xref: /illumos-gate/usr/src/uts/common/fs/zfs/spa.c (revision 98d1cbfec254295273b6a761bc1861c0374bdf02)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
2199653d4eSeschrock 
22fa9e4066Sahrens /*
23*98d1cbfeSGeorge Wilson  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
26fa9e4066Sahrens /*
27fa9e4066Sahrens  * This file contains all the routines used when modifying on-disk SPA state.
28fa9e4066Sahrens  * This includes opening, importing, destroying, exporting a pool, and syncing a
29fa9e4066Sahrens  * pool.
30fa9e4066Sahrens  */
31fa9e4066Sahrens 
32fa9e4066Sahrens #include <sys/zfs_context.h>
33ea8dc4b6Seschrock #include <sys/fm/fs/zfs.h>
34fa9e4066Sahrens #include <sys/spa_impl.h>
35fa9e4066Sahrens #include <sys/zio.h>
36fa9e4066Sahrens #include <sys/zio_checksum.h>
37fa9e4066Sahrens #include <sys/dmu.h>
38fa9e4066Sahrens #include <sys/dmu_tx.h>
39fa9e4066Sahrens #include <sys/zap.h>
40fa9e4066Sahrens #include <sys/zil.h>
41b24ab676SJeff Bonwick #include <sys/ddt.h>
42fa9e4066Sahrens #include <sys/vdev_impl.h>
43fa9e4066Sahrens #include <sys/metaslab.h>
4488ecc943SGeorge Wilson #include <sys/metaslab_impl.h>
45fa9e4066Sahrens #include <sys/uberblock_impl.h>
46fa9e4066Sahrens #include <sys/txg.h>
47fa9e4066Sahrens #include <sys/avl.h>
48fa9e4066Sahrens #include <sys/dmu_traverse.h>
49b1b8ab34Slling #include <sys/dmu_objset.h>
50fa9e4066Sahrens #include <sys/unique.h>
51fa9e4066Sahrens #include <sys/dsl_pool.h>
52b1b8ab34Slling #include <sys/dsl_dataset.h>
53fa9e4066Sahrens #include <sys/dsl_dir.h>
54fa9e4066Sahrens #include <sys/dsl_prop.h>
55b1b8ab34Slling #include <sys/dsl_synctask.h>
56fa9e4066Sahrens #include <sys/fs/zfs.h>
57fa94a07fSbrendan #include <sys/arc.h>
58fa9e4066Sahrens #include <sys/callb.h>
5995173954Sek #include <sys/systeminfo.h>
60e7cbe64fSgw #include <sys/spa_boot.h>
61573ca77eSGeorge Wilson #include <sys/zfs_ioctl.h>
62fa9e4066Sahrens 
635679c89fSjv #ifdef	_KERNEL
64dedec472SJack Meng #include <sys/bootprops.h>
6535a5a358SJonathan Adams #include <sys/callb.h>
6635a5a358SJonathan Adams #include <sys/cpupart.h>
6735a5a358SJonathan Adams #include <sys/pool.h>
6835a5a358SJonathan Adams #include <sys/sysdc.h>
6935a5a358SJonathan Adams #include <sys/zone.h>
705679c89fSjv #endif	/* _KERNEL */
715679c89fSjv 
72990b4856Slling #include "zfs_prop.h"
73b7b97454Sperrin #include "zfs_comutil.h"
74990b4856Slling 
7535a5a358SJonathan Adams typedef enum zti_modes {
762e0c549eSJonathan Adams 	zti_mode_fixed,			/* value is # of threads (min 1) */
772e0c549eSJonathan Adams 	zti_mode_online_percent,	/* value is % of online CPUs */
7835a5a358SJonathan Adams 	zti_mode_batch,			/* cpu-intensive; value is ignored */
7980eb36f2SGeorge Wilson 	zti_mode_null,			/* don't create a taskq */
802e0c549eSJonathan Adams 	zti_nmodes
8135a5a358SJonathan Adams } zti_modes_t;
82416e0cd8Sek 
8380eb36f2SGeorge Wilson #define	ZTI_FIX(n)	{ zti_mode_fixed, (n) }
8480eb36f2SGeorge Wilson #define	ZTI_PCT(n)	{ zti_mode_online_percent, (n) }
8535a5a358SJonathan Adams #define	ZTI_BATCH	{ zti_mode_batch, 0 }
8680eb36f2SGeorge Wilson #define	ZTI_NULL	{ zti_mode_null, 0 }
872e0c549eSJonathan Adams 
8880eb36f2SGeorge Wilson #define	ZTI_ONE		ZTI_FIX(1)
892e0c549eSJonathan Adams 
902e0c549eSJonathan Adams typedef struct zio_taskq_info {
9180eb36f2SGeorge Wilson 	enum zti_modes zti_mode;
9280eb36f2SGeorge Wilson 	uint_t zti_value;
932e0c549eSJonathan Adams } zio_taskq_info_t;
942e0c549eSJonathan Adams 
952e0c549eSJonathan Adams static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
9635a5a358SJonathan Adams 	"issue", "issue_high", "intr", "intr_high"
972e0c549eSJonathan Adams };
982e0c549eSJonathan Adams 
9980eb36f2SGeorge Wilson /*
10080eb36f2SGeorge Wilson  * Define the taskq threads for the following I/O types:
10180eb36f2SGeorge Wilson  * 	NULL, READ, WRITE, FREE, CLAIM, and IOCTL
10280eb36f2SGeorge Wilson  */
10380eb36f2SGeorge Wilson const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
10480eb36f2SGeorge Wilson 	/* ISSUE	ISSUE_HIGH	INTR		INTR_HIGH */
10580eb36f2SGeorge Wilson 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
10635a5a358SJonathan Adams 	{ ZTI_FIX(8),	ZTI_NULL,	ZTI_BATCH,	ZTI_NULL },
10735a5a358SJonathan Adams 	{ ZTI_BATCH,	ZTI_FIX(5),	ZTI_FIX(8),	ZTI_FIX(5) },
108c6065d0fSGeorge Wilson 	{ ZTI_FIX(10),	ZTI_NULL,	ZTI_FIX(10),	ZTI_NULL },
10980eb36f2SGeorge Wilson 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
11080eb36f2SGeorge Wilson 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
1112e0c549eSJonathan Adams };
1122e0c549eSJonathan Adams 
113990b4856Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx);
11489a89ebfSlling static boolean_t spa_has_active_shared_spare(spa_t *spa);
1151195e687SMark J Musante static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
1161195e687SMark J Musante     spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
1171195e687SMark J Musante     char **ereport);
118990b4856Slling 
11935a5a358SJonathan Adams uint_t		zio_taskq_batch_pct = 100;	/* 1 thread per cpu in pset */
12035a5a358SJonathan Adams id_t		zio_taskq_psrset_bind = PS_NONE;
12135a5a358SJonathan Adams boolean_t	zio_taskq_sysdc = B_TRUE;	/* use SDC scheduling class */
12235a5a358SJonathan Adams uint_t		zio_taskq_basedc = 80;		/* base duty cycle */
12335a5a358SJonathan Adams 
12435a5a358SJonathan Adams boolean_t	spa_create_process = B_TRUE;	/* no process ==> no sysdc */
12535a5a358SJonathan Adams 
12635a5a358SJonathan Adams /*
12735a5a358SJonathan Adams  * This (illegal) pool name is used when temporarily importing a spa_t in order
12835a5a358SJonathan Adams  * to get the vdev stats associated with the imported devices.
12935a5a358SJonathan Adams  */
13035a5a358SJonathan Adams #define	TRYIMPORT_NAME	"$import"
13135a5a358SJonathan Adams 
132990b4856Slling /*
133990b4856Slling  * ==========================================================================
134990b4856Slling  * SPA properties routines
135990b4856Slling  * ==========================================================================
136990b4856Slling  */
137990b4856Slling 
138990b4856Slling /*
139990b4856Slling  * Add a (source=src, propname=propval) list to an nvlist.
140990b4856Slling  */
1419d82f4f6Slling static void
142990b4856Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
143990b4856Slling     uint64_t intval, zprop_source_t src)
144990b4856Slling {
145990b4856Slling 	const char *propname = zpool_prop_to_name(prop);
146990b4856Slling 	nvlist_t *propval;
147990b4856Slling 
1489d82f4f6Slling 	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1499d82f4f6Slling 	VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
150990b4856Slling 
1519d82f4f6Slling 	if (strval != NULL)
1529d82f4f6Slling 		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
1539d82f4f6Slling 	else
1549d82f4f6Slling 		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0);
155990b4856Slling 
1569d82f4f6Slling 	VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
157990b4856Slling 	nvlist_free(propval);
158990b4856Slling }
159990b4856Slling 
160990b4856Slling /*
161990b4856Slling  * Get property values from the spa configuration.
162990b4856Slling  */
1639d82f4f6Slling static void
164990b4856Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
165990b4856Slling {
166379c004dSEric Schrock 	uint64_t size;
167485bbbf5SGeorge Wilson 	uint64_t alloc;
168990b4856Slling 	uint64_t cap, version;
169990b4856Slling 	zprop_source_t src = ZPROP_SRC_NONE;
170c5904d13Seschrock 	spa_config_dirent_t *dp;
171990b4856Slling 
172e14bb325SJeff Bonwick 	ASSERT(MUTEX_HELD(&spa->spa_props_lock));
173e14bb325SJeff Bonwick 
174379c004dSEric Schrock 	if (spa->spa_root_vdev != NULL) {
175485bbbf5SGeorge Wilson 		alloc = metaslab_class_get_alloc(spa_normal_class(spa));
176b24ab676SJeff Bonwick 		size = metaslab_class_get_space(spa_normal_class(spa));
177379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
178379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
179485bbbf5SGeorge Wilson 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
180485bbbf5SGeorge Wilson 		spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL,
181485bbbf5SGeorge Wilson 		    size - alloc, src);
182379c004dSEric Schrock 
183485bbbf5SGeorge Wilson 		cap = (size == 0) ? 0 : (alloc * 100 / size);
184379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
185379c004dSEric Schrock 
186b24ab676SJeff Bonwick 		spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL,
187b24ab676SJeff Bonwick 		    ddt_get_pool_dedup_ratio(spa), src);
188b24ab676SJeff Bonwick 
189379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
190379c004dSEric Schrock 		    spa->spa_root_vdev->vdev_state, src);
191379c004dSEric Schrock 
192379c004dSEric Schrock 		version = spa_version(spa);
193379c004dSEric Schrock 		if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
194379c004dSEric Schrock 			src = ZPROP_SRC_DEFAULT;
195379c004dSEric Schrock 		else
196379c004dSEric Schrock 			src = ZPROP_SRC_LOCAL;
197379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
198379c004dSEric Schrock 	}
199990b4856Slling 
2009d82f4f6Slling 	spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
201990b4856Slling 
2029d82f4f6Slling 	if (spa->spa_root != NULL)
2039d82f4f6Slling 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
2049d82f4f6Slling 		    0, ZPROP_SRC_LOCAL);
205990b4856Slling 
206c5904d13Seschrock 	if ((dp = list_head(&spa->spa_config_list)) != NULL) {
207c5904d13Seschrock 		if (dp->scd_path == NULL) {
2089d82f4f6Slling 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
209c5904d13Seschrock 			    "none", 0, ZPROP_SRC_LOCAL);
210c5904d13Seschrock 		} else if (strcmp(dp->scd_path, spa_config_path) != 0) {
2119d82f4f6Slling 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
212c5904d13Seschrock 			    dp->scd_path, 0, ZPROP_SRC_LOCAL);
2132f8aaab3Seschrock 		}
2142f8aaab3Seschrock 	}
215990b4856Slling }
216990b4856Slling 
217990b4856Slling /*
218990b4856Slling  * Get zpool property values.
219990b4856Slling  */
220990b4856Slling int
221990b4856Slling spa_prop_get(spa_t *spa, nvlist_t **nvp)
222990b4856Slling {
223b24ab676SJeff Bonwick 	objset_t *mos = spa->spa_meta_objset;
224990b4856Slling 	zap_cursor_t zc;
225990b4856Slling 	zap_attribute_t za;
226990b4856Slling 	int err;
227990b4856Slling 
2289d82f4f6Slling 	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
229990b4856Slling 
230e14bb325SJeff Bonwick 	mutex_enter(&spa->spa_props_lock);
231e14bb325SJeff Bonwick 
232990b4856Slling 	/*
233990b4856Slling 	 * Get properties from the spa config.
234990b4856Slling 	 */
2359d82f4f6Slling 	spa_prop_get_config(spa, nvp);
236990b4856Slling 
237990b4856Slling 	/* If no pool property object, no more prop to get. */
238afee20e4SGeorge Wilson 	if (mos == NULL || spa->spa_pool_props_object == 0) {
239990b4856Slling 		mutex_exit(&spa->spa_props_lock);
240990b4856Slling 		return (0);
241990b4856Slling 	}
242990b4856Slling 
243990b4856Slling 	/*
244990b4856Slling 	 * Get properties from the MOS pool property object.
245990b4856Slling 	 */
246990b4856Slling 	for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object);
247990b4856Slling 	    (err = zap_cursor_retrieve(&zc, &za)) == 0;
248990b4856Slling 	    zap_cursor_advance(&zc)) {
249990b4856Slling 		uint64_t intval = 0;
250990b4856Slling 		char *strval = NULL;
251990b4856Slling 		zprop_source_t src = ZPROP_SRC_DEFAULT;
252990b4856Slling 		zpool_prop_t prop;
253990b4856Slling 
254990b4856Slling 		if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL)
255990b4856Slling 			continue;
256990b4856Slling 
257990b4856Slling 		switch (za.za_integer_length) {
258990b4856Slling 		case 8:
259990b4856Slling 			/* integer property */
260990b4856Slling 			if (za.za_first_integer !=
261990b4856Slling 			    zpool_prop_default_numeric(prop))
262990b4856Slling 				src = ZPROP_SRC_LOCAL;
263990b4856Slling 
264990b4856Slling 			if (prop == ZPOOL_PROP_BOOTFS) {
265990b4856Slling 				dsl_pool_t *dp;
266990b4856Slling 				dsl_dataset_t *ds = NULL;
267990b4856Slling 
268990b4856Slling 				dp = spa_get_dsl(spa);
269990b4856Slling 				rw_enter(&dp->dp_config_rwlock, RW_READER);
270745cd3c5Smaybee 				if (err = dsl_dataset_hold_obj(dp,
271745cd3c5Smaybee 				    za.za_first_integer, FTAG, &ds)) {
272990b4856Slling 					rw_exit(&dp->dp_config_rwlock);
273990b4856Slling 					break;
274990b4856Slling 				}
275990b4856Slling 
276990b4856Slling 				strval = kmem_alloc(
277990b4856Slling 				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1,
278990b4856Slling 				    KM_SLEEP);
279990b4856Slling 				dsl_dataset_name(ds, strval);
280745cd3c5Smaybee 				dsl_dataset_rele(ds, FTAG);
281990b4856Slling 				rw_exit(&dp->dp_config_rwlock);
282990b4856Slling 			} else {
283990b4856Slling 				strval = NULL;
284990b4856Slling 				intval = za.za_first_integer;
285990b4856Slling 			}
286990b4856Slling 
2879d82f4f6Slling 			spa_prop_add_list(*nvp, prop, strval, intval, src);
288990b4856Slling 
289990b4856Slling 			if (strval != NULL)
290990b4856Slling 				kmem_free(strval,
291990b4856Slling 				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
292990b4856Slling 
293990b4856Slling 			break;
294990b4856Slling 
295990b4856Slling 		case 1:
296990b4856Slling 			/* string property */
297990b4856Slling 			strval = kmem_alloc(za.za_num_integers, KM_SLEEP);
298990b4856Slling 			err = zap_lookup(mos, spa->spa_pool_props_object,
299990b4856Slling 			    za.za_name, 1, za.za_num_integers, strval);
300990b4856Slling 			if (err) {
301990b4856Slling 				kmem_free(strval, za.za_num_integers);
302990b4856Slling 				break;
303990b4856Slling 			}
3049d82f4f6Slling 			spa_prop_add_list(*nvp, prop, strval, 0, src);
305990b4856Slling 			kmem_free(strval, za.za_num_integers);
306990b4856Slling 			break;
307990b4856Slling 
308990b4856Slling 		default:
309990b4856Slling 			break;
310990b4856Slling 		}
311990b4856Slling 	}
312990b4856Slling 	zap_cursor_fini(&zc);
313990b4856Slling 	mutex_exit(&spa->spa_props_lock);
314990b4856Slling out:
315990b4856Slling 	if (err && err != ENOENT) {
316990b4856Slling 		nvlist_free(*nvp);
3179d82f4f6Slling 		*nvp = NULL;
318990b4856Slling 		return (err);
319990b4856Slling 	}
320990b4856Slling 
321990b4856Slling 	return (0);
322990b4856Slling }
323990b4856Slling 
324990b4856Slling /*
325990b4856Slling  * Validate the given pool properties nvlist and modify the list
326990b4856Slling  * for the property values to be set.
327990b4856Slling  */
328990b4856Slling static int
329990b4856Slling spa_prop_validate(spa_t *spa, nvlist_t *props)
330990b4856Slling {
331990b4856Slling 	nvpair_t *elem;
332990b4856Slling 	int error = 0, reset_bootfs = 0;
333990b4856Slling 	uint64_t objnum;
334990b4856Slling 
335990b4856Slling 	elem = NULL;
336990b4856Slling 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
337990b4856Slling 		zpool_prop_t prop;
338990b4856Slling 		char *propname, *strval;
339990b4856Slling 		uint64_t intval;
340990b4856Slling 		objset_t *os;
3412f8aaab3Seschrock 		char *slash;
342990b4856Slling 
343990b4856Slling 		propname = nvpair_name(elem);
344990b4856Slling 
345990b4856Slling 		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL)
346990b4856Slling 			return (EINVAL);
347990b4856Slling 
348990b4856Slling 		switch (prop) {
349990b4856Slling 		case ZPOOL_PROP_VERSION:
350990b4856Slling 			error = nvpair_value_uint64(elem, &intval);
351990b4856Slling 			if (!error &&
352990b4856Slling 			    (intval < spa_version(spa) || intval > SPA_VERSION))
353990b4856Slling 				error = EINVAL;
354990b4856Slling 			break;
355990b4856Slling 
356990b4856Slling 		case ZPOOL_PROP_DELEGATION:
357990b4856Slling 		case ZPOOL_PROP_AUTOREPLACE:
358d5b5bb25SRich Morris 		case ZPOOL_PROP_LISTSNAPS:
359573ca77eSGeorge Wilson 		case ZPOOL_PROP_AUTOEXPAND:
360990b4856Slling 			error = nvpair_value_uint64(elem, &intval);
361990b4856Slling 			if (!error && intval > 1)
362990b4856Slling 				error = EINVAL;
363990b4856Slling 			break;
364990b4856Slling 
365990b4856Slling 		case ZPOOL_PROP_BOOTFS:
36625f89ee2SJeff Bonwick 			/*
36725f89ee2SJeff Bonwick 			 * If the pool version is less than SPA_VERSION_BOOTFS,
36825f89ee2SJeff Bonwick 			 * or the pool is still being created (version == 0),
36925f89ee2SJeff Bonwick 			 * the bootfs property cannot be set.
37025f89ee2SJeff Bonwick 			 */
371990b4856Slling 			if (spa_version(spa) < SPA_VERSION_BOOTFS) {
372990b4856Slling 				error = ENOTSUP;
373990b4856Slling 				break;
374990b4856Slling 			}
375990b4856Slling 
376990b4856Slling 			/*
37715e6edf1Sgw 			 * Make sure the vdev config is bootable
378990b4856Slling 			 */
37915e6edf1Sgw 			if (!vdev_is_bootable(spa->spa_root_vdev)) {
380990b4856Slling 				error = ENOTSUP;
381990b4856Slling 				break;
382990b4856Slling 			}
383990b4856Slling 
384990b4856Slling 			reset_bootfs = 1;
385990b4856Slling 
386990b4856Slling 			error = nvpair_value_string(elem, &strval);
387990b4856Slling 
388990b4856Slling 			if (!error) {
38915e6edf1Sgw 				uint64_t compress;
39015e6edf1Sgw 
391990b4856Slling 				if (strval == NULL || strval[0] == '\0') {
392990b4856Slling 					objnum = zpool_prop_default_numeric(
393990b4856Slling 					    ZPOOL_PROP_BOOTFS);
394990b4856Slling 					break;
395990b4856Slling 				}
396990b4856Slling 
397503ad85cSMatthew Ahrens 				if (error = dmu_objset_hold(strval, FTAG, &os))
398990b4856Slling 					break;
39915e6edf1Sgw 
400503ad85cSMatthew Ahrens 				/* Must be ZPL and not gzip compressed. */
401503ad85cSMatthew Ahrens 
402503ad85cSMatthew Ahrens 				if (dmu_objset_type(os) != DMU_OST_ZFS) {
403503ad85cSMatthew Ahrens 					error = ENOTSUP;
404503ad85cSMatthew Ahrens 				} else if ((error = dsl_prop_get_integer(strval,
40515e6edf1Sgw 				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
40615e6edf1Sgw 				    &compress, NULL)) == 0 &&
40715e6edf1Sgw 				    !BOOTFS_COMPRESS_VALID(compress)) {
40815e6edf1Sgw 					error = ENOTSUP;
40915e6edf1Sgw 				} else {
41015e6edf1Sgw 					objnum = dmu_objset_id(os);
41115e6edf1Sgw 				}
412503ad85cSMatthew Ahrens 				dmu_objset_rele(os, FTAG);
413990b4856Slling 			}
414990b4856Slling 			break;
415e14bb325SJeff Bonwick 
4160a4e9518Sgw 		case ZPOOL_PROP_FAILUREMODE:
4170a4e9518Sgw 			error = nvpair_value_uint64(elem, &intval);
4180a4e9518Sgw 			if (!error && (intval < ZIO_FAILURE_MODE_WAIT ||
4190a4e9518Sgw 			    intval > ZIO_FAILURE_MODE_PANIC))
4200a4e9518Sgw 				error = EINVAL;
4210a4e9518Sgw 
4220a4e9518Sgw 			/*
4230a4e9518Sgw 			 * This is a special case which only occurs when
4240a4e9518Sgw 			 * the pool has completely failed. This allows
4250a4e9518Sgw 			 * the user to change the in-core failmode property
4260a4e9518Sgw 			 * without syncing it out to disk (I/Os might
4270a4e9518Sgw 			 * currently be blocked). We do this by returning
4280a4e9518Sgw 			 * EIO to the caller (spa_prop_set) to trick it
4290a4e9518Sgw 			 * into thinking we encountered a property validation
4300a4e9518Sgw 			 * error.
4310a4e9518Sgw 			 */
432e14bb325SJeff Bonwick 			if (!error && spa_suspended(spa)) {
4330a4e9518Sgw 				spa->spa_failmode = intval;
4340a4e9518Sgw 				error = EIO;
4350a4e9518Sgw 			}
4360a4e9518Sgw 			break;
4372f8aaab3Seschrock 
4382f8aaab3Seschrock 		case ZPOOL_PROP_CACHEFILE:
4392f8aaab3Seschrock 			if ((error = nvpair_value_string(elem, &strval)) != 0)
4402f8aaab3Seschrock 				break;
4412f8aaab3Seschrock 
4422f8aaab3Seschrock 			if (strval[0] == '\0')
4432f8aaab3Seschrock 				break;
4442f8aaab3Seschrock 
4452f8aaab3Seschrock 			if (strcmp(strval, "none") == 0)
4462f8aaab3Seschrock 				break;
4472f8aaab3Seschrock 
4482f8aaab3Seschrock 			if (strval[0] != '/') {
4492f8aaab3Seschrock 				error = EINVAL;
4502f8aaab3Seschrock 				break;
4512f8aaab3Seschrock 			}
4522f8aaab3Seschrock 
4532f8aaab3Seschrock 			slash = strrchr(strval, '/');
4542f8aaab3Seschrock 			ASSERT(slash != NULL);
4552f8aaab3Seschrock 
4562f8aaab3Seschrock 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
4572f8aaab3Seschrock 			    strcmp(slash, "/..") == 0)
4582f8aaab3Seschrock 				error = EINVAL;
4592f8aaab3Seschrock 			break;
460b24ab676SJeff Bonwick 
461b24ab676SJeff Bonwick 		case ZPOOL_PROP_DEDUPDITTO:
462b24ab676SJeff Bonwick 			if (spa_version(spa) < SPA_VERSION_DEDUP)
463b24ab676SJeff Bonwick 				error = ENOTSUP;
464b24ab676SJeff Bonwick 			else
465b24ab676SJeff Bonwick 				error = nvpair_value_uint64(elem, &intval);
466b24ab676SJeff Bonwick 			if (error == 0 &&
467b24ab676SJeff Bonwick 			    intval != 0 && intval < ZIO_DEDUPDITTO_MIN)
468b24ab676SJeff Bonwick 				error = EINVAL;
469b24ab676SJeff Bonwick 			break;
470990b4856Slling 		}
471990b4856Slling 
472990b4856Slling 		if (error)
473990b4856Slling 			break;
474990b4856Slling 	}
475990b4856Slling 
476990b4856Slling 	if (!error && reset_bootfs) {
477990b4856Slling 		error = nvlist_remove(props,
478990b4856Slling 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING);
479990b4856Slling 
480990b4856Slling 		if (!error) {
481990b4856Slling 			error = nvlist_add_uint64(props,
482990b4856Slling 			    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum);
483990b4856Slling 		}
484990b4856Slling 	}
485990b4856Slling 
486990b4856Slling 	return (error);
487990b4856Slling }
488990b4856Slling 
489379c004dSEric Schrock void
490379c004dSEric Schrock spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync)
491379c004dSEric Schrock {
492379c004dSEric Schrock 	char *cachefile;
493379c004dSEric Schrock 	spa_config_dirent_t *dp;
494379c004dSEric Schrock 
495379c004dSEric Schrock 	if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
496379c004dSEric Schrock 	    &cachefile) != 0)
497379c004dSEric Schrock 		return;
498379c004dSEric Schrock 
499379c004dSEric Schrock 	dp = kmem_alloc(sizeof (spa_config_dirent_t),
500379c004dSEric Schrock 	    KM_SLEEP);
501379c004dSEric Schrock 
502379c004dSEric Schrock 	if (cachefile[0] == '\0')
503379c004dSEric Schrock 		dp->scd_path = spa_strdup(spa_config_path);
504379c004dSEric Schrock 	else if (strcmp(cachefile, "none") == 0)
505379c004dSEric Schrock 		dp->scd_path = NULL;
506379c004dSEric Schrock 	else
507379c004dSEric Schrock 		dp->scd_path = spa_strdup(cachefile);
508379c004dSEric Schrock 
509379c004dSEric Schrock 	list_insert_head(&spa->spa_config_list, dp);
510379c004dSEric Schrock 	if (need_sync)
511379c004dSEric Schrock 		spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
512379c004dSEric Schrock }
513379c004dSEric Schrock 
514990b4856Slling int
515990b4856Slling spa_prop_set(spa_t *spa, nvlist_t *nvp)
516990b4856Slling {
517990b4856Slling 	int error;
518379c004dSEric Schrock 	nvpair_t *elem;
519379c004dSEric Schrock 	boolean_t need_sync = B_FALSE;
520379c004dSEric Schrock 	zpool_prop_t prop;
521990b4856Slling 
522990b4856Slling 	if ((error = spa_prop_validate(spa, nvp)) != 0)
523990b4856Slling 		return (error);
524990b4856Slling 
525379c004dSEric Schrock 	elem = NULL;
526379c004dSEric Schrock 	while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
527379c004dSEric Schrock 		if ((prop = zpool_name_to_prop(
528379c004dSEric Schrock 		    nvpair_name(elem))) == ZPROP_INVAL)
529379c004dSEric Schrock 			return (EINVAL);
530379c004dSEric Schrock 
531379c004dSEric Schrock 		if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT)
532379c004dSEric Schrock 			continue;
533379c004dSEric Schrock 
534379c004dSEric Schrock 		need_sync = B_TRUE;
535379c004dSEric Schrock 		break;
536379c004dSEric Schrock 	}
537379c004dSEric Schrock 
538379c004dSEric Schrock 	if (need_sync)
539379c004dSEric Schrock 		return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
540379c004dSEric Schrock 		    spa, nvp, 3));
541379c004dSEric Schrock 	else
542379c004dSEric Schrock 		return (0);
543990b4856Slling }
544990b4856Slling 
545990b4856Slling /*
546990b4856Slling  * If the bootfs property value is dsobj, clear it.
547990b4856Slling  */
548990b4856Slling void
549990b4856Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
550990b4856Slling {
551990b4856Slling 	if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) {
552990b4856Slling 		VERIFY(zap_remove(spa->spa_meta_objset,
553990b4856Slling 		    spa->spa_pool_props_object,
554990b4856Slling 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0);
555990b4856Slling 		spa->spa_bootfs = 0;
556990b4856Slling 	}
557990b4856Slling }
558990b4856Slling 
559fa9e4066Sahrens /*
560fa9e4066Sahrens  * ==========================================================================
561fa9e4066Sahrens  * SPA state manipulation (open/create/destroy/import/export)
562fa9e4066Sahrens  * ==========================================================================
563fa9e4066Sahrens  */
564fa9e4066Sahrens 
565ea8dc4b6Seschrock static int
566ea8dc4b6Seschrock spa_error_entry_compare(const void *a, const void *b)
567ea8dc4b6Seschrock {
568ea8dc4b6Seschrock 	spa_error_entry_t *sa = (spa_error_entry_t *)a;
569ea8dc4b6Seschrock 	spa_error_entry_t *sb = (spa_error_entry_t *)b;
570ea8dc4b6Seschrock 	int ret;
571ea8dc4b6Seschrock 
572ea8dc4b6Seschrock 	ret = bcmp(&sa->se_bookmark, &sb->se_bookmark,
573ea8dc4b6Seschrock 	    sizeof (zbookmark_t));
574ea8dc4b6Seschrock 
575ea8dc4b6Seschrock 	if (ret < 0)
576ea8dc4b6Seschrock 		return (-1);
577ea8dc4b6Seschrock 	else if (ret > 0)
578ea8dc4b6Seschrock 		return (1);
579ea8dc4b6Seschrock 	else
580ea8dc4b6Seschrock 		return (0);
581ea8dc4b6Seschrock }
582ea8dc4b6Seschrock 
583ea8dc4b6Seschrock /*
584ea8dc4b6Seschrock  * Utility function which retrieves copies of the current logs and
585ea8dc4b6Seschrock  * re-initializes them in the process.
586ea8dc4b6Seschrock  */
587ea8dc4b6Seschrock void
588ea8dc4b6Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
589ea8dc4b6Seschrock {
590ea8dc4b6Seschrock 	ASSERT(MUTEX_HELD(&spa->spa_errlist_lock));
591ea8dc4b6Seschrock 
592ea8dc4b6Seschrock 	bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t));
593ea8dc4b6Seschrock 	bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t));
594ea8dc4b6Seschrock 
595ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_scrub,
596ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
597ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
598ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_last,
599ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
600ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
601ea8dc4b6Seschrock }
602ea8dc4b6Seschrock 
60335a5a358SJonathan Adams static taskq_t *
60435a5a358SJonathan Adams spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode,
60535a5a358SJonathan Adams     uint_t value)
606fa9e4066Sahrens {
60735a5a358SJonathan Adams 	uint_t flags = TASKQ_PREPOPULATE;
60835a5a358SJonathan Adams 	boolean_t batch = B_FALSE;
609fa9e4066Sahrens 
61035a5a358SJonathan Adams 	switch (mode) {
61135a5a358SJonathan Adams 	case zti_mode_null:
61235a5a358SJonathan Adams 		return (NULL);		/* no taskq needed */
613fa9e4066Sahrens 
61435a5a358SJonathan Adams 	case zti_mode_fixed:
61535a5a358SJonathan Adams 		ASSERT3U(value, >=, 1);
61635a5a358SJonathan Adams 		value = MAX(value, 1);
61735a5a358SJonathan Adams 		break;
618fa9e4066Sahrens 
61935a5a358SJonathan Adams 	case zti_mode_batch:
62035a5a358SJonathan Adams 		batch = B_TRUE;
62135a5a358SJonathan Adams 		flags |= TASKQ_THREADS_CPU_PCT;
62235a5a358SJonathan Adams 		value = zio_taskq_batch_pct;
62335a5a358SJonathan Adams 		break;
62435a5a358SJonathan Adams 
62535a5a358SJonathan Adams 	case zti_mode_online_percent:
62635a5a358SJonathan Adams 		flags |= TASKQ_THREADS_CPU_PCT;
62735a5a358SJonathan Adams 		break;
62835a5a358SJonathan Adams 
62935a5a358SJonathan Adams 	default:
63035a5a358SJonathan Adams 		panic("unrecognized mode for %s taskq (%u:%u) in "
63135a5a358SJonathan Adams 		    "spa_activate()",
63235a5a358SJonathan Adams 		    name, mode, value);
63335a5a358SJonathan Adams 		break;
63435a5a358SJonathan Adams 	}
63535a5a358SJonathan Adams 
63635a5a358SJonathan Adams 	if (zio_taskq_sysdc && spa->spa_proc != &p0) {
63735a5a358SJonathan Adams 		if (batch)
63835a5a358SJonathan Adams 			flags |= TASKQ_DC_BATCH;
63935a5a358SJonathan Adams 
64035a5a358SJonathan Adams 		return (taskq_create_sysdc(name, value, 50, INT_MAX,
64135a5a358SJonathan Adams 		    spa->spa_proc, zio_taskq_basedc, flags));
64235a5a358SJonathan Adams 	}
64335a5a358SJonathan Adams 	return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX,
64435a5a358SJonathan Adams 	    spa->spa_proc, flags));
64535a5a358SJonathan Adams }
64635a5a358SJonathan Adams 
64735a5a358SJonathan Adams static void
64835a5a358SJonathan Adams spa_create_zio_taskqs(spa_t *spa)
64935a5a358SJonathan Adams {
650e14bb325SJeff Bonwick 	for (int t = 0; t < ZIO_TYPES; t++) {
651e14bb325SJeff Bonwick 		for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
65280eb36f2SGeorge Wilson 			const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
65380eb36f2SGeorge Wilson 			enum zti_modes mode = ztip->zti_mode;
65480eb36f2SGeorge Wilson 			uint_t value = ztip->zti_value;
6552e0c549eSJonathan Adams 			char name[32];
6562e0c549eSJonathan Adams 
6572e0c549eSJonathan Adams 			(void) snprintf(name, sizeof (name),
65880eb36f2SGeorge Wilson 			    "%s_%s", zio_type_name[t], zio_taskq_types[q]);
6592e0c549eSJonathan Adams 
66035a5a358SJonathan Adams 			spa->spa_zio_taskq[t][q] =
66135a5a358SJonathan Adams 			    spa_taskq_create(spa, name, mode, value);
66235a5a358SJonathan Adams 		}
66335a5a358SJonathan Adams 	}
66435a5a358SJonathan Adams }
66535a5a358SJonathan Adams 
66635a5a358SJonathan Adams #ifdef _KERNEL
66735a5a358SJonathan Adams static void
66835a5a358SJonathan Adams spa_thread(void *arg)
66935a5a358SJonathan Adams {
67035a5a358SJonathan Adams 	callb_cpr_t cprinfo;
6712e0c549eSJonathan Adams 
67235a5a358SJonathan Adams 	spa_t *spa = arg;
67335a5a358SJonathan Adams 	user_t *pu = PTOU(curproc);
6742e0c549eSJonathan Adams 
67535a5a358SJonathan Adams 	CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr,
67635a5a358SJonathan Adams 	    spa->spa_name);
6772e0c549eSJonathan Adams 
67835a5a358SJonathan Adams 	ASSERT(curproc != &p0);
67935a5a358SJonathan Adams 	(void) snprintf(pu->u_psargs, sizeof (pu->u_psargs),
68035a5a358SJonathan Adams 	    "zpool-%s", spa->spa_name);
68135a5a358SJonathan Adams 	(void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm));
6822e0c549eSJonathan Adams 
68335a5a358SJonathan Adams 	/* bind this thread to the requested psrset */
68435a5a358SJonathan Adams 	if (zio_taskq_psrset_bind != PS_NONE) {
68535a5a358SJonathan Adams 		pool_lock();
68635a5a358SJonathan Adams 		mutex_enter(&cpu_lock);
68735a5a358SJonathan Adams 		mutex_enter(&pidlock);
68835a5a358SJonathan Adams 		mutex_enter(&curproc->p_lock);
68980eb36f2SGeorge Wilson 
69035a5a358SJonathan Adams 		if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind,
69135a5a358SJonathan Adams 		    0, NULL, NULL) == 0)  {
69235a5a358SJonathan Adams 			curthread->t_bind_pset = zio_taskq_psrset_bind;
69335a5a358SJonathan Adams 		} else {
69435a5a358SJonathan Adams 			cmn_err(CE_WARN,
69535a5a358SJonathan Adams 			    "Couldn't bind process for zfs pool \"%s\" to "
69635a5a358SJonathan Adams 			    "pset %d\n", spa->spa_name, zio_taskq_psrset_bind);
69735a5a358SJonathan Adams 		}
69835a5a358SJonathan Adams 
69935a5a358SJonathan Adams 		mutex_exit(&curproc->p_lock);
70035a5a358SJonathan Adams 		mutex_exit(&pidlock);
70135a5a358SJonathan Adams 		mutex_exit(&cpu_lock);
70235a5a358SJonathan Adams 		pool_unlock();
70335a5a358SJonathan Adams 	}
70435a5a358SJonathan Adams 
70535a5a358SJonathan Adams 	if (zio_taskq_sysdc) {
70635a5a358SJonathan Adams 		sysdc_thread_enter(curthread, 100, 0);
70735a5a358SJonathan Adams 	}
70835a5a358SJonathan Adams 
70935a5a358SJonathan Adams 	spa->spa_proc = curproc;
71035a5a358SJonathan Adams 	spa->spa_did = curthread->t_did;
71135a5a358SJonathan Adams 
71235a5a358SJonathan Adams 	spa_create_zio_taskqs(spa);
71335a5a358SJonathan Adams 
71435a5a358SJonathan Adams 	mutex_enter(&spa->spa_proc_lock);
71535a5a358SJonathan Adams 	ASSERT(spa->spa_proc_state == SPA_PROC_CREATED);
71635a5a358SJonathan Adams 
71735a5a358SJonathan Adams 	spa->spa_proc_state = SPA_PROC_ACTIVE;
71835a5a358SJonathan Adams 	cv_broadcast(&spa->spa_proc_cv);
71935a5a358SJonathan Adams 
72035a5a358SJonathan Adams 	CALLB_CPR_SAFE_BEGIN(&cprinfo);
72135a5a358SJonathan Adams 	while (spa->spa_proc_state == SPA_PROC_ACTIVE)
72235a5a358SJonathan Adams 		cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
72335a5a358SJonathan Adams 	CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock);
72435a5a358SJonathan Adams 
72535a5a358SJonathan Adams 	ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE);
72635a5a358SJonathan Adams 	spa->spa_proc_state = SPA_PROC_GONE;
72735a5a358SJonathan Adams 	spa->spa_proc = &p0;
72835a5a358SJonathan Adams 	cv_broadcast(&spa->spa_proc_cv);
72935a5a358SJonathan Adams 	CALLB_CPR_EXIT(&cprinfo);	/* drops spa_proc_lock */
73035a5a358SJonathan Adams 
73135a5a358SJonathan Adams 	mutex_enter(&curproc->p_lock);
73235a5a358SJonathan Adams 	lwp_exit();
73335a5a358SJonathan Adams }
73435a5a358SJonathan Adams #endif
73535a5a358SJonathan Adams 
73635a5a358SJonathan Adams /*
73735a5a358SJonathan Adams  * Activate an uninitialized pool.
73835a5a358SJonathan Adams  */
73935a5a358SJonathan Adams static void
74035a5a358SJonathan Adams spa_activate(spa_t *spa, int mode)
74135a5a358SJonathan Adams {
74235a5a358SJonathan Adams 	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
74335a5a358SJonathan Adams 
74435a5a358SJonathan Adams 	spa->spa_state = POOL_STATE_ACTIVE;
74535a5a358SJonathan Adams 	spa->spa_mode = mode;
74635a5a358SJonathan Adams 
74735a5a358SJonathan Adams 	spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops);
74835a5a358SJonathan Adams 	spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops);
74935a5a358SJonathan Adams 
75035a5a358SJonathan Adams 	/* Try to create a covering process */
75135a5a358SJonathan Adams 	mutex_enter(&spa->spa_proc_lock);
75235a5a358SJonathan Adams 	ASSERT(spa->spa_proc_state == SPA_PROC_NONE);
75335a5a358SJonathan Adams 	ASSERT(spa->spa_proc == &p0);
75435a5a358SJonathan Adams 	spa->spa_did = 0;
75535a5a358SJonathan Adams 
75635a5a358SJonathan Adams 	/* Only create a process if we're going to be around a while. */
75735a5a358SJonathan Adams 	if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) {
75835a5a358SJonathan Adams 		if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri,
75935a5a358SJonathan Adams 		    NULL, 0) == 0) {
76035a5a358SJonathan Adams 			spa->spa_proc_state = SPA_PROC_CREATED;
76135a5a358SJonathan Adams 			while (spa->spa_proc_state == SPA_PROC_CREATED) {
76235a5a358SJonathan Adams 				cv_wait(&spa->spa_proc_cv,
76335a5a358SJonathan Adams 				    &spa->spa_proc_lock);
7642e0c549eSJonathan Adams 			}
76535a5a358SJonathan Adams 			ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
76635a5a358SJonathan Adams 			ASSERT(spa->spa_proc != &p0);
76735a5a358SJonathan Adams 			ASSERT(spa->spa_did != 0);
76835a5a358SJonathan Adams 		} else {
76935a5a358SJonathan Adams #ifdef _KERNEL
77035a5a358SJonathan Adams 			cmn_err(CE_WARN,
77135a5a358SJonathan Adams 			    "Couldn't create process for zfs pool \"%s\"\n",
77235a5a358SJonathan Adams 			    spa->spa_name);
77335a5a358SJonathan Adams #endif
774e14bb325SJeff Bonwick 		}
775fa9e4066Sahrens 	}
77635a5a358SJonathan Adams 	mutex_exit(&spa->spa_proc_lock);
77735a5a358SJonathan Adams 
77835a5a358SJonathan Adams 	/* If we didn't create a process, we need to create our taskqs. */
77935a5a358SJonathan Adams 	if (spa->spa_proc == &p0) {
78035a5a358SJonathan Adams 		spa_create_zio_taskqs(spa);
78135a5a358SJonathan Adams 	}
782fa9e4066Sahrens 
783e14bb325SJeff Bonwick 	list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
784e14bb325SJeff Bonwick 	    offsetof(vdev_t, vdev_config_dirty_node));
785e14bb325SJeff Bonwick 	list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
786e14bb325SJeff Bonwick 	    offsetof(vdev_t, vdev_state_dirty_node));
787fa9e4066Sahrens 
788fa9e4066Sahrens 	txg_list_create(&spa->spa_vdev_txg_list,
789fa9e4066Sahrens 	    offsetof(struct vdev, vdev_txg_node));
790ea8dc4b6Seschrock 
791ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_scrub,
792ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
793ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
794ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_last,
795ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
796ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
797fa9e4066Sahrens }
798fa9e4066Sahrens 
799fa9e4066Sahrens /*
800fa9e4066Sahrens  * Opposite of spa_activate().
801fa9e4066Sahrens  */
802fa9e4066Sahrens static void
803fa9e4066Sahrens spa_deactivate(spa_t *spa)
804fa9e4066Sahrens {
805fa9e4066Sahrens 	ASSERT(spa->spa_sync_on == B_FALSE);
806fa9e4066Sahrens 	ASSERT(spa->spa_dsl_pool == NULL);
807fa9e4066Sahrens 	ASSERT(spa->spa_root_vdev == NULL);
80825f89ee2SJeff Bonwick 	ASSERT(spa->spa_async_zio_root == NULL);
809fa9e4066Sahrens 	ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
810fa9e4066Sahrens 
811fa9e4066Sahrens 	txg_list_destroy(&spa->spa_vdev_txg_list);
812fa9e4066Sahrens 
813e14bb325SJeff Bonwick 	list_destroy(&spa->spa_config_dirty_list);
814e14bb325SJeff Bonwick 	list_destroy(&spa->spa_state_dirty_list);
815fa9e4066Sahrens 
816e14bb325SJeff Bonwick 	for (int t = 0; t < ZIO_TYPES; t++) {
817e14bb325SJeff Bonwick 		for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
81880eb36f2SGeorge Wilson 			if (spa->spa_zio_taskq[t][q] != NULL)
81980eb36f2SGeorge Wilson 				taskq_destroy(spa->spa_zio_taskq[t][q]);
820e14bb325SJeff Bonwick 			spa->spa_zio_taskq[t][q] = NULL;
821e14bb325SJeff Bonwick 		}
822fa9e4066Sahrens 	}
823fa9e4066Sahrens 
824fa9e4066Sahrens 	metaslab_class_destroy(spa->spa_normal_class);
825fa9e4066Sahrens 	spa->spa_normal_class = NULL;
826fa9e4066Sahrens 
8278654d025Sperrin 	metaslab_class_destroy(spa->spa_log_class);
8288654d025Sperrin 	spa->spa_log_class = NULL;
8298654d025Sperrin 
830ea8dc4b6Seschrock 	/*
831ea8dc4b6Seschrock 	 * If this was part of an import or the open otherwise failed, we may
832ea8dc4b6Seschrock 	 * still have errors left in the queues.  Empty them just in case.
833ea8dc4b6Seschrock 	 */
834ea8dc4b6Seschrock 	spa_errlog_drain(spa);
835ea8dc4b6Seschrock 
836ea8dc4b6Seschrock 	avl_destroy(&spa->spa_errlist_scrub);
837ea8dc4b6Seschrock 	avl_destroy(&spa->spa_errlist_last);
838ea8dc4b6Seschrock 
839fa9e4066Sahrens 	spa->spa_state = POOL_STATE_UNINITIALIZED;
84035a5a358SJonathan Adams 
84135a5a358SJonathan Adams 	mutex_enter(&spa->spa_proc_lock);
84235a5a358SJonathan Adams 	if (spa->spa_proc_state != SPA_PROC_NONE) {
84335a5a358SJonathan Adams 		ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
84435a5a358SJonathan Adams 		spa->spa_proc_state = SPA_PROC_DEACTIVATE;
84535a5a358SJonathan Adams 		cv_broadcast(&spa->spa_proc_cv);
84635a5a358SJonathan Adams 		while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) {
84735a5a358SJonathan Adams 			ASSERT(spa->spa_proc != &p0);
84835a5a358SJonathan Adams 			cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
84935a5a358SJonathan Adams 		}
85035a5a358SJonathan Adams 		ASSERT(spa->spa_proc_state == SPA_PROC_GONE);
85135a5a358SJonathan Adams 		spa->spa_proc_state = SPA_PROC_NONE;
85235a5a358SJonathan Adams 	}
85335a5a358SJonathan Adams 	ASSERT(spa->spa_proc == &p0);
85435a5a358SJonathan Adams 	mutex_exit(&spa->spa_proc_lock);
85535a5a358SJonathan Adams 
85635a5a358SJonathan Adams 	/*
85735a5a358SJonathan Adams 	 * We want to make sure spa_thread() has actually exited the ZFS
85835a5a358SJonathan Adams 	 * module, so that the module can't be unloaded out from underneath
85935a5a358SJonathan Adams 	 * it.
86035a5a358SJonathan Adams 	 */
86135a5a358SJonathan Adams 	if (spa->spa_did != 0) {
86235a5a358SJonathan Adams 		thread_join(spa->spa_did);
86335a5a358SJonathan Adams 		spa->spa_did = 0;
86435a5a358SJonathan Adams 	}
865fa9e4066Sahrens }
866fa9e4066Sahrens 
867fa9e4066Sahrens /*
868fa9e4066Sahrens  * Verify a pool configuration, and construct the vdev tree appropriately.  This
869fa9e4066Sahrens  * will create all the necessary vdevs in the appropriate layout, with each vdev
870fa9e4066Sahrens  * in the CLOSED state.  This will prep the pool before open/creation/import.
871fa9e4066Sahrens  * All vdev validation is done by the vdev_alloc() routine.
872fa9e4066Sahrens  */
87399653d4eSeschrock static int
87499653d4eSeschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
87599653d4eSeschrock     uint_t id, int atype)
876fa9e4066Sahrens {
877fa9e4066Sahrens 	nvlist_t **child;
878573ca77eSGeorge Wilson 	uint_t children;
87999653d4eSeschrock 	int error;
880fa9e4066Sahrens 
88199653d4eSeschrock 	if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0)
88299653d4eSeschrock 		return (error);
883fa9e4066Sahrens 
88499653d4eSeschrock 	if ((*vdp)->vdev_ops->vdev_op_leaf)
88599653d4eSeschrock 		return (0);
886fa9e4066Sahrens 
887e14bb325SJeff Bonwick 	error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
888e14bb325SJeff Bonwick 	    &child, &children);
889e14bb325SJeff Bonwick 
890e14bb325SJeff Bonwick 	if (error == ENOENT)
891e14bb325SJeff Bonwick 		return (0);
892e14bb325SJeff Bonwick 
893e14bb325SJeff Bonwick 	if (error) {
89499653d4eSeschrock 		vdev_free(*vdp);
89599653d4eSeschrock 		*vdp = NULL;
89699653d4eSeschrock 		return (EINVAL);
897fa9e4066Sahrens 	}
898fa9e4066Sahrens 
899573ca77eSGeorge Wilson 	for (int c = 0; c < children; c++) {
90099653d4eSeschrock 		vdev_t *vd;
90199653d4eSeschrock 		if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c,
90299653d4eSeschrock 		    atype)) != 0) {
90399653d4eSeschrock 			vdev_free(*vdp);
90499653d4eSeschrock 			*vdp = NULL;
90599653d4eSeschrock 			return (error);
906fa9e4066Sahrens 		}
907fa9e4066Sahrens 	}
908fa9e4066Sahrens 
90999653d4eSeschrock 	ASSERT(*vdp != NULL);
91099653d4eSeschrock 
91199653d4eSeschrock 	return (0);
912fa9e4066Sahrens }
913fa9e4066Sahrens 
914fa9e4066Sahrens /*
915fa9e4066Sahrens  * Opposite of spa_load().
916fa9e4066Sahrens  */
917fa9e4066Sahrens static void
918fa9e4066Sahrens spa_unload(spa_t *spa)
919fa9e4066Sahrens {
92099653d4eSeschrock 	int i;
92199653d4eSeschrock 
922e14bb325SJeff Bonwick 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
923e14bb325SJeff Bonwick 
924ea8dc4b6Seschrock 	/*
925ea8dc4b6Seschrock 	 * Stop async tasks.
926ea8dc4b6Seschrock 	 */
927ea8dc4b6Seschrock 	spa_async_suspend(spa);
928ea8dc4b6Seschrock 
929fa9e4066Sahrens 	/*
930fa9e4066Sahrens 	 * Stop syncing.
931fa9e4066Sahrens 	 */
932fa9e4066Sahrens 	if (spa->spa_sync_on) {
933fa9e4066Sahrens 		txg_sync_stop(spa->spa_dsl_pool);
934fa9e4066Sahrens 		spa->spa_sync_on = B_FALSE;
935fa9e4066Sahrens 	}
936fa9e4066Sahrens 
937fa9e4066Sahrens 	/*
938e14bb325SJeff Bonwick 	 * Wait for any outstanding async I/O to complete.
939fa9e4066Sahrens 	 */
94054d692b7SGeorge Wilson 	if (spa->spa_async_zio_root != NULL) {
94154d692b7SGeorge Wilson 		(void) zio_wait(spa->spa_async_zio_root);
94254d692b7SGeorge Wilson 		spa->spa_async_zio_root = NULL;
94354d692b7SGeorge Wilson 	}
944fa9e4066Sahrens 
945fa9e4066Sahrens 	/*
946fa9e4066Sahrens 	 * Close the dsl pool.
947fa9e4066Sahrens 	 */
948fa9e4066Sahrens 	if (spa->spa_dsl_pool) {
949fa9e4066Sahrens 		dsl_pool_close(spa->spa_dsl_pool);
950fa9e4066Sahrens 		spa->spa_dsl_pool = NULL;
951afee20e4SGeorge Wilson 		spa->spa_meta_objset = NULL;
952fa9e4066Sahrens 	}
953fa9e4066Sahrens 
954b24ab676SJeff Bonwick 	ddt_unload(spa);
955b24ab676SJeff Bonwick 
9568ad4d6ddSJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
9578ad4d6ddSJeff Bonwick 
9588ad4d6ddSJeff Bonwick 	/*
9598ad4d6ddSJeff Bonwick 	 * Drop and purge level 2 cache
9608ad4d6ddSJeff Bonwick 	 */
9618ad4d6ddSJeff Bonwick 	spa_l2cache_drop(spa);
9628ad4d6ddSJeff Bonwick 
963fa9e4066Sahrens 	/*
964fa9e4066Sahrens 	 * Close all vdevs.
965fa9e4066Sahrens 	 */
9660e34b6a7Sbonwick 	if (spa->spa_root_vdev)
967fa9e4066Sahrens 		vdev_free(spa->spa_root_vdev);
9680e34b6a7Sbonwick 	ASSERT(spa->spa_root_vdev == NULL);
969ea8dc4b6Seschrock 
970fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++)
971fa94a07fSbrendan 		vdev_free(spa->spa_spares.sav_vdevs[i]);
972fa94a07fSbrendan 	if (spa->spa_spares.sav_vdevs) {
973fa94a07fSbrendan 		kmem_free(spa->spa_spares.sav_vdevs,
974fa94a07fSbrendan 		    spa->spa_spares.sav_count * sizeof (void *));
975fa94a07fSbrendan 		spa->spa_spares.sav_vdevs = NULL;
97699653d4eSeschrock 	}
977fa94a07fSbrendan 	if (spa->spa_spares.sav_config) {
978fa94a07fSbrendan 		nvlist_free(spa->spa_spares.sav_config);
979fa94a07fSbrendan 		spa->spa_spares.sav_config = NULL;
980fa94a07fSbrendan 	}
9812ce8af81SEric Schrock 	spa->spa_spares.sav_count = 0;
982fa94a07fSbrendan 
983fa94a07fSbrendan 	for (i = 0; i < spa->spa_l2cache.sav_count; i++)
984fa94a07fSbrendan 		vdev_free(spa->spa_l2cache.sav_vdevs[i]);
985fa94a07fSbrendan 	if (spa->spa_l2cache.sav_vdevs) {
986fa94a07fSbrendan 		kmem_free(spa->spa_l2cache.sav_vdevs,
987fa94a07fSbrendan 		    spa->spa_l2cache.sav_count * sizeof (void *));
988fa94a07fSbrendan 		spa->spa_l2cache.sav_vdevs = NULL;
989fa94a07fSbrendan 	}
990fa94a07fSbrendan 	if (spa->spa_l2cache.sav_config) {
991fa94a07fSbrendan 		nvlist_free(spa->spa_l2cache.sav_config);
992fa94a07fSbrendan 		spa->spa_l2cache.sav_config = NULL;
99399653d4eSeschrock 	}
9942ce8af81SEric Schrock 	spa->spa_l2cache.sav_count = 0;
99599653d4eSeschrock 
996ea8dc4b6Seschrock 	spa->spa_async_suspended = 0;
9978ad4d6ddSJeff Bonwick 
9988ad4d6ddSJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
999fa9e4066Sahrens }
1000fa9e4066Sahrens 
100199653d4eSeschrock /*
100299653d4eSeschrock  * Load (or re-load) the current list of vdevs describing the active spares for
100399653d4eSeschrock  * this pool.  When this is called, we have some form of basic information in
1004fa94a07fSbrendan  * 'spa_spares.sav_config'.  We parse this into vdevs, try to open them, and
1005fa94a07fSbrendan  * then re-generate a more complete list including status information.
100699653d4eSeschrock  */
100799653d4eSeschrock static void
100899653d4eSeschrock spa_load_spares(spa_t *spa)
100999653d4eSeschrock {
101099653d4eSeschrock 	nvlist_t **spares;
101199653d4eSeschrock 	uint_t nspares;
101299653d4eSeschrock 	int i;
101339c23413Seschrock 	vdev_t *vd, *tvd;
101499653d4eSeschrock 
1015e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
1016e14bb325SJeff Bonwick 
101799653d4eSeschrock 	/*
101899653d4eSeschrock 	 * First, close and free any existing spare vdevs.
101999653d4eSeschrock 	 */
1020fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
1021fa94a07fSbrendan 		vd = spa->spa_spares.sav_vdevs[i];
102239c23413Seschrock 
102339c23413Seschrock 		/* Undo the call to spa_activate() below */
1024c5904d13Seschrock 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
1025c5904d13Seschrock 		    B_FALSE)) != NULL && tvd->vdev_isspare)
102639c23413Seschrock 			spa_spare_remove(tvd);
102739c23413Seschrock 		vdev_close(vd);
102839c23413Seschrock 		vdev_free(vd);
102999653d4eSeschrock 	}
103039c23413Seschrock 
1031fa94a07fSbrendan 	if (spa->spa_spares.sav_vdevs)
1032fa94a07fSbrendan 		kmem_free(spa->spa_spares.sav_vdevs,
1033fa94a07fSbrendan 		    spa->spa_spares.sav_count * sizeof (void *));
103499653d4eSeschrock 
1035fa94a07fSbrendan 	if (spa->spa_spares.sav_config == NULL)
103699653d4eSeschrock 		nspares = 0;
103799653d4eSeschrock 	else
1038fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
103999653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
104099653d4eSeschrock 
1041fa94a07fSbrendan 	spa->spa_spares.sav_count = (int)nspares;
1042fa94a07fSbrendan 	spa->spa_spares.sav_vdevs = NULL;
104399653d4eSeschrock 
104499653d4eSeschrock 	if (nspares == 0)
104599653d4eSeschrock 		return;
104699653d4eSeschrock 
104799653d4eSeschrock 	/*
104899653d4eSeschrock 	 * Construct the array of vdevs, opening them to get status in the
104939c23413Seschrock 	 * process.   For each spare, there is potentially two different vdev_t
105039c23413Seschrock 	 * structures associated with it: one in the list of spares (used only
105139c23413Seschrock 	 * for basic validation purposes) and one in the active vdev
105239c23413Seschrock 	 * configuration (if it's spared in).  During this phase we open and
105339c23413Seschrock 	 * validate each vdev on the spare list.  If the vdev also exists in the
105439c23413Seschrock 	 * active configuration, then we also mark this vdev as an active spare.
105599653d4eSeschrock 	 */
1056fa94a07fSbrendan 	spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *),
1057fa94a07fSbrendan 	    KM_SLEEP);
1058fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
105999653d4eSeschrock 		VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
106099653d4eSeschrock 		    VDEV_ALLOC_SPARE) == 0);
106199653d4eSeschrock 		ASSERT(vd != NULL);
106299653d4eSeschrock 
1063fa94a07fSbrendan 		spa->spa_spares.sav_vdevs[i] = vd;
106499653d4eSeschrock 
1065c5904d13Seschrock 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
1066c5904d13Seschrock 		    B_FALSE)) != NULL) {
106739c23413Seschrock 			if (!tvd->vdev_isspare)
106839c23413Seschrock 				spa_spare_add(tvd);
106939c23413Seschrock 
107039c23413Seschrock 			/*
107139c23413Seschrock 			 * We only mark the spare active if we were successfully
107239c23413Seschrock 			 * able to load the vdev.  Otherwise, importing a pool
107339c23413Seschrock 			 * with a bad active spare would result in strange
107439c23413Seschrock 			 * behavior, because multiple pool would think the spare
107539c23413Seschrock 			 * is actively in use.
107639c23413Seschrock 			 *
107739c23413Seschrock 			 * There is a vulnerability here to an equally bizarre
107839c23413Seschrock 			 * circumstance, where a dead active spare is later
107939c23413Seschrock 			 * brought back to life (onlined or otherwise).  Given
108039c23413Seschrock 			 * the rarity of this scenario, and the extra complexity
108139c23413Seschrock 			 * it adds, we ignore the possibility.
108239c23413Seschrock 			 */
108339c23413Seschrock 			if (!vdev_is_dead(tvd))
108439c23413Seschrock 				spa_spare_activate(tvd);
108539c23413Seschrock 		}
108639c23413Seschrock 
1087e14bb325SJeff Bonwick 		vd->vdev_top = vd;
10886809eb4eSEric Schrock 		vd->vdev_aux = &spa->spa_spares;
1089e14bb325SJeff Bonwick 
109099653d4eSeschrock 		if (vdev_open(vd) != 0)
109199653d4eSeschrock 			continue;
109299653d4eSeschrock 
1093fa94a07fSbrendan 		if (vdev_validate_aux(vd) == 0)
1094fa94a07fSbrendan 			spa_spare_add(vd);
109599653d4eSeschrock 	}
109699653d4eSeschrock 
109799653d4eSeschrock 	/*
109899653d4eSeschrock 	 * Recompute the stashed list of spares, with status information
109999653d4eSeschrock 	 * this time.
110099653d4eSeschrock 	 */
1101fa94a07fSbrendan 	VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES,
110299653d4eSeschrock 	    DATA_TYPE_NVLIST_ARRAY) == 0);
110399653d4eSeschrock 
1104fa94a07fSbrendan 	spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *),
1105fa94a07fSbrendan 	    KM_SLEEP);
1106fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++)
1107fa94a07fSbrendan 		spares[i] = vdev_config_generate(spa,
1108fa94a07fSbrendan 		    spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE);
1109fa94a07fSbrendan 	VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
1110fa94a07fSbrendan 	    ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0);
1111fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++)
111299653d4eSeschrock 		nvlist_free(spares[i]);
1113fa94a07fSbrendan 	kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *));
1114fa94a07fSbrendan }
1115fa94a07fSbrendan 
1116fa94a07fSbrendan /*
1117fa94a07fSbrendan  * Load (or re-load) the current list of vdevs describing the active l2cache for
1118fa94a07fSbrendan  * this pool.  When this is called, we have some form of basic information in
1119fa94a07fSbrendan  * 'spa_l2cache.sav_config'.  We parse this into vdevs, try to open them, and
1120fa94a07fSbrendan  * then re-generate a more complete list including status information.
1121fa94a07fSbrendan  * Devices which are already active have their details maintained, and are
1122fa94a07fSbrendan  * not re-opened.
1123fa94a07fSbrendan  */
1124fa94a07fSbrendan static void
1125fa94a07fSbrendan spa_load_l2cache(spa_t *spa)
1126fa94a07fSbrendan {
1127fa94a07fSbrendan 	nvlist_t **l2cache;
1128fa94a07fSbrendan 	uint_t nl2cache;
1129fa94a07fSbrendan 	int i, j, oldnvdevs;
1130573ca77eSGeorge Wilson 	uint64_t guid;
1131fa94a07fSbrendan 	vdev_t *vd, **oldvdevs, **newvdevs;
1132fa94a07fSbrendan 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
1133fa94a07fSbrendan 
1134e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
1135e14bb325SJeff Bonwick 
1136fa94a07fSbrendan 	if (sav->sav_config != NULL) {
1137fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
1138fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
1139fa94a07fSbrendan 		newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP);
1140fa94a07fSbrendan 	} else {
1141fa94a07fSbrendan 		nl2cache = 0;
1142fa94a07fSbrendan 	}
1143fa94a07fSbrendan 
1144fa94a07fSbrendan 	oldvdevs = sav->sav_vdevs;
1145fa94a07fSbrendan 	oldnvdevs = sav->sav_count;
1146fa94a07fSbrendan 	sav->sav_vdevs = NULL;
1147fa94a07fSbrendan 	sav->sav_count = 0;
1148fa94a07fSbrendan 
1149fa94a07fSbrendan 	/*
1150fa94a07fSbrendan 	 * Process new nvlist of vdevs.
1151fa94a07fSbrendan 	 */
1152fa94a07fSbrendan 	for (i = 0; i < nl2cache; i++) {
1153fa94a07fSbrendan 		VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID,
1154fa94a07fSbrendan 		    &guid) == 0);
1155fa94a07fSbrendan 
1156fa94a07fSbrendan 		newvdevs[i] = NULL;
1157fa94a07fSbrendan 		for (j = 0; j < oldnvdevs; j++) {
1158fa94a07fSbrendan 			vd = oldvdevs[j];
1159fa94a07fSbrendan 			if (vd != NULL && guid == vd->vdev_guid) {
1160fa94a07fSbrendan 				/*
1161fa94a07fSbrendan 				 * Retain previous vdev for add/remove ops.
1162fa94a07fSbrendan 				 */
1163fa94a07fSbrendan 				newvdevs[i] = vd;
1164fa94a07fSbrendan 				oldvdevs[j] = NULL;
1165fa94a07fSbrendan 				break;
1166fa94a07fSbrendan 			}
1167fa94a07fSbrendan 		}
1168fa94a07fSbrendan 
1169fa94a07fSbrendan 		if (newvdevs[i] == NULL) {
1170fa94a07fSbrendan 			/*
1171fa94a07fSbrendan 			 * Create new vdev
1172fa94a07fSbrendan 			 */
1173fa94a07fSbrendan 			VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0,
1174fa94a07fSbrendan 			    VDEV_ALLOC_L2CACHE) == 0);
1175fa94a07fSbrendan 			ASSERT(vd != NULL);
1176fa94a07fSbrendan 			newvdevs[i] = vd;
1177fa94a07fSbrendan 
1178fa94a07fSbrendan 			/*
1179fa94a07fSbrendan 			 * Commit this vdev as an l2cache device,
1180fa94a07fSbrendan 			 * even if it fails to open.
1181fa94a07fSbrendan 			 */
1182fa94a07fSbrendan 			spa_l2cache_add(vd);
1183fa94a07fSbrendan 
1184c5904d13Seschrock 			vd->vdev_top = vd;
1185c5904d13Seschrock 			vd->vdev_aux = sav;
1186c5904d13Seschrock 
1187c5904d13Seschrock 			spa_l2cache_activate(vd);
1188c5904d13Seschrock 
1189fa94a07fSbrendan 			if (vdev_open(vd) != 0)
1190fa94a07fSbrendan 				continue;
1191fa94a07fSbrendan 
1192fa94a07fSbrendan 			(void) vdev_validate_aux(vd);
1193fa94a07fSbrendan 
1194573ca77eSGeorge Wilson 			if (!vdev_is_dead(vd))
1195573ca77eSGeorge Wilson 				l2arc_add_vdev(spa, vd);
1196fa94a07fSbrendan 		}
1197fa94a07fSbrendan 	}
1198fa94a07fSbrendan 
1199fa94a07fSbrendan 	/*
1200fa94a07fSbrendan 	 * Purge vdevs that were dropped
1201fa94a07fSbrendan 	 */
1202fa94a07fSbrendan 	for (i = 0; i < oldnvdevs; i++) {
1203fa94a07fSbrendan 		uint64_t pool;
1204fa94a07fSbrendan 
1205fa94a07fSbrendan 		vd = oldvdevs[i];
1206fa94a07fSbrendan 		if (vd != NULL) {
12078ad4d6ddSJeff Bonwick 			if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
12088ad4d6ddSJeff Bonwick 			    pool != 0ULL && l2arc_vdev_present(vd))
1209fa94a07fSbrendan 				l2arc_remove_vdev(vd);
1210fa94a07fSbrendan 			(void) vdev_close(vd);
1211fa94a07fSbrendan 			spa_l2cache_remove(vd);
1212fa94a07fSbrendan 		}
1213fa94a07fSbrendan 	}
1214fa94a07fSbrendan 
1215fa94a07fSbrendan 	if (oldvdevs)
1216fa94a07fSbrendan 		kmem_free(oldvdevs, oldnvdevs * sizeof (void *));
1217fa94a07fSbrendan 
1218fa94a07fSbrendan 	if (sav->sav_config == NULL)
1219fa94a07fSbrendan 		goto out;
1220fa94a07fSbrendan 
1221fa94a07fSbrendan 	sav->sav_vdevs = newvdevs;
1222fa94a07fSbrendan 	sav->sav_count = (int)nl2cache;
1223fa94a07fSbrendan 
1224fa94a07fSbrendan 	/*
1225fa94a07fSbrendan 	 * Recompute the stashed list of l2cache devices, with status
1226fa94a07fSbrendan 	 * information this time.
1227fa94a07fSbrendan 	 */
1228fa94a07fSbrendan 	VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
1229fa94a07fSbrendan 	    DATA_TYPE_NVLIST_ARRAY) == 0);
1230fa94a07fSbrendan 
1231fa94a07fSbrendan 	l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
1232fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++)
1233fa94a07fSbrendan 		l2cache[i] = vdev_config_generate(spa,
1234fa94a07fSbrendan 		    sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE);
1235fa94a07fSbrendan 	VERIFY(nvlist_add_nvlist_array(sav->sav_config,
1236fa94a07fSbrendan 	    ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
1237fa94a07fSbrendan out:
1238fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++)
1239fa94a07fSbrendan 		nvlist_free(l2cache[i]);
1240fa94a07fSbrendan 	if (sav->sav_count)
1241fa94a07fSbrendan 		kmem_free(l2cache, sav->sav_count * sizeof (void *));
124299653d4eSeschrock }
124399653d4eSeschrock 
124499653d4eSeschrock static int
124599653d4eSeschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
124699653d4eSeschrock {
124799653d4eSeschrock 	dmu_buf_t *db;
124899653d4eSeschrock 	char *packed = NULL;
124999653d4eSeschrock 	size_t nvsize = 0;
125099653d4eSeschrock 	int error;
125199653d4eSeschrock 	*value = NULL;
125299653d4eSeschrock 
125399653d4eSeschrock 	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
125499653d4eSeschrock 	nvsize = *(uint64_t *)db->db_data;
125599653d4eSeschrock 	dmu_buf_rele(db, FTAG);
125699653d4eSeschrock 
125799653d4eSeschrock 	packed = kmem_alloc(nvsize, KM_SLEEP);
12587bfdf011SNeil Perrin 	error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed,
12597bfdf011SNeil Perrin 	    DMU_READ_PREFETCH);
126099653d4eSeschrock 	if (error == 0)
126199653d4eSeschrock 		error = nvlist_unpack(packed, nvsize, value, 0);
126299653d4eSeschrock 	kmem_free(packed, nvsize);
126399653d4eSeschrock 
126499653d4eSeschrock 	return (error);
126599653d4eSeschrock }
126699653d4eSeschrock 
12673d7072f8Seschrock /*
12683d7072f8Seschrock  * Checks to see if the given vdev could not be opened, in which case we post a
12693d7072f8Seschrock  * sysevent to notify the autoreplace code that the device has been removed.
12703d7072f8Seschrock  */
12713d7072f8Seschrock static void
12723d7072f8Seschrock spa_check_removed(vdev_t *vd)
12733d7072f8Seschrock {
1274573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++)
12753d7072f8Seschrock 		spa_check_removed(vd->vdev_child[c]);
12763d7072f8Seschrock 
12773d7072f8Seschrock 	if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) {
12783d7072f8Seschrock 		zfs_post_autoreplace(vd->vdev_spa, vd);
12793d7072f8Seschrock 		spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK);
12803d7072f8Seschrock 	}
12813d7072f8Seschrock }
12823d7072f8Seschrock 
1283e6ca193dSGeorge Wilson /*
1284e6ca193dSGeorge Wilson  * Load the slog device state from the config object since it's possible
1285e6ca193dSGeorge Wilson  * that the label does not contain the most up-to-date information.
1286e6ca193dSGeorge Wilson  */
1287e6ca193dSGeorge Wilson void
128888ecc943SGeorge Wilson spa_load_log_state(spa_t *spa, nvlist_t *nv)
1289e6ca193dSGeorge Wilson {
129088ecc943SGeorge Wilson 	vdev_t *ovd, *rvd = spa->spa_root_vdev;
1291e6ca193dSGeorge Wilson 
129288ecc943SGeorge Wilson 	/*
129388ecc943SGeorge Wilson 	 * Load the original root vdev tree from the passed config.
129488ecc943SGeorge Wilson 	 */
129588ecc943SGeorge Wilson 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
129688ecc943SGeorge Wilson 	VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0);
1297e6ca193dSGeorge Wilson 
129888ecc943SGeorge Wilson 	for (int c = 0; c < rvd->vdev_children; c++) {
129988ecc943SGeorge Wilson 		vdev_t *cvd = rvd->vdev_child[c];
130088ecc943SGeorge Wilson 		if (cvd->vdev_islog)
130188ecc943SGeorge Wilson 			vdev_load_log_state(cvd, ovd->vdev_child[c]);
1302e6ca193dSGeorge Wilson 	}
130388ecc943SGeorge Wilson 	vdev_free(ovd);
130488ecc943SGeorge Wilson 	spa_config_exit(spa, SCL_ALL, FTAG);
1305e6ca193dSGeorge Wilson }
1306e6ca193dSGeorge Wilson 
1307b87f3af3Sperrin /*
1308b87f3af3Sperrin  * Check for missing log devices
1309b87f3af3Sperrin  */
1310b87f3af3Sperrin int
1311b87f3af3Sperrin spa_check_logs(spa_t *spa)
1312b87f3af3Sperrin {
1313b87f3af3Sperrin 	switch (spa->spa_log_state) {
1314b87f3af3Sperrin 	case SPA_LOG_MISSING:
1315b87f3af3Sperrin 		/* need to recheck in case slog has been restored */
1316b87f3af3Sperrin 	case SPA_LOG_UNKNOWN:
1317b87f3af3Sperrin 		if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL,
1318b87f3af3Sperrin 		    DS_FIND_CHILDREN)) {
13191195e687SMark J Musante 			spa_set_log_state(spa, SPA_LOG_MISSING);
1320b87f3af3Sperrin 			return (1);
1321b87f3af3Sperrin 		}
1322b87f3af3Sperrin 		break;
1323b87f3af3Sperrin 	}
1324b87f3af3Sperrin 	return (0);
1325b87f3af3Sperrin }
1326b87f3af3Sperrin 
13271195e687SMark J Musante static boolean_t
13281195e687SMark J Musante spa_passivate_log(spa_t *spa)
13291195e687SMark J Musante {
13301195e687SMark J Musante 	vdev_t *rvd = spa->spa_root_vdev;
13311195e687SMark J Musante 	boolean_t slog_found = B_FALSE;
13321195e687SMark J Musante 
13331195e687SMark J Musante 	ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER));
13341195e687SMark J Musante 
13351195e687SMark J Musante 	if (!spa_has_slogs(spa))
13361195e687SMark J Musante 		return (B_FALSE);
13371195e687SMark J Musante 
13381195e687SMark J Musante 	for (int c = 0; c < rvd->vdev_children; c++) {
13391195e687SMark J Musante 		vdev_t *tvd = rvd->vdev_child[c];
13401195e687SMark J Musante 		metaslab_group_t *mg = tvd->vdev_mg;
13411195e687SMark J Musante 
13421195e687SMark J Musante 		if (tvd->vdev_islog) {
13431195e687SMark J Musante 			metaslab_group_passivate(mg);
13441195e687SMark J Musante 			slog_found = B_TRUE;
13451195e687SMark J Musante 		}
13461195e687SMark J Musante 	}
13471195e687SMark J Musante 
13481195e687SMark J Musante 	return (slog_found);
13491195e687SMark J Musante }
13501195e687SMark J Musante 
13511195e687SMark J Musante static void
13521195e687SMark J Musante spa_activate_log(spa_t *spa)
13531195e687SMark J Musante {
13541195e687SMark J Musante 	vdev_t *rvd = spa->spa_root_vdev;
13551195e687SMark J Musante 
13561195e687SMark J Musante 	ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER));
13571195e687SMark J Musante 
13581195e687SMark J Musante 	for (int c = 0; c < rvd->vdev_children; c++) {
13591195e687SMark J Musante 		vdev_t *tvd = rvd->vdev_child[c];
13601195e687SMark J Musante 		metaslab_group_t *mg = tvd->vdev_mg;
13611195e687SMark J Musante 
13621195e687SMark J Musante 		if (tvd->vdev_islog)
13631195e687SMark J Musante 			metaslab_group_activate(mg);
13641195e687SMark J Musante 	}
13651195e687SMark J Musante }
13661195e687SMark J Musante 
13671195e687SMark J Musante int
13681195e687SMark J Musante spa_offline_log(spa_t *spa)
13691195e687SMark J Musante {
13701195e687SMark J Musante 	int error = 0;
13711195e687SMark J Musante 
13721195e687SMark J Musante 	if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
13731195e687SMark J Musante 	    NULL, DS_FIND_CHILDREN)) == 0) {
13741195e687SMark J Musante 
13751195e687SMark J Musante 		/*
13761195e687SMark J Musante 		 * We successfully offlined the log device, sync out the
13771195e687SMark J Musante 		 * current txg so that the "stubby" block can be removed
13781195e687SMark J Musante 		 * by zil_sync().
13791195e687SMark J Musante 		 */
13801195e687SMark J Musante 		txg_wait_synced(spa->spa_dsl_pool, 0);
13811195e687SMark J Musante 	}
13821195e687SMark J Musante 	return (error);
13831195e687SMark J Musante }
13841195e687SMark J Musante 
1385b693757aSEric Schrock static void
1386b693757aSEric Schrock spa_aux_check_removed(spa_aux_vdev_t *sav)
1387b693757aSEric Schrock {
1388b24ab676SJeff Bonwick 	for (int i = 0; i < sav->sav_count; i++)
1389b693757aSEric Schrock 		spa_check_removed(sav->sav_vdevs[i]);
1390b693757aSEric Schrock }
1391b693757aSEric Schrock 
1392b24ab676SJeff Bonwick void
1393b24ab676SJeff Bonwick spa_claim_notify(zio_t *zio)
1394b24ab676SJeff Bonwick {
1395b24ab676SJeff Bonwick 	spa_t *spa = zio->io_spa;
1396b24ab676SJeff Bonwick 
1397b24ab676SJeff Bonwick 	if (zio->io_error)
1398b24ab676SJeff Bonwick 		return;
1399b24ab676SJeff Bonwick 
1400b24ab676SJeff Bonwick 	mutex_enter(&spa->spa_props_lock);	/* any mutex will do */
1401b24ab676SJeff Bonwick 	if (spa->spa_claim_max_txg < zio->io_bp->blk_birth)
1402b24ab676SJeff Bonwick 		spa->spa_claim_max_txg = zio->io_bp->blk_birth;
1403b24ab676SJeff Bonwick 	mutex_exit(&spa->spa_props_lock);
1404b24ab676SJeff Bonwick }
1405b24ab676SJeff Bonwick 
1406468c413aSTim Haley typedef struct spa_load_error {
1407c8ee1847SVictor Latushkin 	uint64_t	sle_meta_count;
1408468c413aSTim Haley 	uint64_t	sle_data_count;
1409468c413aSTim Haley } spa_load_error_t;
1410468c413aSTim Haley 
1411468c413aSTim Haley static void
1412468c413aSTim Haley spa_load_verify_done(zio_t *zio)
1413468c413aSTim Haley {
1414468c413aSTim Haley 	blkptr_t *bp = zio->io_bp;
1415468c413aSTim Haley 	spa_load_error_t *sle = zio->io_private;
1416468c413aSTim Haley 	dmu_object_type_t type = BP_GET_TYPE(bp);
1417468c413aSTim Haley 	int error = zio->io_error;
1418468c413aSTim Haley 
1419468c413aSTim Haley 	if (error) {
1420468c413aSTim Haley 		if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) &&
1421468c413aSTim Haley 		    type != DMU_OT_INTENT_LOG)
1422c8ee1847SVictor Latushkin 			atomic_add_64(&sle->sle_meta_count, 1);
1423468c413aSTim Haley 		else
1424468c413aSTim Haley 			atomic_add_64(&sle->sle_data_count, 1);
1425468c413aSTim Haley 	}
1426468c413aSTim Haley 	zio_data_buf_free(zio->io_data, zio->io_size);
1427468c413aSTim Haley }
1428468c413aSTim Haley 
1429468c413aSTim Haley /*ARGSUSED*/
1430468c413aSTim Haley static int
1431b24ab676SJeff Bonwick spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
1432b24ab676SJeff Bonwick     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
1433468c413aSTim Haley {
1434468c413aSTim Haley 	if (bp != NULL) {
1435468c413aSTim Haley 		zio_t *rio = arg;
1436468c413aSTim Haley 		size_t size = BP_GET_PSIZE(bp);
1437468c413aSTim Haley 		void *data = zio_data_buf_alloc(size);
1438468c413aSTim Haley 
1439468c413aSTim Haley 		zio_nowait(zio_read(rio, spa, bp, data, size,
1440468c413aSTim Haley 		    spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
1441468c413aSTim Haley 		    ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
1442468c413aSTim Haley 		    ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
1443468c413aSTim Haley 	}
1444468c413aSTim Haley 	return (0);
1445468c413aSTim Haley }
1446468c413aSTim Haley 
1447468c413aSTim Haley static int
1448468c413aSTim Haley spa_load_verify(spa_t *spa)
1449468c413aSTim Haley {
1450468c413aSTim Haley 	zio_t *rio;
1451468c413aSTim Haley 	spa_load_error_t sle = { 0 };
1452468c413aSTim Haley 	zpool_rewind_policy_t policy;
1453468c413aSTim Haley 	boolean_t verify_ok = B_FALSE;
1454468c413aSTim Haley 	int error;
1455468c413aSTim Haley 
1456c8ee1847SVictor Latushkin 	zpool_get_rewind_policy(spa->spa_config, &policy);
1457c8ee1847SVictor Latushkin 
1458c8ee1847SVictor Latushkin 	if (policy.zrp_request & ZPOOL_NEVER_REWIND)
1459c8ee1847SVictor Latushkin 		return (0);
1460c8ee1847SVictor Latushkin 
1461468c413aSTim Haley 	rio = zio_root(spa, NULL, &sle,
1462468c413aSTim Haley 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
1463468c413aSTim Haley 
1464bbfd46c4SJeff Bonwick 	error = traverse_pool(spa, spa->spa_verify_min_txg,
1465bbfd46c4SJeff Bonwick 	    TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio);
1466468c413aSTim Haley 
1467468c413aSTim Haley 	(void) zio_wait(rio);
1468468c413aSTim Haley 
1469c8ee1847SVictor Latushkin 	spa->spa_load_meta_errors = sle.sle_meta_count;
1470468c413aSTim Haley 	spa->spa_load_data_errors = sle.sle_data_count;
1471468c413aSTim Haley 
1472c8ee1847SVictor Latushkin 	if (!error && sle.sle_meta_count <= policy.zrp_maxmeta &&
1473468c413aSTim Haley 	    sle.sle_data_count <= policy.zrp_maxdata) {
1474468c413aSTim Haley 		verify_ok = B_TRUE;
1475468c413aSTim Haley 		spa->spa_load_txg = spa->spa_uberblock.ub_txg;
1476468c413aSTim Haley 		spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp;
1477a33cae98STim Haley 	} else {
1478a33cae98STim Haley 		spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
1479468c413aSTim Haley 	}
1480468c413aSTim Haley 
1481468c413aSTim Haley 	if (error) {
1482468c413aSTim Haley 		if (error != ENXIO && error != EIO)
1483468c413aSTim Haley 			error = EIO;
1484468c413aSTim Haley 		return (error);
1485468c413aSTim Haley 	}
1486468c413aSTim Haley 
1487468c413aSTim Haley 	return (verify_ok ? 0 : EIO);
1488468c413aSTim Haley }
1489468c413aSTim Haley 
14901195e687SMark J Musante /*
14911195e687SMark J Musante  * Find a value in the pool props object.
14921195e687SMark J Musante  */
14931195e687SMark J Musante static void
14941195e687SMark J Musante spa_prop_find(spa_t *spa, zpool_prop_t prop, uint64_t *val)
14951195e687SMark J Musante {
14961195e687SMark J Musante 	(void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object,
14971195e687SMark J Musante 	    zpool_prop_to_name(prop), sizeof (uint64_t), 1, val);
14981195e687SMark J Musante }
14991195e687SMark J Musante 
15001195e687SMark J Musante /*
15011195e687SMark J Musante  * Find a value in the pool directory object.
15021195e687SMark J Musante  */
15031195e687SMark J Musante static int
15041195e687SMark J Musante spa_dir_prop(spa_t *spa, const char *name, uint64_t *val)
15051195e687SMark J Musante {
15061195e687SMark J Musante 	return (zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
15071195e687SMark J Musante 	    name, sizeof (uint64_t), 1, val));
15081195e687SMark J Musante }
15091195e687SMark J Musante 
15101195e687SMark J Musante static int
15111195e687SMark J Musante spa_vdev_err(vdev_t *vdev, vdev_aux_t aux, int err)
15121195e687SMark J Musante {
15131195e687SMark J Musante 	vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux);
15141195e687SMark J Musante 	return (err);
15151195e687SMark J Musante }
15161195e687SMark J Musante 
15171195e687SMark J Musante /*
15181195e687SMark J Musante  * Fix up config after a partly-completed split.  This is done with the
15191195e687SMark J Musante  * ZPOOL_CONFIG_SPLIT nvlist.  Both the splitting pool and the split-off
15201195e687SMark J Musante  * pool have that entry in their config, but only the splitting one contains
15211195e687SMark J Musante  * a list of all the guids of the vdevs that are being split off.
15221195e687SMark J Musante  *
15231195e687SMark J Musante  * This function determines what to do with that list: either rejoin
15241195e687SMark J Musante  * all the disks to the pool, or complete the splitting process.  To attempt
15251195e687SMark J Musante  * the rejoin, each disk that is offlined is marked online again, and
15261195e687SMark J Musante  * we do a reopen() call.  If the vdev label for every disk that was
15271195e687SMark J Musante  * marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL)
15281195e687SMark J Musante  * then we call vdev_split() on each disk, and complete the split.
15291195e687SMark J Musante  *
1530d41c4376SMark J Musante  * Otherwise we leave the config alone, with all the vdevs in place in
1531d41c4376SMark J Musante  * the original pool.
15321195e687SMark J Musante  */
15331195e687SMark J Musante static void
15341195e687SMark J Musante spa_try_repair(spa_t *spa, nvlist_t *config)
15351195e687SMark J Musante {
15361195e687SMark J Musante 	uint_t extracted;
15371195e687SMark J Musante 	uint64_t *glist;
15381195e687SMark J Musante 	uint_t i, gcount;
15391195e687SMark J Musante 	nvlist_t *nvl;
15401195e687SMark J Musante 	vdev_t **vd;
15411195e687SMark J Musante 	boolean_t attempt_reopen;
15421195e687SMark J Musante 
15431195e687SMark J Musante 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0)
15441195e687SMark J Musante 		return;
15451195e687SMark J Musante 
15461195e687SMark J Musante 	/* check that the config is complete */
15471195e687SMark J Musante 	if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST,
15481195e687SMark J Musante 	    &glist, &gcount) != 0)
15491195e687SMark J Musante 		return;
15501195e687SMark J Musante 
15511195e687SMark J Musante 	vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_SLEEP);
15521195e687SMark J Musante 
15531195e687SMark J Musante 	/* attempt to online all the vdevs & validate */
15541195e687SMark J Musante 	attempt_reopen = B_TRUE;
15551195e687SMark J Musante 	for (i = 0; i < gcount; i++) {
15561195e687SMark J Musante 		if (glist[i] == 0)	/* vdev is hole */
15571195e687SMark J Musante 			continue;
15581195e687SMark J Musante 
15591195e687SMark J Musante 		vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE);
15601195e687SMark J Musante 		if (vd[i] == NULL) {
15611195e687SMark J Musante 			/*
15621195e687SMark J Musante 			 * Don't bother attempting to reopen the disks;
15631195e687SMark J Musante 			 * just do the split.
15641195e687SMark J Musante 			 */
15651195e687SMark J Musante 			attempt_reopen = B_FALSE;
15661195e687SMark J Musante 		} else {
15671195e687SMark J Musante 			/* attempt to re-online it */
15681195e687SMark J Musante 			vd[i]->vdev_offline = B_FALSE;
15691195e687SMark J Musante 		}
15701195e687SMark J Musante 	}
15711195e687SMark J Musante 
15721195e687SMark J Musante 	if (attempt_reopen) {
15731195e687SMark J Musante 		vdev_reopen(spa->spa_root_vdev);
15741195e687SMark J Musante 
15751195e687SMark J Musante 		/* check each device to see what state it's in */
15761195e687SMark J Musante 		for (extracted = 0, i = 0; i < gcount; i++) {
15771195e687SMark J Musante 			if (vd[i] != NULL &&
15781195e687SMark J Musante 			    vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL)
15791195e687SMark J Musante 				break;
15801195e687SMark J Musante 			++extracted;
15811195e687SMark J Musante 		}
15821195e687SMark J Musante 	}
15831195e687SMark J Musante 
15841195e687SMark J Musante 	/*
15851195e687SMark J Musante 	 * If every disk has been moved to the new pool, or if we never
15861195e687SMark J Musante 	 * even attempted to look at them, then we split them off for
15871195e687SMark J Musante 	 * good.
15881195e687SMark J Musante 	 */
15891195e687SMark J Musante 	if (!attempt_reopen || gcount == extracted) {
15901195e687SMark J Musante 		for (i = 0; i < gcount; i++)
15911195e687SMark J Musante 			if (vd[i] != NULL)
15921195e687SMark J Musante 				vdev_split(vd[i]);
15931195e687SMark J Musante 		vdev_reopen(spa->spa_root_vdev);
15941195e687SMark J Musante 	}
15951195e687SMark J Musante 
15961195e687SMark J Musante 	kmem_free(vd, gcount * sizeof (vdev_t *));
15971195e687SMark J Musante }
15981195e687SMark J Musante 
15991195e687SMark J Musante static int
16001195e687SMark J Musante spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
16011195e687SMark J Musante     boolean_t mosconfig)
16021195e687SMark J Musante {
16031195e687SMark J Musante 	nvlist_t *config = spa->spa_config;
16041195e687SMark J Musante 	char *ereport = FM_EREPORT_ZFS_POOL;
16051195e687SMark J Musante 	int error;
16061195e687SMark J Musante 	uint64_t pool_guid;
16071195e687SMark J Musante 	nvlist_t *nvl;
16081195e687SMark J Musante 
16091195e687SMark J Musante 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid))
16101195e687SMark J Musante 		return (EINVAL);
16111195e687SMark J Musante 
16121195e687SMark J Musante 	/*
16131195e687SMark J Musante 	 * Versioning wasn't explicitly added to the label until later, so if
16141195e687SMark J Musante 	 * it's not present treat it as the initial version.
16151195e687SMark J Musante 	 */
16161195e687SMark J Musante 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
16171195e687SMark J Musante 	    &spa->spa_ubsync.ub_version) != 0)
16181195e687SMark J Musante 		spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL;
16191195e687SMark J Musante 
16201195e687SMark J Musante 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
16211195e687SMark J Musante 	    &spa->spa_config_txg);
16221195e687SMark J Musante 
16231195e687SMark J Musante 	if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
16241195e687SMark J Musante 	    spa_guid_exists(pool_guid, 0)) {
16251195e687SMark J Musante 		error = EEXIST;
16261195e687SMark J Musante 	} else {
16271195e687SMark J Musante 		spa->spa_load_guid = pool_guid;
16281195e687SMark J Musante 
16291195e687SMark J Musante 		if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT,
16301195e687SMark J Musante 		    &nvl) == 0) {
16311195e687SMark J Musante 			VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting,
16321195e687SMark J Musante 			    KM_SLEEP) == 0);
16331195e687SMark J Musante 		}
16341195e687SMark J Musante 
16351195e687SMark J Musante 		error = spa_load_impl(spa, pool_guid, config, state, type,
16361195e687SMark J Musante 		    mosconfig, &ereport);
16371195e687SMark J Musante 	}
16381195e687SMark J Musante 
16391195e687SMark J Musante 	spa->spa_minref = refcount_count(&spa->spa_refcount);
16401195e687SMark J Musante 	if (error && error != EBADF)
16411195e687SMark J Musante 		zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
16421195e687SMark J Musante 	spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
16431195e687SMark J Musante 	spa->spa_ena = 0;
16441195e687SMark J Musante 
16451195e687SMark J Musante 	return (error);
16461195e687SMark J Musante }
16471195e687SMark J Musante 
1648fa9e4066Sahrens /*
1649fa9e4066Sahrens  * Load an existing storage pool, using the pool's builtin spa_config as a
1650ea8dc4b6Seschrock  * source of configuration information.
1651fa9e4066Sahrens  */
1652fa9e4066Sahrens static int
16531195e687SMark J Musante spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
16541195e687SMark J Musante     spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
16551195e687SMark J Musante     char **ereport)
1656fa9e4066Sahrens {
1657fa9e4066Sahrens 	int error = 0;
1658871a9500SMark J Musante 	nvlist_t *nvroot = NULL;
1659fa9e4066Sahrens 	vdev_t *rvd;
1660fa9e4066Sahrens 	uberblock_t *ub = &spa->spa_uberblock;
16610373e76bSbonwick 	uint64_t config_cache_txg = spa->spa_config_txg;
16628ad4d6ddSJeff Bonwick 	int orig_mode = spa->spa_mode;
16631195e687SMark J Musante 	int parse;
1664fa9e4066Sahrens 
16658ad4d6ddSJeff Bonwick 	/*
16668ad4d6ddSJeff Bonwick 	 * If this is an untrusted config, access the pool in read-only mode.
16678ad4d6ddSJeff Bonwick 	 * This prevents things like resilvering recently removed devices.
16688ad4d6ddSJeff Bonwick 	 */
16698ad4d6ddSJeff Bonwick 	if (!mosconfig)
16708ad4d6ddSJeff Bonwick 		spa->spa_mode = FREAD;
16718ad4d6ddSJeff Bonwick 
1672e14bb325SJeff Bonwick 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
1673e14bb325SJeff Bonwick 
1674ea8dc4b6Seschrock 	spa->spa_load_state = state;
16750373e76bSbonwick 
16761195e687SMark J Musante 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot))
16771195e687SMark J Musante 		return (EINVAL);
1678fa9e4066Sahrens 
16791195e687SMark J Musante 	parse = (type == SPA_IMPORT_EXISTING ?
16801195e687SMark J Musante 	    VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT);
1681b5989ec7Seschrock 
168254d692b7SGeorge Wilson 	/*
168354d692b7SGeorge Wilson 	 * Create "The Godfather" zio to hold all async IOs
168454d692b7SGeorge Wilson 	 */
168525f89ee2SJeff Bonwick 	spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
168625f89ee2SJeff Bonwick 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
168754d692b7SGeorge Wilson 
1688fa9e4066Sahrens 	/*
168999653d4eSeschrock 	 * Parse the configuration into a vdev tree.  We explicitly set the
169099653d4eSeschrock 	 * value that will be returned by spa_version() since parsing the
169199653d4eSeschrock 	 * configuration requires knowing the version number.
1692fa9e4066Sahrens 	 */
1693e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
16941195e687SMark J Musante 	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, parse);
1695e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
1696fa9e4066Sahrens 
169799653d4eSeschrock 	if (error != 0)
16981195e687SMark J Musante 		return (error);
1699fa9e4066Sahrens 
17000e34b6a7Sbonwick 	ASSERT(spa->spa_root_vdev == rvd);
17011195e687SMark J Musante 
17021195e687SMark J Musante 	if (type != SPA_IMPORT_ASSEMBLE) {
17031195e687SMark J Musante 		ASSERT(spa_guid(spa) == pool_guid);
17041195e687SMark J Musante 	}
1705fa9e4066Sahrens 
1706fa9e4066Sahrens 	/*
1707fa9e4066Sahrens 	 * Try to open all vdevs, loading each label in the process.
1708fa9e4066Sahrens 	 */
1709e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
17100bf246f5Smc 	error = vdev_open(rvd);
1711e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
17120bf246f5Smc 	if (error != 0)
17131195e687SMark J Musante 		return (error);
1714fa9e4066Sahrens 
1715560e6e96Seschrock 	/*
171677e3a39cSMark J Musante 	 * We need to validate the vdev labels against the configuration that
171777e3a39cSMark J Musante 	 * we have in hand, which is dependent on the setting of mosconfig. If
171877e3a39cSMark J Musante 	 * mosconfig is true then we're validating the vdev labels based on
17191195e687SMark J Musante 	 * that config.  Otherwise, we're validating against the cached config
172077e3a39cSMark J Musante 	 * (zpool.cache) that was read when we loaded the zfs module, and then
172177e3a39cSMark J Musante 	 * later we will recursively call spa_load() and validate against
172277e3a39cSMark J Musante 	 * the vdev config.
17231195e687SMark J Musante 	 *
17241195e687SMark J Musante 	 * If we're assembling a new pool that's been split off from an
17251195e687SMark J Musante 	 * existing pool, the labels haven't yet been updated so we skip
17261195e687SMark J Musante 	 * validation for now.
1727560e6e96Seschrock 	 */
17281195e687SMark J Musante 	if (type != SPA_IMPORT_ASSEMBLE) {
17291195e687SMark J Musante 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
17301195e687SMark J Musante 		error = vdev_validate(rvd);
17311195e687SMark J Musante 		spa_config_exit(spa, SCL_ALL, FTAG);
1732560e6e96Seschrock 
17331195e687SMark J Musante 		if (error != 0)
17341195e687SMark J Musante 			return (error);
17351195e687SMark J Musante 
17361195e687SMark J Musante 		if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
17371195e687SMark J Musante 			return (ENXIO);
1738560e6e96Seschrock 	}
1739560e6e96Seschrock 
1740fa9e4066Sahrens 	/*
1741fa9e4066Sahrens 	 * Find the best uberblock.
1742fa9e4066Sahrens 	 */
1743e14bb325SJeff Bonwick 	vdev_uberblock_load(NULL, rvd, ub);
1744fa9e4066Sahrens 
1745fa9e4066Sahrens 	/*
1746fa9e4066Sahrens 	 * If we weren't able to find a single valid uberblock, return failure.
1747fa9e4066Sahrens 	 */
17481195e687SMark J Musante 	if (ub->ub_txg == 0)
17491195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
1750ea8dc4b6Seschrock 
1751ea8dc4b6Seschrock 	/*
1752ea8dc4b6Seschrock 	 * If the pool is newer than the code, we can't open it.
1753ea8dc4b6Seschrock 	 */
17541195e687SMark J Musante 	if (ub->ub_version > SPA_VERSION)
17551195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP));
1756fa9e4066Sahrens 
1757fa9e4066Sahrens 	/*
1758fa9e4066Sahrens 	 * If the vdev guid sum doesn't match the uberblock, we have an
1759fa9e4066Sahrens 	 * incomplete configuration.
1760fa9e4066Sahrens 	 */
17611195e687SMark J Musante 	if (mosconfig && type != SPA_IMPORT_ASSEMBLE &&
17621195e687SMark J Musante 	    rvd->vdev_guid_sum != ub->ub_guid_sum)
17631195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO));
17641195e687SMark J Musante 
17651195e687SMark J Musante 	if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) {
17661195e687SMark J Musante 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
17671195e687SMark J Musante 		spa_try_repair(spa, config);
17681195e687SMark J Musante 		spa_config_exit(spa, SCL_ALL, FTAG);
17691195e687SMark J Musante 		nvlist_free(spa->spa_config_splitting);
17701195e687SMark J Musante 		spa->spa_config_splitting = NULL;
1771fa9e4066Sahrens 	}
1772fa9e4066Sahrens 
1773fa9e4066Sahrens 	/*
1774fa9e4066Sahrens 	 * Initialize internal SPA structures.
1775fa9e4066Sahrens 	 */
1776fa9e4066Sahrens 	spa->spa_state = POOL_STATE_ACTIVE;
1777fa9e4066Sahrens 	spa->spa_ubsync = spa->spa_uberblock;
1778468c413aSTim Haley 	spa->spa_verify_min_txg = spa->spa_extreme_rewind ?
1779c8ee1847SVictor Latushkin 	    TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1;
1780468c413aSTim Haley 	spa->spa_first_txg = spa->spa_last_ubsync_txg ?
1781468c413aSTim Haley 	    spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1;
1782b24ab676SJeff Bonwick 	spa->spa_claim_max_txg = spa->spa_first_txg;
1783b24ab676SJeff Bonwick 
1784ea8dc4b6Seschrock 	error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
17851195e687SMark J Musante 	if (error)
17861195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1787fa9e4066Sahrens 	spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
1788fa9e4066Sahrens 
17891195e687SMark J Musante 	if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0)
17901195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1791fa9e4066Sahrens 
1792fa9e4066Sahrens 	if (!mosconfig) {
179395173954Sek 		uint64_t hostid;
1794871a9500SMark J Musante 		nvlist_t *policy = NULL, *nvconfig;
1795871a9500SMark J Musante 
1796871a9500SMark J Musante 		if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0)
1797871a9500SMark J Musante 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1798fa9e4066Sahrens 
179988ecc943SGeorge Wilson 		if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig,
180077650510SLin Ling 		    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
180195173954Sek 			char *hostname;
180295173954Sek 			unsigned long myhostid = 0;
180395173954Sek 
180488ecc943SGeorge Wilson 			VERIFY(nvlist_lookup_string(nvconfig,
180595173954Sek 			    ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
180695173954Sek 
18075679c89fSjv #ifdef	_KERNEL
18085679c89fSjv 			myhostid = zone_get_hostid(NULL);
18095679c89fSjv #else	/* _KERNEL */
18105679c89fSjv 			/*
18115679c89fSjv 			 * We're emulating the system's hostid in userland, so
18125679c89fSjv 			 * we can't use zone_get_hostid().
18135679c89fSjv 			 */
181495173954Sek 			(void) ddi_strtoul(hw_serial, NULL, 10, &myhostid);
18155679c89fSjv #endif	/* _KERNEL */
181617194a52Slling 			if (hostid != 0 && myhostid != 0 &&
18175679c89fSjv 			    hostid != myhostid) {
1818871a9500SMark J Musante 				nvlist_free(nvconfig);
181995173954Sek 				cmn_err(CE_WARN, "pool '%s' could not be "
182095173954Sek 				    "loaded as it was last accessed by "
182177650510SLin Ling 				    "another system (host: %s hostid: 0x%lx). "
182295173954Sek 				    "See: http://www.sun.com/msg/ZFS-8000-EY",
1823e14bb325SJeff Bonwick 				    spa_name(spa), hostname,
182495173954Sek 				    (unsigned long)hostid);
18251195e687SMark J Musante 				return (EBADF);
182695173954Sek 			}
182795173954Sek 		}
1828c8ee1847SVictor Latushkin 		if (nvlist_lookup_nvlist(spa->spa_config,
1829c8ee1847SVictor Latushkin 		    ZPOOL_REWIND_POLICY, &policy) == 0)
1830c8ee1847SVictor Latushkin 			VERIFY(nvlist_add_nvlist(nvconfig,
1831c8ee1847SVictor Latushkin 			    ZPOOL_REWIND_POLICY, policy) == 0);
183295173954Sek 
183388ecc943SGeorge Wilson 		spa_config_set(spa, nvconfig);
1834fa9e4066Sahrens 		spa_unload(spa);
1835fa9e4066Sahrens 		spa_deactivate(spa);
18368ad4d6ddSJeff Bonwick 		spa_activate(spa, orig_mode);
1837fa9e4066Sahrens 
18381195e687SMark J Musante 		return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE));
1839fa9e4066Sahrens 	}
1840fa9e4066Sahrens 
18411195e687SMark J Musante 	if (spa_dir_prop(spa, DMU_POOL_SYNC_BPLIST,
18421195e687SMark J Musante 	    &spa->spa_deferred_bplist_obj) != 0)
18431195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1844fa9e4066Sahrens 
184599653d4eSeschrock 	/*
184699653d4eSeschrock 	 * Load the bit that tells us to use the new accounting function
184799653d4eSeschrock 	 * (raid-z deflation).  If we have an older pool, this will not
184899653d4eSeschrock 	 * be present.
184999653d4eSeschrock 	 */
18501195e687SMark J Musante 	error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate);
18511195e687SMark J Musante 	if (error != 0 && error != ENOENT)
18521195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
185399653d4eSeschrock 
1854fa9e4066Sahrens 	/*
1855ea8dc4b6Seschrock 	 * Load the persistent error log.  If we have an older pool, this will
1856ea8dc4b6Seschrock 	 * not be present.
1857fa9e4066Sahrens 	 */
18581195e687SMark J Musante 	error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last);
18591195e687SMark J Musante 	if (error != 0 && error != ENOENT)
18601195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1861ea8dc4b6Seschrock 
18621195e687SMark J Musante 	error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB,
18631195e687SMark J Musante 	    &spa->spa_errlog_scrub);
18641195e687SMark J Musante 	if (error != 0 && error != ENOENT)
18651195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1866ea8dc4b6Seschrock 
186706eeb2adSek 	/*
186806eeb2adSek 	 * Load the history object.  If we have an older pool, this
186906eeb2adSek 	 * will not be present.
187006eeb2adSek 	 */
18711195e687SMark J Musante 	error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history);
18721195e687SMark J Musante 	if (error != 0 && error != ENOENT)
18731195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
18741195e687SMark J Musante 
18751195e687SMark J Musante 	/*
18761195e687SMark J Musante 	 * If we're assembling the pool from the split-off vdevs of
18771195e687SMark J Musante 	 * an existing pool, we don't want to attach the spares & cache
18781195e687SMark J Musante 	 * devices.
18791195e687SMark J Musante 	 */
188006eeb2adSek 
188199653d4eSeschrock 	/*
188299653d4eSeschrock 	 * Load any hot spares for this pool.
188399653d4eSeschrock 	 */
18841195e687SMark J Musante 	error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object);
18851195e687SMark J Musante 	if (error != 0 && error != ENOENT)
18861195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
18871195e687SMark J Musante 	if (error == 0 && type != SPA_IMPORT_ASSEMBLE) {
1888e7437265Sahrens 		ASSERT(spa_version(spa) >= SPA_VERSION_SPARES);
1889fa94a07fSbrendan 		if (load_nvlist(spa, spa->spa_spares.sav_object,
18901195e687SMark J Musante 		    &spa->spa_spares.sav_config) != 0)
18911195e687SMark J Musante 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
189299653d4eSeschrock 
1893e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
189499653d4eSeschrock 		spa_load_spares(spa);
1895e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
18961195e687SMark J Musante 	} else if (error == 0) {
18971195e687SMark J Musante 		spa->spa_spares.sav_sync = B_TRUE;
189899653d4eSeschrock 	}
189999653d4eSeschrock 
1900fa94a07fSbrendan 	/*
1901fa94a07fSbrendan 	 * Load any level 2 ARC devices for this pool.
1902fa94a07fSbrendan 	 */
19031195e687SMark J Musante 	error = spa_dir_prop(spa, DMU_POOL_L2CACHE,
1904fa94a07fSbrendan 	    &spa->spa_l2cache.sav_object);
19051195e687SMark J Musante 	if (error != 0 && error != ENOENT)
19061195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
19071195e687SMark J Musante 	if (error == 0 && type != SPA_IMPORT_ASSEMBLE) {
1908fa94a07fSbrendan 		ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE);
1909fa94a07fSbrendan 		if (load_nvlist(spa, spa->spa_l2cache.sav_object,
19101195e687SMark J Musante 		    &spa->spa_l2cache.sav_config) != 0)
19111195e687SMark J Musante 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1912fa94a07fSbrendan 
1913e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
1914fa94a07fSbrendan 		spa_load_l2cache(spa);
1915e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
19161195e687SMark J Musante 	} else if (error == 0) {
19171195e687SMark J Musante 		spa->spa_l2cache.sav_sync = B_TRUE;
1918fa94a07fSbrendan 	}
1919fa94a07fSbrendan 
1920990b4856Slling 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
1921ecd6cf80Smarks 
19221195e687SMark J Musante 	error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object);
19231195e687SMark J Musante 	if (error && error != ENOENT)
19241195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1925b1b8ab34Slling 
1926b1b8ab34Slling 	if (error == 0) {
19271195e687SMark J Musante 		uint64_t autoreplace;
19281195e687SMark J Musante 
19291195e687SMark J Musante 		spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs);
19301195e687SMark J Musante 		spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace);
19311195e687SMark J Musante 		spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation);
19321195e687SMark J Musante 		spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
19331195e687SMark J Musante 		spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
19341195e687SMark J Musante 		spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
19351195e687SMark J Musante 		    &spa->spa_dedup_ditto);
19361195e687SMark J Musante 
1937b693757aSEric Schrock 		spa->spa_autoreplace = (autoreplace != 0);
1938b1b8ab34Slling 	}
1939b1b8ab34Slling 
19403d7072f8Seschrock 	/*
19413d7072f8Seschrock 	 * If the 'autoreplace' property is set, then post a resource notifying
19423d7072f8Seschrock 	 * the ZFS DE that it should not issue any faults for unopenable
19433d7072f8Seschrock 	 * devices.  We also iterate over the vdevs, and post a sysevent for any
19443d7072f8Seschrock 	 * unopenable vdevs so that the normal autoreplace handler can take
19453d7072f8Seschrock 	 * over.
19463d7072f8Seschrock 	 */
1947b693757aSEric Schrock 	if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) {
19483d7072f8Seschrock 		spa_check_removed(spa->spa_root_vdev);
1949b693757aSEric Schrock 		/*
1950b693757aSEric Schrock 		 * For the import case, this is done in spa_import(), because
1951b693757aSEric Schrock 		 * at this point we're using the spare definitions from
1952b693757aSEric Schrock 		 * the MOS config, not necessarily from the userland config.
1953b693757aSEric Schrock 		 */
1954b693757aSEric Schrock 		if (state != SPA_LOAD_IMPORT) {
1955b693757aSEric Schrock 			spa_aux_check_removed(&spa->spa_spares);
1956b693757aSEric Schrock 			spa_aux_check_removed(&spa->spa_l2cache);
1957b693757aSEric Schrock 		}
1958b693757aSEric Schrock 	}
19593d7072f8Seschrock 
1960ea8dc4b6Seschrock 	/*
1961560e6e96Seschrock 	 * Load the vdev state for all toplevel vdevs.
1962ea8dc4b6Seschrock 	 */
1963560e6e96Seschrock 	vdev_load(rvd);
19640373e76bSbonwick 
1965fa9e4066Sahrens 	/*
1966fa9e4066Sahrens 	 * Propagate the leaf DTLs we just loaded all the way up the tree.
1967fa9e4066Sahrens 	 */
1968e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
1969fa9e4066Sahrens 	vdev_dtl_reassess(rvd, 0, 0, B_FALSE);
1970e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
1971fa9e4066Sahrens 
1972fa9e4066Sahrens 	/*
1973fa9e4066Sahrens 	 * Check the state of the root vdev.  If it can't be opened, it
1974fa9e4066Sahrens 	 * indicates one or more toplevel vdevs are faulted.
1975fa9e4066Sahrens 	 */
19761195e687SMark J Musante 	if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
19771195e687SMark J Musante 		return (ENXIO);
1978fa9e4066Sahrens 
1979b24ab676SJeff Bonwick 	/*
1980b24ab676SJeff Bonwick 	 * Load the DDTs (dedup tables).
1981b24ab676SJeff Bonwick 	 */
1982b24ab676SJeff Bonwick 	error = ddt_load(spa);
19831195e687SMark J Musante 	if (error != 0)
19841195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1985b24ab676SJeff Bonwick 
1986485bbbf5SGeorge Wilson 	spa_update_dspace(spa);
1987485bbbf5SGeorge Wilson 
1988468c413aSTim Haley 	if (state != SPA_LOAD_TRYIMPORT) {
1989468c413aSTim Haley 		error = spa_load_verify(spa);
19901195e687SMark J Musante 		if (error)
19911195e687SMark J Musante 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
19921195e687SMark J Musante 			    error));
1993468c413aSTim Haley 	}
1994468c413aSTim Haley 
1995b24ab676SJeff Bonwick 	/*
19961195e687SMark J Musante 	 * Load the intent log state and check log integrity.  If we're
19971195e687SMark J Musante 	 * assembling a pool from a split, the log is not transferred over.
1998b24ab676SJeff Bonwick 	 */
19991195e687SMark J Musante 	if (type != SPA_IMPORT_ASSEMBLE) {
2000871a9500SMark J Musante 		nvlist_t *nvconfig;
2001871a9500SMark J Musante 
2002871a9500SMark J Musante 		if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0)
2003871a9500SMark J Musante 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
2004871a9500SMark J Musante 
20051195e687SMark J Musante 		VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE,
20061195e687SMark J Musante 		    &nvroot) == 0);
20071195e687SMark J Musante 		spa_load_log_state(spa, nvroot);
20081195e687SMark J Musante 		nvlist_free(nvconfig);
20091195e687SMark J Musante 
20101195e687SMark J Musante 		if (spa_check_logs(spa)) {
20111195e687SMark J Musante 			*ereport = FM_EREPORT_ZFS_LOG_REPLAY;
20121195e687SMark J Musante 			return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO));
20131195e687SMark J Musante 		}
2014b24ab676SJeff Bonwick 	}
2015b24ab676SJeff Bonwick 
2016468c413aSTim Haley 	if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER ||
2017468c413aSTim Haley 	    spa->spa_load_max_txg == UINT64_MAX)) {
20185dabedeeSbonwick 		dmu_tx_t *tx;
20190373e76bSbonwick 		int need_update = B_FALSE;
20208ad4d6ddSJeff Bonwick 
20218ad4d6ddSJeff Bonwick 		ASSERT(state != SPA_LOAD_TRYIMPORT);
20225dabedeeSbonwick 
20230373e76bSbonwick 		/*
20240373e76bSbonwick 		 * Claim log blocks that haven't been committed yet.
20250373e76bSbonwick 		 * This must all happen in a single txg.
2026b24ab676SJeff Bonwick 		 * Note: spa_claim_max_txg is updated by spa_claim_notify(),
2027b24ab676SJeff Bonwick 		 * invoked from zil_claim_log_block()'s i/o done callback.
2028468c413aSTim Haley 		 * Price of rollback is that we abandon the log.
20290373e76bSbonwick 		 */
2030b24ab676SJeff Bonwick 		spa->spa_claiming = B_TRUE;
2031b24ab676SJeff Bonwick 
20325dabedeeSbonwick 		tx = dmu_tx_create_assigned(spa_get_dsl(spa),
2033fa9e4066Sahrens 		    spa_first_txg(spa));
2034e14bb325SJeff Bonwick 		(void) dmu_objset_find(spa_name(spa),
20350b69c2f0Sahrens 		    zil_claim, tx, DS_FIND_CHILDREN);
2036fa9e4066Sahrens 		dmu_tx_commit(tx);
2037fa9e4066Sahrens 
2038b24ab676SJeff Bonwick 		spa->spa_claiming = B_FALSE;
2039b24ab676SJeff Bonwick 
20401195e687SMark J Musante 		spa_set_log_state(spa, SPA_LOG_GOOD);
2041fa9e4066Sahrens 		spa->spa_sync_on = B_TRUE;
2042fa9e4066Sahrens 		txg_sync_start(spa->spa_dsl_pool);
2043fa9e4066Sahrens 
2044fa9e4066Sahrens 		/*
2045b24ab676SJeff Bonwick 		 * Wait for all claims to sync.  We sync up to the highest
2046b24ab676SJeff Bonwick 		 * claimed log block birth time so that claimed log blocks
2047b24ab676SJeff Bonwick 		 * don't appear to be from the future.  spa_claim_max_txg
2048b24ab676SJeff Bonwick 		 * will have been set for us by either zil_check_log_chain()
2049b24ab676SJeff Bonwick 		 * (invoked from spa_check_logs()) or zil_claim() above.
2050fa9e4066Sahrens 		 */
2051b24ab676SJeff Bonwick 		txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg);
20520e34b6a7Sbonwick 
20530e34b6a7Sbonwick 		/*
20540373e76bSbonwick 		 * If the config cache is stale, or we have uninitialized
20550373e76bSbonwick 		 * metaslabs (see spa_vdev_add()), then update the config.
2056bc758434SLin Ling 		 *
2057bc758434SLin Ling 		 * If spa_load_verbatim is true, trust the current
2058bc758434SLin Ling 		 * in-core spa_config and update the disk labels.
20590e34b6a7Sbonwick 		 */
20600373e76bSbonwick 		if (config_cache_txg != spa->spa_config_txg ||
2061468c413aSTim Haley 		    state == SPA_LOAD_IMPORT || spa->spa_load_verbatim ||
2062468c413aSTim Haley 		    state == SPA_LOAD_RECOVER)
20630373e76bSbonwick 			need_update = B_TRUE;
20640373e76bSbonwick 
20658ad4d6ddSJeff Bonwick 		for (int c = 0; c < rvd->vdev_children; c++)
20660373e76bSbonwick 			if (rvd->vdev_child[c]->vdev_ms_array == 0)
20670373e76bSbonwick 				need_update = B_TRUE;
20680e34b6a7Sbonwick 
20690e34b6a7Sbonwick 		/*
20700373e76bSbonwick 		 * Update the config cache asychronously in case we're the
20710373e76bSbonwick 		 * root pool, in which case the config cache isn't writable yet.
20720e34b6a7Sbonwick 		 */
20730373e76bSbonwick 		if (need_update)
20740373e76bSbonwick 			spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
20758ad4d6ddSJeff Bonwick 
20768ad4d6ddSJeff Bonwick 		/*
20778ad4d6ddSJeff Bonwick 		 * Check all DTLs to see if anything needs resilvering.
20788ad4d6ddSJeff Bonwick 		 */
20798ad4d6ddSJeff Bonwick 		if (vdev_resilver_needed(rvd, NULL, NULL))
20808ad4d6ddSJeff Bonwick 			spa_async_request(spa, SPA_ASYNC_RESILVER);
2081503ad85cSMatthew Ahrens 
2082503ad85cSMatthew Ahrens 		/*
2083503ad85cSMatthew Ahrens 		 * Delete any inconsistent datasets.
2084503ad85cSMatthew Ahrens 		 */
2085503ad85cSMatthew Ahrens 		(void) dmu_objset_find(spa_name(spa),
2086503ad85cSMatthew Ahrens 		    dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
2087ca45db41SChris Kirby 
2088ca45db41SChris Kirby 		/*
2089ca45db41SChris Kirby 		 * Clean up any stale temporary dataset userrefs.
2090ca45db41SChris Kirby 		 */
2091ca45db41SChris Kirby 		dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
2092fa9e4066Sahrens 	}
2093fa9e4066Sahrens 
20941195e687SMark J Musante 	return (0);
2095fa9e4066Sahrens }
2096fa9e4066Sahrens 
2097468c413aSTim Haley static int
2098468c413aSTim Haley spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
2099468c413aSTim Haley {
2100468c413aSTim Haley 	spa_unload(spa);
2101468c413aSTim Haley 	spa_deactivate(spa);
2102468c413aSTim Haley 
2103468c413aSTim Haley 	spa->spa_load_max_txg--;
2104468c413aSTim Haley 
2105468c413aSTim Haley 	spa_activate(spa, spa_mode_global);
2106468c413aSTim Haley 	spa_async_suspend(spa);
2107468c413aSTim Haley 
21081195e687SMark J Musante 	return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig));
2109468c413aSTim Haley }
2110468c413aSTim Haley 
2111468c413aSTim Haley static int
2112468c413aSTim Haley spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
2113c8ee1847SVictor Latushkin     uint64_t max_request, int rewind_flags)
2114468c413aSTim Haley {
2115468c413aSTim Haley 	nvlist_t *config = NULL;
2116468c413aSTim Haley 	int load_error, rewind_error;
2117c8ee1847SVictor Latushkin 	uint64_t safe_rewind_txg;
2118468c413aSTim Haley 	uint64_t min_txg;
2119468c413aSTim Haley 
2120a33cae98STim Haley 	if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) {
2121468c413aSTim Haley 		spa->spa_load_max_txg = spa->spa_load_txg;
21221195e687SMark J Musante 		spa_set_log_state(spa, SPA_LOG_CLEAR);
2123a33cae98STim Haley 	} else {
2124468c413aSTim Haley 		spa->spa_load_max_txg = max_request;
2125a33cae98STim Haley 	}
2126468c413aSTim Haley 
21271195e687SMark J Musante 	load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING,
21281195e687SMark J Musante 	    mosconfig);
2129468c413aSTim Haley 	if (load_error == 0)
2130468c413aSTim Haley 		return (0);
2131468c413aSTim Haley 
2132468c413aSTim Haley 	if (spa->spa_root_vdev != NULL)
2133468c413aSTim Haley 		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
2134468c413aSTim Haley 
2135468c413aSTim Haley 	spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg;
2136468c413aSTim Haley 	spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp;
2137468c413aSTim Haley 
2138c8ee1847SVictor Latushkin 	if (rewind_flags & ZPOOL_NEVER_REWIND) {
2139468c413aSTim Haley 		nvlist_free(config);
2140468c413aSTim Haley 		return (load_error);
2141468c413aSTim Haley 	}
2142468c413aSTim Haley 
2143468c413aSTim Haley 	/* Price of rolling back is discarding txgs, including log */
2144468c413aSTim Haley 	if (state == SPA_LOAD_RECOVER)
21451195e687SMark J Musante 		spa_set_log_state(spa, SPA_LOG_CLEAR);
2146468c413aSTim Haley 
2147c8ee1847SVictor Latushkin 	spa->spa_load_max_txg = spa->spa_last_ubsync_txg;
2148c8ee1847SVictor Latushkin 	safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE;
2149c8ee1847SVictor Latushkin 	min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ?
2150c8ee1847SVictor Latushkin 	    TXG_INITIAL : safe_rewind_txg;
2151468c413aSTim Haley 
2152c8ee1847SVictor Latushkin 	/*
2153c8ee1847SVictor Latushkin 	 * Continue as long as we're finding errors, we're still within
2154c8ee1847SVictor Latushkin 	 * the acceptable rewind range, and we're still finding uberblocks
2155c8ee1847SVictor Latushkin 	 */
2156c8ee1847SVictor Latushkin 	while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg &&
2157c8ee1847SVictor Latushkin 	    spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) {
2158c8ee1847SVictor Latushkin 		if (spa->spa_load_max_txg < safe_rewind_txg)
2159468c413aSTim Haley 			spa->spa_extreme_rewind = B_TRUE;
2160468c413aSTim Haley 		rewind_error = spa_load_retry(spa, state, mosconfig);
2161468c413aSTim Haley 	}
2162468c413aSTim Haley 
2163468c413aSTim Haley 	if (config)
2164468c413aSTim Haley 		spa_rewind_data_to_nvlist(spa, config);
2165468c413aSTim Haley 
2166468c413aSTim Haley 	spa->spa_extreme_rewind = B_FALSE;
2167468c413aSTim Haley 	spa->spa_load_max_txg = UINT64_MAX;
2168468c413aSTim Haley 
2169468c413aSTim Haley 	if (config && (rewind_error || state != SPA_LOAD_RECOVER))
2170468c413aSTim Haley 		spa_config_set(spa, config);
2171468c413aSTim Haley 
2172468c413aSTim Haley 	return (state == SPA_LOAD_RECOVER ? rewind_error : load_error);
2173468c413aSTim Haley }
2174468c413aSTim Haley 
2175fa9e4066Sahrens /*
2176fa9e4066Sahrens  * Pool Open/Import
2177fa9e4066Sahrens  *
2178fa9e4066Sahrens  * The import case is identical to an open except that the configuration is sent
2179fa9e4066Sahrens  * down from userland, instead of grabbed from the configuration cache.  For the
2180fa9e4066Sahrens  * case of an open, the pool configuration will exist in the
21813d7072f8Seschrock  * POOL_STATE_UNINITIALIZED state.
2182fa9e4066Sahrens  *
2183fa9e4066Sahrens  * The stats information (gen/count/ustats) is used to gather vdev statistics at
2184fa9e4066Sahrens  * the same time open the pool, without having to keep around the spa_t in some
2185fa9e4066Sahrens  * ambiguous state.
2186fa9e4066Sahrens  */
2187fa9e4066Sahrens static int
2188468c413aSTim Haley spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
2189468c413aSTim Haley     nvlist_t **config)
2190fa9e4066Sahrens {
2191fa9e4066Sahrens 	spa_t *spa;
2192fa9e4066Sahrens 	int error;
2193fa9e4066Sahrens 	int locked = B_FALSE;
2194fa9e4066Sahrens 
2195fa9e4066Sahrens 	*spapp = NULL;
2196fa9e4066Sahrens 
2197fa9e4066Sahrens 	/*
2198fa9e4066Sahrens 	 * As disgusting as this is, we need to support recursive calls to this
2199fa9e4066Sahrens 	 * function because dsl_dir_open() is called during spa_load(), and ends
2200fa9e4066Sahrens 	 * up calling spa_open() again.  The real fix is to figure out how to
2201fa9e4066Sahrens 	 * avoid dsl_dir_open() calling this in the first place.
2202fa9e4066Sahrens 	 */
2203fa9e4066Sahrens 	if (mutex_owner(&spa_namespace_lock) != curthread) {
2204fa9e4066Sahrens 		mutex_enter(&spa_namespace_lock);
2205fa9e4066Sahrens 		locked = B_TRUE;
2206fa9e4066Sahrens 	}
2207fa9e4066Sahrens 
2208fa9e4066Sahrens 	if ((spa = spa_lookup(pool)) == NULL) {
2209fa9e4066Sahrens 		if (locked)
2210fa9e4066Sahrens 			mutex_exit(&spa_namespace_lock);
2211fa9e4066Sahrens 		return (ENOENT);
2212fa9e4066Sahrens 	}
2213468c413aSTim Haley 
2214fa9e4066Sahrens 	if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
22154b44c88cSTim Haley 		spa_load_state_t state = SPA_LOAD_OPEN;
22164b44c88cSTim Haley 		zpool_rewind_policy_t policy;
22174b44c88cSTim Haley 
22184b44c88cSTim Haley 		zpool_get_rewind_policy(nvpolicy ? nvpolicy : spa->spa_config,
22194b44c88cSTim Haley 		    &policy);
22204b44c88cSTim Haley 		if (policy.zrp_request & ZPOOL_DO_REWIND)
22214b44c88cSTim Haley 			state = SPA_LOAD_RECOVER;
2222fa9e4066Sahrens 
22238ad4d6ddSJeff Bonwick 		spa_activate(spa, spa_mode_global);
2224fa9e4066Sahrens 
2225c8ee1847SVictor Latushkin 		if (spa->spa_last_open_failed && (policy.zrp_request &
2226c8ee1847SVictor Latushkin 		    (ZPOOL_NO_REWIND | ZPOOL_NEVER_REWIND))) {
2227468c413aSTim Haley 			if (config != NULL && spa->spa_config)
2228468c413aSTim Haley 				VERIFY(nvlist_dup(spa->spa_config,
2229468c413aSTim Haley 				    config, KM_SLEEP) == 0);
2230468c413aSTim Haley 			spa_deactivate(spa);
2231468c413aSTim Haley 			if (locked)
2232468c413aSTim Haley 				mutex_exit(&spa_namespace_lock);
2233468c413aSTim Haley 			return (spa->spa_last_open_failed);
2234468c413aSTim Haley 		}
2235468c413aSTim Haley 
2236468c413aSTim Haley 		if (state != SPA_LOAD_RECOVER)
2237468c413aSTim Haley 			spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
2238468c413aSTim Haley 
2239468c413aSTim Haley 		error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg,
2240c8ee1847SVictor Latushkin 		    policy.zrp_request);
2241fa9e4066Sahrens 
2242fa9e4066Sahrens 		if (error == EBADF) {
2243fa9e4066Sahrens 			/*
2244560e6e96Seschrock 			 * If vdev_validate() returns failure (indicated by
2245560e6e96Seschrock 			 * EBADF), it indicates that one of the vdevs indicates
2246560e6e96Seschrock 			 * that the pool has been exported or destroyed.  If
2247560e6e96Seschrock 			 * this is the case, the config cache is out of sync and
2248560e6e96Seschrock 			 * we should remove the pool from the namespace.
2249fa9e4066Sahrens 			 */
2250fa9e4066Sahrens 			spa_unload(spa);
2251fa9e4066Sahrens 			spa_deactivate(spa);
2252c5904d13Seschrock 			spa_config_sync(spa, B_TRUE, B_TRUE);
2253fa9e4066Sahrens 			spa_remove(spa);
2254fa9e4066Sahrens 			if (locked)
2255fa9e4066Sahrens 				mutex_exit(&spa_namespace_lock);
2256fa9e4066Sahrens 			return (ENOENT);
2257ea8dc4b6Seschrock 		}
2258ea8dc4b6Seschrock 
2259ea8dc4b6Seschrock 		if (error) {
2260fa9e4066Sahrens 			/*
2261fa9e4066Sahrens 			 * We can't open the pool, but we still have useful
2262fa9e4066Sahrens 			 * information: the state of each vdev after the
2263fa9e4066Sahrens 			 * attempted vdev_open().  Return this to the user.
2264fa9e4066Sahrens 			 */
2265468c413aSTim Haley 			if (config != NULL && spa->spa_config)
2266468c413aSTim Haley 				VERIFY(nvlist_dup(spa->spa_config, config,
2267468c413aSTim Haley 				    KM_SLEEP) == 0);
2268fa9e4066Sahrens 			spa_unload(spa);
2269fa9e4066Sahrens 			spa_deactivate(spa);
2270468c413aSTim Haley 			spa->spa_last_open_failed = error;
2271fa9e4066Sahrens 			if (locked)
2272fa9e4066Sahrens 				mutex_exit(&spa_namespace_lock);
2273fa9e4066Sahrens 			*spapp = NULL;
2274fa9e4066Sahrens 			return (error);
2275fa9e4066Sahrens 		}
2276468c413aSTim Haley 
2277fa9e4066Sahrens 	}
2278fa9e4066Sahrens 
2279fa9e4066Sahrens 	spa_open_ref(spa, tag);
22803d7072f8Seschrock 
2281468c413aSTim Haley 
2282468c413aSTim Haley 	if (config != NULL)
2283468c413aSTim Haley 		*config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
2284468c413aSTim Haley 
2285a33cae98STim Haley 	if (locked) {
2286a33cae98STim Haley 		spa->spa_last_open_failed = 0;
2287a33cae98STim Haley 		spa->spa_last_ubsync_txg = 0;
2288a33cae98STim Haley 		spa->spa_load_txg = 0;
2289fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
2290a33cae98STim Haley 	}
2291fa9e4066Sahrens 
2292fa9e4066Sahrens 	*spapp = spa;
2293fa9e4066Sahrens 
2294fa9e4066Sahrens 	return (0);
2295fa9e4066Sahrens }
2296fa9e4066Sahrens 
2297468c413aSTim Haley int
2298468c413aSTim Haley spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy,
2299468c413aSTim Haley     nvlist_t **config)
2300468c413aSTim Haley {
2301468c413aSTim Haley 	return (spa_open_common(name, spapp, tag, policy, config));
2302468c413aSTim Haley }
2303468c413aSTim Haley 
2304fa9e4066Sahrens int
2305fa9e4066Sahrens spa_open(const char *name, spa_t **spapp, void *tag)
2306fa9e4066Sahrens {
2307468c413aSTim Haley 	return (spa_open_common(name, spapp, tag, NULL, NULL));
2308fa9e4066Sahrens }
2309fa9e4066Sahrens 
2310ea8dc4b6Seschrock /*
2311ea8dc4b6Seschrock  * Lookup the given spa_t, incrementing the inject count in the process,
2312ea8dc4b6Seschrock  * preventing it from being exported or destroyed.
2313ea8dc4b6Seschrock  */
2314ea8dc4b6Seschrock spa_t *
2315ea8dc4b6Seschrock spa_inject_addref(char *name)
2316ea8dc4b6Seschrock {
2317ea8dc4b6Seschrock 	spa_t *spa;
2318ea8dc4b6Seschrock 
2319ea8dc4b6Seschrock 	mutex_enter(&spa_namespace_lock);
2320ea8dc4b6Seschrock 	if ((spa = spa_lookup(name)) == NULL) {
2321ea8dc4b6Seschrock 		mutex_exit(&spa_namespace_lock);
2322ea8dc4b6Seschrock 		return (NULL);
2323ea8dc4b6Seschrock 	}
2324ea8dc4b6Seschrock 	spa->spa_inject_ref++;
2325ea8dc4b6Seschrock 	mutex_exit(&spa_namespace_lock);
2326ea8dc4b6Seschrock 
2327ea8dc4b6Seschrock 	return (spa);
2328ea8dc4b6Seschrock }
2329ea8dc4b6Seschrock 
2330ea8dc4b6Seschrock void
2331ea8dc4b6Seschrock spa_inject_delref(spa_t *spa)
2332ea8dc4b6Seschrock {
2333ea8dc4b6Seschrock 	mutex_enter(&spa_namespace_lock);
2334ea8dc4b6Seschrock 	spa->spa_inject_ref--;
2335ea8dc4b6Seschrock 	mutex_exit(&spa_namespace_lock);
2336ea8dc4b6Seschrock }
2337ea8dc4b6Seschrock 
2338fa94a07fSbrendan /*
2339fa94a07fSbrendan  * Add spares device information to the nvlist.
2340fa94a07fSbrendan  */
234199653d4eSeschrock static void
234299653d4eSeschrock spa_add_spares(spa_t *spa, nvlist_t *config)
234399653d4eSeschrock {
234499653d4eSeschrock 	nvlist_t **spares;
234599653d4eSeschrock 	uint_t i, nspares;
234699653d4eSeschrock 	nvlist_t *nvroot;
234799653d4eSeschrock 	uint64_t guid;
234899653d4eSeschrock 	vdev_stat_t *vs;
234999653d4eSeschrock 	uint_t vsc;
235039c23413Seschrock 	uint64_t pool;
235199653d4eSeschrock 
23526809eb4eSEric Schrock 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
23536809eb4eSEric Schrock 
2354fa94a07fSbrendan 	if (spa->spa_spares.sav_count == 0)
235599653d4eSeschrock 		return;
235699653d4eSeschrock 
235799653d4eSeschrock 	VERIFY(nvlist_lookup_nvlist(config,
235899653d4eSeschrock 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2359fa94a07fSbrendan 	VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
236099653d4eSeschrock 	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
236199653d4eSeschrock 	if (nspares != 0) {
236299653d4eSeschrock 		VERIFY(nvlist_add_nvlist_array(nvroot,
236399653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
236499653d4eSeschrock 		VERIFY(nvlist_lookup_nvlist_array(nvroot,
236599653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
236699653d4eSeschrock 
236799653d4eSeschrock 		/*
236899653d4eSeschrock 		 * Go through and find any spares which have since been
236999653d4eSeschrock 		 * repurposed as an active spare.  If this is the case, update
237099653d4eSeschrock 		 * their status appropriately.
237199653d4eSeschrock 		 */
237299653d4eSeschrock 		for (i = 0; i < nspares; i++) {
237399653d4eSeschrock 			VERIFY(nvlist_lookup_uint64(spares[i],
237499653d4eSeschrock 			    ZPOOL_CONFIG_GUID, &guid) == 0);
237589a89ebfSlling 			if (spa_spare_exists(guid, &pool, NULL) &&
237689a89ebfSlling 			    pool != 0ULL) {
237799653d4eSeschrock 				VERIFY(nvlist_lookup_uint64_array(
237899653d4eSeschrock 				    spares[i], ZPOOL_CONFIG_STATS,
237999653d4eSeschrock 				    (uint64_t **)&vs, &vsc) == 0);
238099653d4eSeschrock 				vs->vs_state = VDEV_STATE_CANT_OPEN;
238199653d4eSeschrock 				vs->vs_aux = VDEV_AUX_SPARED;
238299653d4eSeschrock 			}
238399653d4eSeschrock 		}
238499653d4eSeschrock 	}
238599653d4eSeschrock }
238699653d4eSeschrock 
2387fa94a07fSbrendan /*
2388fa94a07fSbrendan  * Add l2cache device information to the nvlist, including vdev stats.
2389fa94a07fSbrendan  */
2390fa94a07fSbrendan static void
2391fa94a07fSbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config)
2392fa94a07fSbrendan {
2393fa94a07fSbrendan 	nvlist_t **l2cache;
2394fa94a07fSbrendan 	uint_t i, j, nl2cache;
2395fa94a07fSbrendan 	nvlist_t *nvroot;
2396fa94a07fSbrendan 	uint64_t guid;
2397fa94a07fSbrendan 	vdev_t *vd;
2398fa94a07fSbrendan 	vdev_stat_t *vs;
2399fa94a07fSbrendan 	uint_t vsc;
2400fa94a07fSbrendan 
24016809eb4eSEric Schrock 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
24026809eb4eSEric Schrock 
2403fa94a07fSbrendan 	if (spa->spa_l2cache.sav_count == 0)
2404fa94a07fSbrendan 		return;
2405fa94a07fSbrendan 
2406fa94a07fSbrendan 	VERIFY(nvlist_lookup_nvlist(config,
2407fa94a07fSbrendan 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2408fa94a07fSbrendan 	VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
2409fa94a07fSbrendan 	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
2410fa94a07fSbrendan 	if (nl2cache != 0) {
2411fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(nvroot,
2412fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
2413fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(nvroot,
2414fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
2415fa94a07fSbrendan 
2416fa94a07fSbrendan 		/*
2417fa94a07fSbrendan 		 * Update level 2 cache device stats.
2418fa94a07fSbrendan 		 */
2419fa94a07fSbrendan 
2420fa94a07fSbrendan 		for (i = 0; i < nl2cache; i++) {
2421fa94a07fSbrendan 			VERIFY(nvlist_lookup_uint64(l2cache[i],
2422fa94a07fSbrendan 			    ZPOOL_CONFIG_GUID, &guid) == 0);
2423fa94a07fSbrendan 
2424fa94a07fSbrendan 			vd = NULL;
2425fa94a07fSbrendan 			for (j = 0; j < spa->spa_l2cache.sav_count; j++) {
2426fa94a07fSbrendan 				if (guid ==
2427fa94a07fSbrendan 				    spa->spa_l2cache.sav_vdevs[j]->vdev_guid) {
2428fa94a07fSbrendan 					vd = spa->spa_l2cache.sav_vdevs[j];
2429fa94a07fSbrendan 					break;
2430fa94a07fSbrendan 				}
2431fa94a07fSbrendan 			}
2432fa94a07fSbrendan 			ASSERT(vd != NULL);
2433fa94a07fSbrendan 
2434fa94a07fSbrendan 			VERIFY(nvlist_lookup_uint64_array(l2cache[i],
2435fa94a07fSbrendan 			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
2436fa94a07fSbrendan 			vdev_get_stats(vd, vs);
2437fa94a07fSbrendan 		}
2438fa94a07fSbrendan 	}
2439fa94a07fSbrendan }
2440fa94a07fSbrendan 
2441fa9e4066Sahrens int
2442ea8dc4b6Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
2443fa9e4066Sahrens {
2444fa9e4066Sahrens 	int error;
2445fa9e4066Sahrens 	spa_t *spa;
2446fa9e4066Sahrens 
2447fa9e4066Sahrens 	*config = NULL;
2448468c413aSTim Haley 	error = spa_open_common(name, &spa, FTAG, NULL, config);
2449fa9e4066Sahrens 
24506809eb4eSEric Schrock 	if (spa != NULL) {
24516809eb4eSEric Schrock 		/*
24526809eb4eSEric Schrock 		 * This still leaves a window of inconsistency where the spares
24536809eb4eSEric Schrock 		 * or l2cache devices could change and the config would be
24546809eb4eSEric Schrock 		 * self-inconsistent.
24556809eb4eSEric Schrock 		 */
24566809eb4eSEric Schrock 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2457ea8dc4b6Seschrock 
24586809eb4eSEric Schrock 		if (*config != NULL) {
2459e14bb325SJeff Bonwick 			VERIFY(nvlist_add_uint64(*config,
24606809eb4eSEric Schrock 			    ZPOOL_CONFIG_ERRCOUNT,
24616809eb4eSEric Schrock 			    spa_get_errlog_size(spa)) == 0);
2462e14bb325SJeff Bonwick 
24636809eb4eSEric Schrock 			if (spa_suspended(spa))
24646809eb4eSEric Schrock 				VERIFY(nvlist_add_uint64(*config,
24656809eb4eSEric Schrock 				    ZPOOL_CONFIG_SUSPENDED,
24666809eb4eSEric Schrock 				    spa->spa_failmode) == 0);
24676809eb4eSEric Schrock 
24686809eb4eSEric Schrock 			spa_add_spares(spa, *config);
24696809eb4eSEric Schrock 			spa_add_l2cache(spa, *config);
24706809eb4eSEric Schrock 		}
247199653d4eSeschrock 	}
247299653d4eSeschrock 
2473ea8dc4b6Seschrock 	/*
2474ea8dc4b6Seschrock 	 * We want to get the alternate root even for faulted pools, so we cheat
2475ea8dc4b6Seschrock 	 * and call spa_lookup() directly.
2476ea8dc4b6Seschrock 	 */
2477ea8dc4b6Seschrock 	if (altroot) {
2478ea8dc4b6Seschrock 		if (spa == NULL) {
2479ea8dc4b6Seschrock 			mutex_enter(&spa_namespace_lock);
2480ea8dc4b6Seschrock 			spa = spa_lookup(name);
2481ea8dc4b6Seschrock 			if (spa)
2482ea8dc4b6Seschrock 				spa_altroot(spa, altroot, buflen);
2483ea8dc4b6Seschrock 			else
2484ea8dc4b6Seschrock 				altroot[0] = '\0';
2485ea8dc4b6Seschrock 			spa = NULL;
2486ea8dc4b6Seschrock 			mutex_exit(&spa_namespace_lock);
2487ea8dc4b6Seschrock 		} else {
2488ea8dc4b6Seschrock 			spa_altroot(spa, altroot, buflen);
2489ea8dc4b6Seschrock 		}
2490ea8dc4b6Seschrock 	}
2491ea8dc4b6Seschrock 
24926809eb4eSEric Schrock 	if (spa != NULL) {
24936809eb4eSEric Schrock 		spa_config_exit(spa, SCL_CONFIG, FTAG);
2494fa9e4066Sahrens 		spa_close(spa, FTAG);
24956809eb4eSEric Schrock 	}
2496fa9e4066Sahrens 
2497fa9e4066Sahrens 	return (error);
2498fa9e4066Sahrens }
2499fa9e4066Sahrens 
250099653d4eSeschrock /*
2501fa94a07fSbrendan  * Validate that the auxiliary device array is well formed.  We must have an
2502fa94a07fSbrendan  * array of nvlists, each which describes a valid leaf vdev.  If this is an
2503fa94a07fSbrendan  * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be
2504fa94a07fSbrendan  * specified, as long as they are well-formed.
250599653d4eSeschrock  */
250699653d4eSeschrock static int
2507fa94a07fSbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
2508fa94a07fSbrendan     spa_aux_vdev_t *sav, const char *config, uint64_t version,
2509fa94a07fSbrendan     vdev_labeltype_t label)
251099653d4eSeschrock {
2511fa94a07fSbrendan 	nvlist_t **dev;
2512fa94a07fSbrendan 	uint_t i, ndev;
251399653d4eSeschrock 	vdev_t *vd;
251499653d4eSeschrock 	int error;
251599653d4eSeschrock 
2516e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
2517e14bb325SJeff Bonwick 
251899653d4eSeschrock 	/*
2519fa94a07fSbrendan 	 * It's acceptable to have no devs specified.
252099653d4eSeschrock 	 */
2521fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0)
252299653d4eSeschrock 		return (0);
252399653d4eSeschrock 
2524fa94a07fSbrendan 	if (ndev == 0)
252599653d4eSeschrock 		return (EINVAL);
252699653d4eSeschrock 
252799653d4eSeschrock 	/*
2528fa94a07fSbrendan 	 * Make sure the pool is formatted with a version that supports this
2529fa94a07fSbrendan 	 * device type.
253099653d4eSeschrock 	 */
2531fa94a07fSbrendan 	if (spa_version(spa) < version)
253299653d4eSeschrock 		return (ENOTSUP);
253399653d4eSeschrock 
253439c23413Seschrock 	/*
2535fa94a07fSbrendan 	 * Set the pending device list so we correctly handle device in-use
253639c23413Seschrock 	 * checking.
253739c23413Seschrock 	 */
2538fa94a07fSbrendan 	sav->sav_pending = dev;
2539fa94a07fSbrendan 	sav->sav_npending = ndev;
254039c23413Seschrock 
2541fa94a07fSbrendan 	for (i = 0; i < ndev; i++) {
2542fa94a07fSbrendan 		if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0,
254399653d4eSeschrock 		    mode)) != 0)
254439c23413Seschrock 			goto out;
254599653d4eSeschrock 
254699653d4eSeschrock 		if (!vd->vdev_ops->vdev_op_leaf) {
254799653d4eSeschrock 			vdev_free(vd);
254839c23413Seschrock 			error = EINVAL;
254939c23413Seschrock 			goto out;
255099653d4eSeschrock 		}
255199653d4eSeschrock 
2552fa94a07fSbrendan 		/*
2553e14bb325SJeff Bonwick 		 * The L2ARC currently only supports disk devices in
2554e14bb325SJeff Bonwick 		 * kernel context.  For user-level testing, we allow it.
2555fa94a07fSbrendan 		 */
2556e14bb325SJeff Bonwick #ifdef _KERNEL
2557fa94a07fSbrendan 		if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) &&
2558fa94a07fSbrendan 		    strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
2559fa94a07fSbrendan 			error = ENOTBLK;
2560fa94a07fSbrendan 			goto out;
2561fa94a07fSbrendan 		}
2562e14bb325SJeff Bonwick #endif
256399653d4eSeschrock 		vd->vdev_top = vd;
256499653d4eSeschrock 
256539c23413Seschrock 		if ((error = vdev_open(vd)) == 0 &&
2566fa94a07fSbrendan 		    (error = vdev_label_init(vd, crtxg, label)) == 0) {
2567fa94a07fSbrendan 			VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID,
256839c23413Seschrock 			    vd->vdev_guid) == 0);
256939c23413Seschrock 		}
257099653d4eSeschrock 
257199653d4eSeschrock 		vdev_free(vd);
257239c23413Seschrock 
2573fa94a07fSbrendan 		if (error &&
2574fa94a07fSbrendan 		    (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE))
257539c23413Seschrock 			goto out;
257639c23413Seschrock 		else
257739c23413Seschrock 			error = 0;
257899653d4eSeschrock 	}
257999653d4eSeschrock 
258039c23413Seschrock out:
2581fa94a07fSbrendan 	sav->sav_pending = NULL;
2582fa94a07fSbrendan 	sav->sav_npending = 0;
258339c23413Seschrock 	return (error);
258499653d4eSeschrock }
258599653d4eSeschrock 
2586fa94a07fSbrendan static int
2587fa94a07fSbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode)
2588fa94a07fSbrendan {
2589fa94a07fSbrendan 	int error;
2590fa94a07fSbrendan 
2591e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
2592e14bb325SJeff Bonwick 
2593fa94a07fSbrendan 	if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode,
2594fa94a07fSbrendan 	    &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES,
2595fa94a07fSbrendan 	    VDEV_LABEL_SPARE)) != 0) {
2596fa94a07fSbrendan 		return (error);
2597fa94a07fSbrendan 	}
2598fa94a07fSbrendan 
2599fa94a07fSbrendan 	return (spa_validate_aux_devs(spa, nvroot, crtxg, mode,
2600fa94a07fSbrendan 	    &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE,
2601fa94a07fSbrendan 	    VDEV_LABEL_L2CACHE));
2602fa94a07fSbrendan }
2603fa94a07fSbrendan 
2604fa94a07fSbrendan static void
2605fa94a07fSbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs,
2606fa94a07fSbrendan     const char *config)
2607fa94a07fSbrendan {
2608fa94a07fSbrendan 	int i;
2609fa94a07fSbrendan 
2610fa94a07fSbrendan 	if (sav->sav_config != NULL) {
2611fa94a07fSbrendan 		nvlist_t **olddevs;
2612fa94a07fSbrendan 		uint_t oldndevs;
2613fa94a07fSbrendan 		nvlist_t **newdevs;
2614fa94a07fSbrendan 
2615fa94a07fSbrendan 		/*
2616fa94a07fSbrendan 		 * Generate new dev list by concatentating with the
2617fa94a07fSbrendan 		 * current dev list.
2618fa94a07fSbrendan 		 */
2619fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config,
2620fa94a07fSbrendan 		    &olddevs, &oldndevs) == 0);
2621fa94a07fSbrendan 
2622fa94a07fSbrendan 		newdevs = kmem_alloc(sizeof (void *) *
2623fa94a07fSbrendan 		    (ndevs + oldndevs), KM_SLEEP);
2624fa94a07fSbrendan 		for (i = 0; i < oldndevs; i++)
2625fa94a07fSbrendan 			VERIFY(nvlist_dup(olddevs[i], &newdevs[i],
2626fa94a07fSbrendan 			    KM_SLEEP) == 0);
2627fa94a07fSbrendan 		for (i = 0; i < ndevs; i++)
2628fa94a07fSbrendan 			VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs],
2629fa94a07fSbrendan 			    KM_SLEEP) == 0);
2630fa94a07fSbrendan 
2631fa94a07fSbrendan 		VERIFY(nvlist_remove(sav->sav_config, config,
2632fa94a07fSbrendan 		    DATA_TYPE_NVLIST_ARRAY) == 0);
2633fa94a07fSbrendan 
2634fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(sav->sav_config,
2635fa94a07fSbrendan 		    config, newdevs, ndevs + oldndevs) == 0);
2636fa94a07fSbrendan 		for (i = 0; i < oldndevs + ndevs; i++)
2637fa94a07fSbrendan 			nvlist_free(newdevs[i]);
2638fa94a07fSbrendan 		kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *));
2639fa94a07fSbrendan 	} else {
2640fa94a07fSbrendan 		/*
2641fa94a07fSbrendan 		 * Generate a new dev list.
2642fa94a07fSbrendan 		 */
2643fa94a07fSbrendan 		VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME,
2644fa94a07fSbrendan 		    KM_SLEEP) == 0);
2645fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(sav->sav_config, config,
2646fa94a07fSbrendan 		    devs, ndevs) == 0);
2647fa94a07fSbrendan 	}
2648fa94a07fSbrendan }
2649fa94a07fSbrendan 
2650fa94a07fSbrendan /*
2651fa94a07fSbrendan  * Stop and drop level 2 ARC devices
2652fa94a07fSbrendan  */
2653fa94a07fSbrendan void
2654fa94a07fSbrendan spa_l2cache_drop(spa_t *spa)
2655fa94a07fSbrendan {
2656fa94a07fSbrendan 	vdev_t *vd;
2657fa94a07fSbrendan 	int i;
2658fa94a07fSbrendan 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
2659fa94a07fSbrendan 
2660fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++) {
2661fa94a07fSbrendan 		uint64_t pool;
2662fa94a07fSbrendan 
2663fa94a07fSbrendan 		vd = sav->sav_vdevs[i];
2664fa94a07fSbrendan 		ASSERT(vd != NULL);
2665fa94a07fSbrendan 
26668ad4d6ddSJeff Bonwick 		if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
26678ad4d6ddSJeff Bonwick 		    pool != 0ULL && l2arc_vdev_present(vd))
2668fa94a07fSbrendan 			l2arc_remove_vdev(vd);
2669fa94a07fSbrendan 		if (vd->vdev_isl2cache)
2670fa94a07fSbrendan 			spa_l2cache_remove(vd);
2671fa94a07fSbrendan 		vdev_clear_stats(vd);
2672fa94a07fSbrendan 		(void) vdev_close(vd);
2673fa94a07fSbrendan 	}
2674fa94a07fSbrendan }
2675fa94a07fSbrendan 
2676fa9e4066Sahrens /*
2677fa9e4066Sahrens  * Pool Creation
2678fa9e4066Sahrens  */
2679fa9e4066Sahrens int
2680990b4856Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
26810a48a24eStimh     const char *history_str, nvlist_t *zplprops)
2682fa9e4066Sahrens {
2683fa9e4066Sahrens 	spa_t *spa;
2684990b4856Slling 	char *altroot = NULL;
26850373e76bSbonwick 	vdev_t *rvd;
2686fa9e4066Sahrens 	dsl_pool_t *dp;
2687fa9e4066Sahrens 	dmu_tx_t *tx;
2688573ca77eSGeorge Wilson 	int error = 0;
2689fa9e4066Sahrens 	uint64_t txg = TXG_INITIAL;
2690fa94a07fSbrendan 	nvlist_t **spares, **l2cache;
2691fa94a07fSbrendan 	uint_t nspares, nl2cache;
2692990b4856Slling 	uint64_t version;
2693fa9e4066Sahrens 
2694fa9e4066Sahrens 	/*
2695fa9e4066Sahrens 	 * If this pool already exists, return failure.
2696fa9e4066Sahrens 	 */
2697fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
2698fa9e4066Sahrens 	if (spa_lookup(pool) != NULL) {
2699fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
2700fa9e4066Sahrens 		return (EEXIST);
2701fa9e4066Sahrens 	}
2702fa9e4066Sahrens 
2703fa9e4066Sahrens 	/*
2704fa9e4066Sahrens 	 * Allocate a new spa_t structure.
2705fa9e4066Sahrens 	 */
2706990b4856Slling 	(void) nvlist_lookup_string(props,
2707990b4856Slling 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
2708468c413aSTim Haley 	spa = spa_add(pool, NULL, altroot);
27098ad4d6ddSJeff Bonwick 	spa_activate(spa, spa_mode_global);
2710fa9e4066Sahrens 
2711990b4856Slling 	if (props && (error = spa_prop_validate(spa, props))) {
2712990b4856Slling 		spa_deactivate(spa);
2713990b4856Slling 		spa_remove(spa);
2714c5904d13Seschrock 		mutex_exit(&spa_namespace_lock);
2715990b4856Slling 		return (error);
2716990b4856Slling 	}
2717990b4856Slling 
2718990b4856Slling 	if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION),
2719990b4856Slling 	    &version) != 0)
2720990b4856Slling 		version = SPA_VERSION;
2721990b4856Slling 	ASSERT(version <= SPA_VERSION);
2722b24ab676SJeff Bonwick 
2723b24ab676SJeff Bonwick 	spa->spa_first_txg = txg;
2724b24ab676SJeff Bonwick 	spa->spa_uberblock.ub_txg = txg - 1;
2725990b4856Slling 	spa->spa_uberblock.ub_version = version;
2726fa9e4066Sahrens 	spa->spa_ubsync = spa->spa_uberblock;
2727fa9e4066Sahrens 
272854d692b7SGeorge Wilson 	/*
272954d692b7SGeorge Wilson 	 * Create "The Godfather" zio to hold all async IOs
273054d692b7SGeorge Wilson 	 */
273125f89ee2SJeff Bonwick 	spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
273225f89ee2SJeff Bonwick 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
273354d692b7SGeorge Wilson 
27340373e76bSbonwick 	/*
27350373e76bSbonwick 	 * Create the root vdev.
27360373e76bSbonwick 	 */
2737e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
27380373e76bSbonwick 
273999653d4eSeschrock 	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD);
27400373e76bSbonwick 
274199653d4eSeschrock 	ASSERT(error != 0 || rvd != NULL);
274299653d4eSeschrock 	ASSERT(error != 0 || spa->spa_root_vdev == rvd);
27430373e76bSbonwick 
2744b7b97454Sperrin 	if (error == 0 && !zfs_allocatable_devs(nvroot))
27450373e76bSbonwick 		error = EINVAL;
274699653d4eSeschrock 
274799653d4eSeschrock 	if (error == 0 &&
274899653d4eSeschrock 	    (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
2749fa94a07fSbrendan 	    (error = spa_validate_aux(spa, nvroot, txg,
275099653d4eSeschrock 	    VDEV_ALLOC_ADD)) == 0) {
2751573ca77eSGeorge Wilson 		for (int c = 0; c < rvd->vdev_children; c++) {
2752573ca77eSGeorge Wilson 			vdev_metaslab_set_size(rvd->vdev_child[c]);
2753573ca77eSGeorge Wilson 			vdev_expand(rvd->vdev_child[c], txg);
2754573ca77eSGeorge Wilson 		}
27550373e76bSbonwick 	}
27560373e76bSbonwick 
2757e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
2758fa9e4066Sahrens 
275999653d4eSeschrock 	if (error != 0) {
2760fa9e4066Sahrens 		spa_unload(spa);
2761fa9e4066Sahrens 		spa_deactivate(spa);
2762fa9e4066Sahrens 		spa_remove(spa);
2763fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
2764fa9e4066Sahrens 		return (error);
2765fa9e4066Sahrens 	}
2766fa9e4066Sahrens 
276799653d4eSeschrock 	/*
276899653d4eSeschrock 	 * Get the list of spares, if specified.
276999653d4eSeschrock 	 */
277099653d4eSeschrock 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
277199653d4eSeschrock 	    &spares, &nspares) == 0) {
2772fa94a07fSbrendan 		VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME,
277399653d4eSeschrock 		    KM_SLEEP) == 0);
2774fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
277599653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
2776e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
277799653d4eSeschrock 		spa_load_spares(spa);
2778e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
2779fa94a07fSbrendan 		spa->spa_spares.sav_sync = B_TRUE;
2780fa94a07fSbrendan 	}
2781fa94a07fSbrendan 
2782fa94a07fSbrendan 	/*
2783fa94a07fSbrendan 	 * Get the list of level 2 cache devices, if specified.
2784fa94a07fSbrendan 	 */
2785fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
2786fa94a07fSbrendan 	    &l2cache, &nl2cache) == 0) {
2787fa94a07fSbrendan 		VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
2788fa94a07fSbrendan 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
2789fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
2790fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
2791e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
2792fa94a07fSbrendan 		spa_load_l2cache(spa);
2793e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
2794fa94a07fSbrendan 		spa->spa_l2cache.sav_sync = B_TRUE;
279599653d4eSeschrock 	}
279699653d4eSeschrock 
27970a48a24eStimh 	spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
2798fa9e4066Sahrens 	spa->spa_meta_objset = dp->dp_meta_objset;
2799fa9e4066Sahrens 
2800485bbbf5SGeorge Wilson 	/*
2801485bbbf5SGeorge Wilson 	 * Create DDTs (dedup tables).
2802485bbbf5SGeorge Wilson 	 */
2803485bbbf5SGeorge Wilson 	ddt_create(spa);
2804485bbbf5SGeorge Wilson 
2805485bbbf5SGeorge Wilson 	spa_update_dspace(spa);
2806485bbbf5SGeorge Wilson 
2807fa9e4066Sahrens 	tx = dmu_tx_create_assigned(dp, txg);
2808fa9e4066Sahrens 
2809fa9e4066Sahrens 	/*
2810fa9e4066Sahrens 	 * Create the pool config object.
2811fa9e4066Sahrens 	 */
2812fa9e4066Sahrens 	spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset,
2813f7991ba4STim Haley 	    DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE,
2814fa9e4066Sahrens 	    DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx);
2815fa9e4066Sahrens 
2816ea8dc4b6Seschrock 	if (zap_add(spa->spa_meta_objset,
2817fa9e4066Sahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG,
2818ea8dc4b6Seschrock 	    sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) {
2819ea8dc4b6Seschrock 		cmn_err(CE_PANIC, "failed to add pool config");
2820ea8dc4b6Seschrock 	}
2821fa9e4066Sahrens 
2822990b4856Slling 	/* Newly created pools with the right version are always deflated. */
2823990b4856Slling 	if (version >= SPA_VERSION_RAIDZ_DEFLATE) {
2824990b4856Slling 		spa->spa_deflate = TRUE;
2825990b4856Slling 		if (zap_add(spa->spa_meta_objset,
2826990b4856Slling 		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
2827990b4856Slling 		    sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) {
2828990b4856Slling 			cmn_err(CE_PANIC, "failed to add deflate");
2829990b4856Slling 		}
283099653d4eSeschrock 	}
283199653d4eSeschrock 
2832fa9e4066Sahrens 	/*
2833fa9e4066Sahrens 	 * Create the deferred-free bplist object.  Turn off compression
2834fa9e4066Sahrens 	 * because sync-to-convergence takes longer if the blocksize
2835fa9e4066Sahrens 	 * keeps changing.
2836fa9e4066Sahrens 	 */
2837b24ab676SJeff Bonwick 	spa->spa_deferred_bplist_obj = bplist_create(spa->spa_meta_objset,
2838fa9e4066Sahrens 	    1 << 14, tx);
2839b24ab676SJeff Bonwick 	dmu_object_set_compress(spa->spa_meta_objset,
2840b24ab676SJeff Bonwick 	    spa->spa_deferred_bplist_obj, ZIO_COMPRESS_OFF, tx);
2841fa9e4066Sahrens 
2842ea8dc4b6Seschrock 	if (zap_add(spa->spa_meta_objset,
2843fa9e4066Sahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST,
2844b24ab676SJeff Bonwick 	    sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj, tx) != 0) {
2845ea8dc4b6Seschrock 		cmn_err(CE_PANIC, "failed to add bplist");
2846ea8dc4b6Seschrock 	}
2847fa9e4066Sahrens 
284806eeb2adSek 	/*
284906eeb2adSek 	 * Create the pool's history object.
285006eeb2adSek 	 */
2851990b4856Slling 	if (version >= SPA_VERSION_ZPOOL_HISTORY)
2852990b4856Slling 		spa_history_create_obj(spa, tx);
2853990b4856Slling 
2854990b4856Slling 	/*
2855990b4856Slling 	 * Set pool properties.
2856990b4856Slling 	 */
2857990b4856Slling 	spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS);
2858990b4856Slling 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
28590a4e9518Sgw 	spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE);
2860573ca77eSGeorge Wilson 	spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
2861b24ab676SJeff Bonwick 
2862379c004dSEric Schrock 	if (props != NULL) {
2863379c004dSEric Schrock 		spa_configfile_set(spa, props, B_FALSE);
2864990b4856Slling 		spa_sync_props(spa, props, CRED(), tx);
2865379c004dSEric Schrock 	}
286606eeb2adSek 
2867fa9e4066Sahrens 	dmu_tx_commit(tx);
2868fa9e4066Sahrens 
2869fa9e4066Sahrens 	spa->spa_sync_on = B_TRUE;
2870fa9e4066Sahrens 	txg_sync_start(spa->spa_dsl_pool);
2871fa9e4066Sahrens 
2872fa9e4066Sahrens 	/*
2873fa9e4066Sahrens 	 * We explicitly wait for the first transaction to complete so that our
2874fa9e4066Sahrens 	 * bean counters are appropriately updated.
2875fa9e4066Sahrens 	 */
2876fa9e4066Sahrens 	txg_wait_synced(spa->spa_dsl_pool, txg);
2877fa9e4066Sahrens 
2878c5904d13Seschrock 	spa_config_sync(spa, B_FALSE, B_TRUE);
2879fa9e4066Sahrens 
2880990b4856Slling 	if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL)
2881228975ccSek 		(void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE);
2882c8e1f6d2SMark J Musante 	spa_history_log_version(spa, LOG_POOL_CREATE);
2883228975ccSek 
2884088f3894Sahrens 	spa->spa_minref = refcount_count(&spa->spa_refcount);
2885088f3894Sahrens 
2886daaa36a7SGeorge Wilson 	mutex_exit(&spa_namespace_lock);
2887daaa36a7SGeorge Wilson 
2888fa9e4066Sahrens 	return (0);
2889fa9e4066Sahrens }
2890fa9e4066Sahrens 
2891e7cbe64fSgw #ifdef _KERNEL
2892e7cbe64fSgw /*
289321ecdf64SLin Ling  * Get the root pool information from the root disk, then import the root pool
289421ecdf64SLin Ling  * during the system boot up time.
2895e7cbe64fSgw  */
289621ecdf64SLin Ling extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
289721ecdf64SLin Ling 
289821ecdf64SLin Ling static nvlist_t *
289921ecdf64SLin Ling spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid)
2900e7cbe64fSgw {
290121ecdf64SLin Ling 	nvlist_t *config;
2902e7cbe64fSgw 	nvlist_t *nvtop, *nvroot;
2903e7cbe64fSgw 	uint64_t pgid;
2904e7cbe64fSgw 
290521ecdf64SLin Ling 	if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0)
290621ecdf64SLin Ling 		return (NULL);
290721ecdf64SLin Ling 
2908e7cbe64fSgw 	/*
2909e7cbe64fSgw 	 * Add this top-level vdev to the child array.
2910e7cbe64fSgw 	 */
291121ecdf64SLin Ling 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
291221ecdf64SLin Ling 	    &nvtop) == 0);
291321ecdf64SLin Ling 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
291421ecdf64SLin Ling 	    &pgid) == 0);
291521ecdf64SLin Ling 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0);
2916e7cbe64fSgw 
2917e7cbe64fSgw 	/*
2918e7cbe64fSgw 	 * Put this pool's top-level vdevs into a root vdev.
2919e7cbe64fSgw 	 */
2920e7cbe64fSgw 	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
292121ecdf64SLin Ling 	VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
292221ecdf64SLin Ling 	    VDEV_TYPE_ROOT) == 0);
2923e7cbe64fSgw 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0);
2924e7cbe64fSgw 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0);
2925e7cbe64fSgw 	VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2926e7cbe64fSgw 	    &nvtop, 1) == 0);
2927e7cbe64fSgw 
2928e7cbe64fSgw 	/*
2929e7cbe64fSgw 	 * Replace the existing vdev_tree with the new root vdev in
2930e7cbe64fSgw 	 * this pool's configuration (remove the old, add the new).
2931e7cbe64fSgw 	 */
2932e7cbe64fSgw 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
2933e7cbe64fSgw 	nvlist_free(nvroot);
293421ecdf64SLin Ling 	return (config);
2935e7cbe64fSgw }
2936e7cbe64fSgw 
2937e7cbe64fSgw /*
293821ecdf64SLin Ling  * Walk the vdev tree and see if we can find a device with "better"
293921ecdf64SLin Ling  * configuration. A configuration is "better" if the label on that
294021ecdf64SLin Ling  * device has a more recent txg.
2941051aabe6Staylor  */
294221ecdf64SLin Ling static void
294321ecdf64SLin Ling spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg)
2944051aabe6Staylor {
2945573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++)
294621ecdf64SLin Ling 		spa_alt_rootvdev(vd->vdev_child[c], avd, txg);
2947051aabe6Staylor 
294821ecdf64SLin Ling 	if (vd->vdev_ops->vdev_op_leaf) {
294921ecdf64SLin Ling 		nvlist_t *label;
295021ecdf64SLin Ling 		uint64_t label_txg;
2951051aabe6Staylor 
295221ecdf64SLin Ling 		if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid,
295321ecdf64SLin Ling 		    &label) != 0)
295421ecdf64SLin Ling 			return;
2955051aabe6Staylor 
295621ecdf64SLin Ling 		VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG,
295721ecdf64SLin Ling 		    &label_txg) == 0);
2958051aabe6Staylor 
295921ecdf64SLin Ling 		/*
296021ecdf64SLin Ling 		 * Do we have a better boot device?
296121ecdf64SLin Ling 		 */
296221ecdf64SLin Ling 		if (label_txg > *txg) {
296321ecdf64SLin Ling 			*txg = label_txg;
296421ecdf64SLin Ling 			*avd = vd;
2965051aabe6Staylor 		}
296621ecdf64SLin Ling 		nvlist_free(label);
2967051aabe6Staylor 	}
2968051aabe6Staylor }
2969051aabe6Staylor 
2970e7cbe64fSgw /*
2971e7cbe64fSgw  * Import a root pool.
2972e7cbe64fSgw  *
2973051aabe6Staylor  * For x86. devpath_list will consist of devid and/or physpath name of
2974051aabe6Staylor  * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a").
2975051aabe6Staylor  * The GRUB "findroot" command will return the vdev we should boot.
2976e7cbe64fSgw  *
2977e7cbe64fSgw  * For Sparc, devpath_list consists the physpath name of the booting device
2978e7cbe64fSgw  * no matter the rootpool is a single device pool or a mirrored pool.
2979e7cbe64fSgw  * e.g.
2980e7cbe64fSgw  *	"/pci@1f,0/ide@d/disk@0,0:a"
2981e7cbe64fSgw  */
2982e7cbe64fSgw int
2983051aabe6Staylor spa_import_rootpool(char *devpath, char *devid)
2984e7cbe64fSgw {
298521ecdf64SLin Ling 	spa_t *spa;
298621ecdf64SLin Ling 	vdev_t *rvd, *bvd, *avd = NULL;
298721ecdf64SLin Ling 	nvlist_t *config, *nvtop;
298821ecdf64SLin Ling 	uint64_t guid, txg;
2989e7cbe64fSgw 	char *pname;
2990e7cbe64fSgw 	int error;
2991e7cbe64fSgw 
2992e7cbe64fSgw 	/*
299321ecdf64SLin Ling 	 * Read the label from the boot device and generate a configuration.
2994e7cbe64fSgw 	 */
2995dedec472SJack Meng 	config = spa_generate_rootconf(devpath, devid, &guid);
2996dedec472SJack Meng #if defined(_OBP) && defined(_KERNEL)
2997dedec472SJack Meng 	if (config == NULL) {
2998dedec472SJack Meng 		if (strstr(devpath, "/iscsi/ssd") != NULL) {
2999dedec472SJack Meng 			/* iscsi boot */
3000dedec472SJack Meng 			get_iscsi_bootpath_phy(devpath);
3001dedec472SJack Meng 			config = spa_generate_rootconf(devpath, devid, &guid);
3002dedec472SJack Meng 		}
3003dedec472SJack Meng 	}
3004dedec472SJack Meng #endif
3005dedec472SJack Meng 	if (config == NULL) {
300621ecdf64SLin Ling 		cmn_err(CE_NOTE, "Can not read the pool label from '%s'",
300721ecdf64SLin Ling 		    devpath);
300821ecdf64SLin Ling 		return (EIO);
300921ecdf64SLin Ling 	}
3010e7cbe64fSgw 
301121ecdf64SLin Ling 	VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
301221ecdf64SLin Ling 	    &pname) == 0);
301321ecdf64SLin Ling 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
3014e7cbe64fSgw 
30156809eb4eSEric Schrock 	mutex_enter(&spa_namespace_lock);
30166809eb4eSEric Schrock 	if ((spa = spa_lookup(pname)) != NULL) {
30176809eb4eSEric Schrock 		/*
30186809eb4eSEric Schrock 		 * Remove the existing root pool from the namespace so that we
30196809eb4eSEric Schrock 		 * can replace it with the correct config we just read in.
30206809eb4eSEric Schrock 		 */
30216809eb4eSEric Schrock 		spa_remove(spa);
30226809eb4eSEric Schrock 	}
30236809eb4eSEric Schrock 
3024468c413aSTim Haley 	spa = spa_add(pname, config, NULL);
30256809eb4eSEric Schrock 	spa->spa_is_root = B_TRUE;
3026bc758434SLin Ling 	spa->spa_load_verbatim = B_TRUE;
3027e7cbe64fSgw 
302821ecdf64SLin Ling 	/*
302921ecdf64SLin Ling 	 * Build up a vdev tree based on the boot device's label config.
303021ecdf64SLin Ling 	 */
303121ecdf64SLin Ling 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
303221ecdf64SLin Ling 	    &nvtop) == 0);
303321ecdf64SLin Ling 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
303421ecdf64SLin Ling 	error = spa_config_parse(spa, &rvd, nvtop, NULL, 0,
303521ecdf64SLin Ling 	    VDEV_ALLOC_ROOTPOOL);
303621ecdf64SLin Ling 	spa_config_exit(spa, SCL_ALL, FTAG);
303721ecdf64SLin Ling 	if (error) {
303821ecdf64SLin Ling 		mutex_exit(&spa_namespace_lock);
303921ecdf64SLin Ling 		nvlist_free(config);
304021ecdf64SLin Ling 		cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
304121ecdf64SLin Ling 		    pname);
304221ecdf64SLin Ling 		return (error);
304321ecdf64SLin Ling 	}
304421ecdf64SLin Ling 
304521ecdf64SLin Ling 	/*
304621ecdf64SLin Ling 	 * Get the boot vdev.
304721ecdf64SLin Ling 	 */
304821ecdf64SLin Ling 	if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) {
304921ecdf64SLin Ling 		cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu",
305021ecdf64SLin Ling 		    (u_longlong_t)guid);
305121ecdf64SLin Ling 		error = ENOENT;
305221ecdf64SLin Ling 		goto out;
305321ecdf64SLin Ling 	}
3054e7cbe64fSgw 
305521ecdf64SLin Ling 	/*
305621ecdf64SLin Ling 	 * Determine if there is a better boot device.
305721ecdf64SLin Ling 	 */
305821ecdf64SLin Ling 	avd = bvd;
305921ecdf64SLin Ling 	spa_alt_rootvdev(rvd, &avd, &txg);
306021ecdf64SLin Ling 	if (avd != bvd) {
306121ecdf64SLin Ling 		cmn_err(CE_NOTE, "The boot device is 'degraded'. Please "
306221ecdf64SLin Ling 		    "try booting from '%s'", avd->vdev_path);
306321ecdf64SLin Ling 		error = EINVAL;
306421ecdf64SLin Ling 		goto out;
306521ecdf64SLin Ling 	}
3066e7cbe64fSgw 
306721ecdf64SLin Ling 	/*
306821ecdf64SLin Ling 	 * If the boot device is part of a spare vdev then ensure that
306921ecdf64SLin Ling 	 * we're booting off the active spare.
307021ecdf64SLin Ling 	 */
307121ecdf64SLin Ling 	if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
307221ecdf64SLin Ling 	    !bvd->vdev_isspare) {
307321ecdf64SLin Ling 		cmn_err(CE_NOTE, "The boot device is currently spared. Please "
307421ecdf64SLin Ling 		    "try booting from '%s'",
307521ecdf64SLin Ling 		    bvd->vdev_parent->vdev_child[1]->vdev_path);
307621ecdf64SLin Ling 		error = EINVAL;
307721ecdf64SLin Ling 		goto out;
307821ecdf64SLin Ling 	}
307921ecdf64SLin Ling 
308021ecdf64SLin Ling 	error = 0;
3081c8e1f6d2SMark J Musante 	spa_history_log_version(spa, LOG_POOL_IMPORT);
308221ecdf64SLin Ling out:
308321ecdf64SLin Ling 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
308421ecdf64SLin Ling 	vdev_free(rvd);
308521ecdf64SLin Ling 	spa_config_exit(spa, SCL_ALL, FTAG);
308621ecdf64SLin Ling 	mutex_exit(&spa_namespace_lock);
308721ecdf64SLin Ling 
308821ecdf64SLin Ling 	nvlist_free(config);
3089e7cbe64fSgw 	return (error);
3090e7cbe64fSgw }
309121ecdf64SLin Ling 
3092e7cbe64fSgw #endif
3093e7cbe64fSgw 
3094e7cbe64fSgw /*
30956809eb4eSEric Schrock  * Take a pool and insert it into the namespace as if it had been loaded at
30966809eb4eSEric Schrock  * boot.
3097e7cbe64fSgw  */
3098e7cbe64fSgw int
30996809eb4eSEric Schrock spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props)
3100e7cbe64fSgw {
31016809eb4eSEric Schrock 	spa_t *spa;
31026809eb4eSEric Schrock 	char *altroot = NULL;
31036809eb4eSEric Schrock 
31046809eb4eSEric Schrock 	mutex_enter(&spa_namespace_lock);
31056809eb4eSEric Schrock 	if (spa_lookup(pool) != NULL) {
31066809eb4eSEric Schrock 		mutex_exit(&spa_namespace_lock);
31076809eb4eSEric Schrock 		return (EEXIST);
31086809eb4eSEric Schrock 	}
31096809eb4eSEric Schrock 
31106809eb4eSEric Schrock 	(void) nvlist_lookup_string(props,
31116809eb4eSEric Schrock 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
3112468c413aSTim Haley 	spa = spa_add(pool, config, altroot);
31136809eb4eSEric Schrock 
3114468c413aSTim Haley 	spa->spa_load_verbatim = B_TRUE;
31156809eb4eSEric Schrock 
31166809eb4eSEric Schrock 	if (props != NULL)
31176809eb4eSEric Schrock 		spa_configfile_set(spa, props, B_FALSE);
31186809eb4eSEric Schrock 
31196809eb4eSEric Schrock 	spa_config_sync(spa, B_FALSE, B_TRUE);
31206809eb4eSEric Schrock 
31216809eb4eSEric Schrock 	mutex_exit(&spa_namespace_lock);
3122c8e1f6d2SMark J Musante 	spa_history_log_version(spa, LOG_POOL_IMPORT);
31236809eb4eSEric Schrock 
31246809eb4eSEric Schrock 	return (0);
3125e7cbe64fSgw }
3126e7cbe64fSgw 
31276809eb4eSEric Schrock /*
31286809eb4eSEric Schrock  * Import a non-root pool into the system.
31296809eb4eSEric Schrock  */
3130c5904d13Seschrock int
31316809eb4eSEric Schrock spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
3132c5904d13Seschrock {
31336809eb4eSEric Schrock 	spa_t *spa;
31346809eb4eSEric Schrock 	char *altroot = NULL;
3135468c413aSTim Haley 	spa_load_state_t state = SPA_LOAD_IMPORT;
3136468c413aSTim Haley 	zpool_rewind_policy_t policy;
31376809eb4eSEric Schrock 	int error;
31386809eb4eSEric Schrock 	nvlist_t *nvroot;
31396809eb4eSEric Schrock 	nvlist_t **spares, **l2cache;
31406809eb4eSEric Schrock 	uint_t nspares, nl2cache;
31416809eb4eSEric Schrock 
31426809eb4eSEric Schrock 	/*
31436809eb4eSEric Schrock 	 * If a pool with this name exists, return failure.
31446809eb4eSEric Schrock 	 */
31456809eb4eSEric Schrock 	mutex_enter(&spa_namespace_lock);
31461195e687SMark J Musante 	if (spa_lookup(pool) != NULL) {
31476809eb4eSEric Schrock 		mutex_exit(&spa_namespace_lock);
31486809eb4eSEric Schrock 		return (EEXIST);
31496809eb4eSEric Schrock 	}
31506809eb4eSEric Schrock 
3151468c413aSTim Haley 	zpool_get_rewind_policy(config, &policy);
3152468c413aSTim Haley 	if (policy.zrp_request & ZPOOL_DO_REWIND)
3153468c413aSTim Haley 		state = SPA_LOAD_RECOVER;
3154468c413aSTim Haley 
31556809eb4eSEric Schrock 	/*
31566809eb4eSEric Schrock 	 * Create and initialize the spa structure.
31576809eb4eSEric Schrock 	 */
31586809eb4eSEric Schrock 	(void) nvlist_lookup_string(props,
31596809eb4eSEric Schrock 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
3160468c413aSTim Haley 	spa = spa_add(pool, config, altroot);
31616809eb4eSEric Schrock 	spa_activate(spa, spa_mode_global);
31626809eb4eSEric Schrock 
316325f89ee2SJeff Bonwick 	/*
316425f89ee2SJeff Bonwick 	 * Don't start async tasks until we know everything is healthy.
316525f89ee2SJeff Bonwick 	 */
316625f89ee2SJeff Bonwick 	spa_async_suspend(spa);
316725f89ee2SJeff Bonwick 
31686809eb4eSEric Schrock 	/*
31696809eb4eSEric Schrock 	 * Pass off the heavy lifting to spa_load().  Pass TRUE for mosconfig
31706809eb4eSEric Schrock 	 * because the user-supplied config is actually the one to trust when
31716809eb4eSEric Schrock 	 * doing an import.
31726809eb4eSEric Schrock 	 */
3173468c413aSTim Haley 	if (state != SPA_LOAD_RECOVER)
3174468c413aSTim Haley 		spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
3175468c413aSTim Haley 	error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg,
3176c8ee1847SVictor Latushkin 	    policy.zrp_request);
3177468c413aSTim Haley 
3178468c413aSTim Haley 	/*
3179468c413aSTim Haley 	 * Propagate anything learned about failing or best txgs
3180468c413aSTim Haley 	 * back to caller
3181468c413aSTim Haley 	 */
3182468c413aSTim Haley 	spa_rewind_data_to_nvlist(spa, config);
31836809eb4eSEric Schrock 
31846809eb4eSEric Schrock 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
31856809eb4eSEric Schrock 	/*
31866809eb4eSEric Schrock 	 * Toss any existing sparelist, as it doesn't have any validity
31876809eb4eSEric Schrock 	 * anymore, and conflicts with spa_has_spare().
31886809eb4eSEric Schrock 	 */
31896809eb4eSEric Schrock 	if (spa->spa_spares.sav_config) {
31906809eb4eSEric Schrock 		nvlist_free(spa->spa_spares.sav_config);
31916809eb4eSEric Schrock 		spa->spa_spares.sav_config = NULL;
31926809eb4eSEric Schrock 		spa_load_spares(spa);
31936809eb4eSEric Schrock 	}
31946809eb4eSEric Schrock 	if (spa->spa_l2cache.sav_config) {
31956809eb4eSEric Schrock 		nvlist_free(spa->spa_l2cache.sav_config);
31966809eb4eSEric Schrock 		spa->spa_l2cache.sav_config = NULL;
31976809eb4eSEric Schrock 		spa_load_l2cache(spa);
31986809eb4eSEric Schrock 	}
31996809eb4eSEric Schrock 
32006809eb4eSEric Schrock 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
32016809eb4eSEric Schrock 	    &nvroot) == 0);
32026809eb4eSEric Schrock 	if (error == 0)
32036809eb4eSEric Schrock 		error = spa_validate_aux(spa, nvroot, -1ULL,
32046809eb4eSEric Schrock 		    VDEV_ALLOC_SPARE);
32056809eb4eSEric Schrock 	if (error == 0)
32066809eb4eSEric Schrock 		error = spa_validate_aux(spa, nvroot, -1ULL,
32076809eb4eSEric Schrock 		    VDEV_ALLOC_L2CACHE);
32086809eb4eSEric Schrock 	spa_config_exit(spa, SCL_ALL, FTAG);
32096809eb4eSEric Schrock 
32106809eb4eSEric Schrock 	if (props != NULL)
32116809eb4eSEric Schrock 		spa_configfile_set(spa, props, B_FALSE);
32126809eb4eSEric Schrock 
32136809eb4eSEric Schrock 	if (error != 0 || (props && spa_writeable(spa) &&
32146809eb4eSEric Schrock 	    (error = spa_prop_set(spa, props)))) {
32156809eb4eSEric Schrock 		spa_unload(spa);
32166809eb4eSEric Schrock 		spa_deactivate(spa);
32176809eb4eSEric Schrock 		spa_remove(spa);
32186809eb4eSEric Schrock 		mutex_exit(&spa_namespace_lock);
32196809eb4eSEric Schrock 		return (error);
32206809eb4eSEric Schrock 	}
32216809eb4eSEric Schrock 
322225f89ee2SJeff Bonwick 	spa_async_resume(spa);
322325f89ee2SJeff Bonwick 
32246809eb4eSEric Schrock 	/*
32256809eb4eSEric Schrock 	 * Override any spares and level 2 cache devices as specified by
32266809eb4eSEric Schrock 	 * the user, as these may have correct device names/devids, etc.
32276809eb4eSEric Schrock 	 */
32286809eb4eSEric Schrock 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
32296809eb4eSEric Schrock 	    &spares, &nspares) == 0) {
32306809eb4eSEric Schrock 		if (spa->spa_spares.sav_config)
32316809eb4eSEric Schrock 			VERIFY(nvlist_remove(spa->spa_spares.sav_config,
32326809eb4eSEric Schrock 			    ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
32336809eb4eSEric Schrock 		else
32346809eb4eSEric Schrock 			VERIFY(nvlist_alloc(&spa->spa_spares.sav_config,
32356809eb4eSEric Schrock 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
32366809eb4eSEric Schrock 		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
32376809eb4eSEric Schrock 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
32386809eb4eSEric Schrock 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
32396809eb4eSEric Schrock 		spa_load_spares(spa);
32406809eb4eSEric Schrock 		spa_config_exit(spa, SCL_ALL, FTAG);
32416809eb4eSEric Schrock 		spa->spa_spares.sav_sync = B_TRUE;
32426809eb4eSEric Schrock 	}
32436809eb4eSEric Schrock 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
32446809eb4eSEric Schrock 	    &l2cache, &nl2cache) == 0) {
32456809eb4eSEric Schrock 		if (spa->spa_l2cache.sav_config)
32466809eb4eSEric Schrock 			VERIFY(nvlist_remove(spa->spa_l2cache.sav_config,
32476809eb4eSEric Schrock 			    ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0);
32486809eb4eSEric Schrock 		else
32496809eb4eSEric Schrock 			VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
32506809eb4eSEric Schrock 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
32516809eb4eSEric Schrock 		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
32526809eb4eSEric Schrock 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
32536809eb4eSEric Schrock 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
32546809eb4eSEric Schrock 		spa_load_l2cache(spa);
32556809eb4eSEric Schrock 		spa_config_exit(spa, SCL_ALL, FTAG);
32566809eb4eSEric Schrock 		spa->spa_l2cache.sav_sync = B_TRUE;
32576809eb4eSEric Schrock 	}
32586809eb4eSEric Schrock 
3259b693757aSEric Schrock 	/*
3260b693757aSEric Schrock 	 * Check for any removed devices.
3261b693757aSEric Schrock 	 */
3262b693757aSEric Schrock 	if (spa->spa_autoreplace) {
3263b693757aSEric Schrock 		spa_aux_check_removed(&spa->spa_spares);
3264b693757aSEric Schrock 		spa_aux_check_removed(&spa->spa_l2cache);
3265b693757aSEric Schrock 	}
3266b693757aSEric Schrock 
32676809eb4eSEric Schrock 	if (spa_writeable(spa)) {
32686809eb4eSEric Schrock 		/*
32696809eb4eSEric Schrock 		 * Update the config cache to include the newly-imported pool.
32706809eb4eSEric Schrock 		 */
3271bc758434SLin Ling 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
32726809eb4eSEric Schrock 	}
32736809eb4eSEric Schrock 
3274573ca77eSGeorge Wilson 	/*
3275573ca77eSGeorge Wilson 	 * It's possible that the pool was expanded while it was exported.
3276573ca77eSGeorge Wilson 	 * We kick off an async task to handle this for us.
3277573ca77eSGeorge Wilson 	 */
3278573ca77eSGeorge Wilson 	spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
3279573ca77eSGeorge Wilson 
32806809eb4eSEric Schrock 	mutex_exit(&spa_namespace_lock);
3281c8e1f6d2SMark J Musante 	spa_history_log_version(spa, LOG_POOL_IMPORT);
32826809eb4eSEric Schrock 
32836809eb4eSEric Schrock 	return (0);
3284c5904d13Seschrock }
3285c5904d13Seschrock 
3286fa9e4066Sahrens nvlist_t *
3287fa9e4066Sahrens spa_tryimport(nvlist_t *tryconfig)
3288fa9e4066Sahrens {
3289fa9e4066Sahrens 	nvlist_t *config = NULL;
3290fa9e4066Sahrens 	char *poolname;
3291fa9e4066Sahrens 	spa_t *spa;
3292fa9e4066Sahrens 	uint64_t state;
32937b7154beSLin Ling 	int error;
3294fa9e4066Sahrens 
3295fa9e4066Sahrens 	if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname))
3296fa9e4066Sahrens 		return (NULL);
3297fa9e4066Sahrens 
3298fa9e4066Sahrens 	if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state))
3299fa9e4066Sahrens 		return (NULL);
3300fa9e4066Sahrens 
3301fa9e4066Sahrens 	/*
33020373e76bSbonwick 	 * Create and initialize the spa structure.
3303fa9e4066Sahrens 	 */
33040373e76bSbonwick 	mutex_enter(&spa_namespace_lock);
3305468c413aSTim Haley 	spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL);
33068ad4d6ddSJeff Bonwick 	spa_activate(spa, FREAD);
3307fa9e4066Sahrens 
3308fa9e4066Sahrens 	/*
33090373e76bSbonwick 	 * Pass off the heavy lifting to spa_load().
3310ecc2d604Sbonwick 	 * Pass TRUE for mosconfig because the user-supplied config
3311ecc2d604Sbonwick 	 * is actually the one to trust when doing an import.
3312fa9e4066Sahrens 	 */
33131195e687SMark J Musante 	error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING, B_TRUE);
3314fa9e4066Sahrens 
3315fa9e4066Sahrens 	/*
3316fa9e4066Sahrens 	 * If 'tryconfig' was at least parsable, return the current config.
3317fa9e4066Sahrens 	 */
3318fa9e4066Sahrens 	if (spa->spa_root_vdev != NULL) {
3319fa9e4066Sahrens 		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
3320fa9e4066Sahrens 		VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
3321fa9e4066Sahrens 		    poolname) == 0);
3322fa9e4066Sahrens 		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
3323fa9e4066Sahrens 		    state) == 0);
332495173954Sek 		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
332595173954Sek 		    spa->spa_uberblock.ub_timestamp) == 0);
332699653d4eSeschrock 
3327e7cbe64fSgw 		/*
3328e7cbe64fSgw 		 * If the bootfs property exists on this pool then we
3329e7cbe64fSgw 		 * copy it out so that external consumers can tell which
3330e7cbe64fSgw 		 * pools are bootable.
3331e7cbe64fSgw 		 */
33327b7154beSLin Ling 		if ((!error || error == EEXIST) && spa->spa_bootfs) {
3333e7cbe64fSgw 			char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3334e7cbe64fSgw 
3335e7cbe64fSgw 			/*
3336e7cbe64fSgw 			 * We have to play games with the name since the
3337e7cbe64fSgw 			 * pool was opened as TRYIMPORT_NAME.
3338e7cbe64fSgw 			 */
3339e14bb325SJeff Bonwick 			if (dsl_dsobj_to_dsname(spa_name(spa),
3340e7cbe64fSgw 			    spa->spa_bootfs, tmpname) == 0) {
3341e7cbe64fSgw 				char *cp;
3342e7cbe64fSgw 				char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3343e7cbe64fSgw 
3344e7cbe64fSgw 				cp = strchr(tmpname, '/');
3345e7cbe64fSgw 				if (cp == NULL) {
3346e7cbe64fSgw 					(void) strlcpy(dsname, tmpname,
3347e7cbe64fSgw 					    MAXPATHLEN);
3348e7cbe64fSgw 				} else {
3349e7cbe64fSgw 					(void) snprintf(dsname, MAXPATHLEN,
3350e7cbe64fSgw 					    "%s/%s", poolname, ++cp);
3351e7cbe64fSgw 				}
3352e7cbe64fSgw 				VERIFY(nvlist_add_string(config,
3353e7cbe64fSgw 				    ZPOOL_CONFIG_BOOTFS, dsname) == 0);
3354e7cbe64fSgw 				kmem_free(dsname, MAXPATHLEN);
3355e7cbe64fSgw 			}
3356e7cbe64fSgw 			kmem_free(tmpname, MAXPATHLEN);
3357e7cbe64fSgw 		}
3358e7cbe64fSgw 
335999653d4eSeschrock 		/*
3360fa94a07fSbrendan 		 * Add the list of hot spares and level 2 cache devices.
336199653d4eSeschrock 		 */
33626809eb4eSEric Schrock 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
336399653d4eSeschrock 		spa_add_spares(spa, config);
3364fa94a07fSbrendan 		spa_add_l2cache(spa, config);
33656809eb4eSEric Schrock 		spa_config_exit(spa, SCL_CONFIG, FTAG);
3366fa9e4066Sahrens 	}
3367fa9e4066Sahrens 
3368fa9e4066Sahrens 	spa_unload(spa);
3369fa9e4066Sahrens 	spa_deactivate(spa);
3370fa9e4066Sahrens 	spa_remove(spa);
3371fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
3372fa9e4066Sahrens 
3373fa9e4066Sahrens 	return (config);
3374fa9e4066Sahrens }
3375fa9e4066Sahrens 
3376fa9e4066Sahrens /*
3377fa9e4066Sahrens  * Pool export/destroy
3378fa9e4066Sahrens  *
3379fa9e4066Sahrens  * The act of destroying or exporting a pool is very simple.  We make sure there
3380fa9e4066Sahrens  * is no more pending I/O and any references to the pool are gone.  Then, we
3381fa9e4066Sahrens  * update the pool state and sync all the labels to disk, removing the
3382394ab0cbSGeorge Wilson  * configuration from the cache afterwards. If the 'hardforce' flag is set, then
3383394ab0cbSGeorge Wilson  * we don't sync the labels or remove the configuration cache.
3384fa9e4066Sahrens  */
3385fa9e4066Sahrens static int
338689a89ebfSlling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
3387394ab0cbSGeorge Wilson     boolean_t force, boolean_t hardforce)
3388fa9e4066Sahrens {
3389fa9e4066Sahrens 	spa_t *spa;
3390fa9e4066Sahrens 
339144cd46caSbillm 	if (oldconfig)
339244cd46caSbillm 		*oldconfig = NULL;
339344cd46caSbillm 
33948ad4d6ddSJeff Bonwick 	if (!(spa_mode_global & FWRITE))
3395fa9e4066Sahrens 		return (EROFS);
3396fa9e4066Sahrens 
3397fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
3398fa9e4066Sahrens 	if ((spa = spa_lookup(pool)) == NULL) {
3399fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
3400fa9e4066Sahrens 		return (ENOENT);
3401fa9e4066Sahrens 	}
3402fa9e4066Sahrens 
3403ea8dc4b6Seschrock 	/*
3404ea8dc4b6Seschrock 	 * Put a hold on the pool, drop the namespace lock, stop async tasks,
3405ea8dc4b6Seschrock 	 * reacquire the namespace lock, and see if we can export.
3406ea8dc4b6Seschrock 	 */
3407ea8dc4b6Seschrock 	spa_open_ref(spa, FTAG);
3408ea8dc4b6Seschrock 	mutex_exit(&spa_namespace_lock);
3409ea8dc4b6Seschrock 	spa_async_suspend(spa);
3410ea8dc4b6Seschrock 	mutex_enter(&spa_namespace_lock);
3411ea8dc4b6Seschrock 	spa_close(spa, FTAG);
3412ea8dc4b6Seschrock 
3413fa9e4066Sahrens 	/*
3414fa9e4066Sahrens 	 * The pool will be in core if it's openable,
3415fa9e4066Sahrens 	 * in which case we can modify its state.
3416fa9e4066Sahrens 	 */
3417fa9e4066Sahrens 	if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) {
3418fa9e4066Sahrens 		/*
3419fa9e4066Sahrens 		 * Objsets may be open only because they're dirty, so we
3420fa9e4066Sahrens 		 * have to force it to sync before checking spa_refcnt.
3421fa9e4066Sahrens 		 */
3422fa9e4066Sahrens 		txg_wait_synced(spa->spa_dsl_pool, 0);
3423fa9e4066Sahrens 
3424ea8dc4b6Seschrock 		/*
3425ea8dc4b6Seschrock 		 * A pool cannot be exported or destroyed if there are active
3426ea8dc4b6Seschrock 		 * references.  If we are resetting a pool, allow references by
3427ea8dc4b6Seschrock 		 * fault injection handlers.
3428ea8dc4b6Seschrock 		 */
3429ea8dc4b6Seschrock 		if (!spa_refcount_zero(spa) ||
3430ea8dc4b6Seschrock 		    (spa->spa_inject_ref != 0 &&
3431ea8dc4b6Seschrock 		    new_state != POOL_STATE_UNINITIALIZED)) {
3432ea8dc4b6Seschrock 			spa_async_resume(spa);
3433fa9e4066Sahrens 			mutex_exit(&spa_namespace_lock);
3434fa9e4066Sahrens 			return (EBUSY);
3435fa9e4066Sahrens 		}
3436fa9e4066Sahrens 
343789a89ebfSlling 		/*
343889a89ebfSlling 		 * A pool cannot be exported if it has an active shared spare.
343989a89ebfSlling 		 * This is to prevent other pools stealing the active spare
344089a89ebfSlling 		 * from an exported pool. At user's own will, such pool can
344189a89ebfSlling 		 * be forcedly exported.
344289a89ebfSlling 		 */
344389a89ebfSlling 		if (!force && new_state == POOL_STATE_EXPORTED &&
344489a89ebfSlling 		    spa_has_active_shared_spare(spa)) {
344589a89ebfSlling 			spa_async_resume(spa);
344689a89ebfSlling 			mutex_exit(&spa_namespace_lock);
344789a89ebfSlling 			return (EXDEV);
344889a89ebfSlling 		}
344989a89ebfSlling 
3450fa9e4066Sahrens 		/*
3451fa9e4066Sahrens 		 * We want this to be reflected on every label,
3452fa9e4066Sahrens 		 * so mark them all dirty.  spa_unload() will do the
3453fa9e4066Sahrens 		 * final sync that pushes these changes out.
3454fa9e4066Sahrens 		 */
3455394ab0cbSGeorge Wilson 		if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) {
3456e14bb325SJeff Bonwick 			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
3457ea8dc4b6Seschrock 			spa->spa_state = new_state;
34580373e76bSbonwick 			spa->spa_final_txg = spa_last_synced_txg(spa) + 1;
3459ea8dc4b6Seschrock 			vdev_config_dirty(spa->spa_root_vdev);
3460e14bb325SJeff Bonwick 			spa_config_exit(spa, SCL_ALL, FTAG);
3461ea8dc4b6Seschrock 		}
3462fa9e4066Sahrens 	}
3463fa9e4066Sahrens 
34643d7072f8Seschrock 	spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY);
34653d7072f8Seschrock 
3466fa9e4066Sahrens 	if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
3467fa9e4066Sahrens 		spa_unload(spa);
3468fa9e4066Sahrens 		spa_deactivate(spa);
3469fa9e4066Sahrens 	}
3470fa9e4066Sahrens 
347144cd46caSbillm 	if (oldconfig && spa->spa_config)
347244cd46caSbillm 		VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0);
347344cd46caSbillm 
3474ea8dc4b6Seschrock 	if (new_state != POOL_STATE_UNINITIALIZED) {
3475394ab0cbSGeorge Wilson 		if (!hardforce)
3476394ab0cbSGeorge Wilson 			spa_config_sync(spa, B_TRUE, B_TRUE);
3477ea8dc4b6Seschrock 		spa_remove(spa);
3478ea8dc4b6Seschrock 	}
3479fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
3480fa9e4066Sahrens 
3481fa9e4066Sahrens 	return (0);
3482fa9e4066Sahrens }
3483fa9e4066Sahrens 
3484fa9e4066Sahrens /*
3485fa9e4066Sahrens  * Destroy a storage pool.
3486fa9e4066Sahrens  */
3487fa9e4066Sahrens int
3488fa9e4066Sahrens spa_destroy(char *pool)
3489fa9e4066Sahrens {
3490394ab0cbSGeorge Wilson 	return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL,
3491394ab0cbSGeorge Wilson 	    B_FALSE, B_FALSE));
3492fa9e4066Sahrens }
3493fa9e4066Sahrens 
3494fa9e4066Sahrens /*
3495fa9e4066Sahrens  * Export a storage pool.
3496fa9e4066Sahrens  */
3497fa9e4066Sahrens int
3498394ab0cbSGeorge Wilson spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
3499394ab0cbSGeorge Wilson     boolean_t hardforce)
3500fa9e4066Sahrens {
3501394ab0cbSGeorge Wilson 	return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig,
3502394ab0cbSGeorge Wilson 	    force, hardforce));
3503fa9e4066Sahrens }
3504fa9e4066Sahrens 
3505ea8dc4b6Seschrock /*
3506ea8dc4b6Seschrock  * Similar to spa_export(), this unloads the spa_t without actually removing it
3507ea8dc4b6Seschrock  * from the namespace in any way.
3508ea8dc4b6Seschrock  */
3509ea8dc4b6Seschrock int
3510ea8dc4b6Seschrock spa_reset(char *pool)
3511ea8dc4b6Seschrock {
351289a89ebfSlling 	return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL,
3513394ab0cbSGeorge Wilson 	    B_FALSE, B_FALSE));
3514ea8dc4b6Seschrock }
3515ea8dc4b6Seschrock 
3516fa9e4066Sahrens /*
3517fa9e4066Sahrens  * ==========================================================================
3518fa9e4066Sahrens  * Device manipulation
3519fa9e4066Sahrens  * ==========================================================================
3520fa9e4066Sahrens  */
3521fa9e4066Sahrens 
3522fa9e4066Sahrens /*
35238654d025Sperrin  * Add a device to a storage pool.
3524fa9e4066Sahrens  */
3525fa9e4066Sahrens int
3526fa9e4066Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
3527fa9e4066Sahrens {
352888ecc943SGeorge Wilson 	uint64_t txg, id;
35298ad4d6ddSJeff Bonwick 	int error;
3530fa9e4066Sahrens 	vdev_t *rvd = spa->spa_root_vdev;
35310e34b6a7Sbonwick 	vdev_t *vd, *tvd;
3532fa94a07fSbrendan 	nvlist_t **spares, **l2cache;
3533fa94a07fSbrendan 	uint_t nspares, nl2cache;
3534fa9e4066Sahrens 
3535fa9e4066Sahrens 	txg = spa_vdev_enter(spa);
3536fa9e4066Sahrens 
353799653d4eSeschrock 	if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0,
353899653d4eSeschrock 	    VDEV_ALLOC_ADD)) != 0)
353999653d4eSeschrock 		return (spa_vdev_exit(spa, NULL, txg, error));
3540fa9e4066Sahrens 
3541e14bb325SJeff Bonwick 	spa->spa_pending_vdev = vd;	/* spa_vdev_exit() will clear this */
354299653d4eSeschrock 
3543fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
3544fa94a07fSbrendan 	    &nspares) != 0)
354599653d4eSeschrock 		nspares = 0;
354699653d4eSeschrock 
3547fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache,
3548fa94a07fSbrendan 	    &nl2cache) != 0)
3549fa94a07fSbrendan 		nl2cache = 0;
3550fa94a07fSbrendan 
3551e14bb325SJeff Bonwick 	if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0)
3552fa9e4066Sahrens 		return (spa_vdev_exit(spa, vd, txg, EINVAL));
3553fa9e4066Sahrens 
3554e14bb325SJeff Bonwick 	if (vd->vdev_children != 0 &&
3555e14bb325SJeff Bonwick 	    (error = vdev_create(vd, txg, B_FALSE)) != 0)
3556e14bb325SJeff Bonwick 		return (spa_vdev_exit(spa, vd, txg, error));
355799653d4eSeschrock 
355839c23413Seschrock 	/*
3559fa94a07fSbrendan 	 * We must validate the spares and l2cache devices after checking the
3560fa94a07fSbrendan 	 * children.  Otherwise, vdev_inuse() will blindly overwrite the spare.
356139c23413Seschrock 	 */
3562e14bb325SJeff Bonwick 	if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0)
356339c23413Seschrock 		return (spa_vdev_exit(spa, vd, txg, error));
356439c23413Seschrock 
356539c23413Seschrock 	/*
356639c23413Seschrock 	 * Transfer each new top-level vdev from vd to rvd.
356739c23413Seschrock 	 */
35688ad4d6ddSJeff Bonwick 	for (int c = 0; c < vd->vdev_children; c++) {
356988ecc943SGeorge Wilson 
357088ecc943SGeorge Wilson 		/*
357188ecc943SGeorge Wilson 		 * Set the vdev id to the first hole, if one exists.
357288ecc943SGeorge Wilson 		 */
357388ecc943SGeorge Wilson 		for (id = 0; id < rvd->vdev_children; id++) {
357488ecc943SGeorge Wilson 			if (rvd->vdev_child[id]->vdev_ishole) {
357588ecc943SGeorge Wilson 				vdev_free(rvd->vdev_child[id]);
357688ecc943SGeorge Wilson 				break;
357788ecc943SGeorge Wilson 			}
357888ecc943SGeorge Wilson 		}
357939c23413Seschrock 		tvd = vd->vdev_child[c];
358039c23413Seschrock 		vdev_remove_child(vd, tvd);
358188ecc943SGeorge Wilson 		tvd->vdev_id = id;
358239c23413Seschrock 		vdev_add_child(rvd, tvd);
358339c23413Seschrock 		vdev_config_dirty(tvd);
358439c23413Seschrock 	}
358539c23413Seschrock 
358699653d4eSeschrock 	if (nspares != 0) {
3587fa94a07fSbrendan 		spa_set_aux_vdevs(&spa->spa_spares, spares, nspares,
3588fa94a07fSbrendan 		    ZPOOL_CONFIG_SPARES);
358999653d4eSeschrock 		spa_load_spares(spa);
3590fa94a07fSbrendan 		spa->spa_spares.sav_sync = B_TRUE;
3591fa94a07fSbrendan 	}
3592fa94a07fSbrendan 
3593fa94a07fSbrendan 	if (nl2cache != 0) {
3594fa94a07fSbrendan 		spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache,
3595fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE);
3596fa94a07fSbrendan 		spa_load_l2cache(spa);
3597fa94a07fSbrendan 		spa->spa_l2cache.sav_sync = B_TRUE;
3598fa9e4066Sahrens 	}
3599fa9e4066Sahrens 
3600fa9e4066Sahrens 	/*
36010e34b6a7Sbonwick 	 * We have to be careful when adding new vdevs to an existing pool.
36020e34b6a7Sbonwick 	 * If other threads start allocating from these vdevs before we
36030e34b6a7Sbonwick 	 * sync the config cache, and we lose power, then upon reboot we may
36040e34b6a7Sbonwick 	 * fail to open the pool because there are DVAs that the config cache
36050e34b6a7Sbonwick 	 * can't translate.  Therefore, we first add the vdevs without
36060e34b6a7Sbonwick 	 * initializing metaslabs; sync the config cache (via spa_vdev_exit());
36070373e76bSbonwick 	 * and then let spa_config_update() initialize the new metaslabs.
36080e34b6a7Sbonwick 	 *
36090e34b6a7Sbonwick 	 * spa_load() checks for added-but-not-initialized vdevs, so that
36100e34b6a7Sbonwick 	 * if we lose power at any point in this sequence, the remaining
36110e34b6a7Sbonwick 	 * steps will be completed the next time we load the pool.
36120e34b6a7Sbonwick 	 */
36130373e76bSbonwick 	(void) spa_vdev_exit(spa, vd, txg, 0);
36140e34b6a7Sbonwick 
36150373e76bSbonwick 	mutex_enter(&spa_namespace_lock);
36160373e76bSbonwick 	spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
36170373e76bSbonwick 	mutex_exit(&spa_namespace_lock);
3618fa9e4066Sahrens 
36190373e76bSbonwick 	return (0);
3620fa9e4066Sahrens }
3621fa9e4066Sahrens 
3622fa9e4066Sahrens /*
3623fa9e4066Sahrens  * Attach a device to a mirror.  The arguments are the path to any device
3624fa9e4066Sahrens  * in the mirror, and the nvroot for the new device.  If the path specifies
3625fa9e4066Sahrens  * a device that is not mirrored, we automatically insert the mirror vdev.
3626fa9e4066Sahrens  *
3627fa9e4066Sahrens  * If 'replacing' is specified, the new device is intended to replace the
3628fa9e4066Sahrens  * existing device; in this case the two devices are made into their own
36293d7072f8Seschrock  * mirror using the 'replacing' vdev, which is functionally identical to
3630fa9e4066Sahrens  * the mirror vdev (it actually reuses all the same ops) but has a few
3631fa9e4066Sahrens  * extra rules: you can't attach to it after it's been created, and upon
3632fa9e4066Sahrens  * completion of resilvering, the first disk (the one being replaced)
3633fa9e4066Sahrens  * is automatically detached.
3634fa9e4066Sahrens  */
3635fa9e4066Sahrens int
3636ea8dc4b6Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
3637fa9e4066Sahrens {
3638fa9e4066Sahrens 	uint64_t txg, open_txg;
3639fa9e4066Sahrens 	vdev_t *rvd = spa->spa_root_vdev;
3640fa9e4066Sahrens 	vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
364199653d4eSeschrock 	vdev_ops_t *pvops;
36429b3f6b42SEric Kustarz 	char *oldvdpath, *newvdpath;
36439b3f6b42SEric Kustarz 	int newvd_isspare;
36449b3f6b42SEric Kustarz 	int error;
3645fa9e4066Sahrens 
3646fa9e4066Sahrens 	txg = spa_vdev_enter(spa);
3647fa9e4066Sahrens 
3648c5904d13Seschrock 	oldvd = spa_lookup_by_guid(spa, guid, B_FALSE);
3649fa9e4066Sahrens 
3650fa9e4066Sahrens 	if (oldvd == NULL)
3651fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
3652fa9e4066Sahrens 
36530e34b6a7Sbonwick 	if (!oldvd->vdev_ops->vdev_op_leaf)
36540e34b6a7Sbonwick 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
36550e34b6a7Sbonwick 
3656fa9e4066Sahrens 	pvd = oldvd->vdev_parent;
3657fa9e4066Sahrens 
365899653d4eSeschrock 	if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
36593d7072f8Seschrock 	    VDEV_ALLOC_ADD)) != 0)
36603d7072f8Seschrock 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
36613d7072f8Seschrock 
36623d7072f8Seschrock 	if (newrootvd->vdev_children != 1)
3663fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
3664fa9e4066Sahrens 
3665fa9e4066Sahrens 	newvd = newrootvd->vdev_child[0];
3666fa9e4066Sahrens 
3667fa9e4066Sahrens 	if (!newvd->vdev_ops->vdev_op_leaf)
3668fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
3669fa9e4066Sahrens 
367099653d4eSeschrock 	if ((error = vdev_create(newrootvd, txg, replacing)) != 0)
3671fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, error));
3672fa9e4066Sahrens 
36738654d025Sperrin 	/*
36748654d025Sperrin 	 * Spares can't replace logs
36758654d025Sperrin 	 */
3676ee0eb9f2SEric Schrock 	if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
36778654d025Sperrin 		return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
36788654d025Sperrin 
367999653d4eSeschrock 	if (!replacing) {
368099653d4eSeschrock 		/*
368199653d4eSeschrock 		 * For attach, the only allowable parent is a mirror or the root
368299653d4eSeschrock 		 * vdev.
368399653d4eSeschrock 		 */
368499653d4eSeschrock 		if (pvd->vdev_ops != &vdev_mirror_ops &&
368599653d4eSeschrock 		    pvd->vdev_ops != &vdev_root_ops)
368699653d4eSeschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
368799653d4eSeschrock 
368899653d4eSeschrock 		pvops = &vdev_mirror_ops;
368999653d4eSeschrock 	} else {
369099653d4eSeschrock 		/*
369199653d4eSeschrock 		 * Active hot spares can only be replaced by inactive hot
369299653d4eSeschrock 		 * spares.
369399653d4eSeschrock 		 */
369499653d4eSeschrock 		if (pvd->vdev_ops == &vdev_spare_ops &&
369599653d4eSeschrock 		    pvd->vdev_child[1] == oldvd &&
369699653d4eSeschrock 		    !spa_has_spare(spa, newvd->vdev_guid))
369799653d4eSeschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
369899653d4eSeschrock 
369999653d4eSeschrock 		/*
370099653d4eSeschrock 		 * If the source is a hot spare, and the parent isn't already a
370199653d4eSeschrock 		 * spare, then we want to create a new hot spare.  Otherwise, we
370239c23413Seschrock 		 * want to create a replacing vdev.  The user is not allowed to
370339c23413Seschrock 		 * attach to a spared vdev child unless the 'isspare' state is
370439c23413Seschrock 		 * the same (spare replaces spare, non-spare replaces
370539c23413Seschrock 		 * non-spare).
370699653d4eSeschrock 		 */
370799653d4eSeschrock 		if (pvd->vdev_ops == &vdev_replacing_ops)
370899653d4eSeschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
370939c23413Seschrock 		else if (pvd->vdev_ops == &vdev_spare_ops &&
371039c23413Seschrock 		    newvd->vdev_isspare != oldvd->vdev_isspare)
371139c23413Seschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
371299653d4eSeschrock 		else if (pvd->vdev_ops != &vdev_spare_ops &&
371399653d4eSeschrock 		    newvd->vdev_isspare)
371499653d4eSeschrock 			pvops = &vdev_spare_ops;
371599653d4eSeschrock 		else
371699653d4eSeschrock 			pvops = &vdev_replacing_ops;
371799653d4eSeschrock 	}
371899653d4eSeschrock 
37192a79c5feSlling 	/*
3720573ca77eSGeorge Wilson 	 * Make sure the new device is big enough.
37212a79c5feSlling 	 */
3722573ca77eSGeorge Wilson 	if (newvd->vdev_asize < vdev_get_min_asize(oldvd))
3723fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW));
3724fa9e4066Sahrens 
3725ecc2d604Sbonwick 	/*
3726ecc2d604Sbonwick 	 * The new device cannot have a higher alignment requirement
3727ecc2d604Sbonwick 	 * than the top-level vdev.
3728ecc2d604Sbonwick 	 */
3729ecc2d604Sbonwick 	if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift)
3730fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EDOM));
3731fa9e4066Sahrens 
3732fa9e4066Sahrens 	/*
3733fa9e4066Sahrens 	 * If this is an in-place replacement, update oldvd's path and devid
3734fa9e4066Sahrens 	 * to make it distinguishable from newvd, and unopenable from now on.
3735fa9e4066Sahrens 	 */
3736fa9e4066Sahrens 	if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) {
3737fa9e4066Sahrens 		spa_strfree(oldvd->vdev_path);
3738fa9e4066Sahrens 		oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5,
3739fa9e4066Sahrens 		    KM_SLEEP);
3740fa9e4066Sahrens 		(void) sprintf(oldvd->vdev_path, "%s/%s",
3741fa9e4066Sahrens 		    newvd->vdev_path, "old");
3742fa9e4066Sahrens 		if (oldvd->vdev_devid != NULL) {
3743fa9e4066Sahrens 			spa_strfree(oldvd->vdev_devid);
3744fa9e4066Sahrens 			oldvd->vdev_devid = NULL;
3745fa9e4066Sahrens 		}
3746fa9e4066Sahrens 	}
3747fa9e4066Sahrens 
3748fa9e4066Sahrens 	/*
374999653d4eSeschrock 	 * If the parent is not a mirror, or if we're replacing, insert the new
375099653d4eSeschrock 	 * mirror/replacing/spare vdev above oldvd.
3751fa9e4066Sahrens 	 */
3752fa9e4066Sahrens 	if (pvd->vdev_ops != pvops)
3753fa9e4066Sahrens 		pvd = vdev_add_parent(oldvd, pvops);
3754fa9e4066Sahrens 
3755fa9e4066Sahrens 	ASSERT(pvd->vdev_top->vdev_parent == rvd);
3756fa9e4066Sahrens 	ASSERT(pvd->vdev_ops == pvops);
3757fa9e4066Sahrens 	ASSERT(oldvd->vdev_parent == pvd);
3758fa9e4066Sahrens 
3759fa9e4066Sahrens 	/*
3760fa9e4066Sahrens 	 * Extract the new device from its root and add it to pvd.
3761fa9e4066Sahrens 	 */
3762fa9e4066Sahrens 	vdev_remove_child(newrootvd, newvd);
3763fa9e4066Sahrens 	newvd->vdev_id = pvd->vdev_children;
376488ecc943SGeorge Wilson 	newvd->vdev_crtxg = oldvd->vdev_crtxg;
3765fa9e4066Sahrens 	vdev_add_child(pvd, newvd);
3766fa9e4066Sahrens 
3767fa9e4066Sahrens 	tvd = newvd->vdev_top;
3768fa9e4066Sahrens 	ASSERT(pvd->vdev_top == tvd);
3769fa9e4066Sahrens 	ASSERT(tvd->vdev_parent == rvd);
3770fa9e4066Sahrens 
3771fa9e4066Sahrens 	vdev_config_dirty(tvd);
3772fa9e4066Sahrens 
3773fa9e4066Sahrens 	/*
3774fa9e4066Sahrens 	 * Set newvd's DTL to [TXG_INITIAL, open_txg].  It will propagate
3775fa9e4066Sahrens 	 * upward when spa_vdev_exit() calls vdev_dtl_reassess().
3776fa9e4066Sahrens 	 */
3777fa9e4066Sahrens 	open_txg = txg + TXG_CONCURRENT_STATES - 1;
3778fa9e4066Sahrens 
37798ad4d6ddSJeff Bonwick 	vdev_dtl_dirty(newvd, DTL_MISSING,
37808ad4d6ddSJeff Bonwick 	    TXG_INITIAL, open_txg - TXG_INITIAL + 1);
3781fa9e4066Sahrens 
37826809eb4eSEric Schrock 	if (newvd->vdev_isspare) {
378339c23413Seschrock 		spa_spare_activate(newvd);
37846809eb4eSEric Schrock 		spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE);
37856809eb4eSEric Schrock 	}
37866809eb4eSEric Schrock 
3787e14bb325SJeff Bonwick 	oldvdpath = spa_strdup(oldvd->vdev_path);
3788e14bb325SJeff Bonwick 	newvdpath = spa_strdup(newvd->vdev_path);
37899b3f6b42SEric Kustarz 	newvd_isspare = newvd->vdev_isspare;
3790ea8dc4b6Seschrock 
3791fa9e4066Sahrens 	/*
3792fa9e4066Sahrens 	 * Mark newvd's DTL dirty in this txg.
3793fa9e4066Sahrens 	 */
3794ecc2d604Sbonwick 	vdev_dirty(tvd, VDD_DTL, newvd, txg);
3795fa9e4066Sahrens 
3796fa9e4066Sahrens 	(void) spa_vdev_exit(spa, newrootvd, open_txg, 0);
3797fa9e4066Sahrens 
3798c8e1f6d2SMark J Musante 	spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, NULL,
3799c8e1f6d2SMark J Musante 	    CRED(),  "%s vdev=%s %s vdev=%s",
3800c8e1f6d2SMark J Musante 	    replacing && newvd_isspare ? "spare in" :
3801c8e1f6d2SMark J Musante 	    replacing ? "replace" : "attach", newvdpath,
3802c8e1f6d2SMark J Musante 	    replacing ? "for" : "to", oldvdpath);
38039b3f6b42SEric Kustarz 
38049b3f6b42SEric Kustarz 	spa_strfree(oldvdpath);
38059b3f6b42SEric Kustarz 	spa_strfree(newvdpath);
38069b3f6b42SEric Kustarz 
3807fa9e4066Sahrens 	/*
3808088f3894Sahrens 	 * Kick off a resilver to update newvd.
3809fa9e4066Sahrens 	 */
3810088f3894Sahrens 	VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0);
3811fa9e4066Sahrens 
3812fa9e4066Sahrens 	return (0);
3813fa9e4066Sahrens }
3814fa9e4066Sahrens 
3815fa9e4066Sahrens /*
3816fa9e4066Sahrens  * Detach a device from a mirror or replacing vdev.
3817fa9e4066Sahrens  * If 'replace_done' is specified, only detach if the parent
3818fa9e4066Sahrens  * is a replacing vdev.
3819fa9e4066Sahrens  */
3820fa9e4066Sahrens int
38218ad4d6ddSJeff Bonwick spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
3822fa9e4066Sahrens {
3823fa9e4066Sahrens 	uint64_t txg;
38248ad4d6ddSJeff Bonwick 	int error;
3825fa9e4066Sahrens 	vdev_t *rvd = spa->spa_root_vdev;
3826fa9e4066Sahrens 	vdev_t *vd, *pvd, *cvd, *tvd;
382799653d4eSeschrock 	boolean_t unspare = B_FALSE;
382899653d4eSeschrock 	uint64_t unspare_guid;
3829bf82a41bSeschrock 	size_t len;
38301195e687SMark J Musante 	char *vdpath;
3831fa9e4066Sahrens 
3832fa9e4066Sahrens 	txg = spa_vdev_enter(spa);
3833fa9e4066Sahrens 
3834c5904d13Seschrock 	vd = spa_lookup_by_guid(spa, guid, B_FALSE);
3835fa9e4066Sahrens 
3836fa9e4066Sahrens 	if (vd == NULL)
3837fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
3838fa9e4066Sahrens 
38390e34b6a7Sbonwick 	if (!vd->vdev_ops->vdev_op_leaf)
38400e34b6a7Sbonwick 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
38410e34b6a7Sbonwick 
3842fa9e4066Sahrens 	pvd = vd->vdev_parent;
3843fa9e4066Sahrens 
38448ad4d6ddSJeff Bonwick 	/*
38458ad4d6ddSJeff Bonwick 	 * If the parent/child relationship is not as expected, don't do it.
38468ad4d6ddSJeff Bonwick 	 * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing
38478ad4d6ddSJeff Bonwick 	 * vdev that's replacing B with C.  The user's intent in replacing
38488ad4d6ddSJeff Bonwick 	 * is to go from M(A,B) to M(A,C).  If the user decides to cancel
38498ad4d6ddSJeff Bonwick 	 * the replace by detaching C, the expected behavior is to end up
38508ad4d6ddSJeff Bonwick 	 * M(A,B).  But suppose that right after deciding to detach C,
38518ad4d6ddSJeff Bonwick 	 * the replacement of B completes.  We would have M(A,C), and then
38528ad4d6ddSJeff Bonwick 	 * ask to detach C, which would leave us with just A -- not what
38538ad4d6ddSJeff Bonwick 	 * the user wanted.  To prevent this, we make sure that the
38548ad4d6ddSJeff Bonwick 	 * parent/child relationship hasn't changed -- in this example,
38558ad4d6ddSJeff Bonwick 	 * that C's parent is still the replacing vdev R.
38568ad4d6ddSJeff Bonwick 	 */
38578ad4d6ddSJeff Bonwick 	if (pvd->vdev_guid != pguid && pguid != 0)
38588ad4d6ddSJeff Bonwick 		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
38598ad4d6ddSJeff Bonwick 
3860fa9e4066Sahrens 	/*
3861fa9e4066Sahrens 	 * If replace_done is specified, only remove this device if it's
386299653d4eSeschrock 	 * the first child of a replacing vdev.  For the 'spare' vdev, either
386399653d4eSeschrock 	 * disk can be removed.
386499653d4eSeschrock 	 */
386599653d4eSeschrock 	if (replace_done) {
386699653d4eSeschrock 		if (pvd->vdev_ops == &vdev_replacing_ops) {
386799653d4eSeschrock 			if (vd->vdev_id != 0)
386899653d4eSeschrock 				return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
386999653d4eSeschrock 		} else if (pvd->vdev_ops != &vdev_spare_ops) {
387099653d4eSeschrock 			return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
387199653d4eSeschrock 		}
387299653d4eSeschrock 	}
387399653d4eSeschrock 
387499653d4eSeschrock 	ASSERT(pvd->vdev_ops != &vdev_spare_ops ||
3875e7437265Sahrens 	    spa_version(spa) >= SPA_VERSION_SPARES);
3876fa9e4066Sahrens 
3877fa9e4066Sahrens 	/*
387899653d4eSeschrock 	 * Only mirror, replacing, and spare vdevs support detach.
3879fa9e4066Sahrens 	 */
3880fa9e4066Sahrens 	if (pvd->vdev_ops != &vdev_replacing_ops &&
388199653d4eSeschrock 	    pvd->vdev_ops != &vdev_mirror_ops &&
388299653d4eSeschrock 	    pvd->vdev_ops != &vdev_spare_ops)
3883fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
3884fa9e4066Sahrens 
3885fa9e4066Sahrens 	/*
38868ad4d6ddSJeff Bonwick 	 * If this device has the only valid copy of some data,
38878ad4d6ddSJeff Bonwick 	 * we cannot safely detach it.
3888fa9e4066Sahrens 	 */
38898ad4d6ddSJeff Bonwick 	if (vdev_dtl_required(vd))
3890fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
3891fa9e4066Sahrens 
38928ad4d6ddSJeff Bonwick 	ASSERT(pvd->vdev_children >= 2);
3893fa9e4066Sahrens 
3894bf82a41bSeschrock 	/*
3895bf82a41bSeschrock 	 * If we are detaching the second disk from a replacing vdev, then
3896bf82a41bSeschrock 	 * check to see if we changed the original vdev's path to have "/old"
3897bf82a41bSeschrock 	 * at the end in spa_vdev_attach().  If so, undo that change now.
3898bf82a41bSeschrock 	 */
3899bf82a41bSeschrock 	if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 &&
3900bf82a41bSeschrock 	    pvd->vdev_child[0]->vdev_path != NULL &&
3901bf82a41bSeschrock 	    pvd->vdev_child[1]->vdev_path != NULL) {
3902bf82a41bSeschrock 		ASSERT(pvd->vdev_child[1] == vd);
3903bf82a41bSeschrock 		cvd = pvd->vdev_child[0];
3904bf82a41bSeschrock 		len = strlen(vd->vdev_path);
3905bf82a41bSeschrock 		if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 &&
3906bf82a41bSeschrock 		    strcmp(cvd->vdev_path + len, "/old") == 0) {
3907bf82a41bSeschrock 			spa_strfree(cvd->vdev_path);
3908bf82a41bSeschrock 			cvd->vdev_path = spa_strdup(vd->vdev_path);
3909bf82a41bSeschrock 		}
3910bf82a41bSeschrock 	}
3911bf82a41bSeschrock 
391299653d4eSeschrock 	/*
391399653d4eSeschrock 	 * If we are detaching the original disk from a spare, then it implies
391499653d4eSeschrock 	 * that the spare should become a real disk, and be removed from the
391599653d4eSeschrock 	 * active spare list for the pool.
391699653d4eSeschrock 	 */
391799653d4eSeschrock 	if (pvd->vdev_ops == &vdev_spare_ops &&
39188ad4d6ddSJeff Bonwick 	    vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare)
391999653d4eSeschrock 		unspare = B_TRUE;
392099653d4eSeschrock 
3921fa9e4066Sahrens 	/*
3922fa9e4066Sahrens 	 * Erase the disk labels so the disk can be used for other things.
3923fa9e4066Sahrens 	 * This must be done after all other error cases are handled,
3924fa9e4066Sahrens 	 * but before we disembowel vd (so we can still do I/O to it).
3925fa9e4066Sahrens 	 * But if we can't do it, don't treat the error as fatal --
3926fa9e4066Sahrens 	 * it may be that the unwritability of the disk is the reason
3927fa9e4066Sahrens 	 * it's being detached!
3928fa9e4066Sahrens 	 */
392939c23413Seschrock 	error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
3930fa9e4066Sahrens 
3931fa9e4066Sahrens 	/*
3932fa9e4066Sahrens 	 * Remove vd from its parent and compact the parent's children.
3933fa9e4066Sahrens 	 */
3934fa9e4066Sahrens 	vdev_remove_child(pvd, vd);
3935fa9e4066Sahrens 	vdev_compact_children(pvd);
3936fa9e4066Sahrens 
3937fa9e4066Sahrens 	/*
3938fa9e4066Sahrens 	 * Remember one of the remaining children so we can get tvd below.
3939fa9e4066Sahrens 	 */
3940fa9e4066Sahrens 	cvd = pvd->vdev_child[0];
3941fa9e4066Sahrens 
394299653d4eSeschrock 	/*
394399653d4eSeschrock 	 * If we need to remove the remaining child from the list of hot spares,
39448ad4d6ddSJeff Bonwick 	 * do it now, marking the vdev as no longer a spare in the process.
39458ad4d6ddSJeff Bonwick 	 * We must do this before vdev_remove_parent(), because that can
39468ad4d6ddSJeff Bonwick 	 * change the GUID if it creates a new toplevel GUID.  For a similar
39478ad4d6ddSJeff Bonwick 	 * reason, we must remove the spare now, in the same txg as the detach;
39488ad4d6ddSJeff Bonwick 	 * otherwise someone could attach a new sibling, change the GUID, and
39498ad4d6ddSJeff Bonwick 	 * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail.
395099653d4eSeschrock 	 */
395199653d4eSeschrock 	if (unspare) {
395299653d4eSeschrock 		ASSERT(cvd->vdev_isspare);
395339c23413Seschrock 		spa_spare_remove(cvd);
395499653d4eSeschrock 		unspare_guid = cvd->vdev_guid;
39558ad4d6ddSJeff Bonwick 		(void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
395699653d4eSeschrock 	}
395799653d4eSeschrock 
3958fa9e4066Sahrens 	/*
3959fa9e4066Sahrens 	 * If the parent mirror/replacing vdev only has one child,
3960fa9e4066Sahrens 	 * the parent is no longer needed.  Remove it from the tree.
3961fa9e4066Sahrens 	 */
3962fa9e4066Sahrens 	if (pvd->vdev_children == 1)
3963fa9e4066Sahrens 		vdev_remove_parent(cvd);
3964fa9e4066Sahrens 
3965fa9e4066Sahrens 	/*
3966fa9e4066Sahrens 	 * We don't set tvd until now because the parent we just removed
3967fa9e4066Sahrens 	 * may have been the previous top-level vdev.
3968fa9e4066Sahrens 	 */
3969fa9e4066Sahrens 	tvd = cvd->vdev_top;
3970fa9e4066Sahrens 	ASSERT(tvd->vdev_parent == rvd);
3971fa9e4066Sahrens 
3972fa9e4066Sahrens 	/*
397339c23413Seschrock 	 * Reevaluate the parent vdev state.
3974fa9e4066Sahrens 	 */
39753d7072f8Seschrock 	vdev_propagate_state(cvd);
3976fa9e4066Sahrens 
3977fa9e4066Sahrens 	/*
3978573ca77eSGeorge Wilson 	 * If the 'autoexpand' property is set on the pool then automatically
3979573ca77eSGeorge Wilson 	 * try to expand the size of the pool. For example if the device we
3980573ca77eSGeorge Wilson 	 * just detached was smaller than the others, it may be possible to
3981573ca77eSGeorge Wilson 	 * add metaslabs (i.e. grow the pool). We need to reopen the vdev
3982573ca77eSGeorge Wilson 	 * first so that we can obtain the updated sizes of the leaf vdevs.
3983fa9e4066Sahrens 	 */
3984573ca77eSGeorge Wilson 	if (spa->spa_autoexpand) {
3985573ca77eSGeorge Wilson 		vdev_reopen(tvd);
3986573ca77eSGeorge Wilson 		vdev_expand(tvd, txg);
3987573ca77eSGeorge Wilson 	}
3988fa9e4066Sahrens 
3989fa9e4066Sahrens 	vdev_config_dirty(tvd);
3990fa9e4066Sahrens 
3991fa9e4066Sahrens 	/*
399239c23413Seschrock 	 * Mark vd's DTL as dirty in this txg.  vdev_dtl_sync() will see that
399339c23413Seschrock 	 * vd->vdev_detached is set and free vd's DTL object in syncing context.
399439c23413Seschrock 	 * But first make sure we're not on any *other* txg's DTL list, to
399539c23413Seschrock 	 * prevent vd from being accessed after it's freed.
3996fa9e4066Sahrens 	 */
39971195e687SMark J Musante 	vdpath = spa_strdup(vd->vdev_path);
39988ad4d6ddSJeff Bonwick 	for (int t = 0; t < TXG_SIZE; t++)
3999fa9e4066Sahrens 		(void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
4000ecc2d604Sbonwick 	vd->vdev_detached = B_TRUE;
4001ecc2d604Sbonwick 	vdev_dirty(tvd, VDD_DTL, vd, txg);
4002fa9e4066Sahrens 
40033d7072f8Seschrock 	spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE);
40043d7072f8Seschrock 
400599653d4eSeschrock 	error = spa_vdev_exit(spa, vd, txg, 0);
400699653d4eSeschrock 
40071195e687SMark J Musante 	spa_history_internal_log(LOG_POOL_VDEV_DETACH, spa, NULL, CRED(),
40081195e687SMark J Musante 	    "vdev=%s", vdpath);
40091195e687SMark J Musante 	spa_strfree(vdpath);
40101195e687SMark J Musante 
401199653d4eSeschrock 	/*
401239c23413Seschrock 	 * If this was the removal of the original device in a hot spare vdev,
401339c23413Seschrock 	 * then we want to go through and remove the device from the hot spare
401439c23413Seschrock 	 * list of every other pool.
401599653d4eSeschrock 	 */
401699653d4eSeschrock 	if (unspare) {
40178ad4d6ddSJeff Bonwick 		spa_t *myspa = spa;
401899653d4eSeschrock 		spa = NULL;
401999653d4eSeschrock 		mutex_enter(&spa_namespace_lock);
402099653d4eSeschrock 		while ((spa = spa_next(spa)) != NULL) {
402199653d4eSeschrock 			if (spa->spa_state != POOL_STATE_ACTIVE)
402299653d4eSeschrock 				continue;
40238ad4d6ddSJeff Bonwick 			if (spa == myspa)
40248ad4d6ddSJeff Bonwick 				continue;
40259af0a4dfSJeff Bonwick 			spa_open_ref(spa, FTAG);
40269af0a4dfSJeff Bonwick 			mutex_exit(&spa_namespace_lock);
402799653d4eSeschrock 			(void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
40289af0a4dfSJeff Bonwick 			mutex_enter(&spa_namespace_lock);
40299af0a4dfSJeff Bonwick 			spa_close(spa, FTAG);
403099653d4eSeschrock 		}
403199653d4eSeschrock 		mutex_exit(&spa_namespace_lock);
403299653d4eSeschrock 	}
403399653d4eSeschrock 
403499653d4eSeschrock 	return (error);
403599653d4eSeschrock }
403699653d4eSeschrock 
40371195e687SMark J Musante /*
40381195e687SMark J Musante  * Split a set of devices from their mirrors, and create a new pool from them.
40391195e687SMark J Musante  */
40401195e687SMark J Musante int
40411195e687SMark J Musante spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
40421195e687SMark J Musante     nvlist_t *props, boolean_t exp)
40431195e687SMark J Musante {
40441195e687SMark J Musante 	int error = 0;
40451195e687SMark J Musante 	uint64_t txg, *glist;
40461195e687SMark J Musante 	spa_t *newspa;
40471195e687SMark J Musante 	uint_t c, children, lastlog;
40481195e687SMark J Musante 	nvlist_t **child, *nvl, *tmp;
40491195e687SMark J Musante 	dmu_tx_t *tx;
40501195e687SMark J Musante 	char *altroot = NULL;
40511195e687SMark J Musante 	vdev_t *rvd, **vml = NULL;			/* vdev modify list */
40521195e687SMark J Musante 	boolean_t activate_slog;
40531195e687SMark J Musante 
40541195e687SMark J Musante 	if (!spa_writeable(spa))
40551195e687SMark J Musante 		return (EROFS);
40561195e687SMark J Musante 
40571195e687SMark J Musante 	txg = spa_vdev_enter(spa);
40581195e687SMark J Musante 
40591195e687SMark J Musante 	/* clear the log and flush everything up to now */
40601195e687SMark J Musante 	activate_slog = spa_passivate_log(spa);
40611195e687SMark J Musante 	(void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
40621195e687SMark J Musante 	error = spa_offline_log(spa);
40631195e687SMark J Musante 	txg = spa_vdev_config_enter(spa);
40641195e687SMark J Musante 
40651195e687SMark J Musante 	if (activate_slog)
40661195e687SMark J Musante 		spa_activate_log(spa);
40671195e687SMark J Musante 
40681195e687SMark J Musante 	if (error != 0)
40691195e687SMark J Musante 		return (spa_vdev_exit(spa, NULL, txg, error));
40701195e687SMark J Musante 
40711195e687SMark J Musante 	/* check new spa name before going any further */
40721195e687SMark J Musante 	if (spa_lookup(newname) != NULL)
40731195e687SMark J Musante 		return (spa_vdev_exit(spa, NULL, txg, EEXIST));
40741195e687SMark J Musante 
40751195e687SMark J Musante 	/*
40761195e687SMark J Musante 	 * scan through all the children to ensure they're all mirrors
40771195e687SMark J Musante 	 */
40781195e687SMark J Musante 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) != 0 ||
40791195e687SMark J Musante 	    nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child,
40801195e687SMark J Musante 	    &children) != 0)
40811195e687SMark J Musante 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
40821195e687SMark J Musante 
40831195e687SMark J Musante 	/* first, check to ensure we've got the right child count */
40841195e687SMark J Musante 	rvd = spa->spa_root_vdev;
40851195e687SMark J Musante 	lastlog = 0;
40861195e687SMark J Musante 	for (c = 0; c < rvd->vdev_children; c++) {
40871195e687SMark J Musante 		vdev_t *vd = rvd->vdev_child[c];
40881195e687SMark J Musante 
40891195e687SMark J Musante 		/* don't count the holes & logs as children */
40901195e687SMark J Musante 		if (vd->vdev_islog || vd->vdev_ishole) {
40911195e687SMark J Musante 			if (lastlog == 0)
40921195e687SMark J Musante 				lastlog = c;
40931195e687SMark J Musante 			continue;
40941195e687SMark J Musante 		}
40951195e687SMark J Musante 
40961195e687SMark J Musante 		lastlog = 0;
40971195e687SMark J Musante 	}
40981195e687SMark J Musante 	if (children != (lastlog != 0 ? lastlog : rvd->vdev_children))
40991195e687SMark J Musante 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
41001195e687SMark J Musante 
41011195e687SMark J Musante 	/* next, ensure no spare or cache devices are part of the split */
41021195e687SMark J Musante 	if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_SPARES, &tmp) == 0 ||
41031195e687SMark J Musante 	    nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_L2CACHE, &tmp) == 0)
41041195e687SMark J Musante 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
41051195e687SMark J Musante 
41061195e687SMark J Musante 	vml = kmem_zalloc(children * sizeof (vdev_t *), KM_SLEEP);
41071195e687SMark J Musante 	glist = kmem_zalloc(children * sizeof (uint64_t), KM_SLEEP);
41081195e687SMark J Musante 
41091195e687SMark J Musante 	/* then, loop over each vdev and validate it */
41101195e687SMark J Musante 	for (c = 0; c < children; c++) {
41111195e687SMark J Musante 		uint64_t is_hole = 0;
41121195e687SMark J Musante 
41131195e687SMark J Musante 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
41141195e687SMark J Musante 		    &is_hole);
41151195e687SMark J Musante 
41161195e687SMark J Musante 		if (is_hole != 0) {
41171195e687SMark J Musante 			if (spa->spa_root_vdev->vdev_child[c]->vdev_ishole ||
41181195e687SMark J Musante 			    spa->spa_root_vdev->vdev_child[c]->vdev_islog) {
41191195e687SMark J Musante 				continue;
41201195e687SMark J Musante 			} else {
41211195e687SMark J Musante 				error = EINVAL;
41221195e687SMark J Musante 				break;
41231195e687SMark J Musante 			}
41241195e687SMark J Musante 		}
41251195e687SMark J Musante 
41261195e687SMark J Musante 		/* which disk is going to be split? */
41271195e687SMark J Musante 		if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID,
41281195e687SMark J Musante 		    &glist[c]) != 0) {
41291195e687SMark J Musante 			error = EINVAL;
41301195e687SMark J Musante 			break;
41311195e687SMark J Musante 		}
41321195e687SMark J Musante 
41331195e687SMark J Musante 		/* look it up in the spa */
41341195e687SMark J Musante 		vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE);
41351195e687SMark J Musante 		if (vml[c] == NULL) {
41361195e687SMark J Musante 			error = ENODEV;
41371195e687SMark J Musante 			break;
41381195e687SMark J Musante 		}
41391195e687SMark J Musante 
41401195e687SMark J Musante 		/* make sure there's nothing stopping the split */
41411195e687SMark J Musante 		if (vml[c]->vdev_parent->vdev_ops != &vdev_mirror_ops ||
41421195e687SMark J Musante 		    vml[c]->vdev_islog ||
41431195e687SMark J Musante 		    vml[c]->vdev_ishole ||
41441195e687SMark J Musante 		    vml[c]->vdev_isspare ||
41451195e687SMark J Musante 		    vml[c]->vdev_isl2cache ||
41461195e687SMark J Musante 		    !vdev_writeable(vml[c]) ||
4147d41c4376SMark J Musante 		    vml[c]->vdev_children != 0 ||
41481195e687SMark J Musante 		    vml[c]->vdev_state != VDEV_STATE_HEALTHY ||
41491195e687SMark J Musante 		    c != spa->spa_root_vdev->vdev_child[c]->vdev_id) {
41501195e687SMark J Musante 			error = EINVAL;
41511195e687SMark J Musante 			break;
41521195e687SMark J Musante 		}
41531195e687SMark J Musante 
41541195e687SMark J Musante 		if (vdev_dtl_required(vml[c])) {
41551195e687SMark J Musante 			error = EBUSY;
41561195e687SMark J Musante 			break;
41571195e687SMark J Musante 		}
41581195e687SMark J Musante 
41591195e687SMark J Musante 		/* we need certain info from the top level */
41601195e687SMark J Musante 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY,
41611195e687SMark J Musante 		    vml[c]->vdev_top->vdev_ms_array) == 0);
41621195e687SMark J Musante 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT,
41631195e687SMark J Musante 		    vml[c]->vdev_top->vdev_ms_shift) == 0);
41641195e687SMark J Musante 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE,
41651195e687SMark J Musante 		    vml[c]->vdev_top->vdev_asize) == 0);
41661195e687SMark J Musante 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT,
41671195e687SMark J Musante 		    vml[c]->vdev_top->vdev_ashift) == 0);
41681195e687SMark J Musante 	}
41691195e687SMark J Musante 
41701195e687SMark J Musante 	if (error != 0) {
41711195e687SMark J Musante 		kmem_free(vml, children * sizeof (vdev_t *));
41721195e687SMark J Musante 		kmem_free(glist, children * sizeof (uint64_t));
41731195e687SMark J Musante 		return (spa_vdev_exit(spa, NULL, txg, error));
41741195e687SMark J Musante 	}
41751195e687SMark J Musante 
41761195e687SMark J Musante 	/* stop writers from using the disks */
41771195e687SMark J Musante 	for (c = 0; c < children; c++) {
41781195e687SMark J Musante 		if (vml[c] != NULL)
41791195e687SMark J Musante 			vml[c]->vdev_offline = B_TRUE;
41801195e687SMark J Musante 	}
41811195e687SMark J Musante 	vdev_reopen(spa->spa_root_vdev);
41821195e687SMark J Musante 
41831195e687SMark J Musante 	/*
41841195e687SMark J Musante 	 * Temporarily record the splitting vdevs in the spa config.  This
41851195e687SMark J Musante 	 * will disappear once the config is regenerated.
41861195e687SMark J Musante 	 */
41871195e687SMark J Musante 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
41881195e687SMark J Musante 	VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST,
41891195e687SMark J Musante 	    glist, children) == 0);
41901195e687SMark J Musante 	kmem_free(glist, children * sizeof (uint64_t));
41911195e687SMark J Musante 
419298295d61SMark J Musante 	mutex_enter(&spa->spa_props_lock);
41931195e687SMark J Musante 	VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT,
41941195e687SMark J Musante 	    nvl) == 0);
419598295d61SMark J Musante 	mutex_exit(&spa->spa_props_lock);
41961195e687SMark J Musante 	spa->spa_config_splitting = nvl;
41971195e687SMark J Musante 	vdev_config_dirty(spa->spa_root_vdev);
41981195e687SMark J Musante 
41991195e687SMark J Musante 	/* configure and create the new pool */
42001195e687SMark J Musante 	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0);
42011195e687SMark J Musante 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
42021195e687SMark J Musante 	    exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0);
42031195e687SMark J Musante 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
42041195e687SMark J Musante 	    spa_version(spa)) == 0);
42051195e687SMark J Musante 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
42061195e687SMark J Musante 	    spa->spa_config_txg) == 0);
42071195e687SMark J Musante 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
42081195e687SMark J Musante 	    spa_generate_guid(NULL)) == 0);
42091195e687SMark J Musante 	(void) nvlist_lookup_string(props,
42101195e687SMark J Musante 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
42111195e687SMark J Musante 
4212d41c4376SMark J Musante 	/* add the new pool to the namespace */
42131195e687SMark J Musante 	newspa = spa_add(newname, config, altroot);
42141195e687SMark J Musante 	newspa->spa_config_txg = spa->spa_config_txg;
42151195e687SMark J Musante 	spa_set_log_state(newspa, SPA_LOG_CLEAR);
42161195e687SMark J Musante 
42171195e687SMark J Musante 	/* release the spa config lock, retaining the namespace lock */
42181195e687SMark J Musante 	spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
42191195e687SMark J Musante 
42201195e687SMark J Musante 	if (zio_injection_enabled)
42211195e687SMark J Musante 		zio_handle_panic_injection(spa, FTAG, 1);
42221195e687SMark J Musante 
42231195e687SMark J Musante 	spa_activate(newspa, spa_mode_global);
42241195e687SMark J Musante 	spa_async_suspend(newspa);
42251195e687SMark J Musante 
42261195e687SMark J Musante 	/* create the new pool from the disks of the original pool */
42271195e687SMark J Musante 	error = spa_load(newspa, SPA_LOAD_IMPORT, SPA_IMPORT_ASSEMBLE, B_TRUE);
42281195e687SMark J Musante 	if (error)
42291195e687SMark J Musante 		goto out;
42301195e687SMark J Musante 
42311195e687SMark J Musante 	/* if that worked, generate a real config for the new pool */
42321195e687SMark J Musante 	if (newspa->spa_root_vdev != NULL) {
42331195e687SMark J Musante 		VERIFY(nvlist_alloc(&newspa->spa_config_splitting,
42341195e687SMark J Musante 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
42351195e687SMark J Musante 		VERIFY(nvlist_add_uint64(newspa->spa_config_splitting,
42361195e687SMark J Musante 		    ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0);
42371195e687SMark J Musante 		spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL,
42381195e687SMark J Musante 		    B_TRUE));
42391195e687SMark J Musante 	}
42401195e687SMark J Musante 
42411195e687SMark J Musante 	/* set the props */
42421195e687SMark J Musante 	if (props != NULL) {
42431195e687SMark J Musante 		spa_configfile_set(newspa, props, B_FALSE);
42441195e687SMark J Musante 		error = spa_prop_set(newspa, props);
42451195e687SMark J Musante 		if (error)
42461195e687SMark J Musante 			goto out;
42471195e687SMark J Musante 	}
42481195e687SMark J Musante 
42491195e687SMark J Musante 	/* flush everything */
42501195e687SMark J Musante 	txg = spa_vdev_config_enter(newspa);
42511195e687SMark J Musante 	vdev_config_dirty(newspa->spa_root_vdev);
42521195e687SMark J Musante 	(void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG);
42531195e687SMark J Musante 
42541195e687SMark J Musante 	if (zio_injection_enabled)
42551195e687SMark J Musante 		zio_handle_panic_injection(spa, FTAG, 2);
42561195e687SMark J Musante 
42571195e687SMark J Musante 	spa_async_resume(newspa);
42581195e687SMark J Musante 
42591195e687SMark J Musante 	/* finally, update the original pool's config */
42601195e687SMark J Musante 	txg = spa_vdev_config_enter(spa);
42611195e687SMark J Musante 	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
42621195e687SMark J Musante 	error = dmu_tx_assign(tx, TXG_WAIT);
42631195e687SMark J Musante 	if (error != 0)
42641195e687SMark J Musante 		dmu_tx_abort(tx);
42651195e687SMark J Musante 	for (c = 0; c < children; c++) {
42661195e687SMark J Musante 		if (vml[c] != NULL) {
42671195e687SMark J Musante 			vdev_split(vml[c]);
42681195e687SMark J Musante 			if (error == 0)
42691195e687SMark J Musante 				spa_history_internal_log(LOG_POOL_VDEV_DETACH,
42701195e687SMark J Musante 				    spa, tx, CRED(), "vdev=%s",
42711195e687SMark J Musante 				    vml[c]->vdev_path);
42721195e687SMark J Musante 			vdev_free(vml[c]);
42731195e687SMark J Musante 		}
42741195e687SMark J Musante 	}
42751195e687SMark J Musante 	vdev_config_dirty(spa->spa_root_vdev);
42761195e687SMark J Musante 	spa->spa_config_splitting = NULL;
42771195e687SMark J Musante 	nvlist_free(nvl);
42781195e687SMark J Musante 	if (error == 0)
42791195e687SMark J Musante 		dmu_tx_commit(tx);
42801195e687SMark J Musante 	(void) spa_vdev_exit(spa, NULL, txg, 0);
42811195e687SMark J Musante 
42821195e687SMark J Musante 	if (zio_injection_enabled)
42831195e687SMark J Musante 		zio_handle_panic_injection(spa, FTAG, 3);
42841195e687SMark J Musante 
42851195e687SMark J Musante 	/* split is complete; log a history record */
42861195e687SMark J Musante 	spa_history_internal_log(LOG_POOL_SPLIT, newspa, NULL, CRED(),
42871195e687SMark J Musante 	    "split new pool %s from pool %s", newname, spa_name(spa));
42881195e687SMark J Musante 
42891195e687SMark J Musante 	kmem_free(vml, children * sizeof (vdev_t *));
42901195e687SMark J Musante 
42911195e687SMark J Musante 	/* if we're not going to mount the filesystems in userland, export */
42921195e687SMark J Musante 	if (exp)
42931195e687SMark J Musante 		error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL,
42941195e687SMark J Musante 		    B_FALSE, B_FALSE);
42951195e687SMark J Musante 
42961195e687SMark J Musante 	return (error);
42971195e687SMark J Musante 
42981195e687SMark J Musante out:
42991195e687SMark J Musante 	spa_unload(newspa);
43001195e687SMark J Musante 	spa_deactivate(newspa);
43011195e687SMark J Musante 	spa_remove(newspa);
43021195e687SMark J Musante 
43031195e687SMark J Musante 	txg = spa_vdev_config_enter(spa);
430498295d61SMark J Musante 
430598295d61SMark J Musante 	/* re-online all offlined disks */
430698295d61SMark J Musante 	for (c = 0; c < children; c++) {
430798295d61SMark J Musante 		if (vml[c] != NULL)
430898295d61SMark J Musante 			vml[c]->vdev_offline = B_FALSE;
430998295d61SMark J Musante 	}
431098295d61SMark J Musante 	vdev_reopen(spa->spa_root_vdev);
431198295d61SMark J Musante 
43121195e687SMark J Musante 	nvlist_free(spa->spa_config_splitting);
43131195e687SMark J Musante 	spa->spa_config_splitting = NULL;
4314d41c4376SMark J Musante 	(void) spa_vdev_exit(spa, NULL, txg, error);
43151195e687SMark J Musante 
43161195e687SMark J Musante 	kmem_free(vml, children * sizeof (vdev_t *));
43171195e687SMark J Musante 	return (error);
43181195e687SMark J Musante }
43191195e687SMark J Musante 
4320e14bb325SJeff Bonwick static nvlist_t *
4321e14bb325SJeff Bonwick spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid)
432299653d4eSeschrock {
4323e14bb325SJeff Bonwick 	for (int i = 0; i < count; i++) {
4324e14bb325SJeff Bonwick 		uint64_t guid;
432599653d4eSeschrock 
4326e14bb325SJeff Bonwick 		VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID,
4327e14bb325SJeff Bonwick 		    &guid) == 0);
432899653d4eSeschrock 
4329e14bb325SJeff Bonwick 		if (guid == target_guid)
4330e14bb325SJeff Bonwick 			return (nvpp[i]);
433199653d4eSeschrock 	}
433299653d4eSeschrock 
4333e14bb325SJeff Bonwick 	return (NULL);
4334fa94a07fSbrendan }
4335fa94a07fSbrendan 
4336e14bb325SJeff Bonwick static void
4337e14bb325SJeff Bonwick spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count,
4338e14bb325SJeff Bonwick 	nvlist_t *dev_to_remove)
4339fa94a07fSbrendan {
4340e14bb325SJeff Bonwick 	nvlist_t **newdev = NULL;
4341fa94a07fSbrendan 
4342e14bb325SJeff Bonwick 	if (count > 1)
4343e14bb325SJeff Bonwick 		newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP);
4344fa94a07fSbrendan 
4345e14bb325SJeff Bonwick 	for (int i = 0, j = 0; i < count; i++) {
4346e14bb325SJeff Bonwick 		if (dev[i] == dev_to_remove)
4347e14bb325SJeff Bonwick 			continue;
4348e14bb325SJeff Bonwick 		VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0);
4349fa94a07fSbrendan 	}
4350fa94a07fSbrendan 
4351e14bb325SJeff Bonwick 	VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0);
4352e14bb325SJeff Bonwick 	VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0);
4353fa94a07fSbrendan 
4354e14bb325SJeff Bonwick 	for (int i = 0; i < count - 1; i++)
4355e14bb325SJeff Bonwick 		nvlist_free(newdev[i]);
4356fa94a07fSbrendan 
4357e14bb325SJeff Bonwick 	if (count > 1)
4358e14bb325SJeff Bonwick 		kmem_free(newdev, (count - 1) * sizeof (void *));
4359fa94a07fSbrendan }
4360fa94a07fSbrendan 
436188ecc943SGeorge Wilson /*
436288ecc943SGeorge Wilson  * Removing a device from the vdev namespace requires several steps
436388ecc943SGeorge Wilson  * and can take a significant amount of time.  As a result we use
436488ecc943SGeorge Wilson  * the spa_vdev_config_[enter/exit] functions which allow us to
436588ecc943SGeorge Wilson  * grab and release the spa_config_lock while still holding the namespace
436688ecc943SGeorge Wilson  * lock.  During each step the configuration is synced out.
436788ecc943SGeorge Wilson  */
436888ecc943SGeorge Wilson 
436988ecc943SGeorge Wilson /*
437088ecc943SGeorge Wilson  * Evacuate the device.
437188ecc943SGeorge Wilson  */
437288ecc943SGeorge Wilson int
437388ecc943SGeorge Wilson spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd)
437488ecc943SGeorge Wilson {
4375a1521560SJeff Bonwick 	int error = 0;
437688ecc943SGeorge Wilson 	uint64_t txg;
437788ecc943SGeorge Wilson 
437888ecc943SGeorge Wilson 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
437988ecc943SGeorge Wilson 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
4380b24ab676SJeff Bonwick 	ASSERT(vd == vd->vdev_top);
438188ecc943SGeorge Wilson 
438288ecc943SGeorge Wilson 	/*
438388ecc943SGeorge Wilson 	 * Evacuate the device.  We don't hold the config lock as writer
438488ecc943SGeorge Wilson 	 * since we need to do I/O but we do keep the
438588ecc943SGeorge Wilson 	 * spa_namespace_lock held.  Once this completes the device
438688ecc943SGeorge Wilson 	 * should no longer have any blocks allocated on it.
438788ecc943SGeorge Wilson 	 */
438888ecc943SGeorge Wilson 	if (vd->vdev_islog) {
4389a1521560SJeff Bonwick 		error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
4390a1521560SJeff Bonwick 		    NULL, DS_FIND_CHILDREN);
4391a1521560SJeff Bonwick 	} else {
4392a1521560SJeff Bonwick 		error = ENOTSUP;	/* until we have bp rewrite */
439388ecc943SGeorge Wilson 	}
439488ecc943SGeorge Wilson 
4395a1521560SJeff Bonwick 	txg_wait_synced(spa_get_dsl(spa), 0);
4396a1521560SJeff Bonwick 
4397a1521560SJeff Bonwick 	if (error)
4398a1521560SJeff Bonwick 		return (error);
4399a1521560SJeff Bonwick 
440088ecc943SGeorge Wilson 	/*
4401a1521560SJeff Bonwick 	 * The evacuation succeeded.  Remove any remaining MOS metadata
4402a1521560SJeff Bonwick 	 * associated with this vdev, and wait for these changes to sync.
440388ecc943SGeorge Wilson 	 */
440488ecc943SGeorge Wilson 	txg = spa_vdev_config_enter(spa);
440588ecc943SGeorge Wilson 	vd->vdev_removing = B_TRUE;
440688ecc943SGeorge Wilson 	vdev_dirty(vd, 0, NULL, txg);
440788ecc943SGeorge Wilson 	vdev_config_dirty(vd);
440888ecc943SGeorge Wilson 	spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
440988ecc943SGeorge Wilson 
441088ecc943SGeorge Wilson 	return (0);
441188ecc943SGeorge Wilson }
441288ecc943SGeorge Wilson 
441388ecc943SGeorge Wilson /*
441488ecc943SGeorge Wilson  * Complete the removal by cleaning up the namespace.
441588ecc943SGeorge Wilson  */
441688ecc943SGeorge Wilson void
4417a1521560SJeff Bonwick spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd)
441888ecc943SGeorge Wilson {
441988ecc943SGeorge Wilson 	vdev_t *rvd = spa->spa_root_vdev;
442088ecc943SGeorge Wilson 	uint64_t id = vd->vdev_id;
442188ecc943SGeorge Wilson 	boolean_t last_vdev = (id == (rvd->vdev_children - 1));
442288ecc943SGeorge Wilson 
442388ecc943SGeorge Wilson 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
442488ecc943SGeorge Wilson 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
4425b24ab676SJeff Bonwick 	ASSERT(vd == vd->vdev_top);
442688ecc943SGeorge Wilson 
442788ecc943SGeorge Wilson 	(void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
4428b24ab676SJeff Bonwick 
4429b24ab676SJeff Bonwick 	if (list_link_active(&vd->vdev_state_dirty_node))
4430b24ab676SJeff Bonwick 		vdev_state_clean(vd);
4431b24ab676SJeff Bonwick 	if (list_link_active(&vd->vdev_config_dirty_node))
4432b24ab676SJeff Bonwick 		vdev_config_clean(vd);
4433b24ab676SJeff Bonwick 
443488ecc943SGeorge Wilson 	vdev_free(vd);
443588ecc943SGeorge Wilson 
443688ecc943SGeorge Wilson 	if (last_vdev) {
443788ecc943SGeorge Wilson 		vdev_compact_children(rvd);
443888ecc943SGeorge Wilson 	} else {
443988ecc943SGeorge Wilson 		vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops);
444088ecc943SGeorge Wilson 		vdev_add_child(rvd, vd);
444188ecc943SGeorge Wilson 	}
444288ecc943SGeorge Wilson 	vdev_config_dirty(rvd);
444388ecc943SGeorge Wilson 
444488ecc943SGeorge Wilson 	/*
444588ecc943SGeorge Wilson 	 * Reassess the health of our root vdev.
444688ecc943SGeorge Wilson 	 */
444788ecc943SGeorge Wilson 	vdev_reopen(rvd);
444888ecc943SGeorge Wilson }
444988ecc943SGeorge Wilson 
4450fa94a07fSbrendan /*
4451fa94a07fSbrendan  * Remove a device from the pool.  Currently, this supports removing only hot
445288ecc943SGeorge Wilson  * spares, slogs, and level 2 ARC devices.
4453fa94a07fSbrendan  */
4454fa94a07fSbrendan int
4455fa94a07fSbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
4456fa94a07fSbrendan {
4457fa94a07fSbrendan 	vdev_t *vd;
4458a1521560SJeff Bonwick 	metaslab_group_t *mg;
4459e14bb325SJeff Bonwick 	nvlist_t **spares, **l2cache, *nv;
44608ad4d6ddSJeff Bonwick 	uint64_t txg = 0;
446188ecc943SGeorge Wilson 	uint_t nspares, nl2cache;
4462fa94a07fSbrendan 	int error = 0;
44638ad4d6ddSJeff Bonwick 	boolean_t locked = MUTEX_HELD(&spa_namespace_lock);
4464fa94a07fSbrendan 
44658ad4d6ddSJeff Bonwick 	if (!locked)
44668ad4d6ddSJeff Bonwick 		txg = spa_vdev_enter(spa);
4467fa94a07fSbrendan 
4468c5904d13Seschrock 	vd = spa_lookup_by_guid(spa, guid, B_FALSE);
4469fa94a07fSbrendan 
4470fa94a07fSbrendan 	if (spa->spa_spares.sav_vdevs != NULL &&
4471fa94a07fSbrendan 	    nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
4472e14bb325SJeff Bonwick 	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 &&
4473e14bb325SJeff Bonwick 	    (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) {
4474e14bb325SJeff Bonwick 		/*
4475e14bb325SJeff Bonwick 		 * Only remove the hot spare if it's not currently in use
4476e14bb325SJeff Bonwick 		 * in this pool.
4477e14bb325SJeff Bonwick 		 */
4478e14bb325SJeff Bonwick 		if (vd == NULL || unspare) {
4479e14bb325SJeff Bonwick 			spa_vdev_remove_aux(spa->spa_spares.sav_config,
4480e14bb325SJeff Bonwick 			    ZPOOL_CONFIG_SPARES, spares, nspares, nv);
4481e14bb325SJeff Bonwick 			spa_load_spares(spa);
4482e14bb325SJeff Bonwick 			spa->spa_spares.sav_sync = B_TRUE;
4483e14bb325SJeff Bonwick 		} else {
4484e14bb325SJeff Bonwick 			error = EBUSY;
4485e14bb325SJeff Bonwick 		}
4486e14bb325SJeff Bonwick 	} else if (spa->spa_l2cache.sav_vdevs != NULL &&
4487fa94a07fSbrendan 	    nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
4488e14bb325SJeff Bonwick 	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 &&
4489e14bb325SJeff Bonwick 	    (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) {
4490e14bb325SJeff Bonwick 		/*
4491e14bb325SJeff Bonwick 		 * Cache devices can always be removed.
4492e14bb325SJeff Bonwick 		 */
4493e14bb325SJeff Bonwick 		spa_vdev_remove_aux(spa->spa_l2cache.sav_config,
4494e14bb325SJeff Bonwick 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv);
4495fa94a07fSbrendan 		spa_load_l2cache(spa);
4496fa94a07fSbrendan 		spa->spa_l2cache.sav_sync = B_TRUE;
449788ecc943SGeorge Wilson 	} else if (vd != NULL && vd->vdev_islog) {
449888ecc943SGeorge Wilson 		ASSERT(!locked);
4499b24ab676SJeff Bonwick 		ASSERT(vd == vd->vdev_top);
450088ecc943SGeorge Wilson 
450188ecc943SGeorge Wilson 		/*
450288ecc943SGeorge Wilson 		 * XXX - Once we have bp-rewrite this should
450388ecc943SGeorge Wilson 		 * become the common case.
450488ecc943SGeorge Wilson 		 */
450588ecc943SGeorge Wilson 
4506a1521560SJeff Bonwick 		mg = vd->vdev_mg;
4507a1521560SJeff Bonwick 
450888ecc943SGeorge Wilson 		/*
4509a1521560SJeff Bonwick 		 * Stop allocating from this vdev.
451088ecc943SGeorge Wilson 		 */
4511a1521560SJeff Bonwick 		metaslab_group_passivate(mg);
451288ecc943SGeorge Wilson 
4513b24ab676SJeff Bonwick 		/*
4514b24ab676SJeff Bonwick 		 * Wait for the youngest allocations and frees to sync,
4515b24ab676SJeff Bonwick 		 * and then wait for the deferral of those frees to finish.
4516b24ab676SJeff Bonwick 		 */
4517b24ab676SJeff Bonwick 		spa_vdev_config_exit(spa, NULL,
4518b24ab676SJeff Bonwick 		    txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG);
4519b24ab676SJeff Bonwick 
4520a1521560SJeff Bonwick 		/*
4521a1521560SJeff Bonwick 		 * Attempt to evacuate the vdev.
4522a1521560SJeff Bonwick 		 */
4523a1521560SJeff Bonwick 		error = spa_vdev_remove_evacuate(spa, vd);
4524a1521560SJeff Bonwick 
452588ecc943SGeorge Wilson 		txg = spa_vdev_config_enter(spa);
452688ecc943SGeorge Wilson 
4527a1521560SJeff Bonwick 		/*
4528a1521560SJeff Bonwick 		 * If we couldn't evacuate the vdev, unwind.
4529a1521560SJeff Bonwick 		 */
4530a1521560SJeff Bonwick 		if (error) {
4531a1521560SJeff Bonwick 			metaslab_group_activate(mg);
4532a1521560SJeff Bonwick 			return (spa_vdev_exit(spa, NULL, txg, error));
4533a1521560SJeff Bonwick 		}
4534a1521560SJeff Bonwick 
4535a1521560SJeff Bonwick 		/*
4536a1521560SJeff Bonwick 		 * Clean up the vdev namespace.
4537a1521560SJeff Bonwick 		 */
4538a1521560SJeff Bonwick 		spa_vdev_remove_from_namespace(spa, vd);
453988ecc943SGeorge Wilson 
4540e14bb325SJeff Bonwick 	} else if (vd != NULL) {
4541e14bb325SJeff Bonwick 		/*
4542e14bb325SJeff Bonwick 		 * Normal vdevs cannot be removed (yet).
4543e14bb325SJeff Bonwick 		 */
4544e14bb325SJeff Bonwick 		error = ENOTSUP;
4545e14bb325SJeff Bonwick 	} else {
4546e14bb325SJeff Bonwick 		/*
4547e14bb325SJeff Bonwick 		 * There is no vdev of any kind with the specified guid.
4548e14bb325SJeff Bonwick 		 */
4549e14bb325SJeff Bonwick 		error = ENOENT;
4550fa94a07fSbrendan 	}
455199653d4eSeschrock 
45528ad4d6ddSJeff Bonwick 	if (!locked)
45538ad4d6ddSJeff Bonwick 		return (spa_vdev_exit(spa, NULL, txg, error));
45548ad4d6ddSJeff Bonwick 
45558ad4d6ddSJeff Bonwick 	return (error);
4556fa9e4066Sahrens }
4557fa9e4066Sahrens 
4558fa9e4066Sahrens /*
45593d7072f8Seschrock  * Find any device that's done replacing, or a vdev marked 'unspare' that's
45603d7072f8Seschrock  * current spared, so we can detach it.
4561fa9e4066Sahrens  */
4562ea8dc4b6Seschrock static vdev_t *
45633d7072f8Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd)
4564fa9e4066Sahrens {
4565ea8dc4b6Seschrock 	vdev_t *newvd, *oldvd;
4566fa9e4066Sahrens 
4567573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++) {
45683d7072f8Seschrock 		oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]);
4569ea8dc4b6Seschrock 		if (oldvd != NULL)
4570ea8dc4b6Seschrock 			return (oldvd);
4571ea8dc4b6Seschrock 	}
4572fa9e4066Sahrens 
45733d7072f8Seschrock 	/*
45743d7072f8Seschrock 	 * Check for a completed replacement.
45753d7072f8Seschrock 	 */
4576fa9e4066Sahrens 	if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) {
4577ea8dc4b6Seschrock 		oldvd = vd->vdev_child[0];
4578ea8dc4b6Seschrock 		newvd = vd->vdev_child[1];
4579ea8dc4b6Seschrock 
45808ad4d6ddSJeff Bonwick 		if (vdev_dtl_empty(newvd, DTL_MISSING) &&
4581e69acc92SVictor Latushkin 		    vdev_dtl_empty(newvd, DTL_OUTAGE) &&
45828ad4d6ddSJeff Bonwick 		    !vdev_dtl_required(oldvd))
4583ea8dc4b6Seschrock 			return (oldvd);
4584fa9e4066Sahrens 	}
4585ea8dc4b6Seschrock 
45863d7072f8Seschrock 	/*
45873d7072f8Seschrock 	 * Check for a completed resilver with the 'unspare' flag set.
45883d7072f8Seschrock 	 */
45893d7072f8Seschrock 	if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) {
45903d7072f8Seschrock 		newvd = vd->vdev_child[0];
45913d7072f8Seschrock 		oldvd = vd->vdev_child[1];
45923d7072f8Seschrock 
45933d7072f8Seschrock 		if (newvd->vdev_unspare &&
45948ad4d6ddSJeff Bonwick 		    vdev_dtl_empty(newvd, DTL_MISSING) &&
4595e69acc92SVictor Latushkin 		    vdev_dtl_empty(newvd, DTL_OUTAGE) &&
45968ad4d6ddSJeff Bonwick 		    !vdev_dtl_required(oldvd)) {
45973d7072f8Seschrock 			newvd->vdev_unspare = 0;
45983d7072f8Seschrock 			return (oldvd);
45993d7072f8Seschrock 		}
46003d7072f8Seschrock 	}
46013d7072f8Seschrock 
4602ea8dc4b6Seschrock 	return (NULL);
4603fa9e4066Sahrens }
4604fa9e4066Sahrens 
4605ea8dc4b6Seschrock static void
46063d7072f8Seschrock spa_vdev_resilver_done(spa_t *spa)
4607fa9e4066Sahrens {
46088ad4d6ddSJeff Bonwick 	vdev_t *vd, *pvd, *ppvd;
46098ad4d6ddSJeff Bonwick 	uint64_t guid, sguid, pguid, ppguid;
4610ea8dc4b6Seschrock 
46118ad4d6ddSJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
4612ea8dc4b6Seschrock 
46133d7072f8Seschrock 	while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) {
46148ad4d6ddSJeff Bonwick 		pvd = vd->vdev_parent;
46158ad4d6ddSJeff Bonwick 		ppvd = pvd->vdev_parent;
4616ea8dc4b6Seschrock 		guid = vd->vdev_guid;
46178ad4d6ddSJeff Bonwick 		pguid = pvd->vdev_guid;
46188ad4d6ddSJeff Bonwick 		ppguid = ppvd->vdev_guid;
46198ad4d6ddSJeff Bonwick 		sguid = 0;
462099653d4eSeschrock 		/*
462199653d4eSeschrock 		 * If we have just finished replacing a hot spared device, then
462299653d4eSeschrock 		 * we need to detach the parent's first child (the original hot
462399653d4eSeschrock 		 * spare) as well.
462499653d4eSeschrock 		 */
46258ad4d6ddSJeff Bonwick 		if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) {
462699653d4eSeschrock 			ASSERT(pvd->vdev_ops == &vdev_replacing_ops);
46278ad4d6ddSJeff Bonwick 			ASSERT(ppvd->vdev_children == 2);
46288ad4d6ddSJeff Bonwick 			sguid = ppvd->vdev_child[1]->vdev_guid;
462999653d4eSeschrock 		}
46308ad4d6ddSJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
46318ad4d6ddSJeff Bonwick 		if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0)
4632ea8dc4b6Seschrock 			return;
46338ad4d6ddSJeff Bonwick 		if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0)
463499653d4eSeschrock 			return;
46358ad4d6ddSJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
4636fa9e4066Sahrens 	}
4637fa9e4066Sahrens 
46388ad4d6ddSJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
4639fa9e4066Sahrens }
4640fa9e4066Sahrens 
4641c67d9675Seschrock /*
4642b3388e4fSEric Taylor  * Update the stored path or FRU for this vdev.
4643c67d9675Seschrock  */
4644c67d9675Seschrock int
46456809eb4eSEric Schrock spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value,
46466809eb4eSEric Schrock     boolean_t ispath)
4647c67d9675Seschrock {
4648c5904d13Seschrock 	vdev_t *vd;
4649208044b8SGeorge Wilson 	boolean_t sync = B_FALSE;
4650c67d9675Seschrock 
4651b3388e4fSEric Taylor 	spa_vdev_state_enter(spa, SCL_ALL);
4652c67d9675Seschrock 
46536809eb4eSEric Schrock 	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
4654b3388e4fSEric Taylor 		return (spa_vdev_state_exit(spa, NULL, ENOENT));
4655c67d9675Seschrock 
46560e34b6a7Sbonwick 	if (!vd->vdev_ops->vdev_op_leaf)
4657b3388e4fSEric Taylor 		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
46580e34b6a7Sbonwick 
46596809eb4eSEric Schrock 	if (ispath) {
4660208044b8SGeorge Wilson 		if (strcmp(value, vd->vdev_path) != 0) {
4661208044b8SGeorge Wilson 			spa_strfree(vd->vdev_path);
4662208044b8SGeorge Wilson 			vd->vdev_path = spa_strdup(value);
4663208044b8SGeorge Wilson 			sync = B_TRUE;
4664208044b8SGeorge Wilson 		}
46656809eb4eSEric Schrock 	} else {
4666208044b8SGeorge Wilson 		if (vd->vdev_fru == NULL) {
4667208044b8SGeorge Wilson 			vd->vdev_fru = spa_strdup(value);
4668208044b8SGeorge Wilson 			sync = B_TRUE;
4669208044b8SGeorge Wilson 		} else if (strcmp(value, vd->vdev_fru) != 0) {
46706809eb4eSEric Schrock 			spa_strfree(vd->vdev_fru);
4671208044b8SGeorge Wilson 			vd->vdev_fru = spa_strdup(value);
4672208044b8SGeorge Wilson 			sync = B_TRUE;
4673208044b8SGeorge Wilson 		}
46746809eb4eSEric Schrock 	}
4675c67d9675Seschrock 
4676208044b8SGeorge Wilson 	return (spa_vdev_state_exit(spa, sync ? vd : NULL, 0));
4677c67d9675Seschrock }
4678c67d9675Seschrock 
46796809eb4eSEric Schrock int
46806809eb4eSEric Schrock spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath)
46816809eb4eSEric Schrock {
46826809eb4eSEric Schrock 	return (spa_vdev_set_common(spa, guid, newpath, B_TRUE));
46836809eb4eSEric Schrock }
46846809eb4eSEric Schrock 
46856809eb4eSEric Schrock int
46866809eb4eSEric Schrock spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru)
46876809eb4eSEric Schrock {
46886809eb4eSEric Schrock 	return (spa_vdev_set_common(spa, guid, newfru, B_FALSE));
46896809eb4eSEric Schrock }
46906809eb4eSEric Schrock 
4691fa9e4066Sahrens /*
4692fa9e4066Sahrens  * ==========================================================================
4693fa9e4066Sahrens  * SPA Scrubbing
4694fa9e4066Sahrens  * ==========================================================================
4695fa9e4066Sahrens  */
4696fa9e4066Sahrens 
4697ea8dc4b6Seschrock int
4698088f3894Sahrens spa_scrub(spa_t *spa, pool_scrub_type_t type)
4699fa9e4066Sahrens {
4700e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
4701bb8b5132Sek 
4702fa9e4066Sahrens 	if ((uint_t)type >= POOL_SCRUB_TYPES)
4703fa9e4066Sahrens 		return (ENOTSUP);
4704fa9e4066Sahrens 
4705fa9e4066Sahrens 	/*
4706088f3894Sahrens 	 * If a resilver was requested, but there is no DTL on a
4707088f3894Sahrens 	 * writeable leaf device, we have nothing to do.
4708fa9e4066Sahrens 	 */
4709088f3894Sahrens 	if (type == POOL_SCRUB_RESILVER &&
4710088f3894Sahrens 	    !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) {
4711088f3894Sahrens 		spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
4712ea8dc4b6Seschrock 		return (0);
4713ea8dc4b6Seschrock 	}
4714fa9e4066Sahrens 
4715088f3894Sahrens 	if (type == POOL_SCRUB_EVERYTHING &&
4716088f3894Sahrens 	    spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE &&
4717088f3894Sahrens 	    spa->spa_dsl_pool->dp_scrub_isresilver)
4718088f3894Sahrens 		return (EBUSY);
4719fa9e4066Sahrens 
4720088f3894Sahrens 	if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) {
4721088f3894Sahrens 		return (dsl_pool_scrub_clean(spa->spa_dsl_pool));
4722088f3894Sahrens 	} else if (type == POOL_SCRUB_NONE) {
4723088f3894Sahrens 		return (dsl_pool_scrub_cancel(spa->spa_dsl_pool));
4724ea8dc4b6Seschrock 	} else {
4725088f3894Sahrens 		return (EINVAL);
4726fa9e4066Sahrens 	}
4727fa9e4066Sahrens }
4728fa9e4066Sahrens 
4729ea8dc4b6Seschrock /*
4730ea8dc4b6Seschrock  * ==========================================================================
4731ea8dc4b6Seschrock  * SPA async task processing
4732ea8dc4b6Seschrock  * ==========================================================================
4733ea8dc4b6Seschrock  */
4734ea8dc4b6Seschrock 
4735ea8dc4b6Seschrock static void
47363d7072f8Seschrock spa_async_remove(spa_t *spa, vdev_t *vd)
4737fa9e4066Sahrens {
473849cf58c0SBrendan Gregg - Sun Microsystems 	if (vd->vdev_remove_wanted) {
4739*98d1cbfeSGeorge Wilson 		vd->vdev_remove_wanted = B_FALSE;
4740*98d1cbfeSGeorge Wilson 		vd->vdev_delayed_close = B_FALSE;
474149cf58c0SBrendan Gregg - Sun Microsystems 		vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE);
47421d713200SEric Schrock 
47431d713200SEric Schrock 		/*
47441d713200SEric Schrock 		 * We want to clear the stats, but we don't want to do a full
47451d713200SEric Schrock 		 * vdev_clear() as that will cause us to throw away
47461d713200SEric Schrock 		 * degraded/faulted state as well as attempt to reopen the
47471d713200SEric Schrock 		 * device, all of which is a waste.
47481d713200SEric Schrock 		 */
47491d713200SEric Schrock 		vd->vdev_stat.vs_read_errors = 0;
47501d713200SEric Schrock 		vd->vdev_stat.vs_write_errors = 0;
47511d713200SEric Schrock 		vd->vdev_stat.vs_checksum_errors = 0;
47521d713200SEric Schrock 
4753e14bb325SJeff Bonwick 		vdev_state_dirty(vd->vdev_top);
4754ea8dc4b6Seschrock 	}
475549cf58c0SBrendan Gregg - Sun Microsystems 
4756e14bb325SJeff Bonwick 	for (int c = 0; c < vd->vdev_children; c++)
475749cf58c0SBrendan Gregg - Sun Microsystems 		spa_async_remove(spa, vd->vdev_child[c]);
4758ea8dc4b6Seschrock }
4759fa9e4066Sahrens 
4760e14bb325SJeff Bonwick static void
4761e14bb325SJeff Bonwick spa_async_probe(spa_t *spa, vdev_t *vd)
4762e14bb325SJeff Bonwick {
4763e14bb325SJeff Bonwick 	if (vd->vdev_probe_wanted) {
4764*98d1cbfeSGeorge Wilson 		vd->vdev_probe_wanted = B_FALSE;
4765e14bb325SJeff Bonwick 		vdev_reopen(vd);	/* vdev_open() does the actual probe */
4766e14bb325SJeff Bonwick 	}
4767e14bb325SJeff Bonwick 
4768e14bb325SJeff Bonwick 	for (int c = 0; c < vd->vdev_children; c++)
4769e14bb325SJeff Bonwick 		spa_async_probe(spa, vd->vdev_child[c]);
4770e14bb325SJeff Bonwick }
4771e14bb325SJeff Bonwick 
4772573ca77eSGeorge Wilson static void
4773573ca77eSGeorge Wilson spa_async_autoexpand(spa_t *spa, vdev_t *vd)
4774573ca77eSGeorge Wilson {
4775573ca77eSGeorge Wilson 	sysevent_id_t eid;
4776573ca77eSGeorge Wilson 	nvlist_t *attr;
4777573ca77eSGeorge Wilson 	char *physpath;
4778573ca77eSGeorge Wilson 
4779573ca77eSGeorge Wilson 	if (!spa->spa_autoexpand)
4780573ca77eSGeorge Wilson 		return;
4781573ca77eSGeorge Wilson 
4782573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++) {
4783573ca77eSGeorge Wilson 		vdev_t *cvd = vd->vdev_child[c];
4784573ca77eSGeorge Wilson 		spa_async_autoexpand(spa, cvd);
4785573ca77eSGeorge Wilson 	}
4786573ca77eSGeorge Wilson 
4787573ca77eSGeorge Wilson 	if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL)
4788573ca77eSGeorge Wilson 		return;
4789573ca77eSGeorge Wilson 
4790573ca77eSGeorge Wilson 	physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4791573ca77eSGeorge Wilson 	(void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath);
4792573ca77eSGeorge Wilson 
4793573ca77eSGeorge Wilson 	VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4794573ca77eSGeorge Wilson 	VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0);
4795573ca77eSGeorge Wilson 
4796573ca77eSGeorge Wilson 	(void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS,
4797573ca77eSGeorge Wilson 	    ESC_DEV_DLE, attr, &eid, DDI_SLEEP);
4798573ca77eSGeorge Wilson 
4799573ca77eSGeorge Wilson 	nvlist_free(attr);
4800573ca77eSGeorge Wilson 	kmem_free(physpath, MAXPATHLEN);
4801573ca77eSGeorge Wilson }
4802573ca77eSGeorge Wilson 
4803ea8dc4b6Seschrock static void
4804ea8dc4b6Seschrock spa_async_thread(spa_t *spa)
4805ea8dc4b6Seschrock {
4806e14bb325SJeff Bonwick 	int tasks;
4807ea8dc4b6Seschrock 
4808ea8dc4b6Seschrock 	ASSERT(spa->spa_sync_on);
4809ea8dc4b6Seschrock 
4810ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4811ea8dc4b6Seschrock 	tasks = spa->spa_async_tasks;
4812ea8dc4b6Seschrock 	spa->spa_async_tasks = 0;
4813ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4814ea8dc4b6Seschrock 
48150373e76bSbonwick 	/*
48160373e76bSbonwick 	 * See if the config needs to be updated.
48170373e76bSbonwick 	 */
48180373e76bSbonwick 	if (tasks & SPA_ASYNC_CONFIG_UPDATE) {
4819b24ab676SJeff Bonwick 		uint64_t old_space, new_space;
4820573ca77eSGeorge Wilson 
48210373e76bSbonwick 		mutex_enter(&spa_namespace_lock);
4822b24ab676SJeff Bonwick 		old_space = metaslab_class_get_space(spa_normal_class(spa));
48230373e76bSbonwick 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
4824b24ab676SJeff Bonwick 		new_space = metaslab_class_get_space(spa_normal_class(spa));
48250373e76bSbonwick 		mutex_exit(&spa_namespace_lock);
4826573ca77eSGeorge Wilson 
4827573ca77eSGeorge Wilson 		/*
4828573ca77eSGeorge Wilson 		 * If the pool grew as a result of the config update,
4829573ca77eSGeorge Wilson 		 * then log an internal history event.
4830573ca77eSGeorge Wilson 		 */
4831b24ab676SJeff Bonwick 		if (new_space != old_space) {
4832c8e1f6d2SMark J Musante 			spa_history_internal_log(LOG_POOL_VDEV_ONLINE,
4833c8e1f6d2SMark J Musante 			    spa, NULL, CRED(),
4834c8e1f6d2SMark J Musante 			    "pool '%s' size: %llu(+%llu)",
4835b24ab676SJeff Bonwick 			    spa_name(spa), new_space, new_space - old_space);
4836573ca77eSGeorge Wilson 		}
48370373e76bSbonwick 	}
48380373e76bSbonwick 
4839ea8dc4b6Seschrock 	/*
48403d7072f8Seschrock 	 * See if any devices need to be marked REMOVED.
4841ea8dc4b6Seschrock 	 */
4842e14bb325SJeff Bonwick 	if (tasks & SPA_ASYNC_REMOVE) {
48438f18d1faSGeorge Wilson 		spa_vdev_state_enter(spa, SCL_NONE);
48443d7072f8Seschrock 		spa_async_remove(spa, spa->spa_root_vdev);
4845e14bb325SJeff Bonwick 		for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
484649cf58c0SBrendan Gregg - Sun Microsystems 			spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
4847e14bb325SJeff Bonwick 		for (int i = 0; i < spa->spa_spares.sav_count; i++)
484849cf58c0SBrendan Gregg - Sun Microsystems 			spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
4849e14bb325SJeff Bonwick 		(void) spa_vdev_state_exit(spa, NULL, 0);
4850e14bb325SJeff Bonwick 	}
4851e14bb325SJeff Bonwick 
4852573ca77eSGeorge Wilson 	if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) {
4853573ca77eSGeorge Wilson 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
4854573ca77eSGeorge Wilson 		spa_async_autoexpand(spa, spa->spa_root_vdev);
4855573ca77eSGeorge Wilson 		spa_config_exit(spa, SCL_CONFIG, FTAG);
4856573ca77eSGeorge Wilson 	}
4857573ca77eSGeorge Wilson 
4858e14bb325SJeff Bonwick 	/*
4859e14bb325SJeff Bonwick 	 * See if any devices need to be probed.
4860e14bb325SJeff Bonwick 	 */
4861e14bb325SJeff Bonwick 	if (tasks & SPA_ASYNC_PROBE) {
48628f18d1faSGeorge Wilson 		spa_vdev_state_enter(spa, SCL_NONE);
4863e14bb325SJeff Bonwick 		spa_async_probe(spa, spa->spa_root_vdev);
4864e14bb325SJeff Bonwick 		(void) spa_vdev_state_exit(spa, NULL, 0);
48653d7072f8Seschrock 	}
4866ea8dc4b6Seschrock 
4867ea8dc4b6Seschrock 	/*
4868ea8dc4b6Seschrock 	 * If any devices are done replacing, detach them.
4869ea8dc4b6Seschrock 	 */
48703d7072f8Seschrock 	if (tasks & SPA_ASYNC_RESILVER_DONE)
48713d7072f8Seschrock 		spa_vdev_resilver_done(spa);
4872fa9e4066Sahrens 
4873ea8dc4b6Seschrock 	/*
4874ea8dc4b6Seschrock 	 * Kick off a resilver.
4875ea8dc4b6Seschrock 	 */
4876088f3894Sahrens 	if (tasks & SPA_ASYNC_RESILVER)
4877088f3894Sahrens 		VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0);
4878ea8dc4b6Seschrock 
4879ea8dc4b6Seschrock 	/*
4880ea8dc4b6Seschrock 	 * Let the world know that we're done.
4881ea8dc4b6Seschrock 	 */
4882ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4883ea8dc4b6Seschrock 	spa->spa_async_thread = NULL;
4884ea8dc4b6Seschrock 	cv_broadcast(&spa->spa_async_cv);
4885ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4886ea8dc4b6Seschrock 	thread_exit();
4887ea8dc4b6Seschrock }
4888ea8dc4b6Seschrock 
4889ea8dc4b6Seschrock void
4890ea8dc4b6Seschrock spa_async_suspend(spa_t *spa)
4891ea8dc4b6Seschrock {
4892ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4893ea8dc4b6Seschrock 	spa->spa_async_suspended++;
4894ea8dc4b6Seschrock 	while (spa->spa_async_thread != NULL)
4895ea8dc4b6Seschrock 		cv_wait(&spa->spa_async_cv, &spa->spa_async_lock);
4896ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4897ea8dc4b6Seschrock }
4898ea8dc4b6Seschrock 
4899ea8dc4b6Seschrock void
4900ea8dc4b6Seschrock spa_async_resume(spa_t *spa)
4901ea8dc4b6Seschrock {
4902ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4903ea8dc4b6Seschrock 	ASSERT(spa->spa_async_suspended != 0);
4904ea8dc4b6Seschrock 	spa->spa_async_suspended--;
4905ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4906ea8dc4b6Seschrock }
4907ea8dc4b6Seschrock 
4908ea8dc4b6Seschrock static void
4909ea8dc4b6Seschrock spa_async_dispatch(spa_t *spa)
4910ea8dc4b6Seschrock {
4911ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4912ea8dc4b6Seschrock 	if (spa->spa_async_tasks && !spa->spa_async_suspended &&
49130373e76bSbonwick 	    spa->spa_async_thread == NULL &&
49140373e76bSbonwick 	    rootdir != NULL && !vn_is_readonly(rootdir))
4915ea8dc4b6Seschrock 		spa->spa_async_thread = thread_create(NULL, 0,
4916ea8dc4b6Seschrock 		    spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri);
4917ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4918ea8dc4b6Seschrock }
4919ea8dc4b6Seschrock 
4920ea8dc4b6Seschrock void
4921ea8dc4b6Seschrock spa_async_request(spa_t *spa, int task)
4922ea8dc4b6Seschrock {
4923ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4924ea8dc4b6Seschrock 	spa->spa_async_tasks |= task;
4925ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4926fa9e4066Sahrens }
4927fa9e4066Sahrens 
4928fa9e4066Sahrens /*
4929fa9e4066Sahrens  * ==========================================================================
4930fa9e4066Sahrens  * SPA syncing routines
4931fa9e4066Sahrens  * ==========================================================================
4932fa9e4066Sahrens  */
4933fa9e4066Sahrens static void
4934b24ab676SJeff Bonwick spa_sync_deferred_bplist(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx, uint64_t txg)
4935fa9e4066Sahrens {
4936fa9e4066Sahrens 	blkptr_t blk;
4937fa9e4066Sahrens 	uint64_t itor = 0;
4938fa9e4066Sahrens 	uint8_t c = 1;
4939fa9e4066Sahrens 
4940e14bb325SJeff Bonwick 	while (bplist_iterate(bpl, &itor, &blk) == 0) {
4941e14bb325SJeff Bonwick 		ASSERT(blk.blk_birth < txg);
4942b24ab676SJeff Bonwick 		zio_free(spa, txg, &blk);
4943e14bb325SJeff Bonwick 	}
4944fa9e4066Sahrens 
4945fa9e4066Sahrens 	bplist_vacate(bpl, tx);
4946fa9e4066Sahrens 
4947fa9e4066Sahrens 	/*
4948fa9e4066Sahrens 	 * Pre-dirty the first block so we sync to convergence faster.
4949fa9e4066Sahrens 	 * (Usually only the first block is needed.)
4950fa9e4066Sahrens 	 */
4951b24ab676SJeff Bonwick 	dmu_write(bpl->bpl_mos, spa->spa_deferred_bplist_obj, 0, 1, &c, tx);
4952b24ab676SJeff Bonwick }
4953b24ab676SJeff Bonwick 
4954b24ab676SJeff Bonwick static void
4955b24ab676SJeff Bonwick spa_sync_free(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
4956b24ab676SJeff Bonwick {
4957b24ab676SJeff Bonwick 	zio_t *zio = arg;
4958b24ab676SJeff Bonwick 
4959b24ab676SJeff Bonwick 	zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp,
4960b24ab676SJeff Bonwick 	    zio->io_flags));
4961fa9e4066Sahrens }
4962fa9e4066Sahrens 
4963fa9e4066Sahrens static void
496499653d4eSeschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
4965fa9e4066Sahrens {
4966fa9e4066Sahrens 	char *packed = NULL;
4967f7991ba4STim Haley 	size_t bufsize;
4968fa9e4066Sahrens 	size_t nvsize = 0;
4969fa9e4066Sahrens 	dmu_buf_t *db;
4970fa9e4066Sahrens 
497199653d4eSeschrock 	VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0);
4972fa9e4066Sahrens 
4973f7991ba4STim Haley 	/*
4974f7991ba4STim Haley 	 * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration
4975f7991ba4STim Haley 	 * information.  This avoids the dbuf_will_dirty() path and
4976f7991ba4STim Haley 	 * saves us a pre-read to get data we don't actually care about.
4977f7991ba4STim Haley 	 */
4978f7991ba4STim Haley 	bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE);
4979f7991ba4STim Haley 	packed = kmem_alloc(bufsize, KM_SLEEP);
4980fa9e4066Sahrens 
498199653d4eSeschrock 	VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
4982ea8dc4b6Seschrock 	    KM_SLEEP) == 0);
4983f7991ba4STim Haley 	bzero(packed + nvsize, bufsize - nvsize);
4984fa9e4066Sahrens 
4985f7991ba4STim Haley 	dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx);
4986fa9e4066Sahrens 
4987f7991ba4STim Haley 	kmem_free(packed, bufsize);
4988fa9e4066Sahrens 
498999653d4eSeschrock 	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
4990fa9e4066Sahrens 	dmu_buf_will_dirty(db, tx);
4991fa9e4066Sahrens 	*(uint64_t *)db->db_data = nvsize;
4992ea8dc4b6Seschrock 	dmu_buf_rele(db, FTAG);
4993fa9e4066Sahrens }
4994fa9e4066Sahrens 
499599653d4eSeschrock static void
4996fa94a07fSbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx,
4997fa94a07fSbrendan     const char *config, const char *entry)
499899653d4eSeschrock {
499999653d4eSeschrock 	nvlist_t *nvroot;
5000fa94a07fSbrendan 	nvlist_t **list;
500199653d4eSeschrock 	int i;
500299653d4eSeschrock 
5003fa94a07fSbrendan 	if (!sav->sav_sync)
500499653d4eSeschrock 		return;
500599653d4eSeschrock 
500699653d4eSeschrock 	/*
5007fa94a07fSbrendan 	 * Update the MOS nvlist describing the list of available devices.
5008fa94a07fSbrendan 	 * spa_validate_aux() will have already made sure this nvlist is
50093d7072f8Seschrock 	 * valid and the vdevs are labeled appropriately.
501099653d4eSeschrock 	 */
5011fa94a07fSbrendan 	if (sav->sav_object == 0) {
5012fa94a07fSbrendan 		sav->sav_object = dmu_object_alloc(spa->spa_meta_objset,
5013fa94a07fSbrendan 		    DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE,
5014fa94a07fSbrendan 		    sizeof (uint64_t), tx);
501599653d4eSeschrock 		VERIFY(zap_update(spa->spa_meta_objset,
5016fa94a07fSbrendan 		    DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1,
5017fa94a07fSbrendan 		    &sav->sav_object, tx) == 0);
501899653d4eSeschrock 	}
501999653d4eSeschrock 
502099653d4eSeschrock 	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
5021fa94a07fSbrendan 	if (sav->sav_count == 0) {
5022fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0);
502399653d4eSeschrock 	} else {
5024fa94a07fSbrendan 		list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
5025fa94a07fSbrendan 		for (i = 0; i < sav->sav_count; i++)
5026fa94a07fSbrendan 			list[i] = vdev_config_generate(spa, sav->sav_vdevs[i],
5027fa94a07fSbrendan 			    B_FALSE, B_FALSE, B_TRUE);
5028fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(nvroot, config, list,
5029fa94a07fSbrendan 		    sav->sav_count) == 0);
5030fa94a07fSbrendan 		for (i = 0; i < sav->sav_count; i++)
5031fa94a07fSbrendan 			nvlist_free(list[i]);
5032fa94a07fSbrendan 		kmem_free(list, sav->sav_count * sizeof (void *));
503399653d4eSeschrock 	}
503499653d4eSeschrock 
5035fa94a07fSbrendan 	spa_sync_nvlist(spa, sav->sav_object, nvroot, tx);
503606eeb2adSek 	nvlist_free(nvroot);
503799653d4eSeschrock 
5038fa94a07fSbrendan 	sav->sav_sync = B_FALSE;
503999653d4eSeschrock }
504099653d4eSeschrock 
504199653d4eSeschrock static void
504299653d4eSeschrock spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
504399653d4eSeschrock {
504499653d4eSeschrock 	nvlist_t *config;
504599653d4eSeschrock 
5046e14bb325SJeff Bonwick 	if (list_is_empty(&spa->spa_config_dirty_list))
504799653d4eSeschrock 		return;
504899653d4eSeschrock 
5049e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
5050e14bb325SJeff Bonwick 
5051e14bb325SJeff Bonwick 	config = spa_config_generate(spa, spa->spa_root_vdev,
5052e14bb325SJeff Bonwick 	    dmu_tx_get_txg(tx), B_FALSE);
5053e14bb325SJeff Bonwick 
5054e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_STATE, FTAG);
505599653d4eSeschrock 
505699653d4eSeschrock 	if (spa->spa_config_syncing)
505799653d4eSeschrock 		nvlist_free(spa->spa_config_syncing);
505899653d4eSeschrock 	spa->spa_config_syncing = config;
505999653d4eSeschrock 
506099653d4eSeschrock 	spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
506199653d4eSeschrock }
506299653d4eSeschrock 
5063990b4856Slling /*
5064990b4856Slling  * Set zpool properties.
5065990b4856Slling  */
5066b1b8ab34Slling static void
5067ecd6cf80Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
5068b1b8ab34Slling {
5069b1b8ab34Slling 	spa_t *spa = arg1;
5070b1b8ab34Slling 	objset_t *mos = spa->spa_meta_objset;
5071990b4856Slling 	nvlist_t *nvp = arg2;
5072990b4856Slling 	nvpair_t *elem;
50733d7072f8Seschrock 	uint64_t intval;
5074c5904d13Seschrock 	char *strval;
5075990b4856Slling 	zpool_prop_t prop;
5076990b4856Slling 	const char *propname;
5077990b4856Slling 	zprop_type_t proptype;
5078b1b8ab34Slling 
5079e14bb325SJeff Bonwick 	mutex_enter(&spa->spa_props_lock);
5080e14bb325SJeff Bonwick 
5081990b4856Slling 	elem = NULL;
5082990b4856Slling 	while ((elem = nvlist_next_nvpair(nvp, elem))) {
5083990b4856Slling 		switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
5084990b4856Slling 		case ZPOOL_PROP_VERSION:
5085990b4856Slling 			/*
5086990b4856Slling 			 * Only set version for non-zpool-creation cases
5087990b4856Slling 			 * (set/import). spa_create() needs special care
5088990b4856Slling 			 * for version setting.
5089990b4856Slling 			 */
5090990b4856Slling 			if (tx->tx_txg != TXG_INITIAL) {
5091990b4856Slling 				VERIFY(nvpair_value_uint64(elem,
5092990b4856Slling 				    &intval) == 0);
5093990b4856Slling 				ASSERT(intval <= SPA_VERSION);
5094990b4856Slling 				ASSERT(intval >= spa_version(spa));
5095990b4856Slling 				spa->spa_uberblock.ub_version = intval;
5096990b4856Slling 				vdev_config_dirty(spa->spa_root_vdev);
5097990b4856Slling 			}
5098ecd6cf80Smarks 			break;
5099990b4856Slling 
5100990b4856Slling 		case ZPOOL_PROP_ALTROOT:
5101990b4856Slling 			/*
5102990b4856Slling 			 * 'altroot' is a non-persistent property. It should
5103990b4856Slling 			 * have been set temporarily at creation or import time.
5104990b4856Slling 			 */
5105990b4856Slling 			ASSERT(spa->spa_root != NULL);
5106b1b8ab34Slling 			break;
51073d7072f8Seschrock 
51082f8aaab3Seschrock 		case ZPOOL_PROP_CACHEFILE:
5109990b4856Slling 			/*
5110379c004dSEric Schrock 			 * 'cachefile' is also a non-persisitent property.
5111990b4856Slling 			 */
51123d7072f8Seschrock 			break;
5113990b4856Slling 		default:
5114990b4856Slling 			/*
5115990b4856Slling 			 * Set pool property values in the poolprops mos object.
5116990b4856Slling 			 */
5117990b4856Slling 			if (spa->spa_pool_props_object == 0) {
5118990b4856Slling 				VERIFY((spa->spa_pool_props_object =
5119990b4856Slling 				    zap_create(mos, DMU_OT_POOL_PROPS,
5120990b4856Slling 				    DMU_OT_NONE, 0, tx)) > 0);
5121990b4856Slling 
5122990b4856Slling 				VERIFY(zap_update(mos,
5123990b4856Slling 				    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
5124990b4856Slling 				    8, 1, &spa->spa_pool_props_object, tx)
5125990b4856Slling 				    == 0);
5126990b4856Slling 			}
5127990b4856Slling 
5128990b4856Slling 			/* normalize the property name */
5129990b4856Slling 			propname = zpool_prop_to_name(prop);
5130990b4856Slling 			proptype = zpool_prop_get_type(prop);
5131990b4856Slling 
5132990b4856Slling 			if (nvpair_type(elem) == DATA_TYPE_STRING) {
5133990b4856Slling 				ASSERT(proptype == PROP_TYPE_STRING);
5134990b4856Slling 				VERIFY(nvpair_value_string(elem, &strval) == 0);
5135990b4856Slling 				VERIFY(zap_update(mos,
5136990b4856Slling 				    spa->spa_pool_props_object, propname,
5137990b4856Slling 				    1, strlen(strval) + 1, strval, tx) == 0);
5138990b4856Slling 
5139990b4856Slling 			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
5140990b4856Slling 				VERIFY(nvpair_value_uint64(elem, &intval) == 0);
5141990b4856Slling 
5142990b4856Slling 				if (proptype == PROP_TYPE_INDEX) {
5143990b4856Slling 					const char *unused;
5144990b4856Slling 					VERIFY(zpool_prop_index_to_string(
5145990b4856Slling 					    prop, intval, &unused) == 0);
5146990b4856Slling 				}
5147990b4856Slling 				VERIFY(zap_update(mos,
5148990b4856Slling 				    spa->spa_pool_props_object, propname,
5149990b4856Slling 				    8, 1, &intval, tx) == 0);
5150990b4856Slling 			} else {
5151990b4856Slling 				ASSERT(0); /* not allowed */
5152990b4856Slling 			}
5153990b4856Slling 
51540a4e9518Sgw 			switch (prop) {
51550a4e9518Sgw 			case ZPOOL_PROP_DELEGATION:
5156990b4856Slling 				spa->spa_delegation = intval;
51570a4e9518Sgw 				break;
51580a4e9518Sgw 			case ZPOOL_PROP_BOOTFS:
5159990b4856Slling 				spa->spa_bootfs = intval;
51600a4e9518Sgw 				break;
51610a4e9518Sgw 			case ZPOOL_PROP_FAILUREMODE:
51620a4e9518Sgw 				spa->spa_failmode = intval;
51630a4e9518Sgw 				break;
5164573ca77eSGeorge Wilson 			case ZPOOL_PROP_AUTOEXPAND:
5165573ca77eSGeorge Wilson 				spa->spa_autoexpand = intval;
5166573ca77eSGeorge Wilson 				spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
5167573ca77eSGeorge Wilson 				break;
5168b24ab676SJeff Bonwick 			case ZPOOL_PROP_DEDUPDITTO:
5169b24ab676SJeff Bonwick 				spa->spa_dedup_ditto = intval;
5170b24ab676SJeff Bonwick 				break;
51710a4e9518Sgw 			default:
51720a4e9518Sgw 				break;
51730a4e9518Sgw 			}
5174990b4856Slling 		}
5175990b4856Slling 
5176990b4856Slling 		/* log internal history if this is not a zpool create */
5177990b4856Slling 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY &&
5178990b4856Slling 		    tx->tx_txg != TXG_INITIAL) {
5179990b4856Slling 			spa_history_internal_log(LOG_POOL_PROPSET,
5180990b4856Slling 			    spa, tx, cr, "%s %lld %s",
5181e14bb325SJeff Bonwick 			    nvpair_name(elem), intval, spa_name(spa));
5182b1b8ab34Slling 		}
5183b1b8ab34Slling 	}
5184e14bb325SJeff Bonwick 
5185e14bb325SJeff Bonwick 	mutex_exit(&spa->spa_props_lock);
5186b1b8ab34Slling }
5187b1b8ab34Slling 
5188fa9e4066Sahrens /*
5189fa9e4066Sahrens  * Sync the specified transaction group.  New blocks may be dirtied as
5190fa9e4066Sahrens  * part of the process, so we iterate until it converges.
5191fa9e4066Sahrens  */
5192fa9e4066Sahrens void
5193fa9e4066Sahrens spa_sync(spa_t *spa, uint64_t txg)
5194fa9e4066Sahrens {
5195fa9e4066Sahrens 	dsl_pool_t *dp = spa->spa_dsl_pool;
5196fa9e4066Sahrens 	objset_t *mos = spa->spa_meta_objset;
5197b24ab676SJeff Bonwick 	bplist_t *defer_bpl = &spa->spa_deferred_bplist;
5198b24ab676SJeff Bonwick 	bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
51990373e76bSbonwick 	vdev_t *rvd = spa->spa_root_vdev;
5200fa9e4066Sahrens 	vdev_t *vd;
5201fa9e4066Sahrens 	dmu_tx_t *tx;
5202e14bb325SJeff Bonwick 	int error;
5203fa9e4066Sahrens 
5204fa9e4066Sahrens 	/*
5205fa9e4066Sahrens 	 * Lock out configuration changes.
5206fa9e4066Sahrens 	 */
5207e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
5208fa9e4066Sahrens 
5209fa9e4066Sahrens 	spa->spa_syncing_txg = txg;
5210fa9e4066Sahrens 	spa->spa_sync_pass = 0;
5211fa9e4066Sahrens 
5212e14bb325SJeff Bonwick 	/*
5213e14bb325SJeff Bonwick 	 * If there are any pending vdev state changes, convert them
5214e14bb325SJeff Bonwick 	 * into config changes that go out with this transaction group.
5215e14bb325SJeff Bonwick 	 */
5216e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
52178ad4d6ddSJeff Bonwick 	while (list_head(&spa->spa_state_dirty_list) != NULL) {
52188ad4d6ddSJeff Bonwick 		/*
52198ad4d6ddSJeff Bonwick 		 * We need the write lock here because, for aux vdevs,
52208ad4d6ddSJeff Bonwick 		 * calling vdev_config_dirty() modifies sav_config.
52218ad4d6ddSJeff Bonwick 		 * This is ugly and will become unnecessary when we
52228ad4d6ddSJeff Bonwick 		 * eliminate the aux vdev wart by integrating all vdevs
52238ad4d6ddSJeff Bonwick 		 * into the root vdev tree.
52248ad4d6ddSJeff Bonwick 		 */
52258ad4d6ddSJeff Bonwick 		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
52268ad4d6ddSJeff Bonwick 		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER);
52278ad4d6ddSJeff Bonwick 		while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) {
52288ad4d6ddSJeff Bonwick 			vdev_state_clean(vd);
52298ad4d6ddSJeff Bonwick 			vdev_config_dirty(vd);
52308ad4d6ddSJeff Bonwick 		}
52318ad4d6ddSJeff Bonwick 		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
52328ad4d6ddSJeff Bonwick 		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
5233e14bb325SJeff Bonwick 	}
5234e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_STATE, FTAG);
5235e14bb325SJeff Bonwick 
5236b24ab676SJeff Bonwick 	VERIFY(0 == bplist_open(defer_bpl, mos, spa->spa_deferred_bplist_obj));
5237fa9e4066Sahrens 
523899653d4eSeschrock 	tx = dmu_tx_create_assigned(dp, txg);
523999653d4eSeschrock 
524099653d4eSeschrock 	/*
5241e7437265Sahrens 	 * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg,
524299653d4eSeschrock 	 * set spa_deflate if we have no raid-z vdevs.
524399653d4eSeschrock 	 */
5244e7437265Sahrens 	if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE &&
5245e7437265Sahrens 	    spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) {
524699653d4eSeschrock 		int i;
524799653d4eSeschrock 
524899653d4eSeschrock 		for (i = 0; i < rvd->vdev_children; i++) {
524999653d4eSeschrock 			vd = rvd->vdev_child[i];
525099653d4eSeschrock 			if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE)
525199653d4eSeschrock 				break;
525299653d4eSeschrock 		}
525399653d4eSeschrock 		if (i == rvd->vdev_children) {
525499653d4eSeschrock 			spa->spa_deflate = TRUE;
525599653d4eSeschrock 			VERIFY(0 == zap_add(spa->spa_meta_objset,
525699653d4eSeschrock 			    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
525799653d4eSeschrock 			    sizeof (uint64_t), 1, &spa->spa_deflate, tx));
525899653d4eSeschrock 		}
525999653d4eSeschrock 	}
526099653d4eSeschrock 
5261088f3894Sahrens 	if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
5262088f3894Sahrens 	    spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) {
5263088f3894Sahrens 		dsl_pool_create_origin(dp, tx);
5264088f3894Sahrens 
5265088f3894Sahrens 		/* Keeping the origin open increases spa_minref */
5266088f3894Sahrens 		spa->spa_minref += 3;
5267088f3894Sahrens 	}
5268088f3894Sahrens 
5269088f3894Sahrens 	if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES &&
5270088f3894Sahrens 	    spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) {
5271088f3894Sahrens 		dsl_pool_upgrade_clones(dp, tx);
5272088f3894Sahrens 	}
5273088f3894Sahrens 
5274fa9e4066Sahrens 	/*
5275fa9e4066Sahrens 	 * If anything has changed in this txg, push the deferred frees
5276fa9e4066Sahrens 	 * from the previous txg.  If not, leave them alone so that we
5277fa9e4066Sahrens 	 * don't generate work on an otherwise idle system.
5278fa9e4066Sahrens 	 */
5279fa9e4066Sahrens 	if (!txg_list_empty(&dp->dp_dirty_datasets, txg) ||
52801615a317Sek 	    !txg_list_empty(&dp->dp_dirty_dirs, txg) ||
52811615a317Sek 	    !txg_list_empty(&dp->dp_sync_tasks, txg))
5282b24ab676SJeff Bonwick 		spa_sync_deferred_bplist(spa, defer_bpl, tx, txg);
5283fa9e4066Sahrens 
5284fa9e4066Sahrens 	/*
5285fa9e4066Sahrens 	 * Iterate to convergence.
5286fa9e4066Sahrens 	 */
5287fa9e4066Sahrens 	do {
5288b24ab676SJeff Bonwick 		int pass = ++spa->spa_sync_pass;
5289fa9e4066Sahrens 
5290fa9e4066Sahrens 		spa_sync_config_object(spa, tx);
5291fa94a07fSbrendan 		spa_sync_aux_dev(spa, &spa->spa_spares, tx,
5292fa94a07fSbrendan 		    ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES);
5293fa94a07fSbrendan 		spa_sync_aux_dev(spa, &spa->spa_l2cache, tx,
5294fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE);
5295ea8dc4b6Seschrock 		spa_errlog_sync(spa, txg);
5296fa9e4066Sahrens 		dsl_pool_sync(dp, txg);
5297fa9e4066Sahrens 
5298b24ab676SJeff Bonwick 		if (pass <= SYNC_PASS_DEFERRED_FREE) {
5299b24ab676SJeff Bonwick 			zio_t *zio = zio_root(spa, NULL, NULL, 0);
5300b24ab676SJeff Bonwick 			bplist_sync(free_bpl, spa_sync_free, zio, tx);
5301b24ab676SJeff Bonwick 			VERIFY(zio_wait(zio) == 0);
5302b24ab676SJeff Bonwick 		} else {
5303b24ab676SJeff Bonwick 			bplist_sync(free_bpl, bplist_enqueue_cb, defer_bpl, tx);
5304fa9e4066Sahrens 		}
5305fa9e4066Sahrens 
5306b24ab676SJeff Bonwick 		ddt_sync(spa, txg);
5307b24ab676SJeff Bonwick 
5308afee20e4SGeorge Wilson 		mutex_enter(&spa->spa_scrub_lock);
5309afee20e4SGeorge Wilson 		while (spa->spa_scrub_inflight > 0)
5310afee20e4SGeorge Wilson 			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
5311afee20e4SGeorge Wilson 		mutex_exit(&spa->spa_scrub_lock);
5312afee20e4SGeorge Wilson 
5313b24ab676SJeff Bonwick 		while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg))
5314b24ab676SJeff Bonwick 			vdev_sync(vd, txg);
5315b24ab676SJeff Bonwick 
5316b24ab676SJeff Bonwick 	} while (dmu_objset_is_dirty(mos, txg));
5317fa9e4066Sahrens 
53187a21607fSGeorge Wilson 	ASSERT(list_is_empty(&free_bpl->bpl_queue));
5319fa9e4066Sahrens 
5320b24ab676SJeff Bonwick 	bplist_close(defer_bpl);
5321fa9e4066Sahrens 
5322fa9e4066Sahrens 	/*
5323fa9e4066Sahrens 	 * Rewrite the vdev configuration (which includes the uberblock)
5324fa9e4066Sahrens 	 * to commit the transaction group.
53250373e76bSbonwick 	 *
532617f17c2dSbonwick 	 * If there are no dirty vdevs, we sync the uberblock to a few
532717f17c2dSbonwick 	 * random top-level vdevs that are known to be visible in the
5328e14bb325SJeff Bonwick 	 * config cache (see spa_vdev_add() for a complete description).
5329e14bb325SJeff Bonwick 	 * If there *are* dirty vdevs, sync the uberblock to all vdevs.
53300373e76bSbonwick 	 */
5331e14bb325SJeff Bonwick 	for (;;) {
5332e14bb325SJeff Bonwick 		/*
5333e14bb325SJeff Bonwick 		 * We hold SCL_STATE to prevent vdev open/close/etc.
5334e14bb325SJeff Bonwick 		 * while we're attempting to write the vdev labels.
5335e14bb325SJeff Bonwick 		 */
5336e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
5337e14bb325SJeff Bonwick 
5338e14bb325SJeff Bonwick 		if (list_is_empty(&spa->spa_config_dirty_list)) {
5339e14bb325SJeff Bonwick 			vdev_t *svd[SPA_DVAS_PER_BP];
5340e14bb325SJeff Bonwick 			int svdcount = 0;
5341e14bb325SJeff Bonwick 			int children = rvd->vdev_children;
5342e14bb325SJeff Bonwick 			int c0 = spa_get_random(children);
5343e14bb325SJeff Bonwick 
5344573ca77eSGeorge Wilson 			for (int c = 0; c < children; c++) {
5345e14bb325SJeff Bonwick 				vd = rvd->vdev_child[(c0 + c) % children];
5346e14bb325SJeff Bonwick 				if (vd->vdev_ms_array == 0 || vd->vdev_islog)
5347e14bb325SJeff Bonwick 					continue;
5348e14bb325SJeff Bonwick 				svd[svdcount++] = vd;
5349e14bb325SJeff Bonwick 				if (svdcount == SPA_DVAS_PER_BP)
5350e14bb325SJeff Bonwick 					break;
5351e14bb325SJeff Bonwick 			}
53528956713aSEric Schrock 			error = vdev_config_sync(svd, svdcount, txg, B_FALSE);
53538956713aSEric Schrock 			if (error != 0)
53548956713aSEric Schrock 				error = vdev_config_sync(svd, svdcount, txg,
53558956713aSEric Schrock 				    B_TRUE);
5356e14bb325SJeff Bonwick 		} else {
5357e14bb325SJeff Bonwick 			error = vdev_config_sync(rvd->vdev_child,
53588956713aSEric Schrock 			    rvd->vdev_children, txg, B_FALSE);
53598956713aSEric Schrock 			if (error != 0)
53608956713aSEric Schrock 				error = vdev_config_sync(rvd->vdev_child,
53618956713aSEric Schrock 				    rvd->vdev_children, txg, B_TRUE);
53620373e76bSbonwick 		}
5363e14bb325SJeff Bonwick 
5364e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_STATE, FTAG);
5365e14bb325SJeff Bonwick 
5366e14bb325SJeff Bonwick 		if (error == 0)
5367e14bb325SJeff Bonwick 			break;
5368e14bb325SJeff Bonwick 		zio_suspend(spa, NULL);
5369e14bb325SJeff Bonwick 		zio_resume_wait(spa);
53700373e76bSbonwick 	}
537199653d4eSeschrock 	dmu_tx_commit(tx);
537299653d4eSeschrock 
53730373e76bSbonwick 	/*
53740373e76bSbonwick 	 * Clear the dirty config list.
5375fa9e4066Sahrens 	 */
5376e14bb325SJeff Bonwick 	while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL)
53770373e76bSbonwick 		vdev_config_clean(vd);
53780373e76bSbonwick 
53790373e76bSbonwick 	/*
53800373e76bSbonwick 	 * Now that the new config has synced transactionally,
53810373e76bSbonwick 	 * let it become visible to the config cache.
53820373e76bSbonwick 	 */
53830373e76bSbonwick 	if (spa->spa_config_syncing != NULL) {
53840373e76bSbonwick 		spa_config_set(spa, spa->spa_config_syncing);
53850373e76bSbonwick 		spa->spa_config_txg = txg;
53860373e76bSbonwick 		spa->spa_config_syncing = NULL;
53870373e76bSbonwick 	}
5388fa9e4066Sahrens 
5389fa9e4066Sahrens 	spa->spa_ubsync = spa->spa_uberblock;
5390fa9e4066Sahrens 
5391b24ab676SJeff Bonwick 	dsl_pool_sync_done(dp, txg);
5392fa9e4066Sahrens 
5393fa9e4066Sahrens 	/*
5394fa9e4066Sahrens 	 * Update usable space statistics.
5395fa9e4066Sahrens 	 */
5396fa9e4066Sahrens 	while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))
5397fa9e4066Sahrens 		vdev_sync_done(vd, txg);
5398fa9e4066Sahrens 
5399485bbbf5SGeorge Wilson 	spa_update_dspace(spa);
5400485bbbf5SGeorge Wilson 
5401fa9e4066Sahrens 	/*
5402fa9e4066Sahrens 	 * It had better be the case that we didn't dirty anything
540399653d4eSeschrock 	 * since vdev_config_sync().
5404fa9e4066Sahrens 	 */
5405fa9e4066Sahrens 	ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg));
5406fa9e4066Sahrens 	ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
5407fa9e4066Sahrens 	ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg));
54087a21607fSGeorge Wilson 	ASSERT(list_is_empty(&defer_bpl->bpl_queue));
54097a21607fSGeorge Wilson 	ASSERT(list_is_empty(&free_bpl->bpl_queue));
5410b24ab676SJeff Bonwick 
5411b24ab676SJeff Bonwick 	spa->spa_sync_pass = 0;
5412fa9e4066Sahrens 
5413e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_CONFIG, FTAG);
5414ea8dc4b6Seschrock 
5415468c413aSTim Haley 	spa_handle_ignored_writes(spa);
5416468c413aSTim Haley 
5417ea8dc4b6Seschrock 	/*
5418ea8dc4b6Seschrock 	 * If any async tasks have been requested, kick them off.
5419ea8dc4b6Seschrock 	 */
5420ea8dc4b6Seschrock 	spa_async_dispatch(spa);
5421fa9e4066Sahrens }
5422fa9e4066Sahrens 
5423fa9e4066Sahrens /*
5424fa9e4066Sahrens  * Sync all pools.  We don't want to hold the namespace lock across these
5425fa9e4066Sahrens  * operations, so we take a reference on the spa_t and drop the lock during the
5426fa9e4066Sahrens  * sync.
5427fa9e4066Sahrens  */
5428fa9e4066Sahrens void
5429fa9e4066Sahrens spa_sync_allpools(void)
5430fa9e4066Sahrens {
5431fa9e4066Sahrens 	spa_t *spa = NULL;
5432fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
5433fa9e4066Sahrens 	while ((spa = spa_next(spa)) != NULL) {
5434e14bb325SJeff Bonwick 		if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa))
5435fa9e4066Sahrens 			continue;
5436fa9e4066Sahrens 		spa_open_ref(spa, FTAG);
5437fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
5438fa9e4066Sahrens 		txg_wait_synced(spa_get_dsl(spa), 0);
5439fa9e4066Sahrens 		mutex_enter(&spa_namespace_lock);
5440fa9e4066Sahrens 		spa_close(spa, FTAG);
5441fa9e4066Sahrens 	}
5442fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
5443fa9e4066Sahrens }
5444fa9e4066Sahrens 
5445fa9e4066Sahrens /*
5446fa9e4066Sahrens  * ==========================================================================
5447fa9e4066Sahrens  * Miscellaneous routines
5448fa9e4066Sahrens  * ==========================================================================
5449fa9e4066Sahrens  */
5450fa9e4066Sahrens 
5451fa9e4066Sahrens /*
5452fa9e4066Sahrens  * Remove all pools in the system.
5453fa9e4066Sahrens  */
5454fa9e4066Sahrens void
5455fa9e4066Sahrens spa_evict_all(void)
5456fa9e4066Sahrens {
5457fa9e4066Sahrens 	spa_t *spa;
5458fa9e4066Sahrens 
5459fa9e4066Sahrens 	/*
5460fa9e4066Sahrens 	 * Remove all cached state.  All pools should be closed now,
5461fa9e4066Sahrens 	 * so every spa in the AVL tree should be unreferenced.
5462fa9e4066Sahrens 	 */
5463fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
5464fa9e4066Sahrens 	while ((spa = spa_next(NULL)) != NULL) {
5465fa9e4066Sahrens 		/*
5466ea8dc4b6Seschrock 		 * Stop async tasks.  The async thread may need to detach
5467ea8dc4b6Seschrock 		 * a device that's been replaced, which requires grabbing
5468ea8dc4b6Seschrock 		 * spa_namespace_lock, so we must drop it here.
5469fa9e4066Sahrens 		 */
5470fa9e4066Sahrens 		spa_open_ref(spa, FTAG);
5471fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
5472ea8dc4b6Seschrock 		spa_async_suspend(spa);
5473fa9e4066Sahrens 		mutex_enter(&spa_namespace_lock);
5474fa9e4066Sahrens 		spa_close(spa, FTAG);
5475fa9e4066Sahrens 
5476fa9e4066Sahrens 		if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
5477fa9e4066Sahrens 			spa_unload(spa);
5478fa9e4066Sahrens 			spa_deactivate(spa);
5479fa9e4066Sahrens 		}
5480fa9e4066Sahrens 		spa_remove(spa);
5481fa9e4066Sahrens 	}
5482fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
5483fa9e4066Sahrens }
5484ea8dc4b6Seschrock 
5485ea8dc4b6Seschrock vdev_t *
54866809eb4eSEric Schrock spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux)
5487ea8dc4b6Seschrock {
5488c5904d13Seschrock 	vdev_t *vd;
5489c5904d13Seschrock 	int i;
5490c5904d13Seschrock 
5491c5904d13Seschrock 	if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL)
5492c5904d13Seschrock 		return (vd);
5493c5904d13Seschrock 
54946809eb4eSEric Schrock 	if (aux) {
5495c5904d13Seschrock 		for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
5496c5904d13Seschrock 			vd = spa->spa_l2cache.sav_vdevs[i];
54976809eb4eSEric Schrock 			if (vd->vdev_guid == guid)
54986809eb4eSEric Schrock 				return (vd);
54996809eb4eSEric Schrock 		}
55006809eb4eSEric Schrock 
55016809eb4eSEric Schrock 		for (i = 0; i < spa->spa_spares.sav_count; i++) {
55026809eb4eSEric Schrock 			vd = spa->spa_spares.sav_vdevs[i];
5503c5904d13Seschrock 			if (vd->vdev_guid == guid)
5504c5904d13Seschrock 				return (vd);
5505c5904d13Seschrock 		}
5506c5904d13Seschrock 	}
5507c5904d13Seschrock 
5508c5904d13Seschrock 	return (NULL);
5509ea8dc4b6Seschrock }
5510eaca9bbdSeschrock 
5511eaca9bbdSeschrock void
5512990b4856Slling spa_upgrade(spa_t *spa, uint64_t version)
5513eaca9bbdSeschrock {
5514e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
5515eaca9bbdSeschrock 
5516eaca9bbdSeschrock 	/*
5517eaca9bbdSeschrock 	 * This should only be called for a non-faulted pool, and since a
5518eaca9bbdSeschrock 	 * future version would result in an unopenable pool, this shouldn't be
5519eaca9bbdSeschrock 	 * possible.
5520eaca9bbdSeschrock 	 */
5521e7437265Sahrens 	ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION);
5522990b4856Slling 	ASSERT(version >= spa->spa_uberblock.ub_version);
5523eaca9bbdSeschrock 
5524990b4856Slling 	spa->spa_uberblock.ub_version = version;
5525eaca9bbdSeschrock 	vdev_config_dirty(spa->spa_root_vdev);
5526eaca9bbdSeschrock 
5527e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
552899653d4eSeschrock 
552999653d4eSeschrock 	txg_wait_synced(spa_get_dsl(spa), 0);
553099653d4eSeschrock }
553199653d4eSeschrock 
553299653d4eSeschrock boolean_t
553399653d4eSeschrock spa_has_spare(spa_t *spa, uint64_t guid)
553499653d4eSeschrock {
553599653d4eSeschrock 	int i;
553639c23413Seschrock 	uint64_t spareguid;
5537fa94a07fSbrendan 	spa_aux_vdev_t *sav = &spa->spa_spares;
553899653d4eSeschrock 
5539fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++)
5540fa94a07fSbrendan 		if (sav->sav_vdevs[i]->vdev_guid == guid)
554199653d4eSeschrock 			return (B_TRUE);
554299653d4eSeschrock 
5543fa94a07fSbrendan 	for (i = 0; i < sav->sav_npending; i++) {
5544fa94a07fSbrendan 		if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID,
5545fa94a07fSbrendan 		    &spareguid) == 0 && spareguid == guid)
554639c23413Seschrock 			return (B_TRUE);
554739c23413Seschrock 	}
554839c23413Seschrock 
554999653d4eSeschrock 	return (B_FALSE);
5550eaca9bbdSeschrock }
5551b1b8ab34Slling 
555289a89ebfSlling /*
555389a89ebfSlling  * Check if a pool has an active shared spare device.
555489a89ebfSlling  * Note: reference count of an active spare is 2, as a spare and as a replace
555589a89ebfSlling  */
555689a89ebfSlling static boolean_t
555789a89ebfSlling spa_has_active_shared_spare(spa_t *spa)
555889a89ebfSlling {
555989a89ebfSlling 	int i, refcnt;
556089a89ebfSlling 	uint64_t pool;
556189a89ebfSlling 	spa_aux_vdev_t *sav = &spa->spa_spares;
556289a89ebfSlling 
556389a89ebfSlling 	for (i = 0; i < sav->sav_count; i++) {
556489a89ebfSlling 		if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool,
556589a89ebfSlling 		    &refcnt) && pool != 0ULL && pool == spa_guid(spa) &&
556689a89ebfSlling 		    refcnt > 2)
556789a89ebfSlling 			return (B_TRUE);
556889a89ebfSlling 	}
556989a89ebfSlling 
557089a89ebfSlling 	return (B_FALSE);
557189a89ebfSlling }
557289a89ebfSlling 
55733d7072f8Seschrock /*
55743d7072f8Seschrock  * Post a sysevent corresponding to the given event.  The 'name' must be one of
55753d7072f8Seschrock  * the event definitions in sys/sysevent/eventdefs.h.  The payload will be
55763d7072f8Seschrock  * filled in from the spa and (optionally) the vdev.  This doesn't do anything
55773d7072f8Seschrock  * in the userland libzpool, as we don't want consumers to misinterpret ztest
55783d7072f8Seschrock  * or zdb as real changes.
55793d7072f8Seschrock  */
55803d7072f8Seschrock void
55813d7072f8Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name)
55823d7072f8Seschrock {
55833d7072f8Seschrock #ifdef _KERNEL
55843d7072f8Seschrock 	sysevent_t		*ev;
55853d7072f8Seschrock 	sysevent_attr_list_t	*attr = NULL;
55863d7072f8Seschrock 	sysevent_value_t	value;
55873d7072f8Seschrock 	sysevent_id_t		eid;
55883d7072f8Seschrock 
55893d7072f8Seschrock 	ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs",
55903d7072f8Seschrock 	    SE_SLEEP);
55913d7072f8Seschrock 
55923d7072f8Seschrock 	value.value_type = SE_DATA_TYPE_STRING;
55933d7072f8Seschrock 	value.value.sv_string = spa_name(spa);
55943d7072f8Seschrock 	if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0)
55953d7072f8Seschrock 		goto done;
55963d7072f8Seschrock 
55973d7072f8Seschrock 	value.value_type = SE_DATA_TYPE_UINT64;
55983d7072f8Seschrock 	value.value.sv_uint64 = spa_guid(spa);
55993d7072f8Seschrock 	if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0)
56003d7072f8Seschrock 		goto done;
56013d7072f8Seschrock 
56023d7072f8Seschrock 	if (vd) {
56033d7072f8Seschrock 		value.value_type = SE_DATA_TYPE_UINT64;
56043d7072f8Seschrock 		value.value.sv_uint64 = vd->vdev_guid;
56053d7072f8Seschrock 		if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value,
56063d7072f8Seschrock 		    SE_SLEEP) != 0)
56073d7072f8Seschrock 			goto done;
56083d7072f8Seschrock 
56093d7072f8Seschrock 		if (vd->vdev_path) {
56103d7072f8Seschrock 			value.value_type = SE_DATA_TYPE_STRING;
56113d7072f8Seschrock 			value.value.sv_string = vd->vdev_path;
56123d7072f8Seschrock 			if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH,
56133d7072f8Seschrock 			    &value, SE_SLEEP) != 0)
56143d7072f8Seschrock 				goto done;
56153d7072f8Seschrock 		}
56163d7072f8Seschrock 	}
56173d7072f8Seschrock 
5618b01c3b58Seschrock 	if (sysevent_attach_attributes(ev, attr) != 0)
5619b01c3b58Seschrock 		goto done;
5620b01c3b58Seschrock 	attr = NULL;
5621b01c3b58Seschrock 
56223d7072f8Seschrock 	(void) log_sysevent(ev, SE_SLEEP, &eid);
56233d7072f8Seschrock 
56243d7072f8Seschrock done:
56253d7072f8Seschrock 	if (attr)
56263d7072f8Seschrock 		sysevent_free_attr(attr);
56273d7072f8Seschrock 	sysevent_free(ev);
56283d7072f8Seschrock #endif
56293d7072f8Seschrock }
5630