xref: /illumos-gate/usr/src/uts/common/fs/zfs/spa.c (revision 9a4eed61)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
2199653d4eSeschrock 
22fa9e4066Sahrens /*
2398d1cbfeSGeorge Wilson  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
240ce4bbcbSMatthew Ahrens  * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
2514372834SHans Rosenfeld  * Copyright (c) 2015, Nexenta Systems, Inc.  All rights reserved.
26bc9014e6SJustin Gibbs  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
2745818ee1SMatthew Ahrens  * Copyright 2013 Saso Kiselkov. All rights reserved.
28c3d26abcSMatthew Ahrens  * Copyright (c) 2014 Integros [integros.com]
29c8811bd3SToomas Soome  * Copyright 2016 Toomas Soome <tsoome@me.com>
300c06d385Sjwpoduska  * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
31e830fb12SKody A Kantor  * Copyright 2019 Joyent, Inc.
32663207adSDon Brady  * Copyright (c) 2017, Intel Corporation.
330fb055e8SAndy Fiddaman  * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
3430c304d9SJoshua M. Clulow  * Copyright 2020 Joshua M. Clulow <josh@sysmgr.org>
355aeb9474SGarrett D'Amore  */
36fa9e4066Sahrens 
37fa9e4066Sahrens /*
383e30c24aSWill Andrews  * SPA: Storage Pool Allocator
393e30c24aSWill Andrews  *
40fa9e4066Sahrens  * This file contains all the routines used when modifying on-disk SPA state.
41fa9e4066Sahrens  * This includes opening, importing, destroying, exporting a pool, and syncing a
42fa9e4066Sahrens  * pool.
43fa9e4066Sahrens  */
44fa9e4066Sahrens 
45fa9e4066Sahrens #include <sys/zfs_context.h>
46ea8dc4b6Seschrock #include <sys/fm/fs/zfs.h>
47fa9e4066Sahrens #include <sys/spa_impl.h>
48fa9e4066Sahrens #include <sys/zio.h>
49fa9e4066Sahrens #include <sys/zio_checksum.h>
50fa9e4066Sahrens #include <sys/dmu.h>
51fa9e4066Sahrens #include <sys/dmu_tx.h>
52fa9e4066Sahrens #include <sys/zap.h>
53fa9e4066Sahrens #include <sys/zil.h>
54b24ab676SJeff Bonwick #include <sys/ddt.h>
55fa9e4066Sahrens #include <sys/vdev_impl.h>
565cabbc6bSPrashanth Sreenivasa #include <sys/vdev_removal.h>
575cabbc6bSPrashanth Sreenivasa #include <sys/vdev_indirect_mapping.h>
585cabbc6bSPrashanth Sreenivasa #include <sys/vdev_indirect_births.h>
59094e47e9SGeorge Wilson #include <sys/vdev_initialize.h>
60084fd14fSBrian Behlendorf #include <sys/vdev_trim.h>
61fa9e4066Sahrens #include <sys/metaslab.h>
6288ecc943SGeorge Wilson #include <sys/metaslab_impl.h>
63e0f1c0afSOlaf Faaland #include <sys/mmp.h>
64fa9e4066Sahrens #include <sys/uberblock_impl.h>
65fa9e4066Sahrens #include <sys/txg.h>
66fa9e4066Sahrens #include <sys/avl.h>
675cabbc6bSPrashanth Sreenivasa #include <sys/bpobj.h>
68fa9e4066Sahrens #include <sys/dmu_traverse.h>
69b1b8ab34Slling #include <sys/dmu_objset.h>
70fa9e4066Sahrens #include <sys/unique.h>
71fa9e4066Sahrens #include <sys/dsl_pool.h>
72b1b8ab34Slling #include <sys/dsl_dataset.h>
73fa9e4066Sahrens #include <sys/dsl_dir.h>
74fa9e4066Sahrens #include <sys/dsl_prop.h>
75b1b8ab34Slling #include <sys/dsl_synctask.h>
76fa9e4066Sahrens #include <sys/fs/zfs.h>
77fa94a07fSbrendan #include <sys/arc.h>
78fa9e4066Sahrens #include <sys/callb.h>
7995173954Sek #include <sys/systeminfo.h>
80e7cbe64fSgw #include <sys/spa_boot.h>
81573ca77eSGeorge Wilson #include <sys/zfs_ioctl.h>
823f9d6ad7SLin Ling #include <sys/dsl_scan.h>
83ad135b5dSChristopher Siden #include <sys/zfeature.h>
843b2aab18SMatthew Ahrens #include <sys/dsl_destroy.h>
85770499e1SDan Kimmel #include <sys/abd.h>
86fa9e4066Sahrens 
875679c89fSjv #ifdef	_KERNEL
88dedec472SJack Meng #include <sys/bootprops.h>
8935a5a358SJonathan Adams #include <sys/callb.h>
9035a5a358SJonathan Adams #include <sys/cpupart.h>
9135a5a358SJonathan Adams #include <sys/pool.h>
9235a5a358SJonathan Adams #include <sys/sysdc.h>
9335a5a358SJonathan Adams #include <sys/zone.h>
945679c89fSjv #endif	/* _KERNEL */
955679c89fSjv 
96990b4856Slling #include "zfs_prop.h"
97b7b97454Sperrin #include "zfs_comutil.h"
98990b4856Slling 
993cb69f73SWill Andrews /*
1003cb69f73SWill Andrews  * The interval, in seconds, at which failed configuration cache file writes
1013cb69f73SWill Andrews  * should be retried.
1023cb69f73SWill Andrews  */
1035cabbc6bSPrashanth Sreenivasa int zfs_ccw_retry_interval = 300;
1043cb69f73SWill Andrews 
10535a5a358SJonathan Adams typedef enum zti_modes {
106ec94d322SAdam Leventhal 	ZTI_MODE_FIXED,			/* value is # of threads (min 1) */
107ec94d322SAdam Leventhal 	ZTI_MODE_BATCH,			/* cpu-intensive; value is ignored */
108ec94d322SAdam Leventhal 	ZTI_MODE_NULL,			/* don't create a taskq */
109ec94d322SAdam Leventhal 	ZTI_NMODES
11035a5a358SJonathan Adams } zti_modes_t;
111416e0cd8Sek 
112ec94d322SAdam Leventhal #define	ZTI_P(n, q)	{ ZTI_MODE_FIXED, (n), (q) }
113ec94d322SAdam Leventhal #define	ZTI_BATCH	{ ZTI_MODE_BATCH, 0, 1 }
114ec94d322SAdam Leventhal #define	ZTI_NULL	{ ZTI_MODE_NULL, 0, 0 }
1152e0c549eSJonathan Adams 
116ec94d322SAdam Leventhal #define	ZTI_N(n)	ZTI_P(n, 1)
117ec94d322SAdam Leventhal #define	ZTI_ONE		ZTI_N(1)
1182e0c549eSJonathan Adams 
1192e0c549eSJonathan Adams typedef struct zio_taskq_info {
120ec94d322SAdam Leventhal 	zti_modes_t zti_mode;
12180eb36f2SGeorge Wilson 	uint_t zti_value;
122ec94d322SAdam Leventhal 	uint_t zti_count;
1232e0c549eSJonathan Adams } zio_taskq_info_t;
1242e0c549eSJonathan Adams 
1252e0c549eSJonathan Adams static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
12635a5a358SJonathan Adams 	"issue", "issue_high", "intr", "intr_high"
1272e0c549eSJonathan Adams };
1282e0c549eSJonathan Adams 
12980eb36f2SGeorge Wilson /*
130ec94d322SAdam Leventhal  * This table defines the taskq settings for each ZFS I/O type. When
131ec94d322SAdam Leventhal  * initializing a pool, we use this table to create an appropriately sized
132ec94d322SAdam Leventhal  * taskq. Some operations are low volume and therefore have a small, static
133ec94d322SAdam Leventhal  * number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE
134ec94d322SAdam Leventhal  * macros. Other operations process a large amount of data; the ZTI_BATCH
135ec94d322SAdam Leventhal  * macro causes us to create a taskq oriented for throughput. Some operations
136084fd14fSBrian Behlendorf  * are so high frequency and short-lived that the taskq itself can become a
137ec94d322SAdam Leventhal  * point of lock contention. The ZTI_P(#, #) macro indicates that we need an
138ec94d322SAdam Leventhal  * additional degree of parallelism specified by the number of threads per-
139ec94d322SAdam Leventhal  * taskq and the number of taskqs; when dispatching an event in this case, the
140ec94d322SAdam Leventhal  * particular taskq is chosen at random.
141ec94d322SAdam Leventhal  *
142ec94d322SAdam Leventhal  * The different taskq priorities are to handle the different contexts (issue
143ec94d322SAdam Leventhal  * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
144ec94d322SAdam Leventhal  * need to be handled with minimum delay.
14580eb36f2SGeorge Wilson  */
14680eb36f2SGeorge Wilson const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
14780eb36f2SGeorge Wilson 	/* ISSUE	ISSUE_HIGH	INTR		INTR_HIGH */
148ec94d322SAdam Leventhal 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* NULL */
1491b497ab8SAdam H. Leventhal 	{ ZTI_N(8),	ZTI_NULL,	ZTI_P(12, 8),	ZTI_NULL }, /* READ */
150ec94d322SAdam Leventhal 	{ ZTI_BATCH,	ZTI_N(5),	ZTI_N(8),	ZTI_N(5) }, /* WRITE */
151ec94d322SAdam Leventhal 	{ ZTI_P(12, 8),	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* FREE */
152ec94d322SAdam Leventhal 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* CLAIM */
153ec94d322SAdam Leventhal 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* IOCTL */
154084fd14fSBrian Behlendorf 	{ ZTI_N(4),	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* TRIM */
1552e0c549eSJonathan Adams };
1562e0c549eSJonathan Adams 
1573b2aab18SMatthew Ahrens static void spa_sync_version(void *arg, dmu_tx_t *tx);
1583b2aab18SMatthew Ahrens static void spa_sync_props(void *arg, dmu_tx_t *tx);
15989a89ebfSlling static boolean_t spa_has_active_shared_spare(spa_t *spa);
16086714001SSerapheim Dimitropoulos static int spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport);
161cb04b873SMark J Musante static void spa_vdev_resilver_done(spa_t *spa);
162990b4856Slling 
16369962b56SMatthew Ahrens uint_t		zio_taskq_batch_pct = 75;	/* 1 thread per cpu in pset */
16435a5a358SJonathan Adams id_t		zio_taskq_psrset_bind = PS_NONE;
16535a5a358SJonathan Adams boolean_t	zio_taskq_sysdc = B_TRUE;	/* use SDC scheduling class */
16635a5a358SJonathan Adams uint_t		zio_taskq_basedc = 80;		/* base duty cycle */
16735a5a358SJonathan Adams 
16835a5a358SJonathan Adams boolean_t	spa_create_process = B_TRUE;	/* no process ==> no sysdc */
16901f55e48SGeorge Wilson extern int	zfs_sync_pass_deferred_free;
17035a5a358SJonathan Adams 
171e144c4e6SPavel Zakharov /*
172e144c4e6SPavel Zakharov  * Report any spa_load_verify errors found, but do not fail spa_load.
173e144c4e6SPavel Zakharov  * This is used by zdb to analyze non-idle pools.
174e144c4e6SPavel Zakharov  */
175e144c4e6SPavel Zakharov boolean_t	spa_load_verify_dryrun = B_FALSE;
176e144c4e6SPavel Zakharov 
17735a5a358SJonathan Adams /*
17835a5a358SJonathan Adams  * This (illegal) pool name is used when temporarily importing a spa_t in order
17935a5a358SJonathan Adams  * to get the vdev stats associated with the imported devices.
18035a5a358SJonathan Adams  */
18135a5a358SJonathan Adams #define	TRYIMPORT_NAME	"$import"
18235a5a358SJonathan Adams 
1836f793812SPavel Zakharov /*
1846f793812SPavel Zakharov  * For debugging purposes: print out vdev tree during pool import.
1856f793812SPavel Zakharov  */
1866f793812SPavel Zakharov boolean_t	spa_load_print_vdev_tree = B_FALSE;
1876f793812SPavel Zakharov 
1886f793812SPavel Zakharov /*
1896f793812SPavel Zakharov  * A non-zero value for zfs_max_missing_tvds means that we allow importing
1906f793812SPavel Zakharov  * pools with missing top-level vdevs. This is strictly intended for advanced
1916f793812SPavel Zakharov  * pool recovery cases since missing data is almost inevitable. Pools with
1926f793812SPavel Zakharov  * missing devices can only be imported read-only for safety reasons, and their
1936f793812SPavel Zakharov  * fail-mode will be automatically set to "continue".
1946f793812SPavel Zakharov  *
1956f793812SPavel Zakharov  * With 1 missing vdev we should be able to import the pool and mount all
1966f793812SPavel Zakharov  * datasets. User data that was not modified after the missing device has been
1976f793812SPavel Zakharov  * added should be recoverable. This means that snapshots created prior to the
1986f793812SPavel Zakharov  * addition of that device should be completely intact.
1996f793812SPavel Zakharov  *
2006f793812SPavel Zakharov  * With 2 missing vdevs, some datasets may fail to mount since there are
2016f793812SPavel Zakharov  * dataset statistics that are stored as regular metadata. Some data might be
2026f793812SPavel Zakharov  * recoverable if those vdevs were added recently.
2036f793812SPavel Zakharov  *
2046f793812SPavel Zakharov  * With 3 or more missing vdevs, the pool is severely damaged and MOS entries
2056f793812SPavel Zakharov  * may be missing entirely. Chances of data recovery are very low. Note that
2066f793812SPavel Zakharov  * there are also risks of performing an inadvertent rewind as we might be
2076f793812SPavel Zakharov  * missing all the vdevs with the latest uberblocks.
2086f793812SPavel Zakharov  */
2096f793812SPavel Zakharov uint64_t	zfs_max_missing_tvds = 0;
2106f793812SPavel Zakharov 
2116f793812SPavel Zakharov /*
2126f793812SPavel Zakharov  * The parameters below are similar to zfs_max_missing_tvds but are only
2136f793812SPavel Zakharov  * intended for a preliminary open of the pool with an untrusted config which
2146f793812SPavel Zakharov  * might be incomplete or out-dated.
2156f793812SPavel Zakharov  *
2166f793812SPavel Zakharov  * We are more tolerant for pools opened from a cachefile since we could have
2176f793812SPavel Zakharov  * an out-dated cachefile where a device removal was not registered.
2186f793812SPavel Zakharov  * We could have set the limit arbitrarily high but in the case where devices
2196f793812SPavel Zakharov  * are really missing we would want to return the proper error codes; we chose
2206f793812SPavel Zakharov  * SPA_DVAS_PER_BP - 1 so that some copies of the MOS would still be available
2216f793812SPavel Zakharov  * and we get a chance to retrieve the trusted config.
2226f793812SPavel Zakharov  */
2236f793812SPavel Zakharov uint64_t	zfs_max_missing_tvds_cachefile = SPA_DVAS_PER_BP - 1;
22486714001SSerapheim Dimitropoulos 
2256f793812SPavel Zakharov /*
2266f793812SPavel Zakharov  * In the case where config was assembled by scanning device paths (/dev/dsks
2276f793812SPavel Zakharov  * by default) we are less tolerant since all the existing devices should have
2286f793812SPavel Zakharov  * been detected and we want spa_load to return the right error codes.
2296f793812SPavel Zakharov  */
2306f793812SPavel Zakharov uint64_t	zfs_max_missing_tvds_scan = 0;
2316f793812SPavel Zakharov 
232e830fb12SKody A Kantor /*
233e830fb12SKody A Kantor  * Interval in seconds at which to poll spare vdevs for health.
234e830fb12SKody A Kantor  * Setting this to zero disables spare polling.
235e830fb12SKody A Kantor  * Set to three hours by default.
236e830fb12SKody A Kantor  */
237e830fb12SKody A Kantor uint_t		spa_spare_poll_interval_seconds = 60 * 60 * 3;
238e830fb12SKody A Kantor 
23986714001SSerapheim Dimitropoulos /*
24086714001SSerapheim Dimitropoulos  * Debugging aid that pauses spa_sync() towards the end.
24186714001SSerapheim Dimitropoulos  */
24286714001SSerapheim Dimitropoulos boolean_t	zfs_pause_spa_sync = B_FALSE;
24386714001SSerapheim Dimitropoulos 
244990b4856Slling /*
245990b4856Slling  * ==========================================================================
246990b4856Slling  * SPA properties routines
247990b4856Slling  * ==========================================================================
248990b4856Slling  */
249990b4856Slling 
250990b4856Slling /*
251990b4856Slling  * Add a (source=src, propname=propval) list to an nvlist.
252990b4856Slling  */
2539d82f4f6Slling static void
spa_prop_add_list(nvlist_t * nvl,zpool_prop_t prop,char * strval,uint64_t intval,zprop_source_t src)254990b4856Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
255990b4856Slling     uint64_t intval, zprop_source_t src)
256990b4856Slling {
257990b4856Slling 	const char *propname = zpool_prop_to_name(prop);
258990b4856Slling 	nvlist_t *propval;
259990b4856Slling 
2609d82f4f6Slling 	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2619d82f4f6Slling 	VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
262990b4856Slling 
2639d82f4f6Slling 	if (strval != NULL)
2649d82f4f6Slling 		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
2659d82f4f6Slling 	else
2669d82f4f6Slling 		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0);
267990b4856Slling 
2689d82f4f6Slling 	VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
269990b4856Slling 	nvlist_free(propval);
270990b4856Slling }
271990b4856Slling 
272990b4856Slling /*
273990b4856Slling  * Get property values from the spa configuration.
274990b4856Slling  */
2759d82f4f6Slling static void
spa_prop_get_config(spa_t * spa,nvlist_t ** nvp)276990b4856Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
277990b4856Slling {
2784263d13fSGeorge Wilson 	vdev_t *rvd = spa->spa_root_vdev;
279ad135b5dSChristopher Siden 	dsl_pool_t *pool = spa->spa_dsl_pool;
2802e4c9986SGeorge Wilson 	uint64_t size, alloc, cap, version;
281990b4856Slling 	zprop_source_t src = ZPROP_SRC_NONE;
282c5904d13Seschrock 	spa_config_dirent_t *dp;
2832e4c9986SGeorge Wilson 	metaslab_class_t *mc = spa_normal_class(spa);
284990b4856Slling 
285e14bb325SJeff Bonwick 	ASSERT(MUTEX_HELD(&spa->spa_props_lock));
286e14bb325SJeff Bonwick 
2874263d13fSGeorge Wilson 	if (rvd != NULL) {
288663207adSDon Brady 		alloc = metaslab_class_get_alloc(mc);
289663207adSDon Brady 		alloc += metaslab_class_get_alloc(spa_special_class(spa));
290663207adSDon Brady 		alloc += metaslab_class_get_alloc(spa_dedup_class(spa));
291663207adSDon Brady 
292663207adSDon Brady 		size = metaslab_class_get_space(mc);
293663207adSDon Brady 		size += metaslab_class_get_space(spa_special_class(spa));
294663207adSDon Brady 		size += metaslab_class_get_space(spa_dedup_class(spa));
295663207adSDon Brady 
296379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
297379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
298485bbbf5SGeorge Wilson 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
299485bbbf5SGeorge Wilson 		spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL,
300485bbbf5SGeorge Wilson 		    size - alloc, src);
30186714001SSerapheim Dimitropoulos 		spa_prop_add_list(*nvp, ZPOOL_PROP_CHECKPOINT, NULL,
30286714001SSerapheim Dimitropoulos 		    spa->spa_checkpoint_info.sci_dspace, src);
3034263d13fSGeorge Wilson 
3042e4c9986SGeorge Wilson 		spa_prop_add_list(*nvp, ZPOOL_PROP_FRAGMENTATION, NULL,
3052e4c9986SGeorge Wilson 		    metaslab_class_fragmentation(mc), src);
3062e4c9986SGeorge Wilson 		spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL,
3072e4c9986SGeorge Wilson 		    metaslab_class_expandable_space(mc), src);
308f9af39baSGeorge Wilson 		spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL,
309f9af39baSGeorge Wilson 		    (spa_mode(spa) == FREAD), src);
310379c004dSEric Schrock 
311485bbbf5SGeorge Wilson 		cap = (size == 0) ? 0 : (alloc * 100 / size);
312379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
313379c004dSEric Schrock 
314b24ab676SJeff Bonwick 		spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL,
315b24ab676SJeff Bonwick 		    ddt_get_pool_dedup_ratio(spa), src);
316b24ab676SJeff Bonwick 
317379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
3184263d13fSGeorge Wilson 		    rvd->vdev_state, src);
319379c004dSEric Schrock 
320379c004dSEric Schrock 		version = spa_version(spa);
321379c004dSEric Schrock 		if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
322379c004dSEric Schrock 			src = ZPROP_SRC_DEFAULT;
323379c004dSEric Schrock 		else
324379c004dSEric Schrock 			src = ZPROP_SRC_LOCAL;
325379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
326379c004dSEric Schrock 	}
327990b4856Slling 
328ad135b5dSChristopher Siden 	if (pool != NULL) {
329ad135b5dSChristopher Siden 		/*
330ad135b5dSChristopher Siden 		 * The $FREE directory was introduced in SPA_VERSION_DEADLISTS,
331ad135b5dSChristopher Siden 		 * when opening pools before this version freedir will be NULL.
332ad135b5dSChristopher Siden 		 */
3337fd05ac4SMatthew Ahrens 		if (pool->dp_free_dir != NULL) {
334ad135b5dSChristopher Siden 			spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL,
335c1379625SJustin T. Gibbs 			    dsl_dir_phys(pool->dp_free_dir)->dd_used_bytes,
336c1379625SJustin T. Gibbs 			    src);
337ad135b5dSChristopher Siden 		} else {
338ad135b5dSChristopher Siden 			spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING,
339ad135b5dSChristopher Siden 			    NULL, 0, src);
340ad135b5dSChristopher Siden 		}
3417fd05ac4SMatthew Ahrens 
3427fd05ac4SMatthew Ahrens 		if (pool->dp_leak_dir != NULL) {
3437fd05ac4SMatthew Ahrens 			spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL,
344c1379625SJustin T. Gibbs 			    dsl_dir_phys(pool->dp_leak_dir)->dd_used_bytes,
345c1379625SJustin T. Gibbs 			    src);
3467fd05ac4SMatthew Ahrens 		} else {
3477fd05ac4SMatthew Ahrens 			spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED,
3487fd05ac4SMatthew Ahrens 			    NULL, 0, src);
3497fd05ac4SMatthew Ahrens 		}
350ad135b5dSChristopher Siden 	}
351ad135b5dSChristopher Siden 
3529d82f4f6Slling 	spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
353990b4856Slling 
3548704186eSDan McDonald 	if (spa->spa_comment != NULL) {
3558704186eSDan McDonald 		spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment,
3568704186eSDan McDonald 		    0, ZPROP_SRC_LOCAL);
3578704186eSDan McDonald 	}
3588704186eSDan McDonald 
3599d82f4f6Slling 	if (spa->spa_root != NULL)
3609d82f4f6Slling 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
3619d82f4f6Slling 		    0, ZPROP_SRC_LOCAL);
362990b4856Slling 
363b5152584SMatthew Ahrens 	if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) {
364b5152584SMatthew Ahrens 		spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL,
365b5152584SMatthew Ahrens 		    MIN(zfs_max_recordsize, SPA_MAXBLOCKSIZE), ZPROP_SRC_NONE);
366b5152584SMatthew Ahrens 	} else {
367b5152584SMatthew Ahrens 		spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL,
368b5152584SMatthew Ahrens 		    SPA_OLD_MAXBLOCKSIZE, ZPROP_SRC_NONE);
369b5152584SMatthew Ahrens 	}
370b5152584SMatthew Ahrens 
37154811da5SToomas Soome 	if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_DNODE)) {
37254811da5SToomas Soome 		spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL,
37354811da5SToomas Soome 		    DNODE_MAX_SIZE, ZPROP_SRC_NONE);
37454811da5SToomas Soome 	} else {
37554811da5SToomas Soome 		spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL,
37654811da5SToomas Soome 		    DNODE_MIN_SIZE, ZPROP_SRC_NONE);
37754811da5SToomas Soome 	}
37854811da5SToomas Soome 
379c5904d13Seschrock 	if ((dp = list_head(&spa->spa_config_list)) != NULL) {
380c5904d13Seschrock 		if (dp->scd_path == NULL) {
3819d82f4f6Slling 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
382c5904d13Seschrock 			    "none", 0, ZPROP_SRC_LOCAL);
383c5904d13Seschrock 		} else if (strcmp(dp->scd_path, spa_config_path) != 0) {
3849d82f4f6Slling 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
385c5904d13Seschrock 			    dp->scd_path, 0, ZPROP_SRC_LOCAL);
3862f8aaab3Seschrock 		}
3872f8aaab3Seschrock 	}
388990b4856Slling }
389990b4856Slling 
390990b4856Slling /*
391990b4856Slling  * Get zpool property values.
392990b4856Slling  */
393990b4856Slling int
spa_prop_get(spa_t * spa,nvlist_t ** nvp)394990b4856Slling spa_prop_get(spa_t *spa, nvlist_t **nvp)
395990b4856Slling {
396b24ab676SJeff Bonwick 	objset_t *mos = spa->spa_meta_objset;
397990b4856Slling 	zap_cursor_t zc;
398990b4856Slling 	zap_attribute_t za;
399990b4856Slling 	int err;
400990b4856Slling 
4019d82f4f6Slling 	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
402990b4856Slling 
403e14bb325SJeff Bonwick 	mutex_enter(&spa->spa_props_lock);
404e14bb325SJeff Bonwick 
405990b4856Slling 	/*
406990b4856Slling 	 * Get properties from the spa config.
407990b4856Slling 	 */
4089d82f4f6Slling 	spa_prop_get_config(spa, nvp);
409990b4856Slling 
410990b4856Slling 	/* If no pool property object, no more prop to get. */
411afee20e4SGeorge Wilson 	if (mos == NULL || spa->spa_pool_props_object == 0) {
412990b4856Slling 		mutex_exit(&spa->spa_props_lock);
413990b4856Slling 		return (0);
414990b4856Slling 	}
415990b4856Slling 
416990b4856Slling 	/*
417990b4856Slling 	 * Get properties from the MOS pool property object.
418990b4856Slling 	 */
419990b4856Slling 	for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object);
420990b4856Slling 	    (err = zap_cursor_retrieve(&zc, &za)) == 0;
421990b4856Slling 	    zap_cursor_advance(&zc)) {
422990b4856Slling 		uint64_t intval = 0;
423990b4856Slling 		char *strval = NULL;
424990b4856Slling 		zprop_source_t src = ZPROP_SRC_DEFAULT;
425990b4856Slling 		zpool_prop_t prop;
426990b4856Slling 
4274ae5f5f0SAlan Somers 		if ((prop = zpool_name_to_prop(za.za_name)) == ZPOOL_PROP_INVAL)
428990b4856Slling 			continue;
429990b4856Slling 
430990b4856Slling 		switch (za.za_integer_length) {
431990b4856Slling 		case 8:
432990b4856Slling 			/* integer property */
433990b4856Slling 			if (za.za_first_integer !=
434990b4856Slling 			    zpool_prop_default_numeric(prop))
435990b4856Slling 				src = ZPROP_SRC_LOCAL;
436990b4856Slling 
437990b4856Slling 			if (prop == ZPOOL_PROP_BOOTFS) {
438990b4856Slling 				dsl_pool_t *dp;
439990b4856Slling 				dsl_dataset_t *ds = NULL;
440990b4856Slling 
441990b4856Slling 				dp = spa_get_dsl(spa);
4423b2aab18SMatthew Ahrens 				dsl_pool_config_enter(dp, FTAG);
443094e47e9SGeorge Wilson 				err = dsl_dataset_hold_obj(dp,
444094e47e9SGeorge Wilson 				    za.za_first_integer, FTAG, &ds);
445094e47e9SGeorge Wilson 				if (err != 0) {
4463b2aab18SMatthew Ahrens 					dsl_pool_config_exit(dp, FTAG);
447990b4856Slling 					break;
448990b4856Slling 				}
449990b4856Slling 
4509adfa60dSMatthew Ahrens 				strval = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN,
451990b4856Slling 				    KM_SLEEP);
452990b4856Slling 				dsl_dataset_name(ds, strval);
453745cd3c5Smaybee 				dsl_dataset_rele(ds, FTAG);
4543b2aab18SMatthew Ahrens 				dsl_pool_config_exit(dp, FTAG);
455990b4856Slling 			} else {
456990b4856Slling 				strval = NULL;
457990b4856Slling 				intval = za.za_first_integer;
458990b4856Slling 			}
459990b4856Slling 
4609d82f4f6Slling 			spa_prop_add_list(*nvp, prop, strval, intval, src);
461990b4856Slling 
462990b4856Slling 			if (strval != NULL)
4639adfa60dSMatthew Ahrens 				kmem_free(strval, ZFS_MAX_DATASET_NAME_LEN);
464990b4856Slling 
465990b4856Slling 			break;
466990b4856Slling 
467990b4856Slling 		case 1:
468990b4856Slling 			/* string property */
469990b4856Slling 			strval = kmem_alloc(za.za_num_integers, KM_SLEEP);
470990b4856Slling 			err = zap_lookup(mos, spa->spa_pool_props_object,
471990b4856Slling 			    za.za_name, 1, za.za_num_integers, strval);
472990b4856Slling 			if (err) {
473990b4856Slling 				kmem_free(strval, za.za_num_integers);
474990b4856Slling 				break;
475990b4856Slling 			}
4769d82f4f6Slling 			spa_prop_add_list(*nvp, prop, strval, 0, src);
477990b4856Slling 			kmem_free(strval, za.za_num_integers);
478990b4856Slling 			break;
479990b4856Slling 
480990b4856Slling 		default:
481990b4856Slling 			break;
482990b4856Slling 		}
483990b4856Slling 	}
484990b4856Slling 	zap_cursor_fini(&zc);
485990b4856Slling 	mutex_exit(&spa->spa_props_lock);
486990b4856Slling out:
487990b4856Slling 	if (err && err != ENOENT) {
488990b4856Slling 		nvlist_free(*nvp);
4899d82f4f6Slling 		*nvp = NULL;
490990b4856Slling 		return (err);
491990b4856Slling 	}
492990b4856Slling 
493990b4856Slling 	return (0);
494990b4856Slling }
495990b4856Slling 
496990b4856Slling /*
497990b4856Slling  * Validate the given pool properties nvlist and modify the list
498990b4856Slling  * for the property values to be set.
499990b4856Slling  */
500990b4856Slling static int
spa_prop_validate(spa_t * spa,nvlist_t * props)501990b4856Slling spa_prop_validate(spa_t *spa, nvlist_t *props)
502990b4856Slling {
503990b4856Slling 	nvpair_t *elem;
504990b4856Slling 	int error = 0, reset_bootfs = 0;
505d5285caeSGeorge Wilson 	uint64_t objnum = 0;
506ad135b5dSChristopher Siden 	boolean_t has_feature = B_FALSE;
507990b4856Slling 
508990b4856Slling 	elem = NULL;
509990b4856Slling 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
510990b4856Slling 		uint64_t intval;
511ad135b5dSChristopher Siden 		char *strval, *slash, *check, *fname;
512ad135b5dSChristopher Siden 		const char *propname = nvpair_name(elem);
513ad135b5dSChristopher Siden 		zpool_prop_t prop = zpool_name_to_prop(propname);
514ad135b5dSChristopher Siden 
515ad135b5dSChristopher Siden 		switch (prop) {
5164ae5f5f0SAlan Somers 		case ZPOOL_PROP_INVAL:
517ad135b5dSChristopher Siden 			if (!zpool_prop_feature(propname)) {
518be6fd75aSMatthew Ahrens 				error = SET_ERROR(EINVAL);
519ad135b5dSChristopher Siden 				break;
520ad135b5dSChristopher Siden 			}
521990b4856Slling 
522ad135b5dSChristopher Siden 			/*
523ad135b5dSChristopher Siden 			 * Sanitize the input.
524ad135b5dSChristopher Siden 			 */
525ad135b5dSChristopher Siden 			if (nvpair_type(elem) != DATA_TYPE_UINT64) {
526be6fd75aSMatthew Ahrens 				error = SET_ERROR(EINVAL);
527ad135b5dSChristopher Siden 				break;
528ad135b5dSChristopher Siden 			}
529990b4856Slling 
530ad135b5dSChristopher Siden 			if (nvpair_value_uint64(elem, &intval) != 0) {
531be6fd75aSMatthew Ahrens 				error = SET_ERROR(EINVAL);
532ad135b5dSChristopher Siden 				break;
533ad135b5dSChristopher Siden 			}
534ad135b5dSChristopher Siden 
535ad135b5dSChristopher Siden 			if (intval != 0) {
536be6fd75aSMatthew Ahrens 				error = SET_ERROR(EINVAL);
537ad135b5dSChristopher Siden 				break;
538ad135b5dSChristopher Siden 			}
539ad135b5dSChristopher Siden 
540ad135b5dSChristopher Siden 			fname = strchr(propname, '@') + 1;
541ad135b5dSChristopher Siden 			if (zfeature_lookup_name(fname, NULL) != 0) {
542be6fd75aSMatthew Ahrens 				error = SET_ERROR(EINVAL);
543ad135b5dSChristopher Siden 				break;
544ad135b5dSChristopher Siden 			}
545ad135b5dSChristopher Siden 
546ad135b5dSChristopher Siden 			has_feature = B_TRUE;
547ad135b5dSChristopher Siden 			break;
548990b4856Slling 
549990b4856Slling 		case ZPOOL_PROP_VERSION:
550990b4856Slling 			error = nvpair_value_uint64(elem, &intval);
551990b4856Slling 			if (!error &&
552ad135b5dSChristopher Siden 			    (intval < spa_version(spa) ||
553ad135b5dSChristopher Siden 			    intval > SPA_VERSION_BEFORE_FEATURES ||
554ad135b5dSChristopher Siden 			    has_feature))
555be6fd75aSMatthew Ahrens 				error = SET_ERROR(EINVAL);
556990b4856Slling 			break;
557990b4856Slling 
558990b4856Slling 		case ZPOOL_PROP_DELEGATION:
559990b4856Slling 		case ZPOOL_PROP_AUTOREPLACE:
560d5b5bb25SRich Morris 		case ZPOOL_PROP_LISTSNAPS:
561573ca77eSGeorge Wilson 		case ZPOOL_PROP_AUTOEXPAND:
562084fd14fSBrian Behlendorf 		case ZPOOL_PROP_AUTOTRIM:
563990b4856Slling 			error = nvpair_value_uint64(elem, &intval);
564990b4856Slling 			if (!error && intval > 1)
565be6fd75aSMatthew Ahrens 				error = SET_ERROR(EINVAL);
566990b4856Slling 			break;
567990b4856Slling 
568e0f1c0afSOlaf Faaland 		case ZPOOL_PROP_MULTIHOST:
569e0f1c0afSOlaf Faaland 			error = nvpair_value_uint64(elem, &intval);
570e0f1c0afSOlaf Faaland 			if (!error && intval > 1)
571e0f1c0afSOlaf Faaland 				error = SET_ERROR(EINVAL);
572e0f1c0afSOlaf Faaland 
573e0f1c0afSOlaf Faaland 			if (!error && !spa_get_hostid())
574e0f1c0afSOlaf Faaland 				error = SET_ERROR(ENOTSUP);
575e0f1c0afSOlaf Faaland 
576e0f1c0afSOlaf Faaland 			break;
577e0f1c0afSOlaf Faaland 
578990b4856Slling 		case ZPOOL_PROP_BOOTFS:
57925f89ee2SJeff Bonwick 			/*
58025f89ee2SJeff Bonwick 			 * If the pool version is less than SPA_VERSION_BOOTFS,
58125f89ee2SJeff Bonwick 			 * or the pool is still being created (version == 0),
58225f89ee2SJeff Bonwick 			 * the bootfs property cannot be set.
58325f89ee2SJeff Bonwick 			 */
584990b4856Slling 			if (spa_version(spa) < SPA_VERSION_BOOTFS) {
585be6fd75aSMatthew Ahrens 				error = SET_ERROR(ENOTSUP);
586990b4856Slling 				break;
587990b4856Slling 			}
588990b4856Slling 
589990b4856Slling 			/*
59015e6edf1Sgw 			 * Make sure the vdev config is bootable
591990b4856Slling 			 */
59215e6edf1Sgw 			if (!vdev_is_bootable(spa->spa_root_vdev)) {
593be6fd75aSMatthew Ahrens 				error = SET_ERROR(ENOTSUP);
594990b4856Slling 				break;
595990b4856Slling 			}
596990b4856Slling 
597990b4856Slling 			reset_bootfs = 1;
598990b4856Slling 
599990b4856Slling 			error = nvpair_value_string(elem, &strval);
600990b4856Slling 
601990b4856Slling 			if (!error) {
602ad135b5dSChristopher Siden 				objset_t *os;
603b5152584SMatthew Ahrens 				uint64_t propval;
60415e6edf1Sgw 
605990b4856Slling 				if (strval == NULL || strval[0] == '\0') {
606990b4856Slling 					objnum = zpool_prop_default_numeric(
607990b4856Slling 					    ZPOOL_PROP_BOOTFS);
608990b4856Slling 					break;
609990b4856Slling 				}
610990b4856Slling 
611094e47e9SGeorge Wilson 				error = dmu_objset_hold(strval, FTAG, &os);
612094e47e9SGeorge Wilson 				if (error != 0)
613990b4856Slling 					break;
61415e6edf1Sgw 
615b5152584SMatthew Ahrens 				/*
616b5152584SMatthew Ahrens 				 * Must be ZPL, and its property settings
61754811da5SToomas Soome 				 * must be supported.
618b5152584SMatthew Ahrens 				 */
619503ad85cSMatthew Ahrens 
620503ad85cSMatthew Ahrens 				if (dmu_objset_type(os) != DMU_OST_ZFS) {
621be6fd75aSMatthew Ahrens 					error = SET_ERROR(ENOTSUP);
6223b2aab18SMatthew Ahrens 				} else if ((error =
6233b2aab18SMatthew Ahrens 				    dsl_prop_get_int_ds(dmu_objset_ds(os),
62415e6edf1Sgw 				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
625b5152584SMatthew Ahrens 				    &propval)) == 0 &&
626b5152584SMatthew Ahrens 				    !BOOTFS_COMPRESS_VALID(propval)) {
627b5152584SMatthew Ahrens 					error = SET_ERROR(ENOTSUP);
62815e6edf1Sgw 				} else {
62915e6edf1Sgw 					objnum = dmu_objset_id(os);
63015e6edf1Sgw 				}
631503ad85cSMatthew Ahrens 				dmu_objset_rele(os, FTAG);
632990b4856Slling 			}
633990b4856Slling 			break;
634e14bb325SJeff Bonwick 
6350a4e9518Sgw 		case ZPOOL_PROP_FAILUREMODE:
6360a4e9518Sgw 			error = nvpair_value_uint64(elem, &intval);
6370a4e9518Sgw 			if (!error && (intval < ZIO_FAILURE_MODE_WAIT ||
6380a4e9518Sgw 			    intval > ZIO_FAILURE_MODE_PANIC))
639be6fd75aSMatthew Ahrens 				error = SET_ERROR(EINVAL);
640