spa.c revision 0ce4bbcb47d8f86307fb8d2c84fd0f4e070f576e
1fa9e406ahrens/*
2fa9e406ahrens * CDDL HEADER START
3fa9e406ahrens *
4fa9e406ahrens * The contents of this file are subject to the terms of the
5ea8dc4beschrock * Common Development and Distribution License (the "License").
6ea8dc4beschrock * You may not use this file except in compliance with the License.
7fa9e406ahrens *
8fa9e406ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e406ahrens * or http://www.opensolaris.org/os/licensing.
10fa9e406ahrens * See the License for the specific language governing permissions
11fa9e406ahrens * and limitations under the License.
12fa9e406ahrens *
13fa9e406ahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e406ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e406ahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e406ahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e406ahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e406ahrens *
19fa9e406ahrens * CDDL HEADER END
20fa9e406ahrens */
2199653d4eschrock
22fa9e406ahrens/*
2398d1cbfGeorge Wilson * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
240ce4bbcMatthew Ahrens * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
251437283Hans Rosenfeld * Copyright (c) 2015, Nexenta Systems, Inc.  All rights reserved.
26bc9014eJustin Gibbs * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
2745818eeMatthew Ahrens * Copyright 2013 Saso Kiselkov. All rights reserved.
28c3d26abMatthew Ahrens * Copyright (c) 2014 Integros [integros.com]
29c8811bdToomas Soome * Copyright 2016 Toomas Soome <tsoome@me.com>
3082f63c3Jerry Jelinek * Copyright 2018 Joyent, Inc.
31663207aDon Brady * Copyright (c) 2017, Intel Corporation.
321702cceAlek Pinchuk * Copyright (c) 2017 Datto Inc.
330fb055eAndy Fiddaman * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
345aeb947Garrett D'Amore */
35fa9e406ahrens
36fa9e406ahrens/*
373e30c24Will Andrews * SPA: Storage Pool Allocator
383e30c24Will Andrews *
39fa9e406ahrens * This file contains all the routines used when modifying on-disk SPA state.
40fa9e406ahrens * This includes opening, importing, destroying, exporting a pool, and syncing a
41fa9e406ahrens * pool.
42fa9e406ahrens */
43fa9e406ahrens
44fa9e406ahrens#include <sys/zfs_context.h>
45ea8dc4beschrock#include <sys/fm/fs/zfs.h>
46fa9e406ahrens#include <sys/spa_impl.h>
47fa9e406ahrens#include <sys/zio.h>
48fa9e406ahrens#include <sys/zio_checksum.h>
49fa9e406ahrens#include <sys/dmu.h>
50fa9e406ahrens#include <sys/dmu_tx.h>
51fa9e406ahrens#include <sys/zap.h>
52fa9e406ahrens#include <sys/zil.h>
53b24ab67Jeff Bonwick#include <sys/ddt.h>
54fa9e406ahrens#include <sys/vdev_impl.h>
555cabbc6Prashanth Sreenivasa#include <sys/vdev_removal.h>
565cabbc6Prashanth Sreenivasa#include <sys/vdev_indirect_mapping.h>
575cabbc6Prashanth Sreenivasa#include <sys/vdev_indirect_births.h>
58094e47eGeorge Wilson#include <sys/vdev_initialize.h>
59fa9e406ahrens#include <sys/metaslab.h>
6088ecc94George Wilson#include <sys/metaslab_impl.h>
61e0f1c0aOlaf Faaland#include <sys/mmp.h>
62fa9e406ahrens#include <sys/uberblock_impl.h>
63fa9e406ahrens#include <sys/txg.h>
64fa9e406ahrens#include <sys/avl.h>
655cabbc6Prashanth Sreenivasa#include <sys/bpobj.h>
66fa9e406ahrens#include <sys/dmu_traverse.h>
67b1b8ab3lling#include <sys/dmu_objset.h>
68fa9e406ahrens#include <sys/unique.h>
69fa9e406ahrens#include <sys/dsl_pool.h>
70b1b8ab3lling#include <sys/dsl_dataset.h>
71fa9e406ahrens#include <sys/dsl_dir.h>
72fa9e406ahrens#include <sys/dsl_prop.h>
73b1b8ab3lling#include <sys/dsl_synctask.h>
74fa9e406ahrens#include <sys/fs/zfs.h>
75fa94a07brendan#include <sys/arc.h>
76fa9e406ahrens#include <sys/callb.h>
779517395ek#include <sys/systeminfo.h>
78e7cbe64gw#include <sys/spa_boot.h>
79573ca77George Wilson#include <sys/zfs_ioctl.h>
803f9d6adLin Ling#include <sys/dsl_scan.h>
81ad135b5Christopher Siden#include <sys/zfeature.h>
823b2aab1Matthew Ahrens#include <sys/dsl_destroy.h>
83770499eDan Kimmel#include <sys/abd.h>
84fa9e406ahrens
855679c89jv#ifdef	_KERNEL
86dedec47Jack Meng#include <sys/bootprops.h>
8735a5a35Jonathan Adams#include <sys/callb.h>
8835a5a35Jonathan Adams#include <sys/cpupart.h>
8935a5a35Jonathan Adams#include <sys/pool.h>
9035a5a35Jonathan Adams#include <sys/sysdc.h>
9135a5a35Jonathan Adams#include <sys/zone.h>
925679c89jv#endif	/* _KERNEL */
935679c89jv
94990b485lling#include "zfs_prop.h"
95b7b9745perrin#include "zfs_comutil.h"
96990b485lling
973cb69f7Will Andrews/*
983cb69f7Will Andrews * The interval, in seconds, at which failed configuration cache file writes
993cb69f7Will Andrews * should be retried.
1003cb69f7Will Andrews */
1015cabbc6Prashanth Sreenivasaint zfs_ccw_retry_interval = 300;
1023cb69f7Will Andrews
10335a5a35Jonathan Adamstypedef enum zti_modes {
104ec94d32Adam Leventhal	ZTI_MODE_FIXED,			/* value is # of threads (min 1) */
105ec94d32Adam Leventhal	ZTI_MODE_BATCH,			/* cpu-intensive; value is ignored */
106ec94d32Adam Leventhal	ZTI_MODE_NULL,			/* don't create a taskq */
107ec94d32Adam Leventhal	ZTI_NMODES
10835a5a35Jonathan Adams} zti_modes_t;
109416e0cdek
110ec94d32Adam Leventhal#define	ZTI_P(n, q)	{ ZTI_MODE_FIXED, (n), (q) }
111ec94d32Adam Leventhal#define	ZTI_BATCH	{ ZTI_MODE_BATCH, 0, 1 }
112ec94d32Adam Leventhal#define	ZTI_NULL	{ ZTI_MODE_NULL, 0, 0 }
1132e0c549Jonathan Adams
114ec94d32Adam Leventhal#define	ZTI_N(n)	ZTI_P(n, 1)
115ec94d32Adam Leventhal#define	ZTI_ONE		ZTI_N(1)
1162e0c549Jonathan Adams
1172e0c549Jonathan Adamstypedef struct zio_taskq_info {
118ec94d32Adam Leventhal	zti_modes_t zti_mode;
11980eb36fGeorge Wilson	uint_t zti_value;
120ec94d32Adam Leventhal	uint_t zti_count;
1212e0c549Jonathan Adams} zio_taskq_info_t;
1222e0c549Jonathan Adams
1232e0c549Jonathan Adamsstatic const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
12435a5a35Jonathan Adams	"issue", "issue_high", "intr", "intr_high"
1252e0c549Jonathan Adams};
1262e0c549Jonathan Adams
12780eb36fGeorge Wilson/*
128ec94d32Adam Leventhal * This table defines the taskq settings for each ZFS I/O type. When
129ec94d32Adam Leventhal * initializing a pool, we use this table to create an appropriately sized
130ec94d32Adam Leventhal * taskq. Some operations are low volume and therefore have a small, static
131ec94d32Adam Leventhal * number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE
132ec94d32Adam Leventhal * macros. Other operations process a large amount of data; the ZTI_BATCH
133ec94d32Adam Leventhal * macro causes us to create a taskq oriented for throughput. Some operations
134ec94d32Adam Leventhal * are so high frequency and short-lived that the taskq itself can become a a
135ec94d32Adam Leventhal * point of lock contention. The ZTI_P(#, #) macro indicates that we need an
136ec94d32Adam Leventhal * additional degree of parallelism specified by the number of threads per-
137ec94d32Adam Leventhal * taskq and the number of taskqs; when dispatching an event in this case, the
138ec94d32Adam Leventhal * particular taskq is chosen at random.
139ec94d32Adam Leventhal *
140ec94d32Adam Leventhal * The different taskq priorities are to handle the different contexts (issue
141ec94d32Adam Leventhal * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
142ec94d32Adam Leventhal * need to be handled with minimum delay.
14380eb36fGeorge Wilson */
14480eb36fGeorge Wilsonconst zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
14580eb36fGeorge Wilson	/* ISSUE	ISSUE_HIGH	INTR		INTR_HIGH */
146ec94d32Adam Leventhal	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* NULL */
1471b497abAdam H. Leventhal	{ ZTI_N(8),	ZTI_NULL,	ZTI_P(12, 8),	ZTI_NULL }, /* READ */
148ec94d32Adam Leventhal	{ ZTI_BATCH,	ZTI_N(5),	ZTI_N(8),	ZTI_N(5) }, /* WRITE */
149ec94d32Adam Leventhal	{ ZTI_P(12, 8),	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* FREE */
150ec94d32Adam Leventhal	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* CLAIM */
151ec94d32Adam Leventhal	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* IOCTL */
1522e0c549Jonathan Adams};
1532e0c549Jonathan Adams
1543b2aab1Matthew Ahrensstatic void spa_sync_version(void *arg, dmu_tx_t *tx);
1553b2aab1Matthew Ahrensstatic void spa_sync_props(void *arg, dmu_tx_t *tx);
15689a89ebllingstatic boolean_t spa_has_active_shared_spare(spa_t *spa);
1578671400Serapheim Dimitropoulosstatic int spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport);
158cb04b87Mark J Musantestatic void spa_vdev_resilver_done(spa_t *spa);
159990b485lling
16069962b5Matthew Ahrensuint_t		zio_taskq_batch_pct = 75;	/* 1 thread per cpu in pset */
16135a5a35Jonathan Adamsid_t		zio_taskq_psrset_bind = PS_NONE;
16235a5a35Jonathan Adamsboolean_t	zio_taskq_sysdc = B_TRUE;	/* use SDC scheduling class */
16335a5a35Jonathan Adamsuint_t		zio_taskq_basedc = 80;		/* base duty cycle */
16435a5a35Jonathan Adams
16535a5a35Jonathan Adamsboolean_t	spa_create_process = B_TRUE;	/* no process ==> no sysdc */
16601f55e4George Wilsonextern int	zfs_sync_pass_deferred_free;
16735a5a35Jonathan Adams
16835a5a35Jonathan Adams/*
169e144c4ePavel Zakharov * Report any spa_load_verify errors found, but do not fail spa_load.
170e144c4ePavel Zakharov * This is used by zdb to analyze non-idle pools.
171e144c4ePavel Zakharov */
172e144c4ePavel Zakharovboolean_t	spa_load_verify_dryrun = B_FALSE;
173e144c4ePavel Zakharov
174e144c4ePavel Zakharov/*
17535a5a35Jonathan Adams * This (illegal) pool name is used when temporarily importing a spa_t in order
17635a5a35Jonathan Adams * to get the vdev stats associated with the imported devices.
17735a5a35Jonathan Adams */
17835a5a35Jonathan Adams#define	TRYIMPORT_NAME	"$import"
17935a5a35Jonathan Adams
180990b485lling/*
1816f79381Pavel Zakharov * For debugging purposes: print out vdev tree during pool import.
1826f79381Pavel Zakharov */
1836f79381Pavel Zakharovboolean_t	spa_load_print_vdev_tree = B_FALSE;
1846f79381Pavel Zakharov
1856f79381Pavel Zakharov/*
1866f79381Pavel Zakharov * A non-zero value for zfs_max_missing_tvds means that we allow importing
1876f79381Pavel Zakharov * pools with missing top-level vdevs. This is strictly intended for advanced
1886f79381Pavel Zakharov * pool recovery cases since missing data is almost inevitable. Pools with
1896f79381Pavel Zakharov * missing devices can only be imported read-only for safety reasons, and their
1906f79381Pavel Zakharov * fail-mode will be automatically set to "continue".
1916f79381Pavel Zakharov *
1926f79381Pavel Zakharov * With 1 missing vdev we should be able to import the pool and mount all
1936f79381Pavel Zakharov * datasets. User data that was not modified after the missing device has been
1946f79381Pavel Zakharov * added should be recoverable. This means that snapshots created prior to the
1956f79381Pavel Zakharov * addition of that device should be completely intact.
1966f79381Pavel Zakharov *
1976f79381Pavel Zakharov * With 2 missing vdevs, some datasets may fail to mount since there are
1986f79381Pavel Zakharov * dataset statistics that are stored as regular metadata. Some data might be
1996f79381Pavel Zakharov * recoverable if those vdevs were added recently.
2006f79381Pavel Zakharov *
2016f79381Pavel Zakharov * With 3 or more missing vdevs, the pool is severely damaged and MOS entries
2026f79381Pavel Zakharov * may be missing entirely. Chances of data recovery are very low. Note that
2036f79381Pavel Zakharov * there are also risks of performing an inadvertent rewind as we might be
2046f79381Pavel Zakharov * missing all the vdevs with the latest uberblocks.
2056f79381Pavel Zakharov */
2066f79381Pavel Zakharovuint64_t	zfs_max_missing_tvds = 0;
2076f79381Pavel Zakharov
2086f79381Pavel Zakharov/*
2096f79381Pavel Zakharov * The parameters below are similar to zfs_max_missing_tvds but are only
2106f79381Pavel Zakharov * intended for a preliminary open of the pool with an untrusted config which
2116f79381Pavel Zakharov * might be incomplete or out-dated.
2126f79381Pavel Zakharov *
2136f79381Pavel Zakharov * We are more tolerant for pools opened from a cachefile since we could have
2146f79381Pavel Zakharov * an out-dated cachefile where a device removal was not registered.
2156f79381Pavel Zakharov * We could have set the limit arbitrarily high but in the case where devices
2166f79381Pavel Zakharov * are really missing we would want to return the proper error codes; we chose
2176f79381Pavel Zakharov * SPA_DVAS_PER_BP - 1 so that some copies of the MOS would still be available
2186f79381Pavel Zakharov * and we get a chance to retrieve the trusted config.
2196f79381Pavel Zakharov */
2206f79381Pavel Zakharovuint64_t	zfs_max_missing_tvds_cachefile = SPA_DVAS_PER_BP - 1;
2218671400Serapheim Dimitropoulos
2226f79381Pavel Zakharov/*
2236f79381Pavel Zakharov * In the case where config was assembled by scanning device paths (/dev/dsks
2246f79381Pavel Zakharov * by default) we are less tolerant since all the existing devices should have
2256f79381Pavel Zakharov * been detected and we want spa_load to return the right error codes.
2266f79381Pavel Zakharov */
2276f79381Pavel Zakharovuint64_t	zfs_max_missing_tvds_scan = 0;
2286f79381Pavel Zakharov
2296f79381Pavel Zakharov/*
2308671400Serapheim Dimitropoulos * Debugging aid that pauses spa_sync() towards the end.
2318671400Serapheim Dimitropoulos */
2328671400Serapheim Dimitropoulosboolean_t	zfs_pause_spa_sync = B_FALSE;
2338671400Serapheim Dimitropoulos
2348671400Serapheim Dimitropoulos/*
235990b485lling * ==========================================================================
236990b485lling * SPA properties routines
237990b485lling * ==========================================================================
238990b485lling */
239990b485lling
240990b485lling/*
241990b485lling * Add a (source=src, propname=propval) list to an nvlist.
242990b485lling */
2439d82f4fllingstatic void
244990b485llingspa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
245990b485lling    uint64_t intval, zprop_source_t src)
246990b485lling{
247990b485lling	const char *propname = zpool_prop_to_name(prop);
248990b485lling	nvlist_t *propval;
249990b485lling
2509d82f4flling	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2519d82f4flling	VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
252990b485lling
2539d82f4flling	if (strval != NULL)
2549d82f4flling		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
2559d82f4flling	else
2569d82f4flling		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0);
257990b485lling
2589d82f4flling	VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
259990b485lling	nvlist_free(propval);
260990b485lling}
261990b485lling
262990b485lling/*
263990b485lling * Get property values from the spa configuration.
264990b485lling */
2659d82f4fllingstatic void
266990b485llingspa_prop_get_config(spa_t *spa, nvlist_t **nvp)
267990b485lling{
2684263d13George Wilson	vdev_t *rvd = spa->spa_root_vdev;
269ad135b5Christopher Siden	dsl_pool_t *pool = spa->spa_dsl_pool;
2702e4c998George Wilson	uint64_t size, alloc, cap, version;
271990b485lling	zprop_source_t src = ZPROP_SRC_NONE;
272c5904d1eschrock	spa_config_dirent_t *dp;
2732e4c998George Wilson	metaslab_class_t *mc = spa_normal_class(spa);
274990b485lling
275e14bb32Jeff Bonwick	ASSERT(MUTEX_HELD(&spa->spa_props_lock));
276e14bb32Jeff Bonwick
2774263d13George Wilson	if (rvd != NULL) {
278663207aDon Brady		alloc = metaslab_class_get_alloc(mc);
279663207aDon Brady		alloc += metaslab_class_get_alloc(spa_special_class(spa));
280663207aDon Brady		alloc += metaslab_class_get_alloc(spa_dedup_class(spa));
281663207aDon Brady
282663207aDon Brady		size = metaslab_class_get_space(mc);
283663207aDon Brady		size += metaslab_class_get_space(spa_special_class(spa));
284663207aDon Brady		size += metaslab_class_get_space(spa_dedup_class(spa));
285663207aDon Brady
286379c004Eric Schrock		spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
287379c004Eric Schrock		spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
288485bbbfGeorge Wilson		spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
289485bbbfGeorge Wilson		spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL,
290485bbbfGeorge Wilson		    size - alloc, src);
2918671400Serapheim Dimitropoulos		spa_prop_add_list(*nvp, ZPOOL_PROP_CHECKPOINT, NULL,
2928671400Serapheim Dimitropoulos		    spa->spa_checkpoint_info.sci_dspace, src);
2934263d13George Wilson
2942e4c998George Wilson		spa_prop_add_list(*nvp, ZPOOL_PROP_FRAGMENTATION, NULL,
2952e4c998George Wilson		    metaslab_class_fragmentation(mc), src);
2962e4c998George Wilson		spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL,
2972e4c998George Wilson		    metaslab_class_expandable_space(mc), src);
298f9af39bGeorge Wilson		spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL,
299f9af39bGeorge Wilson		    (spa_mode(spa) == FREAD), src);
300379c004Eric Schrock
301485bbbfGeorge Wilson		cap = (size == 0) ? 0 : (alloc * 100 / size);
302379c004Eric Schrock		spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
303379c004Eric Schrock
304b24ab67Jeff Bonwick		spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL,
305b24ab67Jeff Bonwick		    ddt_get_pool_dedup_ratio(spa), src);
306b24ab67Jeff Bonwick
307379c004Eric Schrock		spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
3084263d13George Wilson		    rvd->vdev_state, src);
309379c004Eric Schrock
310379c004Eric Schrock		version = spa_version(spa);
311379c004Eric Schrock		if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
312379c004Eric Schrock			src = ZPROP_SRC_DEFAULT;
313379c004Eric Schrock		else
314379c004Eric Schrock			src = ZPROP_SRC_LOCAL;
315379c004Eric Schrock		spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
316379c004Eric Schrock	}
317990b485lling
318ad135b5Christopher Siden	if (pool != NULL) {
319ad135b5Christopher Siden		/*
320ad135b5Christopher Siden		 * The $FREE directory was introduced in SPA_VERSION_DEADLISTS,
321ad135b5Christopher Siden		 * when opening pools before this version freedir will be NULL.
322ad135b5Christopher Siden		 */
3237fd05acMatthew Ahrens		if (pool->dp_free_dir != NULL) {
324ad135b5Christopher Siden			spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL,
325c137962Justin T. Gibbs			    dsl_dir_phys(pool->dp_free_dir)->dd_used_bytes,
326c137962Justin T. Gibbs			    src);
327ad135b5Christopher Siden		} else {
328ad135b5Christopher Siden			spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING,
329ad135b5Christopher Siden			    NULL, 0, src);
330ad135b5Christopher Siden		}
3317fd05acMatthew Ahrens
3327fd05acMatthew Ahrens		if (pool->dp_leak_dir != NULL) {
3337fd05acMatthew Ahrens			spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL,
334c137962Justin T. Gibbs			    dsl_dir_phys(pool->dp_leak_dir)->dd_used_bytes,
335c137962Justin T. Gibbs			    src);
3367fd05acMatthew Ahrens		} else {
3377fd05acMatthew Ahrens			spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED,
3387fd05acMatthew Ahrens			    NULL, 0, src);
3397fd05acMatthew Ahrens		}
340ad135b5Christopher Siden	}
341ad135b5Christopher Siden
3429d82f4flling	spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
343990b485lling
3448704186Dan McDonald	if (spa->spa_comment != NULL) {
3458704186Dan McDonald		spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment,
3468704186Dan McDonald		    0, ZPROP_SRC_LOCAL);
3478704186Dan McDonald	}
3488704186Dan McDonald
3499d82f4flling	if (spa->spa_root != NULL)
3509d82f4flling		spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
3519d82f4flling		    0, ZPROP_SRC_LOCAL);
352990b485lling
353b515258Matthew Ahrens	if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) {
354b515258Matthew Ahrens		spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL,
355b515258Matthew Ahrens		    MIN(zfs_max_recordsize, SPA_MAXBLOCKSIZE), ZPROP_SRC_NONE);
356b515258Matthew Ahrens	} else {
357b515258Matthew Ahrens		spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL,
358b515258Matthew Ahrens		    SPA_OLD_MAXBLOCKSIZE, ZPROP_SRC_NONE);
359b515258Matthew Ahrens	}
360b515258Matthew Ahrens
36154811daToomas Soome	if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_DNODE)) {
36254811daToomas Soome		spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL,
36354811daToomas Soome		    DNODE_MAX_SIZE, ZPROP_SRC_NONE);
36454811daToomas Soome	} else {
36554811daToomas Soome		spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL,
36654811daToomas Soome		    DNODE_MIN_SIZE, ZPROP_SRC_NONE);
36754811daToomas Soome	}
36854811daToomas Soome
369c5904d1eschrock	if ((dp = list_head(&spa->spa_config_list)) != NULL) {
370c5904d1eschrock		if (dp->scd_path == NULL) {
3719d82f4flling			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
372c5904d1eschrock			    "none", 0, ZPROP_SRC_LOCAL);
373c5904d1eschrock		} else if (strcmp(dp->scd_path, spa_config_path) != 0) {
3749d82f4flling			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
375c5904d1eschrock			    dp->scd_path, 0, ZPROP_SRC_LOCAL);
3762f8aaabeschrock		}
3772f8aaabeschrock	}
378990b485lling}
379990b485lling
380990b485lling/*
381990b485lling * Get zpool property values.
382990b485lling */
383990b485llingint
384990b485llingspa_prop_get(spa_t *spa, nvlist_t **nvp)
385990b485lling{
386b24ab67Jeff Bonwick	objset_t *mos = spa->spa_meta_objset;
387990b485lling	zap_cursor_t zc;
388990b485lling	zap_attribute_t za;
389990b485lling	int err;
390990b485lling
3919d82f4flling	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
392990b485lling
393e14bb32Jeff Bonwick	mutex_enter(&spa->spa_props_lock);
394e14bb32Jeff Bonwick
395990b485lling	/*
396990b485lling	 * Get properties from the spa config.
397990b485lling	 */
3989d82f4flling	spa_prop_get_config(spa, nvp);
399990b485lling
400990b485lling	/* If no pool property object, no more prop to get. */
401afee20eGeorge Wilson	if (mos == NULL || spa->spa_pool_props_object == 0) {
402990b485lling		mutex_exit(&spa->spa_props_lock);
403990b485lling		return (0);
404990b485lling	}
405990b485lling
406990b485lling	/*
407990b485lling	 * Get properties from the MOS pool property object.
408990b485lling	 */
409990b485lling	for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object);
410990b485lling	    (err = zap_cursor_retrieve(&zc, &za)) == 0;
411990b485lling	    zap_cursor_advance(&zc)) {
412990b485lling		uint64_t intval = 0;
413990b485lling		char *strval = NULL;
414990b485lling		zprop_source_t src = ZPROP_SRC_DEFAULT;
415990b485lling		zpool_prop_t prop;
416990b485lling
4174ae5f5fAlan Somers		if ((prop = zpool_name_to_prop(za.za_name)) == ZPOOL_PROP_INVAL)
418990b485lling			continue;
419990b485lling
420990b485lling		switch (za.za_integer_length) {
421990b485lling		case 8:
422990b485lling			/* integer property */
423990b485lling			if (za.za_first_integer !=
424990b485lling			    zpool_prop_default_numeric(prop))
425990b485lling				src = ZPROP_SRC_LOCAL;
426990b485lling
427990b485lling			if (prop == ZPOOL_PROP_BOOTFS) {
428990b485lling				dsl_pool_t *dp;
429990b485lling				dsl_dataset_t *ds = NULL;
430990b485lling
431990b485lling				dp = spa_get_dsl(spa);
4323b2aab1Matthew Ahrens				dsl_pool_config_enter(dp, FTAG);
433094e47eGeorge Wilson				err = dsl_dataset_hold_obj(dp,
434094e47eGeorge Wilson				    za.za_first_integer, FTAG, &ds);
435094e47eGeorge Wilson				if (err != 0) {
4363b2aab1Matthew Ahrens					dsl_pool_config_exit(dp, FTAG);
437990b485lling					break;
438990b485lling				}
439990b485lling
4409adfa60Matthew Ahrens				strval = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN,
441990b485lling				    KM_SLEEP);
442990b485lling				dsl_dataset_name(ds, strval);
443745cd3cmaybee				dsl_dataset_rele(ds, FTAG);
4443b2aab1Matthew Ahrens				dsl_pool_config_exit(dp, FTAG);
445990b485lling			} else {
446990b485lling				strval = NULL;
447990b485lling				intval = za.za_first_integer;
448990b485lling			}
449990b485lling
4509d82f4flling			spa_prop_add_list(*nvp, prop, strval, intval, src);
451990b485lling
452990b485lling			if (strval != NULL)
4539adfa60Matthew Ahrens				kmem_free(strval, ZFS_MAX_DATASET_NAME_LEN);
454990b485lling
455990b485lling			break;
456990b485lling
457990b485lling		case 1:
458990b485lling			/* string property */
459990b485lling			strval = kmem_alloc(za.za_num_integers, KM_SLEEP);
460990b485lling			err = zap_lookup(mos, spa->spa_pool_props_object,
461990b485lling			    za.za_name, 1, za.za_num_integers, strval);
462990b485lling			if (err) {
463990b485lling				kmem_free(strval, za.za_num_integers);
464990b485lling				break;
465990b485lling			}
4669d82f4flling			spa_prop_add_list(*nvp, prop, strval, 0, src);
467990b485lling			kmem_free(strval, za.za_num_integers);
468990b485lling			break;
469990b485lling
470990b485lling		default:
471990b485lling			break;
472990b485lling		}
473990b485lling	}
474990b485lling	zap_cursor_fini(&zc);
475990b485lling	mutex_exit(&spa->spa_props_lock);
476990b485llingout:
477990b485lling	if (err && err != ENOENT) {
478990b485lling		nvlist_free(*nvp);
4799d82f4flling		*nvp = NULL;
480990b485lling		return (err);
481990b485lling	}
482990b485lling
483990b485lling	return (0);
484990b485lling}
485990b485lling
486990b485lling/*
487990b485lling * Validate the given pool properties nvlist and modify the list
488990b485lling * for the property values to be set.
489990b485lling */
490990b485llingstatic int
491990b485llingspa_prop_validate(spa_t *spa, nvlist_t *props)
492990b485lling{
493990b485lling	nvpair_t *elem;
494990b485lling	int error = 0, reset_bootfs = 0;
495d5285caGeorge Wilson	uint64_t objnum = 0;
496ad135b5Christopher Siden	boolean_t has_feature = B_FALSE;
497990b485lling
498990b485lling	elem = NULL;
499990b485lling	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
500990b485lling		uint64_t intval;
501ad135b5Christopher Siden		char *strval, *slash, *check, *fname;
502ad135b5Christopher Siden		const char *propname = nvpair_name(elem);
503ad135b5Christopher Siden		zpool_prop_t prop = zpool_name_to_prop(propname);
504ad135b5Christopher Siden
505ad135b5Christopher Siden		switch (prop) {
5064ae5f5fAlan Somers		case ZPOOL_PROP_INVAL:
507ad135b5Christopher Siden			if (!zpool_prop_feature(propname)) {
508be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
509ad135b5Christopher Siden				break;
510ad135b5Christopher Siden			}
511990b485lling
512ad135b5Christopher Siden			/*
513ad135b5Christopher Siden			 * Sanitize the input.
514ad135b5Christopher Siden			 */
515ad135b5Christopher Siden			if (nvpair_type(elem) != DATA_TYPE_UINT64) {
516be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
517ad135b5Christopher Siden				break;
518ad135b5Christopher Siden			}
519990b485lling
520ad135b5Christopher Siden			if (nvpair_value_uint64(elem, &intval) != 0) {
521be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
522ad135b5Christopher Siden				break;
523ad135b5Christopher Siden			}
524ad135b5Christopher Siden
525ad135b5Christopher Siden			if (intval != 0) {
526be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
527ad135b5Christopher Siden				break;
528ad135b5Christopher Siden			}
529ad135b5Christopher Siden
530ad135b5Christopher Siden			fname = strchr(propname, '@') + 1;
531ad135b5Christopher Siden			if (zfeature_lookup_name(fname, NULL) != 0) {
532be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
533ad135b5Christopher Siden				break;
534ad135b5Christopher Siden			}
535ad135b5Christopher Siden
536ad135b5Christopher Siden			has_feature = B_TRUE;
537ad135b5Christopher Siden			break;
538990b485lling
539990b485lling		case ZPOOL_PROP_VERSION:
540990b485lling			error = nvpair_value_uint64(elem, &intval);
541990b485lling			if (!error &&
542ad135b5Christopher Siden			    (intval < spa_version(spa) ||
543ad135b5Christopher Siden			    intval > SPA_VERSION_BEFORE_FEATURES ||
544ad135b5Christopher Siden			    has_feature))
545be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
546990b485lling			break;
547990b485lling
548990b485lling		case ZPOOL_PROP_DELEGATION:
549990b485lling		case ZPOOL_PROP_AUTOREPLACE:
550d5b5bb2Rich Morris		case ZPOOL_PROP_LISTSNAPS:
551573ca77George Wilson		case ZPOOL_PROP_AUTOEXPAND:
552990b485lling			error = nvpair_value_uint64(elem, &intval);
553990b485lling			if (!error && intval > 1)
554be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
555990b485lling			break;
556990b485lling
557e0f1c0aOlaf Faaland		case ZPOOL_PROP_MULTIHOST:
558e0f1c0aOlaf Faaland			error = nvpair_value_uint64(elem, &intval);
559e0f1c0aOlaf Faaland			if (!error && intval > 1)
560e0f1c0aOlaf Faaland				error = SET_ERROR(EINVAL);
561e0f1c0aOlaf Faaland
562e0f1c0aOlaf Faaland			if (!error && !spa_get_hostid())
563e0f1c0aOlaf Faaland				error = SET_ERROR(ENOTSUP);
564e0f1c0aOlaf Faaland
565e0f1c0aOlaf Faaland			break;
566e0f1c0aOlaf Faaland
567990b485lling		case ZPOOL_PROP_BOOTFS:
56825f89eeJeff Bonwick			/*
56925f89eeJeff Bonwick			 * If the pool version is less than SPA_VERSION_BOOTFS,
57025f89eeJeff Bonwick			 * or the pool is still being created (version == 0),
57125f89eeJeff Bonwick			 * the bootfs property cannot be set.
57225f89eeJeff Bonwick			 */
573990b485lling			if (spa_version(spa) < SPA_VERSION_BOOTFS) {
574be6fd75Matthew Ahrens				error = SET_ERROR(ENOTSUP);
575990b485lling				break;
576990b485lling			}
577990b485lling
578990b485lling			/*
57915e6edfgw			 * Make sure the vdev config is bootable
580990b485lling			 */
58115e6edfgw			if (!vdev_is_bootable(spa->spa_root_vdev)) {
582be6fd75Matthew Ahrens				error = SET_ERROR(ENOTSUP);
583990b485lling				break;
584990b485lling			}
585990b485lling
586990b485lling			reset_bootfs = 1;
587990b485lling
588990b485lling			error = nvpair_value_string(elem, &strval);
589990b485lling
590990b485lling			if (!error) {
591ad135b5Christopher Siden				objset_t *os;
592b515258Matthew Ahrens				uint64_t propval;
59315e6edfgw
594990b485lling				if (strval == NULL || strval[0] == '\0') {
595990b485lling					objnum = zpool_prop_default_numeric(
596990b485lling					    ZPOOL_PROP_BOOTFS);
597990b485lling					break;
598990b485lling				}
599990b485lling
600094e47eGeorge Wilson				error = dmu_objset_hold(strval, FTAG, &os);
601094e47eGeorge Wilson				if (error != 0)
602990b485lling					break;
60315e6edfgw
604b515258Matthew Ahrens				/*
605b515258Matthew Ahrens				 * Must be ZPL, and its property settings
60654811daToomas Soome				 * must be supported.
607b515258Matthew Ahrens				 */
608503ad85Matthew Ahrens
609503ad85Matthew Ahrens				if (dmu_objset_type(os) != DMU_OST_ZFS) {
610be6fd75Matthew Ahrens					error = SET_ERROR(ENOTSUP);
6113b2aab1Matthew Ahrens				} else if ((error =
6123b2aab1Matthew Ahrens				    dsl_prop_get_int_ds(dmu_objset_ds(os),
61315e6edfgw				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
614b515258Matthew Ahrens				    &propval)) == 0 &&
615b515258Matthew Ahrens				    !BOOTFS_COMPRESS_VALID(propval)) {
616b515258Matthew Ahrens					error = SET_ERROR(ENOTSUP);
61715e6edfgw				} else {
61815e6edfgw					objnum = dmu_objset_id(os);
61915e6edfgw				}
620503ad85Matthew Ahrens				dmu_objset_rele(os, FTAG);
621990b485lling			}
622990b485lling			break;
623e14bb32Jeff Bonwick
6240a4e951gw		case ZPOOL_PROP_FAILUREMODE:
6250a4e951gw			error = nvpair_value_uint64(elem, &intval);
6260a4e951gw			if (!error && (intval < ZIO_FAILURE_MODE_WAIT ||
6270a4e951gw			    intval > ZIO_FAILURE_MODE_PANIC))
628be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
6290a4e951gw
6300a4e951gw			/*
6310a4e951gw			 * This is a special case which only occurs when
6320a4e951gw			 * the pool has completely failed. This allows
6330a4e951gw			 * the user to change the in-core failmode property
6340a4e951gw			 * without syncing it out to disk (I/Os might
6350a4e951gw			 * currently be blocked). We do this by returning
6360a4e951gw			 * EIO to the caller (spa_prop_set) to trick it
6370a4e951gw			 * into thinking we encountered a property validation
6380a4e951gw			 * error.
6390a4e951gw			 */
640e14bb32Jeff Bonwick			if (!error && spa_suspended(spa)) {
6410a4e951gw				spa->spa_failmode = intval;
642be6fd75Matthew Ahrens				error = SET_ERROR(EIO);
6430a4e951gw			}
6440a4e951gw			break;
6452f8aaabeschrock
6462f8aaabeschrock		case ZPOOL_PROP_CACHEFILE:
6472f8aaabeschrock			if ((error = nvpair_value_string(elem, &strval)) != 0)
6482f8aaabeschrock				break;
6492f8aaabeschrock
6502f8aaabeschrock			if (strval[0] == '\0')
6512f8aaabeschrock				break;
6522f8aaabeschrock
6532f8aaabeschrock			if (strcmp(strval, "none") == 0)
6542f8aaabeschrock				break;
6552f8aaabeschrock
6562f8aaabeschrock			if (strval[0] != '/') {
657be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
6582f8aaabeschrock				break;
6592f8aaabeschrock			}
6602f8aaabeschrock
6612f8aaabeschrock			slash = strrchr(strval, '/');
6622f8aaabeschrock			ASSERT(slash != NULL);
6632f8aaabeschrock
6642f8aaabeschrock			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
6652f8aaabeschrock			    strcmp(slash, "/..") == 0)
666be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
6672f8aaabeschrock			break;
668b24ab67Jeff Bonwick
6698704186Dan McDonald		case ZPOOL_PROP_COMMENT:
6708704186Dan McDonald			if ((error = nvpair_value_string(elem, &strval)) != 0)
6718704186Dan McDonald				break;
6728704186Dan McDonald			for (check = strval; *check != '\0'; check++) {
6738704186Dan McDonald				/*
6748704186Dan McDonald				 * The kernel doesn't have an easy isprint()
6758704186Dan McDonald				 * check.  For this kernel check, we merely
6768704186Dan McDonald				 * check ASCII apart from DEL.  Fix this if
6778704186Dan McDonald				 * there is an easy-to-use kernel isprint().
6788704186Dan McDonald				 */
6798704186Dan McDonald				if (*check >= 0x7f) {
680be6fd75Matthew Ahrens					error = SET_ERROR(EINVAL);
6818704186Dan McDonald					break;
6828704186Dan McDonald				}
6838704186Dan McDonald			}
6848704186Dan McDonald			if (strlen(strval) > ZPROP_MAX_COMMENT)
6858704186Dan McDonald				error = E2BIG;
6868704186Dan McDonald			break;
6878704186Dan McDonald
688b24ab67Jeff Bonwick		case ZPOOL_PROP_DEDUPDITTO:
689b24ab67Jeff Bonwick			if (spa_version(spa) < SPA_VERSION_DEDUP)
690be6fd75Matthew Ahrens				error = SET_ERROR(ENOTSUP);
691b24ab67Jeff Bonwick			else
692b24ab67Jeff Bonwick				error = nvpair_value_uint64(elem, &intval);
693b24ab67Jeff Bonwick			if (error == 0 &&
694b24ab67Jeff Bonwick			    intval != 0 && intval < ZIO_DEDUPDITTO_MIN)
695be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
696b24ab67Jeff Bonwick			break;
697990b485lling		}
698990b485lling
699990b485lling		if (error)
700990b485lling			break;
701990b485lling	}
702990b485lling
703990b485lling	if (!error && reset_bootfs) {
704990b485lling		error = nvlist_remove(props,
705990b485lling		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING);
706990b485lling
707990b485lling		if (!error) {
708990b485lling			error = nvlist_add_uint64(props,
709990b485lling			    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum);
710990b485lling		}
711990b485lling	}
712990b485lling
713990b485lling	return (error);
714990b485lling}
715990b485lling
716379c004Eric Schrockvoid
717379c004Eric Schrockspa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync)
718379c004Eric Schrock{
719379c004Eric Schrock	char *cachefile;
720379c004Eric Schrock	spa_config_dirent_t *dp;
721379c004Eric Schrock
722379c004Eric Schrock	if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
723379c004Eric Schrock	    &cachefile) != 0)
724379c004Eric Schrock		return;
725379c004Eric Schrock
726379c004Eric Schrock	dp = kmem_alloc(sizeof (spa_config_dirent_t),
727379c004Eric Schrock	    KM_SLEEP);
728379c004Eric Schrock
729379c004Eric Schrock	if (cachefile[0] == '\0')
730379c004Eric Schrock		dp->scd_path = spa_strdup(spa_config_path);
731379c004Eric Schrock	else if (strcmp(cachefile, "none") == 0)
732379c004Eric Schrock		dp->scd_path = NULL;
733379c004Eric Schrock	else
734379c004Eric Schrock		dp->scd_path = spa_strdup(cachefile);
735379c004Eric Schrock
736379c004Eric Schrock	list_insert_head(&spa->spa_config_list, dp);
737379c004Eric Schrock	if (need_sync)
738379c004Eric Schrock		spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
739379c004Eric Schrock}
740379c004Eric Schrock
741990b485llingint
742990b485llingspa_prop_set(spa_t *spa, nvlist_t *nvp)
743990b485lling{
744990b485lling	int error;
745ad135b5Christopher Siden	nvpair_t *elem = NULL;
746379c004Eric Schrock	boolean_t need_sync = B_FALSE;
747990b485lling
748990b485lling	if ((error = spa_prop_validate(spa, nvp)) != 0)
749990b485lling		return (error);
750990b485lling
751379c004Eric Schrock	while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
752ad135b5Christopher Siden		zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem));
753379c004Eric Schrock
754f9af39bGeorge Wilson		if (prop == ZPOOL_PROP_CACHEFILE ||
755f9af39bGeorge Wilson		    prop == ZPOOL_PROP_ALTROOT ||
756f9af39bGeorge Wilson		    prop == ZPOOL_PROP_READONLY)
757379c004Eric Schrock			continue;
758379c004Eric Schrock
7594ae5f5fAlan Somers		if (prop == ZPOOL_PROP_VERSION || prop == ZPOOL_PROP_INVAL) {
760ad135b5Christopher Siden			uint64_t ver;
761ad135b5Christopher Siden
762ad135b5Christopher Siden			if (prop == ZPOOL_PROP_VERSION) {
763ad135b5Christopher Siden				VERIFY(nvpair_value_uint64(elem, &ver) == 0);
764ad135b5Christopher Siden			} else {
765ad135b5Christopher Siden				ASSERT(zpool_prop_feature(nvpair_name(elem)));
766ad135b5Christopher Siden				ver = SPA_VERSION_FEATURES;
767ad135b5Christopher Siden				need_sync = B_TRUE;
768ad135b5Christopher Siden			}
769ad135b5Christopher Siden
770ad135b5Christopher Siden			/* Save time if the version is already set. */
771ad135b5Christopher Siden			if (ver == spa_version(spa))
772ad135b5Christopher Siden				continue;
773ad135b5Christopher Siden
774ad135b5Christopher Siden			/*
775ad135b5Christopher Siden			 * In addition to the pool directory object, we might
776ad135b5Christopher Siden			 * create the pool properties object, the features for
777ad135b5Christopher Siden			 * read object, the features for write object, or the
778ad135b5Christopher Siden			 * feature descriptions object.
779ad135b5Christopher Siden			 */
7803b2aab1Matthew Ahrens			error = dsl_sync_task(spa->spa_name, NULL,
7817d46dc6Matthew Ahrens			    spa_sync_version, &ver,
7827d46dc6Matthew Ahrens			    6, ZFS_SPACE_CHECK_RESERVED);
783ad135b5Christopher Siden			if (error)
784ad135b5Christopher Siden