spa.c revision 1702cce751c5cb7ead878d0205a6c90b027e3de8
1fa9e406ahrens/*
2fa9e406ahrens * CDDL HEADER START
3fa9e406ahrens *
4fa9e406ahrens * The contents of this file are subject to the terms of the
5ea8dc4beschrock * Common Development and Distribution License (the "License").
6ea8dc4beschrock * You may not use this file except in compliance with the License.
7fa9e406ahrens *
8fa9e406ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e406ahrens * or http://www.opensolaris.org/os/licensing.
10fa9e406ahrens * See the License for the specific language governing permissions
11fa9e406ahrens * and limitations under the License.
12fa9e406ahrens *
13fa9e406ahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e406ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e406ahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e406ahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e406ahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e406ahrens *
19fa9e406ahrens * CDDL HEADER END
20fa9e406ahrens */
2199653d4eschrock
22fa9e406ahrens/*
2398d1cbfGeorge Wilson * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24b7b2590Matthew Ahrens * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
251437283Hans Rosenfeld * Copyright (c) 2015, Nexenta Systems, Inc.  All rights reserved.
26bc9014eJustin Gibbs * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
2745818eeMatthew Ahrens * Copyright 2013 Saso Kiselkov. All rights reserved.
28c3d26abMatthew Ahrens * Copyright (c) 2014 Integros [integros.com]
29c8811bdToomas Soome * Copyright 2016 Toomas Soome <tsoome@me.com>
30ce1577bDave Eddy * Copyright 2017 Joyent, Inc.
311702cceAlek Pinchuk * Copyright (c) 2017 Datto Inc.
325aeb947Garrett D'Amore */
33fa9e406ahrens
34fa9e406ahrens/*
353e30c24Will Andrews * SPA: Storage Pool Allocator
363e30c24Will Andrews *
37fa9e406ahrens * This file contains all the routines used when modifying on-disk SPA state.
38fa9e406ahrens * This includes opening, importing, destroying, exporting a pool, and syncing a
39fa9e406ahrens * pool.
40fa9e406ahrens */
41fa9e406ahrens
42fa9e406ahrens#include <sys/zfs_context.h>
43ea8dc4beschrock#include <sys/fm/fs/zfs.h>
44fa9e406ahrens#include <sys/spa_impl.h>
45fa9e406ahrens#include <sys/zio.h>
46fa9e406ahrens#include <sys/zio_checksum.h>
47fa9e406ahrens#include <sys/dmu.h>
48fa9e406ahrens#include <sys/dmu_tx.h>
49fa9e406ahrens#include <sys/zap.h>
50fa9e406ahrens#include <sys/zil.h>
51b24ab67Jeff Bonwick#include <sys/ddt.h>
52fa9e406ahrens#include <sys/vdev_impl.h>
53fa9e406ahrens#include <sys/metaslab.h>
5488ecc94George Wilson#include <sys/metaslab_impl.h>
55fa9e406ahrens#include <sys/uberblock_impl.h>
56fa9e406ahrens#include <sys/txg.h>
57fa9e406ahrens#include <sys/avl.h>
58fa9e406ahrens#include <sys/dmu_traverse.h>
59b1b8ab3lling#include <sys/dmu_objset.h>
60fa9e406ahrens#include <sys/unique.h>
61fa9e406ahrens#include <sys/dsl_pool.h>
62b1b8ab3lling#include <sys/dsl_dataset.h>
63fa9e406ahrens#include <sys/dsl_dir.h>
64fa9e406ahrens#include <sys/dsl_prop.h>
65b1b8ab3lling#include <sys/dsl_synctask.h>
66fa9e406ahrens#include <sys/fs/zfs.h>
67fa94a07brendan#include <sys/arc.h>
68fa9e406ahrens#include <sys/callb.h>
699517395ek#include <sys/systeminfo.h>
70e7cbe64gw#include <sys/spa_boot.h>
71573ca77George Wilson#include <sys/zfs_ioctl.h>
723f9d6adLin Ling#include <sys/dsl_scan.h>
73ad135b5Christopher Siden#include <sys/zfeature.h>
743b2aab1Matthew Ahrens#include <sys/dsl_destroy.h>
75770499eDan Kimmel#include <sys/abd.h>
76fa9e406ahrens
775679c89jv#ifdef	_KERNEL
78dedec47Jack Meng#include <sys/bootprops.h>
7935a5a35Jonathan Adams#include <sys/callb.h>
8035a5a35Jonathan Adams#include <sys/cpupart.h>
8135a5a35Jonathan Adams#include <sys/pool.h>
8235a5a35Jonathan Adams#include <sys/sysdc.h>
8335a5a35Jonathan Adams#include <sys/zone.h>
845679c89jv#endif	/* _KERNEL */
855679c89jv
86990b485lling#include "zfs_prop.h"
87b7b9745perrin#include "zfs_comutil.h"
88990b485lling
893cb69f7Will Andrews/*
903cb69f7Will Andrews * The interval, in seconds, at which failed configuration cache file writes
913cb69f7Will Andrews * should be retried.
923cb69f7Will Andrews */
933cb69f7Will Andrewsstatic int zfs_ccw_retry_interval = 300;
943cb69f7Will Andrews
9535a5a35Jonathan Adamstypedef enum zti_modes {
96ec94d32Adam Leventhal	ZTI_MODE_FIXED,			/* value is # of threads (min 1) */
97ec94d32Adam Leventhal	ZTI_MODE_BATCH,			/* cpu-intensive; value is ignored */
98ec94d32Adam Leventhal	ZTI_MODE_NULL,			/* don't create a taskq */
99ec94d32Adam Leventhal	ZTI_NMODES
10035a5a35Jonathan Adams} zti_modes_t;
101416e0cdek
102ec94d32Adam Leventhal#define	ZTI_P(n, q)	{ ZTI_MODE_FIXED, (n), (q) }
103ec94d32Adam Leventhal#define	ZTI_BATCH	{ ZTI_MODE_BATCH, 0, 1 }
104ec94d32Adam Leventhal#define	ZTI_NULL	{ ZTI_MODE_NULL, 0, 0 }
1052e0c549Jonathan Adams
106ec94d32Adam Leventhal#define	ZTI_N(n)	ZTI_P(n, 1)
107ec94d32Adam Leventhal#define	ZTI_ONE		ZTI_N(1)
1082e0c549Jonathan Adams
1092e0c549Jonathan Adamstypedef struct zio_taskq_info {
110ec94d32Adam Leventhal	zti_modes_t zti_mode;
11180eb36fGeorge Wilson	uint_t zti_value;
112ec94d32Adam Leventhal	uint_t zti_count;
1132e0c549Jonathan Adams} zio_taskq_info_t;
1142e0c549Jonathan Adams
1152e0c549Jonathan Adamsstatic const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
11635a5a35Jonathan Adams	"issue", "issue_high", "intr", "intr_high"
1172e0c549Jonathan Adams};
1182e0c549Jonathan Adams
11980eb36fGeorge Wilson/*
120ec94d32Adam Leventhal * This table defines the taskq settings for each ZFS I/O type. When
121ec94d32Adam Leventhal * initializing a pool, we use this table to create an appropriately sized
122ec94d32Adam Leventhal * taskq. Some operations are low volume and therefore have a small, static
123ec94d32Adam Leventhal * number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE
124ec94d32Adam Leventhal * macros. Other operations process a large amount of data; the ZTI_BATCH
125ec94d32Adam Leventhal * macro causes us to create a taskq oriented for throughput. Some operations
126ec94d32Adam Leventhal * are so high frequency and short-lived that the taskq itself can become a a
127ec94d32Adam Leventhal * point of lock contention. The ZTI_P(#, #) macro indicates that we need an
128ec94d32Adam Leventhal * additional degree of parallelism specified by the number of threads per-
129ec94d32Adam Leventhal * taskq and the number of taskqs; when dispatching an event in this case, the
130ec94d32Adam Leventhal * particular taskq is chosen at random.
131ec94d32Adam Leventhal *
132ec94d32Adam Leventhal * The different taskq priorities are to handle the different contexts (issue
133ec94d32Adam Leventhal * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
134ec94d32Adam Leventhal * need to be handled with minimum delay.
13580eb36fGeorge Wilson */
13680eb36fGeorge Wilsonconst zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
13780eb36fGeorge Wilson	/* ISSUE	ISSUE_HIGH	INTR		INTR_HIGH */
138ec94d32Adam Leventhal	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* NULL */
1391b497abAdam H. Leventhal	{ ZTI_N(8),	ZTI_NULL,	ZTI_P(12, 8),	ZTI_NULL }, /* READ */
140ec94d32Adam Leventhal	{ ZTI_BATCH,	ZTI_N(5),	ZTI_N(8),	ZTI_N(5) }, /* WRITE */
141ec94d32Adam Leventhal	{ ZTI_P(12, 8),	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* FREE */
142ec94d32Adam Leventhal	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* CLAIM */
143ec94d32Adam Leventhal	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* IOCTL */
1442e0c549Jonathan Adams};
1452e0c549Jonathan Adams
146ce1577bDave Eddystatic sysevent_t *spa_event_create(spa_t *spa, vdev_t *vd, nvlist_t *hist_nvl,
147ce1577bDave Eddy    const char *name);
148b72b6bbAlan Somersstatic void spa_event_post(sysevent_t *ev);
1493b2aab1Matthew Ahrensstatic void spa_sync_version(void *arg, dmu_tx_t *tx);
1503b2aab1Matthew Ahrensstatic void spa_sync_props(void *arg, dmu_tx_t *tx);
15189a89ebllingstatic boolean_t spa_has_active_shared_spare(spa_t *spa);
1521195e68Mark J Musantestatic int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
1531195e68Mark J Musante    spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
1541195e68Mark J Musante    char **ereport);
155cb04b87Mark J Musantestatic void spa_vdev_resilver_done(spa_t *spa);
156990b485lling
15769962b5Matthew Ahrensuint_t		zio_taskq_batch_pct = 75;	/* 1 thread per cpu in pset */
15835a5a35Jonathan Adamsid_t		zio_taskq_psrset_bind = PS_NONE;
15935a5a35Jonathan Adamsboolean_t	zio_taskq_sysdc = B_TRUE;	/* use SDC scheduling class */
16035a5a35Jonathan Adamsuint_t		zio_taskq_basedc = 80;		/* base duty cycle */
16135a5a35Jonathan Adams
16235a5a35Jonathan Adamsboolean_t	spa_create_process = B_TRUE;	/* no process ==> no sysdc */
16301f55e4George Wilsonextern int	zfs_sync_pass_deferred_free;
16435a5a35Jonathan Adams
16535a5a35Jonathan Adams/*
16635a5a35Jonathan Adams * This (illegal) pool name is used when temporarily importing a spa_t in order
16735a5a35Jonathan Adams * to get the vdev stats associated with the imported devices.
16835a5a35Jonathan Adams */
16935a5a35Jonathan Adams#define	TRYIMPORT_NAME	"$import"
17035a5a35Jonathan Adams
171990b485lling/*
172990b485lling * ==========================================================================
173990b485lling * SPA properties routines
174990b485lling * ==========================================================================
175990b485lling */
176990b485lling
177990b485lling/*
178990b485lling * Add a (source=src, propname=propval) list to an nvlist.
179990b485lling */
1809d82f4fllingstatic void
181990b485llingspa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
182990b485lling    uint64_t intval, zprop_source_t src)
183990b485lling{
184990b485lling	const char *propname = zpool_prop_to_name(prop);
185990b485lling	nvlist_t *propval;
186990b485lling
1879d82f4flling	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1889d82f4flling	VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
189990b485lling
1909d82f4flling	if (strval != NULL)
1919d82f4flling		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
1929d82f4flling	else
1939d82f4flling		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0);
194990b485lling
1959d82f4flling	VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
196990b485lling	nvlist_free(propval);
197990b485lling}
198990b485lling
199990b485lling/*
200990b485lling * Get property values from the spa configuration.
201990b485lling */
2029d82f4fllingstatic void
203990b485llingspa_prop_get_config(spa_t *spa, nvlist_t **nvp)
204990b485lling{
2054263d13George Wilson	vdev_t *rvd = spa->spa_root_vdev;
206ad135b5Christopher Siden	dsl_pool_t *pool = spa->spa_dsl_pool;
2072e4c998George Wilson	uint64_t size, alloc, cap, version;
208990b485lling	zprop_source_t src = ZPROP_SRC_NONE;
209c5904d1eschrock	spa_config_dirent_t *dp;
2102e4c998George Wilson	metaslab_class_t *mc = spa_normal_class(spa);
211990b485lling
212e14bb32Jeff Bonwick	ASSERT(MUTEX_HELD(&spa->spa_props_lock));
213e14bb32Jeff Bonwick
2144263d13George Wilson	if (rvd != NULL) {
215485bbbfGeorge Wilson		alloc = metaslab_class_get_alloc(spa_normal_class(spa));
216b24ab67Jeff Bonwick		size = metaslab_class_get_space(spa_normal_class(spa));
217379c004Eric Schrock		spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
218379c004Eric Schrock		spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
219485bbbfGeorge Wilson		spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
220485bbbfGeorge Wilson		spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL,
221485bbbfGeorge Wilson		    size - alloc, src);
2224263d13George Wilson
2232e4c998George Wilson		spa_prop_add_list(*nvp, ZPOOL_PROP_FRAGMENTATION, NULL,
2242e4c998George Wilson		    metaslab_class_fragmentation(mc), src);
2252e4c998George Wilson		spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL,
2262e4c998George Wilson		    metaslab_class_expandable_space(mc), src);
227f9af39bGeorge Wilson		spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL,
228f9af39bGeorge Wilson		    (spa_mode(spa) == FREAD), src);
229379c004Eric Schrock
230485bbbfGeorge Wilson		cap = (size == 0) ? 0 : (alloc * 100 / size);
231379c004Eric Schrock		spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
232379c004Eric Schrock
233b24ab67Jeff Bonwick		spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL,
234b24ab67Jeff Bonwick		    ddt_get_pool_dedup_ratio(spa), src);
235b24ab67Jeff Bonwick
236379c004Eric Schrock		spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
2374263d13George Wilson		    rvd->vdev_state, src);
238379c004Eric Schrock
239379c004Eric Schrock		version = spa_version(spa);
240379c004Eric Schrock		if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
241379c004Eric Schrock			src = ZPROP_SRC_DEFAULT;
242379c004Eric Schrock		else
243379c004Eric Schrock			src = ZPROP_SRC_LOCAL;
244379c004Eric Schrock		spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
245379c004Eric Schrock	}
246990b485lling
247ad135b5Christopher Siden	if (pool != NULL) {
248ad135b5Christopher Siden		/*
249ad135b5Christopher Siden		 * The $FREE directory was introduced in SPA_VERSION_DEADLISTS,
250ad135b5Christopher Siden		 * when opening pools before this version freedir will be NULL.
251ad135b5Christopher Siden		 */
2527fd05acMatthew Ahrens		if (pool->dp_free_dir != NULL) {
253ad135b5Christopher Siden			spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL,
254c137962Justin T. Gibbs			    dsl_dir_phys(pool->dp_free_dir)->dd_used_bytes,
255c137962Justin T. Gibbs			    src);
256ad135b5Christopher Siden		} else {
257ad135b5Christopher Siden			spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING,
258ad135b5Christopher Siden			    NULL, 0, src);
259ad135b5Christopher Siden		}
2607fd05acMatthew Ahrens
2617fd05acMatthew Ahrens		if (pool->dp_leak_dir != NULL) {
2627fd05acMatthew Ahrens			spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL,
263c137962Justin T. Gibbs			    dsl_dir_phys(pool->dp_leak_dir)->dd_used_bytes,
264c137962Justin T. Gibbs			    src);
2657fd05acMatthew Ahrens		} else {
2667fd05acMatthew Ahrens			spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED,
2677fd05acMatthew Ahrens			    NULL, 0, src);
2687fd05acMatthew Ahrens		}
269ad135b5Christopher Siden	}
270ad135b5Christopher Siden
2719d82f4flling	spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
272990b485lling
2738704186Dan McDonald	if (spa->spa_comment != NULL) {
2748704186Dan McDonald		spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment,
2758704186Dan McDonald		    0, ZPROP_SRC_LOCAL);
2768704186Dan McDonald	}
2778704186Dan McDonald
2789d82f4flling	if (spa->spa_root != NULL)
2799d82f4flling		spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
2809d82f4flling		    0, ZPROP_SRC_LOCAL);
281990b485lling
282b515258Matthew Ahrens	if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) {
283b515258Matthew Ahrens		spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL,
284b515258Matthew Ahrens		    MIN(zfs_max_recordsize, SPA_MAXBLOCKSIZE), ZPROP_SRC_NONE);
285b515258Matthew Ahrens	} else {
286b515258Matthew Ahrens		spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL,
287b515258Matthew Ahrens		    SPA_OLD_MAXBLOCKSIZE, ZPROP_SRC_NONE);
288b515258Matthew Ahrens	}
289b515258Matthew Ahrens
290c5904d1eschrock	if ((dp = list_head(&spa->spa_config_list)) != NULL) {
291c5904d1eschrock		if (dp->scd_path == NULL) {
2929d82f4flling			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
293c5904d1eschrock			    "none", 0, ZPROP_SRC_LOCAL);
294c5904d1eschrock		} else if (strcmp(dp->scd_path, spa_config_path) != 0) {
2959d82f4flling			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
296c5904d1eschrock			    dp->scd_path, 0, ZPROP_SRC_LOCAL);
2972f8aaabeschrock		}
2982f8aaabeschrock	}
299990b485lling}
300990b485lling
301990b485lling/*
302990b485lling * Get zpool property values.
303990b485lling */
304990b485llingint
305990b485llingspa_prop_get(spa_t *spa, nvlist_t **nvp)
306990b485lling{
307b24ab67Jeff Bonwick	objset_t *mos = spa->spa_meta_objset;
308990b485lling	zap_cursor_t zc;
309990b485lling	zap_attribute_t za;
310990b485lling	int err;
311990b485lling
3129d82f4flling	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
313990b485lling
314e14bb32Jeff Bonwick	mutex_enter(&spa->spa_props_lock);
315e14bb32Jeff Bonwick
316990b485lling	/*
317990b485lling	 * Get properties from the spa config.
318990b485lling	 */
3199d82f4flling	spa_prop_get_config(spa, nvp);
320990b485lling
321990b485lling	/* If no pool property object, no more prop to get. */
322afee20eGeorge Wilson	if (mos == NULL || spa->spa_pool_props_object == 0) {
323990b485lling		mutex_exit(&spa->spa_props_lock);
324990b485lling		return (0);
325990b485lling	}
326990b485lling
327990b485lling	/*
328990b485lling	 * Get properties from the MOS pool property object.
329990b485lling	 */
330990b485lling	for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object);
331990b485lling	    (err = zap_cursor_retrieve(&zc, &za)) == 0;
332990b485lling	    zap_cursor_advance(&zc)) {
333990b485lling		uint64_t intval = 0;
334990b485lling		char *strval = NULL;
335990b485lling		zprop_source_t src = ZPROP_SRC_DEFAULT;
336990b485lling		zpool_prop_t prop;
337990b485lling
338990b485lling		if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL)
339990b485lling			continue;
340990b485lling
341990b485lling		switch (za.za_integer_length) {
342990b485lling		case 8:
343990b485lling			/* integer property */
344990b485lling			if (za.za_first_integer !=
345990b485lling			    zpool_prop_default_numeric(prop))
346990b485lling				src = ZPROP_SRC_LOCAL;
347990b485lling
348990b485lling			if (prop == ZPOOL_PROP_BOOTFS) {
349990b485lling				dsl_pool_t *dp;
350990b485lling				dsl_dataset_t *ds = NULL;
351990b485lling
352990b485lling				dp = spa_get_dsl(spa);
3533b2aab1Matthew Ahrens				dsl_pool_config_enter(dp, FTAG);
354745cd3cmaybee				if (err = dsl_dataset_hold_obj(dp,
355745cd3cmaybee				    za.za_first_integer, FTAG, &ds)) {
3563b2aab1Matthew Ahrens					dsl_pool_config_exit(dp, FTAG);
357990b485lling					break;
358990b485lling				}
359990b485lling
3609adfa60Matthew Ahrens				strval = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN,
361990b485lling				    KM_SLEEP);
362990b485lling				dsl_dataset_name(ds, strval);
363745cd3cmaybee				dsl_dataset_rele(ds, FTAG);
3643b2aab1Matthew Ahrens				dsl_pool_config_exit(dp, FTAG);
365990b485lling			} else {
366990b485lling				strval = NULL;
367990b485lling				intval = za.za_first_integer;
368990b485lling			}
369990b485lling
3709d82f4flling			spa_prop_add_list(*nvp, prop, strval, intval, src);
371990b485lling
372990b485lling			if (strval != NULL)
3739adfa60Matthew Ahrens				kmem_free(strval, ZFS_MAX_DATASET_NAME_LEN);
374990b485lling
375990b485lling			break;
376990b485lling
377990b485lling		case 1:
378990b485lling			/* string property */
379990b485lling			strval = kmem_alloc(za.za_num_integers, KM_SLEEP);
380990b485lling			err = zap_lookup(mos, spa->spa_pool_props_object,
381990b485lling			    za.za_name, 1, za.za_num_integers, strval);
382990b485lling			if (err) {
383990b485lling				kmem_free(strval, za.za_num_integers);
384990b485lling				break;
385990b485lling			}
3869d82f4flling			spa_prop_add_list(*nvp, prop, strval, 0, src);
387990b485lling			kmem_free(strval, za.za_num_integers);
388990b485lling			break;
389990b485lling
390990b485lling		default:
391990b485lling			break;
392990b485lling		}
393990b485lling	}
394990b485lling	zap_cursor_fini(&zc);
395990b485lling	mutex_exit(&spa->spa_props_lock);
396990b485llingout:
397990b485lling	if (err && err != ENOENT) {
398990b485lling		nvlist_free(*nvp);
3999d82f4flling		*nvp = NULL;
400990b485lling		return (err);
401990b485lling	}
402990b485lling
403990b485lling	return (0);
404990b485lling}
405990b485lling
406990b485lling/*
407990b485lling * Validate the given pool properties nvlist and modify the list
408990b485lling * for the property values to be set.
409990b485lling */
410990b485llingstatic int
411990b485llingspa_prop_validate(spa_t *spa, nvlist_t *props)
412990b485lling{
413990b485lling	nvpair_t *elem;
414990b485lling	int error = 0, reset_bootfs = 0;
415d5285caGeorge Wilson	uint64_t objnum = 0;
416ad135b5Christopher Siden	boolean_t has_feature = B_FALSE;
417990b485lling
418990b485lling	elem = NULL;
419990b485lling	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
420990b485lling		uint64_t intval;
421ad135b5Christopher Siden		char *strval, *slash, *check, *fname;
422ad135b5Christopher Siden		const char *propname = nvpair_name(elem);
423ad135b5Christopher Siden		zpool_prop_t prop = zpool_name_to_prop(propname);
424ad135b5Christopher Siden
425ad135b5Christopher Siden		switch (prop) {
426ad135b5Christopher Siden		case ZPROP_INVAL:
427ad135b5Christopher Siden			if (!zpool_prop_feature(propname)) {
428be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
429ad135b5Christopher Siden				break;
430ad135b5Christopher Siden			}
431990b485lling
432ad135b5Christopher Siden			/*
433ad135b5Christopher Siden			 * Sanitize the input.
434ad135b5Christopher Siden			 */
435ad135b5Christopher Siden			if (nvpair_type(elem) != DATA_TYPE_UINT64) {
436be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
437ad135b5Christopher Siden				break;
438ad135b5Christopher Siden			}
439990b485lling
440ad135b5Christopher Siden			if (nvpair_value_uint64(elem, &intval) != 0) {
441be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
442ad135b5Christopher Siden				break;
443ad135b5Christopher Siden			}
444ad135b5Christopher Siden
445ad135b5Christopher Siden			if (intval != 0) {
446be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
447ad135b5Christopher Siden				break;
448ad135b5Christopher Siden			}
449ad135b5Christopher Siden
450ad135b5Christopher Siden			fname = strchr(propname, '@') + 1;
451ad135b5Christopher Siden			if (zfeature_lookup_name(fname, NULL) != 0) {
452be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
453ad135b5Christopher Siden				break;
454ad135b5Christopher Siden			}
455ad135b5Christopher Siden
456ad135b5Christopher Siden			has_feature = B_TRUE;
457ad135b5Christopher Siden			break;
458990b485lling
459990b485lling		case ZPOOL_PROP_VERSION:
460990b485lling			error = nvpair_value_uint64(elem, &intval);
461990b485lling			if (!error &&
462ad135b5Christopher Siden			    (intval < spa_version(spa) ||
463ad135b5Christopher Siden			    intval > SPA_VERSION_BEFORE_FEATURES ||
464ad135b5Christopher Siden			    has_feature))
465be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
466990b485lling			break;
467990b485lling
468990b485lling		case ZPOOL_PROP_DELEGATION:
469990b485lling		case ZPOOL_PROP_AUTOREPLACE:
470d5b5bb2Rich Morris		case ZPOOL_PROP_LISTSNAPS:
471573ca77George Wilson		case ZPOOL_PROP_AUTOEXPAND:
472990b485lling			error = nvpair_value_uint64(elem, &intval);
473990b485lling			if (!error && intval > 1)
474be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
475990b485lling			break;
476990b485lling
477990b485lling		case ZPOOL_PROP_BOOTFS:
47825f89eeJeff Bonwick			/*
47925f89eeJeff Bonwick			 * If the pool version is less than SPA_VERSION_BOOTFS,
48025f89eeJeff Bonwick			 * or the pool is still being created (version == 0),
48125f89eeJeff Bonwick			 * the bootfs property cannot be set.
48225f89eeJeff Bonwick			 */
483990b485lling			if (spa_version(spa) < SPA_VERSION_BOOTFS) {
484be6fd75Matthew Ahrens				error = SET_ERROR(ENOTSUP);
485990b485lling				break;
486990b485lling			}
487990b485lling
488990b485lling			/*
48915e6edfgw			 * Make sure the vdev config is bootable
490990b485lling			 */
49115e6edfgw			if (!vdev_is_bootable(spa->spa_root_vdev)) {
492be6fd75Matthew Ahrens				error = SET_ERROR(ENOTSUP);
493990b485lling				break;
494990b485lling			}
495990b485lling
496990b485lling			reset_bootfs = 1;
497990b485lling
498990b485lling			error = nvpair_value_string(elem, &strval);
499990b485lling
500990b485lling			if (!error) {
501ad135b5Christopher Siden				objset_t *os;
502b515258Matthew Ahrens				uint64_t propval;
50315e6edfgw
504990b485lling				if (strval == NULL || strval[0] == '\0') {
505990b485lling					objnum = zpool_prop_default_numeric(
506990b485lling					    ZPOOL_PROP_BOOTFS);
507990b485lling					break;
508990b485lling				}
509990b485lling
510503ad85Matthew Ahrens				if (error = dmu_objset_hold(strval, FTAG, &os))
511990b485lling					break;
51215e6edfgw
513b515258Matthew Ahrens				/*
514b515258Matthew Ahrens				 * Must be ZPL, and its property settings
515b515258Matthew Ahrens				 * must be supported by GRUB (compression
516b515258Matthew Ahrens				 * is not gzip, and large blocks are not used).
517b515258Matthew Ahrens				 */
518503ad85Matthew Ahrens
519503ad85Matthew Ahrens				if (dmu_objset_type(os) != DMU_OST_ZFS) {
520be6fd75Matthew Ahrens					error = SET_ERROR(ENOTSUP);
5213b2aab1Matthew Ahrens				} else if ((error =
5223b2aab1Matthew Ahrens				    dsl_prop_get_int_ds(dmu_objset_ds(os),
52315e6edfgw				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
524b515258Matthew Ahrens				    &propval)) == 0 &&
525b515258Matthew Ahrens				    !BOOTFS_COMPRESS_VALID(propval)) {
526b515258Matthew Ahrens					error = SET_ERROR(ENOTSUP);
52715e6edfgw				} else {
52815e6edfgw					objnum = dmu_objset_id(os);
52915e6edfgw				}
530503ad85Matthew Ahrens				dmu_objset_rele(os, FTAG);
531990b485lling			}
532990b485lling			break;
533e14bb32Jeff Bonwick
5340a4e951gw		case ZPOOL_PROP_FAILUREMODE:
5350a4e951gw			error = nvpair_value_uint64(elem, &intval);
5360a4e951gw			if (!error && (intval < ZIO_FAILURE_MODE_WAIT ||
5370a4e951gw			    intval > ZIO_FAILURE_MODE_PANIC))
538be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
5390a4e951gw
5400a4e951gw			/*
5410a4e951gw			 * This is a special case which only occurs when
5420a4e951gw			 * the pool has completely failed. This allows
5430a4e951gw			 * the user to change the in-core failmode property
5440a4e951gw			 * without syncing it out to disk (I/Os might
5450a4e951gw			 * currently be blocked). We do this by returning
5460a4e951gw			 * EIO to the caller (spa_prop_set) to trick it
5470a4e951gw			 * into thinking we encountered a property validation
5480a4e951gw			 * error.
5490a4e951gw			 */
550e14bb32Jeff Bonwick			if (!error && spa_suspended(spa)) {
5510a4e951gw				spa->spa_failmode = intval;
552be6fd75Matthew Ahrens				error = SET_ERROR(EIO);
5530a4e951gw			}
5540a4e951gw			break;
5552f8aaabeschrock
5562f8aaabeschrock		case ZPOOL_PROP_CACHEFILE:
5572f8aaabeschrock			if ((error = nvpair_value_string(elem, &strval)) != 0)
5582f8aaabeschrock				break;
5592f8aaabeschrock
5602f8aaabeschrock			if (strval[0] == '\0')
5612f8aaabeschrock				break;
5622f8aaabeschrock
5632f8aaabeschrock			if (strcmp(strval, "none") == 0)
5642f8aaabeschrock				break;
5652f8aaabeschrock
5662f8aaabeschrock			if (strval[0] != '/') {
567be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
5682f8aaabeschrock				break;
5692f8aaabeschrock			}
5702f8aaabeschrock
5712f8aaabeschrock			slash = strrchr(strval, '/');
5722f8aaabeschrock			ASSERT(slash != NULL);
5732f8aaabeschrock
5742f8aaabeschrock			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
5752f8aaabeschrock			    strcmp(slash, "/..") == 0)
576be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
5772f8aaabeschrock			break;
578b24ab67Jeff Bonwick
5798704186Dan McDonald		case ZPOOL_PROP_COMMENT:
5808704186Dan McDonald			if ((error = nvpair_value_string(elem, &strval)) != 0)
5818704186Dan McDonald				break;
5828704186Dan McDonald			for (check = strval; *check != '\0'; check++) {
5838704186Dan McDonald				/*
5848704186Dan McDonald				 * The kernel doesn't have an easy isprint()
5858704186Dan McDonald				 * check.  For this kernel check, we merely
5868704186Dan McDonald				 * check ASCII apart from DEL.  Fix this if
5878704186Dan McDonald				 * there is an easy-to-use kernel isprint().
5888704186Dan McDonald				 */
5898704186Dan McDonald				if (*check >= 0x7f) {
590be6fd75Matthew Ahrens					error = SET_ERROR(EINVAL);
5918704186Dan McDonald					break;
5928704186Dan McDonald				}
5938704186Dan McDonald			}
5948704186Dan McDonald			if (strlen(strval) > ZPROP_MAX_COMMENT)
5958704186Dan McDonald				error = E2BIG;
5968704186Dan McDonald			break;
5978704186Dan McDonald
598b24ab67Jeff Bonwick		case ZPOOL_PROP_DEDUPDITTO:
599b24ab67Jeff Bonwick			if (spa_version(spa) < SPA_VERSION_DEDUP)
600be6fd75Matthew Ahrens				error = SET_ERROR(ENOTSUP);
601b24ab67Jeff Bonwick			else
602b24ab67Jeff Bonwick				error = nvpair_value_uint64(elem, &intval);
603b24ab67Jeff Bonwick			if (error == 0 &&
604b24ab67Jeff Bonwick			    intval != 0 && intval < ZIO_DEDUPDITTO_MIN)
605be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
606b24ab67Jeff Bonwick			break;
607990b485lling		}
608990b485lling
609990b485lling		if (error)
610990b485lling			break;
611990b485lling	}
612990b485lling
613990b485lling	if (!error && reset_bootfs) {
614990b485lling		error = nvlist_remove(props,
615990b485lling		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING);
616990b485lling
617990b485lling		if (!error) {
618990b485lling			error = nvlist_add_uint64(props,
619990b485lling			    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum);
620990b485lling		}
621990b485lling	}
622990b485lling
623990b485lling	return (error);
624990b485lling}
625990b485lling
626379c004Eric Schrockvoid
627379c004Eric Schrockspa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync)
628379c004Eric Schrock{
629379c004Eric Schrock	char *cachefile;
630379c004Eric Schrock	spa_config_dirent_t *dp;
631379c004Eric Schrock
632379c004Eric Schrock	if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
633379c004Eric Schrock	    &cachefile) != 0)
634379c004Eric Schrock		return;
635379c004Eric Schrock
636379c004Eric Schrock	dp = kmem_alloc(sizeof (spa_config_dirent_t),
637379c004Eric Schrock	    KM_SLEEP);
638379c004Eric Schrock
639379c004Eric Schrock	if (cachefile[0] == '\0')
640379c004Eric Schrock		dp->scd_path = spa_strdup(spa_config_path);
641379c004Eric Schrock	else if (strcmp(cachefile, "none") == 0)
642379c004Eric Schrock		dp->scd_path = NULL;
643379c004Eric Schrock	else
644379c004Eric Schrock		dp->scd_path = spa_strdup(cachefile);
645379c004Eric Schrock
646379c004Eric Schrock	list_insert_head(&spa->spa_config_list, dp);
647379c004Eric Schrock	if (need_sync)
648379c004Eric Schrock		spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
649379c004Eric Schrock}
650379c004Eric Schrock
651990b485llingint
652990b485llingspa_prop_set(spa_t *spa, nvlist_t *nvp)
653990b485lling{
654990b485lling	int error;
655ad135b5Christopher Siden	nvpair_t *elem = NULL;
656379c004Eric Schrock	boolean_t need_sync = B_FALSE;
657990b485lling
658990b485lling	if ((error = spa_prop_validate(spa, nvp)) != 0)
659990b485lling		return (error);
660990b485lling
661379c004Eric Schrock	while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
662ad135b5Christopher Siden		zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem));
663379c004Eric Schrock
664f9af39bGeorge Wilson		if (prop == ZPOOL_PROP_CACHEFILE ||
665f9af39bGeorge Wilson		    prop == ZPOOL_PROP_ALTROOT ||
666f9af39bGeorge Wilson		    prop == ZPOOL_PROP_READONLY)
667379c004Eric Schrock			continue;
668379c004Eric Schrock
669ad135b5Christopher Siden		if (prop == ZPOOL_PROP_VERSION || prop == ZPROP_INVAL) {
670ad135b5Christopher Siden			uint64_t ver;
671ad135b5Christopher Siden
672ad135b5Christopher Siden			if (prop == ZPOOL_PROP_VERSION) {
673ad135b5Christopher Siden				VERIFY(nvpair_value_uint64(elem, &ver) == 0);
674ad135b5Christopher Siden			} else {
675ad135b5Christopher Siden				ASSERT(zpool_prop_feature(nvpair_name(elem)));
676ad135b5Christopher Siden				ver = SPA_VERSION_FEATURES;
677ad135b5Christopher Siden				need_sync = B_TRUE;
678ad135b5Christopher Siden			}
679ad135b5Christopher Siden
680ad135b5Christopher Siden			/* Save time if the version is already set. */
681ad135b5Christopher Siden			if (ver == spa_version(spa))
682ad135b5Christopher Siden				continue;
683ad135b5Christopher Siden
684ad135b5Christopher Siden			/*
685ad135b5Christopher Siden			 * In addition to the pool directory object, we might
686ad135b5Christopher Siden			 * create the pool properties object, the features for
687ad135b5Christopher Siden			 * read object, the features for write object, or the
688ad135b5Christopher Siden			 * feature descriptions object.
689ad135b5Christopher Siden			 */
6903b2aab1Matthew Ahrens			error = dsl_sync_task(spa->spa_name, NULL,
6917d46dc6Matthew Ahrens			    spa_sync_version, &ver,
6927d46dc6Matthew Ahrens			    6, ZFS_SPACE_CHECK_RESERVED);
693ad135b5Christopher Siden			if (error)
694ad135b5Christopher Siden				return (error);
695ad135b5Christopher Siden			continue;
696ad135b5Christopher Siden		}
697ad135b5Christopher Siden
698379c004Eric Schrock		need_sync = B_TRUE;
699379c004Eric Schrock		break;
700379c004Eric Schrock	}
701379c004Eric Schrock
702ad135b5Christopher Siden	if (need_sync) {
7033b2aab1Matthew Ahrens		return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props,
7047d46dc6Matthew Ahrens		    nvp, 6, ZFS_SPACE_CHECK_RESERVED));
705ad135b5Christopher Siden	}
706ad135b5Christopher Siden
707ad135b5Christopher Siden	return (0);
708990b485lling}
709990b485lling
710990b485lling/*
711990b485lling * If the bootfs property value is dsobj, clear it.
712990b485lling */
713990b485llingvoid
714990b485llingspa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
715990b485lling{
716990b485lling	if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) {
717990b485lling		VERIFY(zap_remove(spa->spa_meta_objset,
718990b485lling		    spa->spa_pool_props_object,
719990b485lling		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0);
720990b485lling		spa->spa_bootfs = 0;
721990b485lling	}
722990b485lling}
723990b485lling
724dfbb943George Wilson/*ARGSUSED*/
725dfbb943George Wilsonstatic int
7263b2aab1Matthew Ahrensspa_change_guid_check(void *arg, dmu_tx_t *tx)
727dfbb943George Wilson{
7283b2aab1Matthew Ahrens	uint64_t *newguid = arg;
7293b2aab1Matthew Ahrens	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
730dfbb943George Wilson	vdev_t *rvd = spa->spa_root_vdev;
731dfbb943George Wilson	uint64_t vdev_state;
732dfbb943George Wilson
733dfbb943George Wilson	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
734dfbb943George Wilson	vdev_state = rvd->vdev_state;
735dfbb943George Wilson	spa_config_exit(spa, SCL_STATE, FTAG);
736dfbb943George Wilson
737dfbb943George Wilson	if (vdev_state != VDEV_STATE_HEALTHY)
738be6fd75Matthew Ahrens		return (SET_ERROR(ENXIO));
739dfbb943George Wilson
740dfbb943George Wilson	ASSERT3U(spa_guid(spa), !=, *newguid);
741dfbb943George Wilson
742dfbb943George Wilson	return (0);
743dfbb943George Wilson}
744dfbb943George Wilson
745dfbb943George Wilsonstatic void
7463b2aab1Matthew Ahrensspa_change_guid_sync(void *arg, dmu_tx_t *tx)
747dfbb943George Wilson{
7483b2aab1Matthew Ahrens	uint64_t *newguid = arg;
7493b2aab1Matthew Ahrens	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
750dfbb943George Wilson	uint64_t oldguid;
751dfbb943George Wilson	vdev_t *rvd = spa->spa_root_vdev;
752dfbb943George Wilson
753dfbb943George Wilson	oldguid = spa_guid(spa);
754dfbb943George Wilson
755dfbb943George Wilson	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
756dfbb943George Wilson	rvd->vdev_guid = *newguid;
757dfbb943George Wilson	rvd->vdev_guid_sum += (*newguid - oldguid);
758dfbb943George Wilson	vdev_config_dirty(rvd);
759dfbb943George Wilson	spa_config_exit(spa, SCL_STATE, FTAG);
760dfbb943George Wilson
76120128a0George Wilson	spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu",
762dfbb943George Wilson	    oldguid, *newguid);
763dfbb943George Wilson}
764dfbb943George Wilson
765fa9e406ahrens/*
766e9103aaGarrett D'Amore * Change the GUID for the pool.  This is done so that we can later
767e9103aaGarrett D'Amore * re-import a pool built from a clone of our own vdevs.  We will modify
768e9103aaGarrett D'Amore * the root vdev's guid, our own pool guid, and then mark all of our
769e9103aaGarrett D'Amore * vdevs dirty.  Note that we must make sure that all our vdevs are
770e9103aaGarrett D'Amore * online when we do this, or else any vdevs that weren't present
771e9103aaGarrett D'Amore * would be orphaned from our pool.  We are also going to issue a
772e9103aaGarrett D'Amore * sysevent to update any watchers.
773e9103aaGarrett D'Amore */
774e9103aaGarrett D'Amoreint
775e9103aaGarrett D'Amorespa_change_guid(spa_t *spa)
776e9103aaGarrett D'Amore{
777dfbb943George Wilson	int error;
778dfbb943George Wilson	uint64_t guid;
779e9103aaGarrett D'Amore
7802c1e2b4George Wilson	mutex_enter(&spa->spa_vdev_top_lock);
781dfbb943George Wilson	mutex_enter(&spa_namespace_lock);
782dfbb943George Wilson	guid = spa_generate_guid(NULL);
783e9103aaGarrett D'Amore
7843b2aab1Matthew Ahrens	error = dsl_sync_task(spa->spa_name, spa_change_guid_check,
7857d46dc6Matthew Ahrens	    spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED);
786e9103aaGarrett D'Amore
787dfbb943George Wilson	if (error == 0) {
788dfbb943George Wilson		spa_config_sync(spa, B_FALSE, B_TRUE);
789ce1577bDave Eddy		spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_REGUID);
790dfbb943George Wilson	}
791e9103aaGarrett D'Amore
792dfbb943George Wilson	mutex_exit(&spa_namespace_lock);
7932c1e2b4George Wilson	mutex_exit(&spa->spa_vdev_top_lock);
794e9103aaGarrett D'Amore
795dfbb943George Wilson	return (error);
796e9103aaGarrett D'Amore}
797e9103aaGarrett D'Amore
798e9103aaGarrett D'Amore/*
799fa9e406ahrens * ==========================================================================
800fa9e406ahrens * SPA state manipulation (open/create/destroy/import/export)
801fa9e406ahrens * ==========================================================================
802fa9e406ahrens */
803