1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 2199653d4eSeschrock 22fa9e4066Sahrens /* 23379c004dSEric Schrock * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24fa9e4066Sahrens * Use is subject to license terms. 25fa9e4066Sahrens */ 26fa9e4066Sahrens 27fa9e4066Sahrens /* 28fa9e4066Sahrens * This file contains all the routines used when modifying on-disk SPA state. 29fa9e4066Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 30fa9e4066Sahrens * pool. 31fa9e4066Sahrens */ 32fa9e4066Sahrens 33fa9e4066Sahrens #include <sys/zfs_context.h> 34ea8dc4b6Seschrock #include <sys/fm/fs/zfs.h> 35fa9e4066Sahrens #include <sys/spa_impl.h> 36fa9e4066Sahrens #include <sys/zio.h> 37fa9e4066Sahrens #include <sys/zio_checksum.h> 38fa9e4066Sahrens #include <sys/dmu.h> 39fa9e4066Sahrens #include <sys/dmu_tx.h> 40fa9e4066Sahrens #include <sys/zap.h> 41fa9e4066Sahrens #include <sys/zil.h> 42b24ab676SJeff Bonwick #include <sys/ddt.h> 43fa9e4066Sahrens #include <sys/vdev_impl.h> 44fa9e4066Sahrens #include <sys/metaslab.h> 4588ecc943SGeorge Wilson #include <sys/metaslab_impl.h> 46fa9e4066Sahrens #include <sys/uberblock_impl.h> 47fa9e4066Sahrens #include <sys/txg.h> 48fa9e4066Sahrens #include <sys/avl.h> 49fa9e4066Sahrens #include <sys/dmu_traverse.h> 50b1b8ab34Slling #include <sys/dmu_objset.h> 51fa9e4066Sahrens #include <sys/unique.h> 52fa9e4066Sahrens #include <sys/dsl_pool.h> 53b1b8ab34Slling #include <sys/dsl_dataset.h> 54fa9e4066Sahrens #include <sys/dsl_dir.h> 55fa9e4066Sahrens #include <sys/dsl_prop.h> 56b1b8ab34Slling #include <sys/dsl_synctask.h> 57fa9e4066Sahrens #include <sys/fs/zfs.h> 58fa94a07fSbrendan #include <sys/arc.h> 59fa9e4066Sahrens #include <sys/callb.h> 6095173954Sek #include <sys/systeminfo.h> 61e7cbe64fSgw #include <sys/spa_boot.h> 62573ca77eSGeorge Wilson #include <sys/zfs_ioctl.h> 63fa9e4066Sahrens 645679c89fSjv #ifdef _KERNEL 655679c89fSjv #include <sys/zone.h> 66dedec472SJack Meng #include <sys/bootprops.h> 675679c89fSjv #endif /* _KERNEL */ 685679c89fSjv 69990b4856Slling #include "zfs_prop.h" 70b7b97454Sperrin #include "zfs_comutil.h" 71990b4856Slling 722e0c549eSJonathan Adams enum zti_modes { 732e0c549eSJonathan Adams zti_mode_fixed, /* value is # of threads (min 1) */ 742e0c549eSJonathan Adams zti_mode_online_percent, /* value is % of online CPUs */ 752e0c549eSJonathan Adams zti_mode_tune, /* fill from zio_taskq_tune_* */ 76*80eb36f2SGeorge Wilson zti_mode_null, /* don't create a taskq */ 772e0c549eSJonathan Adams zti_nmodes 78e14bb325SJeff Bonwick }; 79416e0cd8Sek 80*80eb36f2SGeorge Wilson #define ZTI_FIX(n) { zti_mode_fixed, (n) } 81*80eb36f2SGeorge Wilson #define ZTI_PCT(n) { zti_mode_online_percent, (n) } 82*80eb36f2SGeorge Wilson #define ZTI_TUNE { zti_mode_tune, 0 } 83*80eb36f2SGeorge Wilson #define ZTI_NULL { zti_mode_null, 0 } 842e0c549eSJonathan Adams 85*80eb36f2SGeorge Wilson #define ZTI_ONE ZTI_FIX(1) 862e0c549eSJonathan Adams 872e0c549eSJonathan Adams typedef struct zio_taskq_info { 88*80eb36f2SGeorge Wilson enum zti_modes zti_mode; 89*80eb36f2SGeorge Wilson uint_t zti_value; 902e0c549eSJonathan Adams } zio_taskq_info_t; 912e0c549eSJonathan Adams 922e0c549eSJonathan Adams static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { 93*80eb36f2SGeorge Wilson "issue", "issue_high", "intr", "intr_high" 942e0c549eSJonathan Adams }; 952e0c549eSJonathan Adams 96*80eb36f2SGeorge Wilson /* 97*80eb36f2SGeorge Wilson * Define the taskq threads for the following I/O types: 98*80eb36f2SGeorge Wilson * NULL, READ, WRITE, FREE, CLAIM, and IOCTL 99*80eb36f2SGeorge Wilson */ 100*80eb36f2SGeorge Wilson const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { 101*80eb36f2SGeorge Wilson /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ 102*80eb36f2SGeorge Wilson { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 103*80eb36f2SGeorge Wilson { ZTI_FIX(8), ZTI_NULL, ZTI_TUNE, ZTI_NULL }, 104*80eb36f2SGeorge Wilson { ZTI_TUNE, ZTI_FIX(5), ZTI_FIX(8), ZTI_FIX(5) }, 105*80eb36f2SGeorge Wilson { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 106*80eb36f2SGeorge Wilson { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 107*80eb36f2SGeorge Wilson { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 1082e0c549eSJonathan Adams }; 1092e0c549eSJonathan Adams 1102e0c549eSJonathan Adams enum zti_modes zio_taskq_tune_mode = zti_mode_online_percent; 1112e0c549eSJonathan Adams uint_t zio_taskq_tune_value = 80; /* #threads = 80% of # online CPUs */ 1122e0c549eSJonathan Adams 113990b4856Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 11489a89ebfSlling static boolean_t spa_has_active_shared_spare(spa_t *spa); 115990b4856Slling 116990b4856Slling /* 117990b4856Slling * ========================================================================== 118990b4856Slling * SPA properties routines 119990b4856Slling * ========================================================================== 120990b4856Slling */ 121990b4856Slling 122990b4856Slling /* 123990b4856Slling * Add a (source=src, propname=propval) list to an nvlist. 124990b4856Slling */ 1259d82f4f6Slling static void 126990b4856Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 127990b4856Slling uint64_t intval, zprop_source_t src) 128990b4856Slling { 129990b4856Slling const char *propname = zpool_prop_to_name(prop); 130990b4856Slling nvlist_t *propval; 131990b4856Slling 1329d82f4f6Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1339d82f4f6Slling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 134990b4856Slling 1359d82f4f6Slling if (strval != NULL) 1369d82f4f6Slling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 1379d82f4f6Slling else 1389d82f4f6Slling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 139990b4856Slling 1409d82f4f6Slling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 141990b4856Slling nvlist_free(propval); 142990b4856Slling } 143990b4856Slling 144990b4856Slling /* 145990b4856Slling * Get property values from the spa configuration. 146990b4856Slling */ 1479d82f4f6Slling static void 148990b4856Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 149990b4856Slling { 150379c004dSEric Schrock uint64_t size; 151485bbbf5SGeorge Wilson uint64_t alloc; 152990b4856Slling uint64_t cap, version; 153990b4856Slling zprop_source_t src = ZPROP_SRC_NONE; 154c5904d13Seschrock spa_config_dirent_t *dp; 155990b4856Slling 156e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 157e14bb325SJeff Bonwick 158379c004dSEric Schrock if (spa->spa_root_vdev != NULL) { 159485bbbf5SGeorge Wilson alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 160b24ab676SJeff Bonwick size = metaslab_class_get_space(spa_normal_class(spa)); 161379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 162379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 163485bbbf5SGeorge Wilson spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); 164485bbbf5SGeorge Wilson spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, 165485bbbf5SGeorge Wilson size - alloc, src); 166379c004dSEric Schrock 167485bbbf5SGeorge Wilson cap = (size == 0) ? 0 : (alloc * 100 / size); 168379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 169379c004dSEric Schrock 170b24ab676SJeff Bonwick spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL, 171b24ab676SJeff Bonwick ddt_get_pool_dedup_ratio(spa), src); 172b24ab676SJeff Bonwick 173379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 174379c004dSEric Schrock spa->spa_root_vdev->vdev_state, src); 175379c004dSEric Schrock 176379c004dSEric Schrock version = spa_version(spa); 177379c004dSEric Schrock if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 178379c004dSEric Schrock src = ZPROP_SRC_DEFAULT; 179379c004dSEric Schrock else 180379c004dSEric Schrock src = ZPROP_SRC_LOCAL; 181379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 182379c004dSEric Schrock } 183990b4856Slling 1849d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 185990b4856Slling 1869d82f4f6Slling if (spa->spa_root != NULL) 1879d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 1889d82f4f6Slling 0, ZPROP_SRC_LOCAL); 189990b4856Slling 190c5904d13Seschrock if ((dp = list_head(&spa->spa_config_list)) != NULL) { 191c5904d13Seschrock if (dp->scd_path == NULL) { 1929d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 193c5904d13Seschrock "none", 0, ZPROP_SRC_LOCAL); 194c5904d13Seschrock } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 1959d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 196c5904d13Seschrock dp->scd_path, 0, ZPROP_SRC_LOCAL); 1972f8aaab3Seschrock } 1982f8aaab3Seschrock } 199990b4856Slling } 200990b4856Slling 201990b4856Slling /* 202990b4856Slling * Get zpool property values. 203990b4856Slling */ 204990b4856Slling int 205990b4856Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 206990b4856Slling { 207b24ab676SJeff Bonwick objset_t *mos = spa->spa_meta_objset; 208990b4856Slling zap_cursor_t zc; 209990b4856Slling zap_attribute_t za; 210990b4856Slling int err; 211990b4856Slling 2129d82f4f6Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 213990b4856Slling 214e14bb325SJeff Bonwick mutex_enter(&spa->spa_props_lock); 215e14bb325SJeff Bonwick 216990b4856Slling /* 217990b4856Slling * Get properties from the spa config. 218990b4856Slling */ 2199d82f4f6Slling spa_prop_get_config(spa, nvp); 220990b4856Slling 221990b4856Slling /* If no pool property object, no more prop to get. */ 222990b4856Slling if (spa->spa_pool_props_object == 0) { 223990b4856Slling mutex_exit(&spa->spa_props_lock); 224990b4856Slling return (0); 225990b4856Slling } 226990b4856Slling 227990b4856Slling /* 228990b4856Slling * Get properties from the MOS pool property object. 229990b4856Slling */ 230990b4856Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 231990b4856Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 232990b4856Slling zap_cursor_advance(&zc)) { 233990b4856Slling uint64_t intval = 0; 234990b4856Slling char *strval = NULL; 235990b4856Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 236990b4856Slling zpool_prop_t prop; 237990b4856Slling 238990b4856Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 239990b4856Slling continue; 240990b4856Slling 241990b4856Slling switch (za.za_integer_length) { 242990b4856Slling case 8: 243990b4856Slling /* integer property */ 244990b4856Slling if (za.za_first_integer != 245990b4856Slling zpool_prop_default_numeric(prop)) 246990b4856Slling src = ZPROP_SRC_LOCAL; 247990b4856Slling 248990b4856Slling if (prop == ZPOOL_PROP_BOOTFS) { 249990b4856Slling dsl_pool_t *dp; 250990b4856Slling dsl_dataset_t *ds = NULL; 251990b4856Slling 252990b4856Slling dp = spa_get_dsl(spa); 253990b4856Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 254745cd3c5Smaybee if (err = dsl_dataset_hold_obj(dp, 255745cd3c5Smaybee za.za_first_integer, FTAG, &ds)) { 256990b4856Slling rw_exit(&dp->dp_config_rwlock); 257990b4856Slling break; 258990b4856Slling } 259990b4856Slling 260990b4856Slling strval = kmem_alloc( 261990b4856Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 262990b4856Slling KM_SLEEP); 263990b4856Slling dsl_dataset_name(ds, strval); 264745cd3c5Smaybee dsl_dataset_rele(ds, FTAG); 265990b4856Slling rw_exit(&dp->dp_config_rwlock); 266990b4856Slling } else { 267990b4856Slling strval = NULL; 268990b4856Slling intval = za.za_first_integer; 269990b4856Slling } 270990b4856Slling 2719d82f4f6Slling spa_prop_add_list(*nvp, prop, strval, intval, src); 272990b4856Slling 273990b4856Slling if (strval != NULL) 274990b4856Slling kmem_free(strval, 275990b4856Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 276990b4856Slling 277990b4856Slling break; 278990b4856Slling 279990b4856Slling case 1: 280990b4856Slling /* string property */ 281990b4856Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 282990b4856Slling err = zap_lookup(mos, spa->spa_pool_props_object, 283990b4856Slling za.za_name, 1, za.za_num_integers, strval); 284990b4856Slling if (err) { 285990b4856Slling kmem_free(strval, za.za_num_integers); 286990b4856Slling break; 287990b4856Slling } 2889d82f4f6Slling spa_prop_add_list(*nvp, prop, strval, 0, src); 289990b4856Slling kmem_free(strval, za.za_num_integers); 290990b4856Slling break; 291990b4856Slling 292990b4856Slling default: 293990b4856Slling break; 294990b4856Slling } 295990b4856Slling } 296990b4856Slling zap_cursor_fini(&zc); 297990b4856Slling mutex_exit(&spa->spa_props_lock); 298990b4856Slling out: 299990b4856Slling if (err && err != ENOENT) { 300990b4856Slling nvlist_free(*nvp); 3019d82f4f6Slling *nvp = NULL; 302990b4856Slling return (err); 303990b4856Slling } 304990b4856Slling 305990b4856Slling return (0); 306990b4856Slling } 307990b4856Slling 308990b4856Slling /* 309990b4856Slling * Validate the given pool properties nvlist and modify the list 310990b4856Slling * for the property values to be set. 311990b4856Slling */ 312990b4856Slling static int 313990b4856Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 314990b4856Slling { 315990b4856Slling nvpair_t *elem; 316990b4856Slling int error = 0, reset_bootfs = 0; 317990b4856Slling uint64_t objnum; 318990b4856Slling 319990b4856Slling elem = NULL; 320990b4856Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 321990b4856Slling zpool_prop_t prop; 322990b4856Slling char *propname, *strval; 323990b4856Slling uint64_t intval; 324990b4856Slling objset_t *os; 3252f8aaab3Seschrock char *slash; 326990b4856Slling 327990b4856Slling propname = nvpair_name(elem); 328990b4856Slling 329990b4856Slling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 330990b4856Slling return (EINVAL); 331990b4856Slling 332990b4856Slling switch (prop) { 333990b4856Slling case ZPOOL_PROP_VERSION: 334990b4856Slling error = nvpair_value_uint64(elem, &intval); 335990b4856Slling if (!error && 336990b4856Slling (intval < spa_version(spa) || intval > SPA_VERSION)) 337990b4856Slling error = EINVAL; 338990b4856Slling break; 339990b4856Slling 340990b4856Slling case ZPOOL_PROP_DELEGATION: 341990b4856Slling case ZPOOL_PROP_AUTOREPLACE: 342d5b5bb25SRich Morris case ZPOOL_PROP_LISTSNAPS: 343573ca77eSGeorge Wilson case ZPOOL_PROP_AUTOEXPAND: 344990b4856Slling error = nvpair_value_uint64(elem, &intval); 345990b4856Slling if (!error && intval > 1) 346990b4856Slling error = EINVAL; 347990b4856Slling break; 348990b4856Slling 349990b4856Slling case ZPOOL_PROP_BOOTFS: 35025f89ee2SJeff Bonwick /* 35125f89ee2SJeff Bonwick * If the pool version is less than SPA_VERSION_BOOTFS, 35225f89ee2SJeff Bonwick * or the pool is still being created (version == 0), 35325f89ee2SJeff Bonwick * the bootfs property cannot be set. 35425f89ee2SJeff Bonwick */ 355990b4856Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 356990b4856Slling error = ENOTSUP; 357990b4856Slling break; 358990b4856Slling } 359990b4856Slling 360990b4856Slling /* 36115e6edf1Sgw * Make sure the vdev config is bootable 362990b4856Slling */ 36315e6edf1Sgw if (!vdev_is_bootable(spa->spa_root_vdev)) { 364990b4856Slling error = ENOTSUP; 365990b4856Slling break; 366990b4856Slling } 367990b4856Slling 368990b4856Slling reset_bootfs = 1; 369990b4856Slling 370990b4856Slling error = nvpair_value_string(elem, &strval); 371990b4856Slling 372990b4856Slling if (!error) { 37315e6edf1Sgw uint64_t compress; 37415e6edf1Sgw 375990b4856Slling if (strval == NULL || strval[0] == '\0') { 376990b4856Slling objnum = zpool_prop_default_numeric( 377990b4856Slling ZPOOL_PROP_BOOTFS); 378990b4856Slling break; 379990b4856Slling } 380990b4856Slling 381503ad85cSMatthew Ahrens if (error = dmu_objset_hold(strval, FTAG, &os)) 382990b4856Slling break; 38315e6edf1Sgw 384503ad85cSMatthew Ahrens /* Must be ZPL and not gzip compressed. */ 385503ad85cSMatthew Ahrens 386503ad85cSMatthew Ahrens if (dmu_objset_type(os) != DMU_OST_ZFS) { 387503ad85cSMatthew Ahrens error = ENOTSUP; 388503ad85cSMatthew Ahrens } else if ((error = dsl_prop_get_integer(strval, 38915e6edf1Sgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), 39015e6edf1Sgw &compress, NULL)) == 0 && 39115e6edf1Sgw !BOOTFS_COMPRESS_VALID(compress)) { 39215e6edf1Sgw error = ENOTSUP; 39315e6edf1Sgw } else { 39415e6edf1Sgw objnum = dmu_objset_id(os); 39515e6edf1Sgw } 396503ad85cSMatthew Ahrens dmu_objset_rele(os, FTAG); 397990b4856Slling } 398990b4856Slling break; 399e14bb325SJeff Bonwick 4000a4e9518Sgw case ZPOOL_PROP_FAILUREMODE: 4010a4e9518Sgw error = nvpair_value_uint64(elem, &intval); 4020a4e9518Sgw if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 4030a4e9518Sgw intval > ZIO_FAILURE_MODE_PANIC)) 4040a4e9518Sgw error = EINVAL; 4050a4e9518Sgw 4060a4e9518Sgw /* 4070a4e9518Sgw * This is a special case which only occurs when 4080a4e9518Sgw * the pool has completely failed. This allows 4090a4e9518Sgw * the user to change the in-core failmode property 4100a4e9518Sgw * without syncing it out to disk (I/Os might 4110a4e9518Sgw * currently be blocked). We do this by returning 4120a4e9518Sgw * EIO to the caller (spa_prop_set) to trick it 4130a4e9518Sgw * into thinking we encountered a property validation 4140a4e9518Sgw * error. 4150a4e9518Sgw */ 416e14bb325SJeff Bonwick if (!error && spa_suspended(spa)) { 4170a4e9518Sgw spa->spa_failmode = intval; 4180a4e9518Sgw error = EIO; 4190a4e9518Sgw } 4200a4e9518Sgw break; 4212f8aaab3Seschrock 4222f8aaab3Seschrock case ZPOOL_PROP_CACHEFILE: 4232f8aaab3Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 4242f8aaab3Seschrock break; 4252f8aaab3Seschrock 4262f8aaab3Seschrock if (strval[0] == '\0') 4272f8aaab3Seschrock break; 4282f8aaab3Seschrock 4292f8aaab3Seschrock if (strcmp(strval, "none") == 0) 4302f8aaab3Seschrock break; 4312f8aaab3Seschrock 4322f8aaab3Seschrock if (strval[0] != '/') { 4332f8aaab3Seschrock error = EINVAL; 4342f8aaab3Seschrock break; 4352f8aaab3Seschrock } 4362f8aaab3Seschrock 4372f8aaab3Seschrock slash = strrchr(strval, '/'); 4382f8aaab3Seschrock ASSERT(slash != NULL); 4392f8aaab3Seschrock 4402f8aaab3Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 4412f8aaab3Seschrock strcmp(slash, "/..") == 0) 4422f8aaab3Seschrock error = EINVAL; 4432f8aaab3Seschrock break; 444b24ab676SJeff Bonwick 445b24ab676SJeff Bonwick case ZPOOL_PROP_DEDUPDITTO: 446b24ab676SJeff Bonwick if (spa_version(spa) < SPA_VERSION_DEDUP) 447b24ab676SJeff Bonwick error = ENOTSUP; 448b24ab676SJeff Bonwick else 449b24ab676SJeff Bonwick error = nvpair_value_uint64(elem, &intval); 450b24ab676SJeff Bonwick if (error == 0 && 451b24ab676SJeff Bonwick intval != 0 && intval < ZIO_DEDUPDITTO_MIN) 452b24ab676SJeff Bonwick error = EINVAL; 453b24ab676SJeff Bonwick break; 454990b4856Slling } 455990b4856Slling 456990b4856Slling if (error) 457990b4856Slling break; 458990b4856Slling } 459990b4856Slling 460990b4856Slling if (!error && reset_bootfs) { 461990b4856Slling error = nvlist_remove(props, 462990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 463990b4856Slling 464990b4856Slling if (!error) { 465990b4856Slling error = nvlist_add_uint64(props, 466990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 467990b4856Slling } 468990b4856Slling } 469990b4856Slling 470990b4856Slling return (error); 471990b4856Slling } 472990b4856Slling 473379c004dSEric Schrock void 474379c004dSEric Schrock spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) 475379c004dSEric Schrock { 476379c004dSEric Schrock char *cachefile; 477379c004dSEric Schrock spa_config_dirent_t *dp; 478379c004dSEric Schrock 479379c004dSEric Schrock if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), 480379c004dSEric Schrock &cachefile) != 0) 481379c004dSEric Schrock return; 482379c004dSEric Schrock 483379c004dSEric Schrock dp = kmem_alloc(sizeof (spa_config_dirent_t), 484379c004dSEric Schrock KM_SLEEP); 485379c004dSEric Schrock 486379c004dSEric Schrock if (cachefile[0] == '\0') 487379c004dSEric Schrock dp->scd_path = spa_strdup(spa_config_path); 488379c004dSEric Schrock else if (strcmp(cachefile, "none") == 0) 489379c004dSEric Schrock dp->scd_path = NULL; 490379c004dSEric Schrock else 491379c004dSEric Schrock dp->scd_path = spa_strdup(cachefile); 492379c004dSEric Schrock 493379c004dSEric Schrock list_insert_head(&spa->spa_config_list, dp); 494379c004dSEric Schrock if (need_sync) 495379c004dSEric Schrock spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 496379c004dSEric Schrock } 497379c004dSEric Schrock 498990b4856Slling int 499990b4856Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 500990b4856Slling { 501990b4856Slling int error; 502379c004dSEric Schrock nvpair_t *elem; 503379c004dSEric Schrock boolean_t need_sync = B_FALSE; 504379c004dSEric Schrock zpool_prop_t prop; 505990b4856Slling 506990b4856Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 507990b4856Slling return (error); 508990b4856Slling 509379c004dSEric Schrock elem = NULL; 510379c004dSEric Schrock while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { 511379c004dSEric Schrock if ((prop = zpool_name_to_prop( 512379c004dSEric Schrock nvpair_name(elem))) == ZPROP_INVAL) 513379c004dSEric Schrock return (EINVAL); 514379c004dSEric Schrock 515379c004dSEric Schrock if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT) 516379c004dSEric Schrock continue; 517379c004dSEric Schrock 518379c004dSEric Schrock need_sync = B_TRUE; 519379c004dSEric Schrock break; 520379c004dSEric Schrock } 521379c004dSEric Schrock 522379c004dSEric Schrock if (need_sync) 523379c004dSEric Schrock return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 524379c004dSEric Schrock spa, nvp, 3)); 525379c004dSEric Schrock else 526379c004dSEric Schrock return (0); 527990b4856Slling } 528990b4856Slling 529990b4856Slling /* 530990b4856Slling * If the bootfs property value is dsobj, clear it. 531990b4856Slling */ 532990b4856Slling void 533990b4856Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 534990b4856Slling { 535990b4856Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 536990b4856Slling VERIFY(zap_remove(spa->spa_meta_objset, 537990b4856Slling spa->spa_pool_props_object, 538990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 539990b4856Slling spa->spa_bootfs = 0; 540990b4856Slling } 541990b4856Slling } 542990b4856Slling 543fa9e4066Sahrens /* 544fa9e4066Sahrens * ========================================================================== 545fa9e4066Sahrens * SPA state manipulation (open/create/destroy/import/export) 546fa9e4066Sahrens * ========================================================================== 547fa9e4066Sahrens */ 548fa9e4066Sahrens 549ea8dc4b6Seschrock static int 550ea8dc4b6Seschrock spa_error_entry_compare(const void *a, const void *b) 551ea8dc4b6Seschrock { 552ea8dc4b6Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 553ea8dc4b6Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 554ea8dc4b6Seschrock int ret; 555ea8dc4b6Seschrock 556ea8dc4b6Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 557ea8dc4b6Seschrock sizeof (zbookmark_t)); 558ea8dc4b6Seschrock 559ea8dc4b6Seschrock if (ret < 0) 560ea8dc4b6Seschrock return (-1); 561ea8dc4b6Seschrock else if (ret > 0) 562ea8dc4b6Seschrock return (1); 563ea8dc4b6Seschrock else 564ea8dc4b6Seschrock return (0); 565ea8dc4b6Seschrock } 566ea8dc4b6Seschrock 567ea8dc4b6Seschrock /* 568ea8dc4b6Seschrock * Utility function which retrieves copies of the current logs and 569ea8dc4b6Seschrock * re-initializes them in the process. 570ea8dc4b6Seschrock */ 571ea8dc4b6Seschrock void 572ea8dc4b6Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 573ea8dc4b6Seschrock { 574ea8dc4b6Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 575ea8dc4b6Seschrock 576ea8dc4b6Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 577ea8dc4b6Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 578ea8dc4b6Seschrock 579ea8dc4b6Seschrock avl_create(&spa->spa_errlist_scrub, 580ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 581ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 582ea8dc4b6Seschrock avl_create(&spa->spa_errlist_last, 583ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 584ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 585ea8dc4b6Seschrock } 586ea8dc4b6Seschrock 587fa9e4066Sahrens /* 588fa9e4066Sahrens * Activate an uninitialized pool. 589fa9e4066Sahrens */ 590fa9e4066Sahrens static void 5918ad4d6ddSJeff Bonwick spa_activate(spa_t *spa, int mode) 592fa9e4066Sahrens { 593fa9e4066Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 594fa9e4066Sahrens 595fa9e4066Sahrens spa->spa_state = POOL_STATE_ACTIVE; 5968ad4d6ddSJeff Bonwick spa->spa_mode = mode; 597fa9e4066Sahrens 59888ecc943SGeorge Wilson spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); 59988ecc943SGeorge Wilson spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); 600fa9e4066Sahrens 601e14bb325SJeff Bonwick for (int t = 0; t < ZIO_TYPES; t++) { 602e14bb325SJeff Bonwick for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 603*80eb36f2SGeorge Wilson const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; 604*80eb36f2SGeorge Wilson enum zti_modes mode = ztip->zti_mode; 605*80eb36f2SGeorge Wilson uint_t value = ztip->zti_value; 6062e0c549eSJonathan Adams char name[32]; 6072e0c549eSJonathan Adams 6082e0c549eSJonathan Adams (void) snprintf(name, sizeof (name), 609*80eb36f2SGeorge Wilson "%s_%s", zio_type_name[t], zio_taskq_types[q]); 6102e0c549eSJonathan Adams 6112e0c549eSJonathan Adams if (mode == zti_mode_tune) { 6122e0c549eSJonathan Adams mode = zio_taskq_tune_mode; 6132e0c549eSJonathan Adams value = zio_taskq_tune_value; 6142e0c549eSJonathan Adams if (mode == zti_mode_tune) 6152e0c549eSJonathan Adams mode = zti_mode_online_percent; 6162e0c549eSJonathan Adams } 6172e0c549eSJonathan Adams 6182e0c549eSJonathan Adams switch (mode) { 6192e0c549eSJonathan Adams case zti_mode_fixed: 6202e0c549eSJonathan Adams ASSERT3U(value, >=, 1); 6212e0c549eSJonathan Adams value = MAX(value, 1); 6222e0c549eSJonathan Adams 6232e0c549eSJonathan Adams spa->spa_zio_taskq[t][q] = taskq_create(name, 6242e0c549eSJonathan Adams value, maxclsyspri, 50, INT_MAX, 6252e0c549eSJonathan Adams TASKQ_PREPOPULATE); 6262e0c549eSJonathan Adams break; 6272e0c549eSJonathan Adams 6282e0c549eSJonathan Adams case zti_mode_online_percent: 6292e0c549eSJonathan Adams spa->spa_zio_taskq[t][q] = taskq_create(name, 6302e0c549eSJonathan Adams value, maxclsyspri, 50, INT_MAX, 6312e0c549eSJonathan Adams TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT); 6322e0c549eSJonathan Adams break; 6332e0c549eSJonathan Adams 634*80eb36f2SGeorge Wilson case zti_mode_null: 635*80eb36f2SGeorge Wilson spa->spa_zio_taskq[t][q] = NULL; 636*80eb36f2SGeorge Wilson break; 637*80eb36f2SGeorge Wilson 6382e0c549eSJonathan Adams case zti_mode_tune: 6392e0c549eSJonathan Adams default: 6402e0c549eSJonathan Adams panic("unrecognized mode for " 6412e0c549eSJonathan Adams "zio_taskqs[%u]->zti_nthreads[%u] (%u:%u) " 6422e0c549eSJonathan Adams "in spa_activate()", 6432e0c549eSJonathan Adams t, q, mode, value); 6442e0c549eSJonathan Adams break; 6452e0c549eSJonathan Adams } 646e14bb325SJeff Bonwick } 647fa9e4066Sahrens } 648fa9e4066Sahrens 649e14bb325SJeff Bonwick list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 650e14bb325SJeff Bonwick offsetof(vdev_t, vdev_config_dirty_node)); 651e14bb325SJeff Bonwick list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 652e14bb325SJeff Bonwick offsetof(vdev_t, vdev_state_dirty_node)); 653fa9e4066Sahrens 654fa9e4066Sahrens txg_list_create(&spa->spa_vdev_txg_list, 655fa9e4066Sahrens offsetof(struct vdev, vdev_txg_node)); 656ea8dc4b6Seschrock 657ea8dc4b6Seschrock avl_create(&spa->spa_errlist_scrub, 658ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 659ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 660ea8dc4b6Seschrock avl_create(&spa->spa_errlist_last, 661ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 662ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 663fa9e4066Sahrens } 664fa9e4066Sahrens 665fa9e4066Sahrens /* 666fa9e4066Sahrens * Opposite of spa_activate(). 667fa9e4066Sahrens */ 668fa9e4066Sahrens static void 669fa9e4066Sahrens spa_deactivate(spa_t *spa) 670fa9e4066Sahrens { 671fa9e4066Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 672fa9e4066Sahrens ASSERT(spa->spa_dsl_pool == NULL); 673fa9e4066Sahrens ASSERT(spa->spa_root_vdev == NULL); 67425f89ee2SJeff Bonwick ASSERT(spa->spa_async_zio_root == NULL); 675fa9e4066Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 676fa9e4066Sahrens 677fa9e4066Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 678fa9e4066Sahrens 679e14bb325SJeff Bonwick list_destroy(&spa->spa_config_dirty_list); 680e14bb325SJeff Bonwick list_destroy(&spa->spa_state_dirty_list); 681fa9e4066Sahrens 682e14bb325SJeff Bonwick for (int t = 0; t < ZIO_TYPES; t++) { 683e14bb325SJeff Bonwick for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 684*80eb36f2SGeorge Wilson if (spa->spa_zio_taskq[t][q] != NULL) 685*80eb36f2SGeorge Wilson taskq_destroy(spa->spa_zio_taskq[t][q]); 686e14bb325SJeff Bonwick spa->spa_zio_taskq[t][q] = NULL; 687e14bb325SJeff Bonwick } 688fa9e4066Sahrens } 689fa9e4066Sahrens 690fa9e4066Sahrens metaslab_class_destroy(spa->spa_normal_class); 691fa9e4066Sahrens spa->spa_normal_class = NULL; 692fa9e4066Sahrens 6938654d025Sperrin metaslab_class_destroy(spa->spa_log_class); 6948654d025Sperrin spa->spa_log_class = NULL; 6958654d025Sperrin 696ea8dc4b6Seschrock /* 697ea8dc4b6Seschrock * If this was part of an import or the open otherwise failed, we may 698ea8dc4b6Seschrock * still have errors left in the queues. Empty them just in case. 699ea8dc4b6Seschrock */ 700ea8dc4b6Seschrock spa_errlog_drain(spa); 701ea8dc4b6Seschrock 702ea8dc4b6Seschrock avl_destroy(&spa->spa_errlist_scrub); 703ea8dc4b6Seschrock avl_destroy(&spa->spa_errlist_last); 704ea8dc4b6Seschrock 705fa9e4066Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 706fa9e4066Sahrens } 707fa9e4066Sahrens 708fa9e4066Sahrens /* 709fa9e4066Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 710fa9e4066Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 711fa9e4066Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 712fa9e4066Sahrens * All vdev validation is done by the vdev_alloc() routine. 713fa9e4066Sahrens */ 71499653d4eSeschrock static int 71599653d4eSeschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 71699653d4eSeschrock uint_t id, int atype) 717fa9e4066Sahrens { 718fa9e4066Sahrens nvlist_t **child; 719573ca77eSGeorge Wilson uint_t children; 72099653d4eSeschrock int error; 721fa9e4066Sahrens 72299653d4eSeschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 72399653d4eSeschrock return (error); 724fa9e4066Sahrens 72599653d4eSeschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 72699653d4eSeschrock return (0); 727fa9e4066Sahrens 728e14bb325SJeff Bonwick error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 729e14bb325SJeff Bonwick &child, &children); 730e14bb325SJeff Bonwick 731e14bb325SJeff Bonwick if (error == ENOENT) 732e14bb325SJeff Bonwick return (0); 733e14bb325SJeff Bonwick 734e14bb325SJeff Bonwick if (error) { 73599653d4eSeschrock vdev_free(*vdp); 73699653d4eSeschrock *vdp = NULL; 73799653d4eSeschrock return (EINVAL); 738fa9e4066Sahrens } 739fa9e4066Sahrens 740573ca77eSGeorge Wilson for (int c = 0; c < children; c++) { 74199653d4eSeschrock vdev_t *vd; 74299653d4eSeschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 74399653d4eSeschrock atype)) != 0) { 74499653d4eSeschrock vdev_free(*vdp); 74599653d4eSeschrock *vdp = NULL; 74699653d4eSeschrock return (error); 747fa9e4066Sahrens } 748fa9e4066Sahrens } 749fa9e4066Sahrens 75099653d4eSeschrock ASSERT(*vdp != NULL); 75199653d4eSeschrock 75299653d4eSeschrock return (0); 753fa9e4066Sahrens } 754fa9e4066Sahrens 755fa9e4066Sahrens /* 756fa9e4066Sahrens * Opposite of spa_load(). 757fa9e4066Sahrens */ 758fa9e4066Sahrens static void 759fa9e4066Sahrens spa_unload(spa_t *spa) 760fa9e4066Sahrens { 76199653d4eSeschrock int i; 76299653d4eSeschrock 763e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa_namespace_lock)); 764e14bb325SJeff Bonwick 765ea8dc4b6Seschrock /* 766ea8dc4b6Seschrock * Stop async tasks. 767ea8dc4b6Seschrock */ 768ea8dc4b6Seschrock spa_async_suspend(spa); 769ea8dc4b6Seschrock 770fa9e4066Sahrens /* 771fa9e4066Sahrens * Stop syncing. 772fa9e4066Sahrens */ 773fa9e4066Sahrens if (spa->spa_sync_on) { 774fa9e4066Sahrens txg_sync_stop(spa->spa_dsl_pool); 775fa9e4066Sahrens spa->spa_sync_on = B_FALSE; 776fa9e4066Sahrens } 777fa9e4066Sahrens 778fa9e4066Sahrens /* 779e14bb325SJeff Bonwick * Wait for any outstanding async I/O to complete. 780fa9e4066Sahrens */ 78154d692b7SGeorge Wilson if (spa->spa_async_zio_root != NULL) { 78254d692b7SGeorge Wilson (void) zio_wait(spa->spa_async_zio_root); 78354d692b7SGeorge Wilson spa->spa_async_zio_root = NULL; 78454d692b7SGeorge Wilson } 785fa9e4066Sahrens 786fa9e4066Sahrens /* 787fa9e4066Sahrens * Close the dsl pool. 788fa9e4066Sahrens */ 789fa9e4066Sahrens if (spa->spa_dsl_pool) { 790fa9e4066Sahrens dsl_pool_close(spa->spa_dsl_pool); 791fa9e4066Sahrens spa->spa_dsl_pool = NULL; 792fa9e4066Sahrens } 793fa9e4066Sahrens 794b24ab676SJeff Bonwick ddt_unload(spa); 795b24ab676SJeff Bonwick 7968ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 7978ad4d6ddSJeff Bonwick 7988ad4d6ddSJeff Bonwick /* 7998ad4d6ddSJeff Bonwick * Drop and purge level 2 cache 8008ad4d6ddSJeff Bonwick */ 8018ad4d6ddSJeff Bonwick spa_l2cache_drop(spa); 8028ad4d6ddSJeff Bonwick 803fa9e4066Sahrens /* 804fa9e4066Sahrens * Close all vdevs. 805fa9e4066Sahrens */ 8060e34b6a7Sbonwick if (spa->spa_root_vdev) 807fa9e4066Sahrens vdev_free(spa->spa_root_vdev); 8080e34b6a7Sbonwick ASSERT(spa->spa_root_vdev == NULL); 809ea8dc4b6Seschrock 810fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 811fa94a07fSbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 812fa94a07fSbrendan if (spa->spa_spares.sav_vdevs) { 813fa94a07fSbrendan kmem_free(spa->spa_spares.sav_vdevs, 814fa94a07fSbrendan spa->spa_spares.sav_count * sizeof (void *)); 815fa94a07fSbrendan spa->spa_spares.sav_vdevs = NULL; 81699653d4eSeschrock } 817fa94a07fSbrendan if (spa->spa_spares.sav_config) { 818fa94a07fSbrendan nvlist_free(spa->spa_spares.sav_config); 819fa94a07fSbrendan spa->spa_spares.sav_config = NULL; 820fa94a07fSbrendan } 8212ce8af81SEric Schrock spa->spa_spares.sav_count = 0; 822fa94a07fSbrendan 823fa94a07fSbrendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 824fa94a07fSbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 825fa94a07fSbrendan if (spa->spa_l2cache.sav_vdevs) { 826fa94a07fSbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 827fa94a07fSbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 828fa94a07fSbrendan spa->spa_l2cache.sav_vdevs = NULL; 829fa94a07fSbrendan } 830fa94a07fSbrendan if (spa->spa_l2cache.sav_config) { 831fa94a07fSbrendan nvlist_free(spa->spa_l2cache.sav_config); 832fa94a07fSbrendan spa->spa_l2cache.sav_config = NULL; 83399653d4eSeschrock } 8342ce8af81SEric Schrock spa->spa_l2cache.sav_count = 0; 83599653d4eSeschrock 836ea8dc4b6Seschrock spa->spa_async_suspended = 0; 8378ad4d6ddSJeff Bonwick 8388ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 839fa9e4066Sahrens } 840fa9e4066Sahrens 84199653d4eSeschrock /* 84299653d4eSeschrock * Load (or re-load) the current list of vdevs describing the active spares for 84399653d4eSeschrock * this pool. When this is called, we have some form of basic information in 844fa94a07fSbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 845fa94a07fSbrendan * then re-generate a more complete list including status information. 84699653d4eSeschrock */ 84799653d4eSeschrock static void 84899653d4eSeschrock spa_load_spares(spa_t *spa) 84999653d4eSeschrock { 85099653d4eSeschrock nvlist_t **spares; 85199653d4eSeschrock uint_t nspares; 85299653d4eSeschrock int i; 85339c23413Seschrock vdev_t *vd, *tvd; 85499653d4eSeschrock 855e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 856e14bb325SJeff Bonwick 85799653d4eSeschrock /* 85899653d4eSeschrock * First, close and free any existing spare vdevs. 85999653d4eSeschrock */ 860fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 861fa94a07fSbrendan vd = spa->spa_spares.sav_vdevs[i]; 86239c23413Seschrock 86339c23413Seschrock /* Undo the call to spa_activate() below */ 864c5904d13Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 865c5904d13Seschrock B_FALSE)) != NULL && tvd->vdev_isspare) 86639c23413Seschrock spa_spare_remove(tvd); 86739c23413Seschrock vdev_close(vd); 86839c23413Seschrock vdev_free(vd); 86999653d4eSeschrock } 87039c23413Seschrock 871fa94a07fSbrendan if (spa->spa_spares.sav_vdevs) 872fa94a07fSbrendan kmem_free(spa->spa_spares.sav_vdevs, 873fa94a07fSbrendan spa->spa_spares.sav_count * sizeof (void *)); 87499653d4eSeschrock 875fa94a07fSbrendan if (spa->spa_spares.sav_config == NULL) 87699653d4eSeschrock nspares = 0; 87799653d4eSeschrock else 878fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 87999653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 88099653d4eSeschrock 881fa94a07fSbrendan spa->spa_spares.sav_count = (int)nspares; 882fa94a07fSbrendan spa->spa_spares.sav_vdevs = NULL; 88399653d4eSeschrock 88499653d4eSeschrock if (nspares == 0) 88599653d4eSeschrock return; 88699653d4eSeschrock 88799653d4eSeschrock /* 88899653d4eSeschrock * Construct the array of vdevs, opening them to get status in the 88939c23413Seschrock * process. For each spare, there is potentially two different vdev_t 89039c23413Seschrock * structures associated with it: one in the list of spares (used only 89139c23413Seschrock * for basic validation purposes) and one in the active vdev 89239c23413Seschrock * configuration (if it's spared in). During this phase we open and 89339c23413Seschrock * validate each vdev on the spare list. If the vdev also exists in the 89439c23413Seschrock * active configuration, then we also mark this vdev as an active spare. 89599653d4eSeschrock */ 896fa94a07fSbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 897fa94a07fSbrendan KM_SLEEP); 898fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 89999653d4eSeschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 90099653d4eSeschrock VDEV_ALLOC_SPARE) == 0); 90199653d4eSeschrock ASSERT(vd != NULL); 90299653d4eSeschrock 903fa94a07fSbrendan spa->spa_spares.sav_vdevs[i] = vd; 90499653d4eSeschrock 905c5904d13Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 906c5904d13Seschrock B_FALSE)) != NULL) { 90739c23413Seschrock if (!tvd->vdev_isspare) 90839c23413Seschrock spa_spare_add(tvd); 90939c23413Seschrock 91039c23413Seschrock /* 91139c23413Seschrock * We only mark the spare active if we were successfully 91239c23413Seschrock * able to load the vdev. Otherwise, importing a pool 91339c23413Seschrock * with a bad active spare would result in strange 91439c23413Seschrock * behavior, because multiple pool would think the spare 91539c23413Seschrock * is actively in use. 91639c23413Seschrock * 91739c23413Seschrock * There is a vulnerability here to an equally bizarre 91839c23413Seschrock * circumstance, where a dead active spare is later 91939c23413Seschrock * brought back to life (onlined or otherwise). Given 92039c23413Seschrock * the rarity of this scenario, and the extra complexity 92139c23413Seschrock * it adds, we ignore the possibility. 92239c23413Seschrock */ 92339c23413Seschrock if (!vdev_is_dead(tvd)) 92439c23413Seschrock spa_spare_activate(tvd); 92539c23413Seschrock } 92639c23413Seschrock 927e14bb325SJeff Bonwick vd->vdev_top = vd; 9286809eb4eSEric Schrock vd->vdev_aux = &spa->spa_spares; 929e14bb325SJeff Bonwick 93099653d4eSeschrock if (vdev_open(vd) != 0) 93199653d4eSeschrock continue; 93299653d4eSeschrock 933fa94a07fSbrendan if (vdev_validate_aux(vd) == 0) 934fa94a07fSbrendan spa_spare_add(vd); 93599653d4eSeschrock } 93699653d4eSeschrock 93799653d4eSeschrock /* 93899653d4eSeschrock * Recompute the stashed list of spares, with status information 93999653d4eSeschrock * this time. 94099653d4eSeschrock */ 941fa94a07fSbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 94299653d4eSeschrock DATA_TYPE_NVLIST_ARRAY) == 0); 94399653d4eSeschrock 944fa94a07fSbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 945fa94a07fSbrendan KM_SLEEP); 946fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 947fa94a07fSbrendan spares[i] = vdev_config_generate(spa, 948fa94a07fSbrendan spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 949fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 950fa94a07fSbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 951fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 95299653d4eSeschrock nvlist_free(spares[i]); 953fa94a07fSbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 954fa94a07fSbrendan } 955fa94a07fSbrendan 956fa94a07fSbrendan /* 957fa94a07fSbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 958fa94a07fSbrendan * this pool. When this is called, we have some form of basic information in 959fa94a07fSbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 960fa94a07fSbrendan * then re-generate a more complete list including status information. 961fa94a07fSbrendan * Devices which are already active have their details maintained, and are 962fa94a07fSbrendan * not re-opened. 963fa94a07fSbrendan */ 964fa94a07fSbrendan static void 965fa94a07fSbrendan spa_load_l2cache(spa_t *spa) 966fa94a07fSbrendan { 967fa94a07fSbrendan nvlist_t **l2cache; 968fa94a07fSbrendan uint_t nl2cache; 969fa94a07fSbrendan int i, j, oldnvdevs; 970573ca77eSGeorge Wilson uint64_t guid; 971fa94a07fSbrendan vdev_t *vd, **oldvdevs, **newvdevs; 972fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 973fa94a07fSbrendan 974e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 975e14bb325SJeff Bonwick 976fa94a07fSbrendan if (sav->sav_config != NULL) { 977fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 978fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 979fa94a07fSbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 980fa94a07fSbrendan } else { 981fa94a07fSbrendan nl2cache = 0; 982fa94a07fSbrendan } 983fa94a07fSbrendan 984fa94a07fSbrendan oldvdevs = sav->sav_vdevs; 985fa94a07fSbrendan oldnvdevs = sav->sav_count; 986fa94a07fSbrendan sav->sav_vdevs = NULL; 987fa94a07fSbrendan sav->sav_count = 0; 988fa94a07fSbrendan 989fa94a07fSbrendan /* 990fa94a07fSbrendan * Process new nvlist of vdevs. 991fa94a07fSbrendan */ 992fa94a07fSbrendan for (i = 0; i < nl2cache; i++) { 993fa94a07fSbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 994fa94a07fSbrendan &guid) == 0); 995fa94a07fSbrendan 996fa94a07fSbrendan newvdevs[i] = NULL; 997fa94a07fSbrendan for (j = 0; j < oldnvdevs; j++) { 998fa94a07fSbrendan vd = oldvdevs[j]; 999fa94a07fSbrendan if (vd != NULL && guid == vd->vdev_guid) { 1000fa94a07fSbrendan /* 1001fa94a07fSbrendan * Retain previous vdev for add/remove ops. 1002fa94a07fSbrendan */ 1003fa94a07fSbrendan newvdevs[i] = vd; 1004fa94a07fSbrendan oldvdevs[j] = NULL; 1005fa94a07fSbrendan break; 1006fa94a07fSbrendan } 1007fa94a07fSbrendan } 1008fa94a07fSbrendan 1009fa94a07fSbrendan if (newvdevs[i] == NULL) { 1010fa94a07fSbrendan /* 1011fa94a07fSbrendan * Create new vdev 1012fa94a07fSbrendan */ 1013fa94a07fSbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 1014fa94a07fSbrendan VDEV_ALLOC_L2CACHE) == 0); 1015fa94a07fSbrendan ASSERT(vd != NULL); 1016fa94a07fSbrendan newvdevs[i] = vd; 1017fa94a07fSbrendan 1018fa94a07fSbrendan /* 1019fa94a07fSbrendan * Commit this vdev as an l2cache device, 1020fa94a07fSbrendan * even if it fails to open. 1021fa94a07fSbrendan */ 1022fa94a07fSbrendan spa_l2cache_add(vd); 1023fa94a07fSbrendan 1024c5904d13Seschrock vd->vdev_top = vd; 1025c5904d13Seschrock vd->vdev_aux = sav; 1026c5904d13Seschrock 1027c5904d13Seschrock spa_l2cache_activate(vd); 1028c5904d13Seschrock 1029fa94a07fSbrendan if (vdev_open(vd) != 0) 1030fa94a07fSbrendan continue; 1031fa94a07fSbrendan 1032fa94a07fSbrendan (void) vdev_validate_aux(vd); 1033fa94a07fSbrendan 1034573ca77eSGeorge Wilson if (!vdev_is_dead(vd)) 1035573ca77eSGeorge Wilson l2arc_add_vdev(spa, vd); 1036fa94a07fSbrendan } 1037fa94a07fSbrendan } 1038fa94a07fSbrendan 1039fa94a07fSbrendan /* 1040fa94a07fSbrendan * Purge vdevs that were dropped 1041fa94a07fSbrendan */ 1042fa94a07fSbrendan for (i = 0; i < oldnvdevs; i++) { 1043fa94a07fSbrendan uint64_t pool; 1044fa94a07fSbrendan 1045fa94a07fSbrendan vd = oldvdevs[i]; 1046fa94a07fSbrendan if (vd != NULL) { 10478ad4d6ddSJeff Bonwick if (spa_l2cache_exists(vd->vdev_guid, &pool) && 10488ad4d6ddSJeff Bonwick pool != 0ULL && l2arc_vdev_present(vd)) 1049fa94a07fSbrendan l2arc_remove_vdev(vd); 1050fa94a07fSbrendan (void) vdev_close(vd); 1051fa94a07fSbrendan spa_l2cache_remove(vd); 1052fa94a07fSbrendan } 1053fa94a07fSbrendan } 1054fa94a07fSbrendan 1055fa94a07fSbrendan if (oldvdevs) 1056fa94a07fSbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 1057fa94a07fSbrendan 1058fa94a07fSbrendan if (sav->sav_config == NULL) 1059fa94a07fSbrendan goto out; 1060fa94a07fSbrendan 1061fa94a07fSbrendan sav->sav_vdevs = newvdevs; 1062fa94a07fSbrendan sav->sav_count = (int)nl2cache; 1063fa94a07fSbrendan 1064fa94a07fSbrendan /* 1065fa94a07fSbrendan * Recompute the stashed list of l2cache devices, with status 1066fa94a07fSbrendan * information this time. 1067fa94a07fSbrendan */ 1068fa94a07fSbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 1069fa94a07fSbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 1070fa94a07fSbrendan 1071fa94a07fSbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 1072fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 1073fa94a07fSbrendan l2cache[i] = vdev_config_generate(spa, 1074fa94a07fSbrendan sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 1075fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 1076fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 1077fa94a07fSbrendan out: 1078fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 1079fa94a07fSbrendan nvlist_free(l2cache[i]); 1080fa94a07fSbrendan if (sav->sav_count) 1081fa94a07fSbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 108299653d4eSeschrock } 108399653d4eSeschrock 108499653d4eSeschrock static int 108599653d4eSeschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 108699653d4eSeschrock { 108799653d4eSeschrock dmu_buf_t *db; 108899653d4eSeschrock char *packed = NULL; 108999653d4eSeschrock size_t nvsize = 0; 109099653d4eSeschrock int error; 109199653d4eSeschrock *value = NULL; 109299653d4eSeschrock 109399653d4eSeschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 109499653d4eSeschrock nvsize = *(uint64_t *)db->db_data; 109599653d4eSeschrock dmu_buf_rele(db, FTAG); 109699653d4eSeschrock 109799653d4eSeschrock packed = kmem_alloc(nvsize, KM_SLEEP); 10987bfdf011SNeil Perrin error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, 10997bfdf011SNeil Perrin DMU_READ_PREFETCH); 110099653d4eSeschrock if (error == 0) 110199653d4eSeschrock error = nvlist_unpack(packed, nvsize, value, 0); 110299653d4eSeschrock kmem_free(packed, nvsize); 110399653d4eSeschrock 110499653d4eSeschrock return (error); 110599653d4eSeschrock } 110699653d4eSeschrock 11073d7072f8Seschrock /* 11083d7072f8Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 11093d7072f8Seschrock * sysevent to notify the autoreplace code that the device has been removed. 11103d7072f8Seschrock */ 11113d7072f8Seschrock static void 11123d7072f8Seschrock spa_check_removed(vdev_t *vd) 11133d7072f8Seschrock { 1114573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) 11153d7072f8Seschrock spa_check_removed(vd->vdev_child[c]); 11163d7072f8Seschrock 11173d7072f8Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 11183d7072f8Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 11193d7072f8Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 11203d7072f8Seschrock } 11213d7072f8Seschrock } 11223d7072f8Seschrock 1123e6ca193dSGeorge Wilson /* 1124e6ca193dSGeorge Wilson * Load the slog device state from the config object since it's possible 1125e6ca193dSGeorge Wilson * that the label does not contain the most up-to-date information. 1126e6ca193dSGeorge Wilson */ 1127e6ca193dSGeorge Wilson void 112888ecc943SGeorge Wilson spa_load_log_state(spa_t *spa, nvlist_t *nv) 1129e6ca193dSGeorge Wilson { 113088ecc943SGeorge Wilson vdev_t *ovd, *rvd = spa->spa_root_vdev; 1131e6ca193dSGeorge Wilson 113288ecc943SGeorge Wilson /* 113388ecc943SGeorge Wilson * Load the original root vdev tree from the passed config. 113488ecc943SGeorge Wilson */ 113588ecc943SGeorge Wilson spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 113688ecc943SGeorge Wilson VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); 1137e6ca193dSGeorge Wilson 113888ecc943SGeorge Wilson for (int c = 0; c < rvd->vdev_children; c++) { 113988ecc943SGeorge Wilson vdev_t *cvd = rvd->vdev_child[c]; 114088ecc943SGeorge Wilson if (cvd->vdev_islog) 114188ecc943SGeorge Wilson vdev_load_log_state(cvd, ovd->vdev_child[c]); 1142e6ca193dSGeorge Wilson } 114388ecc943SGeorge Wilson vdev_free(ovd); 114488ecc943SGeorge Wilson spa_config_exit(spa, SCL_ALL, FTAG); 1145e6ca193dSGeorge Wilson } 1146e6ca193dSGeorge Wilson 1147b87f3af3Sperrin /* 1148b87f3af3Sperrin * Check for missing log devices 1149b87f3af3Sperrin */ 1150b87f3af3Sperrin int 1151b87f3af3Sperrin spa_check_logs(spa_t *spa) 1152b87f3af3Sperrin { 1153b87f3af3Sperrin switch (spa->spa_log_state) { 1154b87f3af3Sperrin case SPA_LOG_MISSING: 1155b87f3af3Sperrin /* need to recheck in case slog has been restored */ 1156b87f3af3Sperrin case SPA_LOG_UNKNOWN: 1157b87f3af3Sperrin if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 1158b87f3af3Sperrin DS_FIND_CHILDREN)) { 1159b87f3af3Sperrin spa->spa_log_state = SPA_LOG_MISSING; 1160b87f3af3Sperrin return (1); 1161b87f3af3Sperrin } 1162b87f3af3Sperrin break; 1163b87f3af3Sperrin } 1164b87f3af3Sperrin return (0); 1165b87f3af3Sperrin } 1166b87f3af3Sperrin 1167b693757aSEric Schrock static void 1168b693757aSEric Schrock spa_aux_check_removed(spa_aux_vdev_t *sav) 1169b693757aSEric Schrock { 1170b24ab676SJeff Bonwick for (int i = 0; i < sav->sav_count; i++) 1171b693757aSEric Schrock spa_check_removed(sav->sav_vdevs[i]); 1172b693757aSEric Schrock } 1173b693757aSEric Schrock 1174b24ab676SJeff Bonwick void 1175b24ab676SJeff Bonwick spa_claim_notify(zio_t *zio) 1176b24ab676SJeff Bonwick { 1177b24ab676SJeff Bonwick spa_t *spa = zio->io_spa; 1178b24ab676SJeff Bonwick 1179b24ab676SJeff Bonwick if (zio->io_error) 1180b24ab676SJeff Bonwick return; 1181b24ab676SJeff Bonwick 1182b24ab676SJeff Bonwick mutex_enter(&spa->spa_props_lock); /* any mutex will do */ 1183b24ab676SJeff Bonwick if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) 1184b24ab676SJeff Bonwick spa->spa_claim_max_txg = zio->io_bp->blk_birth; 1185b24ab676SJeff Bonwick mutex_exit(&spa->spa_props_lock); 1186b24ab676SJeff Bonwick } 1187b24ab676SJeff Bonwick 1188468c413aSTim Haley typedef struct spa_load_error { 1189468c413aSTim Haley uint64_t sle_metadata_count; 1190468c413aSTim Haley uint64_t sle_data_count; 1191468c413aSTim Haley } spa_load_error_t; 1192468c413aSTim Haley 1193468c413aSTim Haley static void 1194468c413aSTim Haley spa_load_verify_done(zio_t *zio) 1195468c413aSTim Haley { 1196468c413aSTim Haley blkptr_t *bp = zio->io_bp; 1197468c413aSTim Haley spa_load_error_t *sle = zio->io_private; 1198468c413aSTim Haley dmu_object_type_t type = BP_GET_TYPE(bp); 1199468c413aSTim Haley int error = zio->io_error; 1200468c413aSTim Haley 1201468c413aSTim Haley if (error) { 1202468c413aSTim Haley if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) && 1203468c413aSTim Haley type != DMU_OT_INTENT_LOG) 1204468c413aSTim Haley atomic_add_64(&sle->sle_metadata_count, 1); 1205468c413aSTim Haley else 1206468c413aSTim Haley atomic_add_64(&sle->sle_data_count, 1); 1207468c413aSTim Haley } 1208468c413aSTim Haley zio_data_buf_free(zio->io_data, zio->io_size); 1209468c413aSTim Haley } 1210468c413aSTim Haley 1211468c413aSTim Haley /*ARGSUSED*/ 1212468c413aSTim Haley static int 1213b24ab676SJeff Bonwick spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 1214b24ab676SJeff Bonwick const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 1215468c413aSTim Haley { 1216468c413aSTim Haley if (bp != NULL) { 1217468c413aSTim Haley zio_t *rio = arg; 1218468c413aSTim Haley size_t size = BP_GET_PSIZE(bp); 1219468c413aSTim Haley void *data = zio_data_buf_alloc(size); 1220468c413aSTim Haley 1221468c413aSTim Haley zio_nowait(zio_read(rio, spa, bp, data, size, 1222468c413aSTim Haley spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, 1223468c413aSTim Haley ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | 1224468c413aSTim Haley ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); 1225468c413aSTim Haley } 1226468c413aSTim Haley return (0); 1227468c413aSTim Haley } 1228468c413aSTim Haley 1229468c413aSTim Haley static int 1230468c413aSTim Haley spa_load_verify(spa_t *spa) 1231468c413aSTim Haley { 1232468c413aSTim Haley zio_t *rio; 1233468c413aSTim Haley spa_load_error_t sle = { 0 }; 1234468c413aSTim Haley zpool_rewind_policy_t policy; 1235468c413aSTim Haley boolean_t verify_ok = B_FALSE; 1236468c413aSTim Haley int error; 1237468c413aSTim Haley 1238468c413aSTim Haley rio = zio_root(spa, NULL, &sle, 1239468c413aSTim Haley ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 1240468c413aSTim Haley 1241bbfd46c4SJeff Bonwick error = traverse_pool(spa, spa->spa_verify_min_txg, 1242bbfd46c4SJeff Bonwick TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio); 1243468c413aSTim Haley 1244468c413aSTim Haley (void) zio_wait(rio); 1245468c413aSTim Haley 1246468c413aSTim Haley zpool_get_rewind_policy(spa->spa_config, &policy); 1247468c413aSTim Haley 1248468c413aSTim Haley spa->spa_load_meta_errors = sle.sle_metadata_count; 1249468c413aSTim Haley spa->spa_load_data_errors = sle.sle_data_count; 1250468c413aSTim Haley 1251468c413aSTim Haley if (!error && sle.sle_metadata_count <= policy.zrp_maxmeta && 1252468c413aSTim Haley sle.sle_data_count <= policy.zrp_maxdata) { 1253468c413aSTim Haley verify_ok = B_TRUE; 1254468c413aSTim Haley spa->spa_load_txg = spa->spa_uberblock.ub_txg; 1255468c413aSTim Haley spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; 1256a33cae98STim Haley } else { 1257a33cae98STim Haley spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; 1258468c413aSTim Haley } 1259468c413aSTim Haley 1260468c413aSTim Haley if (error) { 1261468c413aSTim Haley if (error != ENXIO && error != EIO) 1262468c413aSTim Haley error = EIO; 1263468c413aSTim Haley return (error); 1264468c413aSTim Haley } 1265468c413aSTim Haley 1266468c413aSTim Haley return (verify_ok ? 0 : EIO); 1267468c413aSTim Haley } 1268468c413aSTim Haley 1269fa9e4066Sahrens /* 1270fa9e4066Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 1271ea8dc4b6Seschrock * source of configuration information. 1272fa9e4066Sahrens */ 1273fa9e4066Sahrens static int 1274468c413aSTim Haley spa_load(spa_t *spa, spa_load_state_t state, int mosconfig) 1275fa9e4066Sahrens { 1276fa9e4066Sahrens int error = 0; 127788ecc943SGeorge Wilson nvlist_t *nvconfig, *nvroot = NULL; 1278fa9e4066Sahrens vdev_t *rvd; 1279fa9e4066Sahrens uberblock_t *ub = &spa->spa_uberblock; 12800373e76bSbonwick uint64_t config_cache_txg = spa->spa_config_txg; 1281fa9e4066Sahrens uint64_t pool_guid; 128299653d4eSeschrock uint64_t version; 12833d7072f8Seschrock uint64_t autoreplace = 0; 12848ad4d6ddSJeff Bonwick int orig_mode = spa->spa_mode; 1285b87f3af3Sperrin char *ereport = FM_EREPORT_ZFS_POOL; 1286468c413aSTim Haley nvlist_t *config = spa->spa_config; 1287fa9e4066Sahrens 12888ad4d6ddSJeff Bonwick /* 12898ad4d6ddSJeff Bonwick * If this is an untrusted config, access the pool in read-only mode. 12908ad4d6ddSJeff Bonwick * This prevents things like resilvering recently removed devices. 12918ad4d6ddSJeff Bonwick */ 12928ad4d6ddSJeff Bonwick if (!mosconfig) 12938ad4d6ddSJeff Bonwick spa->spa_mode = FREAD; 12948ad4d6ddSJeff Bonwick 1295e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1296e14bb325SJeff Bonwick 1297ea8dc4b6Seschrock spa->spa_load_state = state; 12980373e76bSbonwick 1299fa9e4066Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 1300a9926bf0Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 1301ea8dc4b6Seschrock error = EINVAL; 1302ea8dc4b6Seschrock goto out; 1303ea8dc4b6Seschrock } 1304fa9e4066Sahrens 130599653d4eSeschrock /* 130699653d4eSeschrock * Versioning wasn't explicitly added to the label until later, so if 130799653d4eSeschrock * it's not present treat it as the initial version. 130899653d4eSeschrock */ 130999653d4eSeschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 1310e7437265Sahrens version = SPA_VERSION_INITIAL; 131199653d4eSeschrock 1312a9926bf0Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 1313a9926bf0Sbonwick &spa->spa_config_txg); 1314a9926bf0Sbonwick 13150373e76bSbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 1316ea8dc4b6Seschrock spa_guid_exists(pool_guid, 0)) { 1317ea8dc4b6Seschrock error = EEXIST; 1318ea8dc4b6Seschrock goto out; 1319ea8dc4b6Seschrock } 1320fa9e4066Sahrens 1321b5989ec7Seschrock spa->spa_load_guid = pool_guid; 1322b5989ec7Seschrock 132354d692b7SGeorge Wilson /* 132454d692b7SGeorge Wilson * Create "The Godfather" zio to hold all async IOs 132554d692b7SGeorge Wilson */ 132625f89ee2SJeff Bonwick spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 132725f89ee2SJeff Bonwick ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 132854d692b7SGeorge Wilson 1329fa9e4066Sahrens /* 133099653d4eSeschrock * Parse the configuration into a vdev tree. We explicitly set the 133199653d4eSeschrock * value that will be returned by spa_version() since parsing the 133299653d4eSeschrock * configuration requires knowing the version number. 1333fa9e4066Sahrens */ 1334e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 133599653d4eSeschrock spa->spa_ubsync.ub_version = version; 133699653d4eSeschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 1337e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1338fa9e4066Sahrens 133999653d4eSeschrock if (error != 0) 1340ea8dc4b6Seschrock goto out; 1341fa9e4066Sahrens 13420e34b6a7Sbonwick ASSERT(spa->spa_root_vdev == rvd); 1343fa9e4066Sahrens ASSERT(spa_guid(spa) == pool_guid); 1344fa9e4066Sahrens 1345fa9e4066Sahrens /* 1346fa9e4066Sahrens * Try to open all vdevs, loading each label in the process. 1347fa9e4066Sahrens */ 1348e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 13490bf246f5Smc error = vdev_open(rvd); 1350e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 13510bf246f5Smc if (error != 0) 1352ea8dc4b6Seschrock goto out; 1353fa9e4066Sahrens 1354560e6e96Seschrock /* 135577e3a39cSMark J Musante * We need to validate the vdev labels against the configuration that 135677e3a39cSMark J Musante * we have in hand, which is dependent on the setting of mosconfig. If 135777e3a39cSMark J Musante * mosconfig is true then we're validating the vdev labels based on 135877e3a39cSMark J Musante * that config. Otherwise, we're validating against the cached config 135977e3a39cSMark J Musante * (zpool.cache) that was read when we loaded the zfs module, and then 136077e3a39cSMark J Musante * later we will recursively call spa_load() and validate against 136177e3a39cSMark J Musante * the vdev config. 1362560e6e96Seschrock */ 136377e3a39cSMark J Musante spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 136477e3a39cSMark J Musante error = vdev_validate(rvd); 136577e3a39cSMark J Musante spa_config_exit(spa, SCL_ALL, FTAG); 136677e3a39cSMark J Musante if (error != 0) 136777e3a39cSMark J Musante goto out; 1368560e6e96Seschrock 1369560e6e96Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 1370560e6e96Seschrock error = ENXIO; 1371560e6e96Seschrock goto out; 1372560e6e96Seschrock } 1373560e6e96Seschrock 1374fa9e4066Sahrens /* 1375fa9e4066Sahrens * Find the best uberblock. 1376fa9e4066Sahrens */ 1377e14bb325SJeff Bonwick vdev_uberblock_load(NULL, rvd, ub); 1378fa9e4066Sahrens 1379fa9e4066Sahrens /* 1380fa9e4066Sahrens * If we weren't able to find a single valid uberblock, return failure. 1381fa9e4066Sahrens */ 1382fa9e4066Sahrens if (ub->ub_txg == 0) { 1383eaca9bbdSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1384eaca9bbdSeschrock VDEV_AUX_CORRUPT_DATA); 1385ea8dc4b6Seschrock error = ENXIO; 1386ea8dc4b6Seschrock goto out; 1387ea8dc4b6Seschrock } 1388ea8dc4b6Seschrock 1389ea8dc4b6Seschrock /* 1390ea8dc4b6Seschrock * If the pool is newer than the code, we can't open it. 1391ea8dc4b6Seschrock */ 1392e7437265Sahrens if (ub->ub_version > SPA_VERSION) { 1393eaca9bbdSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1394eaca9bbdSeschrock VDEV_AUX_VERSION_NEWER); 1395ea8dc4b6Seschrock error = ENOTSUP; 1396ea8dc4b6Seschrock goto out; 1397fa9e4066Sahrens } 1398fa9e4066Sahrens 1399fa9e4066Sahrens /* 1400fa9e4066Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 1401fa9e4066Sahrens * incomplete configuration. 1402fa9e4066Sahrens */ 1403ecc2d604Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 1404ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1405ea8dc4b6Seschrock VDEV_AUX_BAD_GUID_SUM); 1406ea8dc4b6Seschrock error = ENXIO; 1407ea8dc4b6Seschrock goto out; 1408fa9e4066Sahrens } 1409fa9e4066Sahrens 1410fa9e4066Sahrens /* 1411fa9e4066Sahrens * Initialize internal SPA structures. 1412fa9e4066Sahrens */ 1413fa9e4066Sahrens spa->spa_state = POOL_STATE_ACTIVE; 1414fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 1415468c413aSTim Haley spa->spa_verify_min_txg = spa->spa_extreme_rewind ? 1416468c413aSTim Haley TXG_INITIAL : spa_last_synced_txg(spa) - TXG_DEFER_SIZE; 1417468c413aSTim Haley spa->spa_first_txg = spa->spa_last_ubsync_txg ? 1418468c413aSTim Haley spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; 1419b24ab676SJeff Bonwick spa->spa_claim_max_txg = spa->spa_first_txg; 1420b24ab676SJeff Bonwick 1421ea8dc4b6Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 1422ea8dc4b6Seschrock if (error) { 1423ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1424ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1425468c413aSTim Haley error = EIO; 1426ea8dc4b6Seschrock goto out; 1427ea8dc4b6Seschrock } 1428fa9e4066Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1429fa9e4066Sahrens 1430ea8dc4b6Seschrock if (zap_lookup(spa->spa_meta_objset, 1431fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 1432ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 1433ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1434ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1435ea8dc4b6Seschrock error = EIO; 1436ea8dc4b6Seschrock goto out; 1437ea8dc4b6Seschrock } 1438fa9e4066Sahrens 143988ecc943SGeorge Wilson if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) { 144088ecc943SGeorge Wilson vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 144188ecc943SGeorge Wilson VDEV_AUX_CORRUPT_DATA); 144288ecc943SGeorge Wilson error = EIO; 144388ecc943SGeorge Wilson goto out; 144488ecc943SGeorge Wilson } 144588ecc943SGeorge Wilson 1446fa9e4066Sahrens if (!mosconfig) { 144795173954Sek uint64_t hostid; 1448fa9e4066Sahrens 144988ecc943SGeorge Wilson if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig, 145077650510SLin Ling ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 145195173954Sek char *hostname; 145295173954Sek unsigned long myhostid = 0; 145395173954Sek 145488ecc943SGeorge Wilson VERIFY(nvlist_lookup_string(nvconfig, 145595173954Sek ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 145695173954Sek 14575679c89fSjv #ifdef _KERNEL 14585679c89fSjv myhostid = zone_get_hostid(NULL); 14595679c89fSjv #else /* _KERNEL */ 14605679c89fSjv /* 14615679c89fSjv * We're emulating the system's hostid in userland, so 14625679c89fSjv * we can't use zone_get_hostid(). 14635679c89fSjv */ 146495173954Sek (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 14655679c89fSjv #endif /* _KERNEL */ 146617194a52Slling if (hostid != 0 && myhostid != 0 && 14675679c89fSjv hostid != myhostid) { 146895173954Sek cmn_err(CE_WARN, "pool '%s' could not be " 146995173954Sek "loaded as it was last accessed by " 147077650510SLin Ling "another system (host: %s hostid: 0x%lx). " 147195173954Sek "See: http://www.sun.com/msg/ZFS-8000-EY", 1472e14bb325SJeff Bonwick spa_name(spa), hostname, 147395173954Sek (unsigned long)hostid); 147495173954Sek error = EBADF; 147595173954Sek goto out; 147695173954Sek } 147795173954Sek } 147895173954Sek 147988ecc943SGeorge Wilson spa_config_set(spa, nvconfig); 1480fa9e4066Sahrens spa_unload(spa); 1481fa9e4066Sahrens spa_deactivate(spa); 14828ad4d6ddSJeff Bonwick spa_activate(spa, orig_mode); 1483fa9e4066Sahrens 1484468c413aSTim Haley return (spa_load(spa, state, B_TRUE)); 1485fa9e4066Sahrens } 1486fa9e4066Sahrens 1487ea8dc4b6Seschrock if (zap_lookup(spa->spa_meta_objset, 1488fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 1489b24ab676SJeff Bonwick sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj) != 0) { 1490ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1491ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1492ea8dc4b6Seschrock error = EIO; 1493ea8dc4b6Seschrock goto out; 1494ea8dc4b6Seschrock } 1495fa9e4066Sahrens 149699653d4eSeschrock /* 149799653d4eSeschrock * Load the bit that tells us to use the new accounting function 149899653d4eSeschrock * (raid-z deflation). If we have an older pool, this will not 149999653d4eSeschrock * be present. 150099653d4eSeschrock */ 150199653d4eSeschrock error = zap_lookup(spa->spa_meta_objset, 150299653d4eSeschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 150399653d4eSeschrock sizeof (uint64_t), 1, &spa->spa_deflate); 150499653d4eSeschrock if (error != 0 && error != ENOENT) { 150599653d4eSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 150699653d4eSeschrock VDEV_AUX_CORRUPT_DATA); 150799653d4eSeschrock error = EIO; 150899653d4eSeschrock goto out; 150999653d4eSeschrock } 151099653d4eSeschrock 1511fa9e4066Sahrens /* 1512ea8dc4b6Seschrock * Load the persistent error log. If we have an older pool, this will 1513ea8dc4b6Seschrock * not be present. 1514fa9e4066Sahrens */ 1515ea8dc4b6Seschrock error = zap_lookup(spa->spa_meta_objset, 1516ea8dc4b6Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 1517ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 1518d80c45e0Sbonwick if (error != 0 && error != ENOENT) { 1519ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1520ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1521ea8dc4b6Seschrock error = EIO; 1522ea8dc4b6Seschrock goto out; 1523ea8dc4b6Seschrock } 1524ea8dc4b6Seschrock 1525ea8dc4b6Seschrock error = zap_lookup(spa->spa_meta_objset, 1526ea8dc4b6Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 1527ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 1528ea8dc4b6Seschrock if (error != 0 && error != ENOENT) { 1529ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1530ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1531ea8dc4b6Seschrock error = EIO; 1532ea8dc4b6Seschrock goto out; 1533ea8dc4b6Seschrock } 1534ea8dc4b6Seschrock 153506eeb2adSek /* 153606eeb2adSek * Load the history object. If we have an older pool, this 153706eeb2adSek * will not be present. 153806eeb2adSek */ 153906eeb2adSek error = zap_lookup(spa->spa_meta_objset, 154006eeb2adSek DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 154106eeb2adSek sizeof (uint64_t), 1, &spa->spa_history); 154206eeb2adSek if (error != 0 && error != ENOENT) { 154306eeb2adSek vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 154406eeb2adSek VDEV_AUX_CORRUPT_DATA); 154506eeb2adSek error = EIO; 154606eeb2adSek goto out; 154706eeb2adSek } 154806eeb2adSek 154999653d4eSeschrock /* 155099653d4eSeschrock * Load any hot spares for this pool. 155199653d4eSeschrock */ 155299653d4eSeschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1553fa94a07fSbrendan DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 155499653d4eSeschrock if (error != 0 && error != ENOENT) { 155599653d4eSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 155699653d4eSeschrock VDEV_AUX_CORRUPT_DATA); 155799653d4eSeschrock error = EIO; 155899653d4eSeschrock goto out; 155999653d4eSeschrock } 156099653d4eSeschrock if (error == 0) { 1561e7437265Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 1562fa94a07fSbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 1563fa94a07fSbrendan &spa->spa_spares.sav_config) != 0) { 156499653d4eSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 156599653d4eSeschrock VDEV_AUX_CORRUPT_DATA); 156699653d4eSeschrock error = EIO; 156799653d4eSeschrock goto out; 156899653d4eSeschrock } 156999653d4eSeschrock 1570e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 157199653d4eSeschrock spa_load_spares(spa); 1572e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 157399653d4eSeschrock } 157499653d4eSeschrock 1575fa94a07fSbrendan /* 1576fa94a07fSbrendan * Load any level 2 ARC devices for this pool. 1577fa94a07fSbrendan */ 1578fa94a07fSbrendan error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1579fa94a07fSbrendan DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 1580fa94a07fSbrendan &spa->spa_l2cache.sav_object); 1581fa94a07fSbrendan if (error != 0 && error != ENOENT) { 1582fa94a07fSbrendan vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1583fa94a07fSbrendan VDEV_AUX_CORRUPT_DATA); 1584fa94a07fSbrendan error = EIO; 1585fa94a07fSbrendan goto out; 1586fa94a07fSbrendan } 1587fa94a07fSbrendan if (error == 0) { 1588fa94a07fSbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 1589fa94a07fSbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 1590fa94a07fSbrendan &spa->spa_l2cache.sav_config) != 0) { 1591fa94a07fSbrendan vdev_set_state(rvd, B_TRUE, 1592fa94a07fSbrendan VDEV_STATE_CANT_OPEN, 1593fa94a07fSbrendan VDEV_AUX_CORRUPT_DATA); 1594fa94a07fSbrendan error = EIO; 1595fa94a07fSbrendan goto out; 1596fa94a07fSbrendan } 1597fa94a07fSbrendan 1598e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1599fa94a07fSbrendan spa_load_l2cache(spa); 1600e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1601fa94a07fSbrendan } 1602fa94a07fSbrendan 1603990b4856Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 1604ecd6cf80Smarks 1605b1b8ab34Slling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1606b1b8ab34Slling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 1607b1b8ab34Slling 1608b1b8ab34Slling if (error && error != ENOENT) { 1609b1b8ab34Slling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1610b1b8ab34Slling VDEV_AUX_CORRUPT_DATA); 1611b1b8ab34Slling error = EIO; 1612b1b8ab34Slling goto out; 1613b1b8ab34Slling } 1614b1b8ab34Slling 1615b1b8ab34Slling if (error == 0) { 1616b1b8ab34Slling (void) zap_lookup(spa->spa_meta_objset, 1617b1b8ab34Slling spa->spa_pool_props_object, 16183d7072f8Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 1619b1b8ab34Slling sizeof (uint64_t), 1, &spa->spa_bootfs); 16203d7072f8Seschrock (void) zap_lookup(spa->spa_meta_objset, 16213d7072f8Seschrock spa->spa_pool_props_object, 16223d7072f8Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 16233d7072f8Seschrock sizeof (uint64_t), 1, &autoreplace); 1624b693757aSEric Schrock spa->spa_autoreplace = (autoreplace != 0); 1625ecd6cf80Smarks (void) zap_lookup(spa->spa_meta_objset, 1626ecd6cf80Smarks spa->spa_pool_props_object, 1627ecd6cf80Smarks zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 1628ecd6cf80Smarks sizeof (uint64_t), 1, &spa->spa_delegation); 16290a4e9518Sgw (void) zap_lookup(spa->spa_meta_objset, 16300a4e9518Sgw spa->spa_pool_props_object, 16310a4e9518Sgw zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 16320a4e9518Sgw sizeof (uint64_t), 1, &spa->spa_failmode); 1633573ca77eSGeorge Wilson (void) zap_lookup(spa->spa_meta_objset, 1634573ca77eSGeorge Wilson spa->spa_pool_props_object, 1635573ca77eSGeorge Wilson zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND), 1636573ca77eSGeorge Wilson sizeof (uint64_t), 1, &spa->spa_autoexpand); 1637b24ab676SJeff Bonwick (void) zap_lookup(spa->spa_meta_objset, 1638b24ab676SJeff Bonwick spa->spa_pool_props_object, 1639b24ab676SJeff Bonwick zpool_prop_to_name(ZPOOL_PROP_DEDUPDITTO), 1640b24ab676SJeff Bonwick sizeof (uint64_t), 1, &spa->spa_dedup_ditto); 1641b1b8ab34Slling } 1642b1b8ab34Slling 16433d7072f8Seschrock /* 16443d7072f8Seschrock * If the 'autoreplace' property is set, then post a resource notifying 16453d7072f8Seschrock * the ZFS DE that it should not issue any faults for unopenable 16463d7072f8Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 16473d7072f8Seschrock * unopenable vdevs so that the normal autoreplace handler can take 16483d7072f8Seschrock * over. 16493d7072f8Seschrock */ 1650b693757aSEric Schrock if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) { 16513d7072f8Seschrock spa_check_removed(spa->spa_root_vdev); 1652b693757aSEric Schrock /* 1653b693757aSEric Schrock * For the import case, this is done in spa_import(), because 1654b693757aSEric Schrock * at this point we're using the spare definitions from 1655b693757aSEric Schrock * the MOS config, not necessarily from the userland config. 1656b693757aSEric Schrock */ 1657b693757aSEric Schrock if (state != SPA_LOAD_IMPORT) { 1658b693757aSEric Schrock spa_aux_check_removed(&spa->spa_spares); 1659b693757aSEric Schrock spa_aux_check_removed(&spa->spa_l2cache); 1660b693757aSEric Schrock } 1661b693757aSEric Schrock } 16623d7072f8Seschrock 1663ea8dc4b6Seschrock /* 1664560e6e96Seschrock * Load the vdev state for all toplevel vdevs. 1665ea8dc4b6Seschrock */ 1666560e6e96Seschrock vdev_load(rvd); 16670373e76bSbonwick 1668fa9e4066Sahrens /* 1669fa9e4066Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 1670fa9e4066Sahrens */ 1671e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1672fa9e4066Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 1673e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1674fa9e4066Sahrens 1675fa9e4066Sahrens /* 1676fa9e4066Sahrens * Check the state of the root vdev. If it can't be opened, it 1677fa9e4066Sahrens * indicates one or more toplevel vdevs are faulted. 1678fa9e4066Sahrens */ 1679ea8dc4b6Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 1680ea8dc4b6Seschrock error = ENXIO; 1681ea8dc4b6Seschrock goto out; 1682ea8dc4b6Seschrock } 1683fa9e4066Sahrens 1684b24ab676SJeff Bonwick /* 1685b24ab676SJeff Bonwick * Load the DDTs (dedup tables). 1686b24ab676SJeff Bonwick */ 1687b24ab676SJeff Bonwick error = ddt_load(spa); 1688b24ab676SJeff Bonwick if (error != 0) { 1689b24ab676SJeff Bonwick vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1690b24ab676SJeff Bonwick VDEV_AUX_CORRUPT_DATA); 1691b24ab676SJeff Bonwick error = EIO; 1692b24ab676SJeff Bonwick goto out; 1693b24ab676SJeff Bonwick } 1694b24ab676SJeff Bonwick 1695485bbbf5SGeorge Wilson spa_update_dspace(spa); 1696485bbbf5SGeorge Wilson 1697468c413aSTim Haley if (state != SPA_LOAD_TRYIMPORT) { 1698468c413aSTim Haley error = spa_load_verify(spa); 1699468c413aSTim Haley if (error) { 1700468c413aSTim Haley vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1701468c413aSTim Haley VDEV_AUX_CORRUPT_DATA); 1702468c413aSTim Haley goto out; 1703468c413aSTim Haley } 1704468c413aSTim Haley } 1705468c413aSTim Haley 1706b24ab676SJeff Bonwick /* 1707b24ab676SJeff Bonwick * Load the intent log state and check log integrity. 1708b24ab676SJeff Bonwick */ 1709b24ab676SJeff Bonwick VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE, 1710b24ab676SJeff Bonwick &nvroot) == 0); 1711b24ab676SJeff Bonwick spa_load_log_state(spa, nvroot); 1712b24ab676SJeff Bonwick nvlist_free(nvconfig); 1713b24ab676SJeff Bonwick 1714b24ab676SJeff Bonwick if (spa_check_logs(spa)) { 1715b24ab676SJeff Bonwick vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1716b24ab676SJeff Bonwick VDEV_AUX_BAD_LOG); 1717b24ab676SJeff Bonwick error = ENXIO; 1718b24ab676SJeff Bonwick ereport = FM_EREPORT_ZFS_LOG_REPLAY; 1719b24ab676SJeff Bonwick goto out; 1720b24ab676SJeff Bonwick } 1721b24ab676SJeff Bonwick 1722468c413aSTim Haley if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER || 1723468c413aSTim Haley spa->spa_load_max_txg == UINT64_MAX)) { 17245dabedeeSbonwick dmu_tx_t *tx; 17250373e76bSbonwick int need_update = B_FALSE; 17268ad4d6ddSJeff Bonwick 17278ad4d6ddSJeff Bonwick ASSERT(state != SPA_LOAD_TRYIMPORT); 17285dabedeeSbonwick 17290373e76bSbonwick /* 17300373e76bSbonwick * Claim log blocks that haven't been committed yet. 17310373e76bSbonwick * This must all happen in a single txg. 1732b24ab676SJeff Bonwick * Note: spa_claim_max_txg is updated by spa_claim_notify(), 1733b24ab676SJeff Bonwick * invoked from zil_claim_log_block()'s i/o done callback. 1734468c413aSTim Haley * Price of rollback is that we abandon the log. 17350373e76bSbonwick */ 1736b24ab676SJeff Bonwick spa->spa_claiming = B_TRUE; 1737b24ab676SJeff Bonwick 17385dabedeeSbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1739fa9e4066Sahrens spa_first_txg(spa)); 1740e14bb325SJeff Bonwick (void) dmu_objset_find(spa_name(spa), 17410b69c2f0Sahrens zil_claim, tx, DS_FIND_CHILDREN); 1742fa9e4066Sahrens dmu_tx_commit(tx); 1743fa9e4066Sahrens 1744b24ab676SJeff Bonwick spa->spa_claiming = B_FALSE; 1745b24ab676SJeff Bonwick 1746e6ca193dSGeorge Wilson spa->spa_log_state = SPA_LOG_GOOD; 1747fa9e4066Sahrens spa->spa_sync_on = B_TRUE; 1748fa9e4066Sahrens txg_sync_start(spa->spa_dsl_pool); 1749fa9e4066Sahrens 1750fa9e4066Sahrens /* 1751b24ab676SJeff Bonwick * Wait for all claims to sync. We sync up to the highest 1752b24ab676SJeff Bonwick * claimed log block birth time so that claimed log blocks 1753b24ab676SJeff Bonwick * don't appear to be from the future. spa_claim_max_txg 1754b24ab676SJeff Bonwick * will have been set for us by either zil_check_log_chain() 1755b24ab676SJeff Bonwick * (invoked from spa_check_logs()) or zil_claim() above. 1756fa9e4066Sahrens */ 1757b24ab676SJeff Bonwick txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); 17580e34b6a7Sbonwick 17590e34b6a7Sbonwick /* 17600373e76bSbonwick * If the config cache is stale, or we have uninitialized 17610373e76bSbonwick * metaslabs (see spa_vdev_add()), then update the config. 1762bc758434SLin Ling * 1763bc758434SLin Ling * If spa_load_verbatim is true, trust the current 1764bc758434SLin Ling * in-core spa_config and update the disk labels. 17650e34b6a7Sbonwick */ 17660373e76bSbonwick if (config_cache_txg != spa->spa_config_txg || 1767468c413aSTim Haley state == SPA_LOAD_IMPORT || spa->spa_load_verbatim || 1768468c413aSTim Haley state == SPA_LOAD_RECOVER) 17690373e76bSbonwick need_update = B_TRUE; 17700373e76bSbonwick 17718ad4d6ddSJeff Bonwick for (int c = 0; c < rvd->vdev_children; c++) 17720373e76bSbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 17730373e76bSbonwick need_update = B_TRUE; 17740e34b6a7Sbonwick 17750e34b6a7Sbonwick /* 17760373e76bSbonwick * Update the config cache asychronously in case we're the 17770373e76bSbonwick * root pool, in which case the config cache isn't writable yet. 17780e34b6a7Sbonwick */ 17790373e76bSbonwick if (need_update) 17800373e76bSbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 17818ad4d6ddSJeff Bonwick 17828ad4d6ddSJeff Bonwick /* 17838ad4d6ddSJeff Bonwick * Check all DTLs to see if anything needs resilvering. 17848ad4d6ddSJeff Bonwick */ 17858ad4d6ddSJeff Bonwick if (vdev_resilver_needed(rvd, NULL, NULL)) 17868ad4d6ddSJeff Bonwick spa_async_request(spa, SPA_ASYNC_RESILVER); 1787503ad85cSMatthew Ahrens 1788503ad85cSMatthew Ahrens /* 1789503ad85cSMatthew Ahrens * Delete any inconsistent datasets. 1790503ad85cSMatthew Ahrens */ 1791503ad85cSMatthew Ahrens (void) dmu_objset_find(spa_name(spa), 1792503ad85cSMatthew Ahrens dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); 1793ca45db41SChris Kirby 1794ca45db41SChris Kirby /* 1795ca45db41SChris Kirby * Clean up any stale temporary dataset userrefs. 1796ca45db41SChris Kirby */ 1797ca45db41SChris Kirby dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); 1798fa9e4066Sahrens } 1799fa9e4066Sahrens 1800ea8dc4b6Seschrock error = 0; 1801ea8dc4b6Seschrock out: 1802468c413aSTim Haley 1803088f3894Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 180499653d4eSeschrock if (error && error != EBADF) 1805b87f3af3Sperrin zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 1806ea8dc4b6Seschrock spa->spa_load_state = SPA_LOAD_NONE; 1807ea8dc4b6Seschrock spa->spa_ena = 0; 1808ea8dc4b6Seschrock 1809ea8dc4b6Seschrock return (error); 1810fa9e4066Sahrens } 1811fa9e4066Sahrens 1812468c413aSTim Haley static int 1813468c413aSTim Haley spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig) 1814468c413aSTim Haley { 1815468c413aSTim Haley spa_unload(spa); 1816468c413aSTim Haley spa_deactivate(spa); 1817468c413aSTim Haley 1818468c413aSTim Haley spa->spa_load_max_txg--; 1819468c413aSTim Haley 1820468c413aSTim Haley spa_activate(spa, spa_mode_global); 1821468c413aSTim Haley spa_async_suspend(spa); 1822468c413aSTim Haley 1823468c413aSTim Haley return (spa_load(spa, state, mosconfig)); 1824468c413aSTim Haley } 1825468c413aSTim Haley 1826468c413aSTim Haley static int 1827468c413aSTim Haley spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig, 1828468c413aSTim Haley uint64_t max_request, boolean_t extreme) 1829468c413aSTim Haley { 1830468c413aSTim Haley nvlist_t *config = NULL; 1831468c413aSTim Haley int load_error, rewind_error; 1832468c413aSTim Haley uint64_t safe_rollback_txg; 1833468c413aSTim Haley uint64_t min_txg; 1834468c413aSTim Haley 1835a33cae98STim Haley if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) { 1836468c413aSTim Haley spa->spa_load_max_txg = spa->spa_load_txg; 1837a33cae98STim Haley spa->spa_log_state = SPA_LOG_CLEAR; 1838a33cae98STim Haley } else { 1839468c413aSTim Haley spa->spa_load_max_txg = max_request; 1840a33cae98STim Haley } 1841468c413aSTim Haley 1842468c413aSTim Haley load_error = rewind_error = spa_load(spa, state, mosconfig); 1843468c413aSTim Haley if (load_error == 0) 1844468c413aSTim Haley return (0); 1845468c413aSTim Haley 1846468c413aSTim Haley if (spa->spa_root_vdev != NULL) 1847468c413aSTim Haley config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 1848468c413aSTim Haley 1849468c413aSTim Haley spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg; 1850468c413aSTim Haley spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp; 1851468c413aSTim Haley 1852468c413aSTim Haley /* specific txg requested */ 1853468c413aSTim Haley if (spa->spa_load_max_txg != UINT64_MAX && !extreme) { 1854468c413aSTim Haley nvlist_free(config); 1855468c413aSTim Haley return (load_error); 1856468c413aSTim Haley } 1857468c413aSTim Haley 1858468c413aSTim Haley /* Price of rolling back is discarding txgs, including log */ 1859468c413aSTim Haley if (state == SPA_LOAD_RECOVER) 1860468c413aSTim Haley spa->spa_log_state = SPA_LOG_CLEAR; 1861468c413aSTim Haley 1862468c413aSTim Haley spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; 1863468c413aSTim Haley safe_rollback_txg = spa->spa_uberblock.ub_txg - TXG_DEFER_SIZE; 1864468c413aSTim Haley 1865468c413aSTim Haley min_txg = extreme ? TXG_INITIAL : safe_rollback_txg; 1866468c413aSTim Haley while (rewind_error && (spa->spa_uberblock.ub_txg >= min_txg)) { 1867468c413aSTim Haley if (spa->spa_load_max_txg < safe_rollback_txg) 1868468c413aSTim Haley spa->spa_extreme_rewind = B_TRUE; 1869468c413aSTim Haley rewind_error = spa_load_retry(spa, state, mosconfig); 1870468c413aSTim Haley } 1871468c413aSTim Haley 1872468c413aSTim Haley if (config) 1873468c413aSTim Haley spa_rewind_data_to_nvlist(spa, config); 1874468c413aSTim Haley 1875468c413aSTim Haley spa->spa_extreme_rewind = B_FALSE; 1876468c413aSTim Haley spa->spa_load_max_txg = UINT64_MAX; 1877468c413aSTim Haley 1878468c413aSTim Haley if (config && (rewind_error || state != SPA_LOAD_RECOVER)) 1879468c413aSTim Haley spa_config_set(spa, config); 1880468c413aSTim Haley 1881468c413aSTim Haley return (state == SPA_LOAD_RECOVER ? rewind_error : load_error); 1882468c413aSTim Haley } 1883468c413aSTim Haley 1884fa9e4066Sahrens /* 1885fa9e4066Sahrens * Pool Open/Import 1886fa9e4066Sahrens * 1887fa9e4066Sahrens * The import case is identical to an open except that the configuration is sent 1888fa9e4066Sahrens * down from userland, instead of grabbed from the configuration cache. For the 1889fa9e4066Sahrens * case of an open, the pool configuration will exist in the 18903d7072f8Seschrock * POOL_STATE_UNINITIALIZED state. 1891fa9e4066Sahrens * 1892fa9e4066Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 1893fa9e4066Sahrens * the same time open the pool, without having to keep around the spa_t in some 1894fa9e4066Sahrens * ambiguous state. 1895fa9e4066Sahrens */ 1896fa9e4066Sahrens static int 1897468c413aSTim Haley spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, 1898468c413aSTim Haley nvlist_t **config) 1899fa9e4066Sahrens { 1900fa9e4066Sahrens spa_t *spa; 1901468c413aSTim Haley boolean_t norewind; 1902468c413aSTim Haley boolean_t extreme; 1903468c413aSTim Haley zpool_rewind_policy_t policy; 1904468c413aSTim Haley spa_load_state_t state = SPA_LOAD_OPEN; 1905fa9e4066Sahrens int error; 1906fa9e4066Sahrens int locked = B_FALSE; 1907fa9e4066Sahrens 1908fa9e4066Sahrens *spapp = NULL; 1909fa9e4066Sahrens 1910468c413aSTim Haley zpool_get_rewind_policy(nvpolicy, &policy); 1911468c413aSTim Haley if (policy.zrp_request & ZPOOL_DO_REWIND) 1912468c413aSTim Haley state = SPA_LOAD_RECOVER; 1913468c413aSTim Haley norewind = (policy.zrp_request == ZPOOL_NO_REWIND); 1914468c413aSTim Haley extreme = ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0); 1915468c413aSTim Haley 1916fa9e4066Sahrens /* 1917fa9e4066Sahrens * As disgusting as this is, we need to support recursive calls to this 1918fa9e4066Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 1919fa9e4066Sahrens * up calling spa_open() again. The real fix is to figure out how to 1920fa9e4066Sahrens * avoid dsl_dir_open() calling this in the first place. 1921fa9e4066Sahrens */ 1922fa9e4066Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 1923fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 1924fa9e4066Sahrens locked = B_TRUE; 1925fa9e4066Sahrens } 1926fa9e4066Sahrens 1927fa9e4066Sahrens if ((spa = spa_lookup(pool)) == NULL) { 1928fa9e4066Sahrens if (locked) 1929fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1930fa9e4066Sahrens return (ENOENT); 1931fa9e4066Sahrens } 1932468c413aSTim Haley 1933fa9e4066Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 1934fa9e4066Sahrens 19358ad4d6ddSJeff Bonwick spa_activate(spa, spa_mode_global); 1936fa9e4066Sahrens 1937468c413aSTim Haley if (spa->spa_last_open_failed && norewind) { 1938468c413aSTim Haley if (config != NULL && spa->spa_config) 1939468c413aSTim Haley VERIFY(nvlist_dup(spa->spa_config, 1940468c413aSTim Haley config, KM_SLEEP) == 0); 1941468c413aSTim Haley spa_deactivate(spa); 1942468c413aSTim Haley if (locked) 1943468c413aSTim Haley mutex_exit(&spa_namespace_lock); 1944468c413aSTim Haley return (spa->spa_last_open_failed); 1945468c413aSTim Haley } 1946468c413aSTim Haley 1947468c413aSTim Haley if (state != SPA_LOAD_RECOVER) 1948468c413aSTim Haley spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; 1949468c413aSTim Haley 1950468c413aSTim Haley error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg, 1951468c413aSTim Haley extreme); 1952fa9e4066Sahrens 1953fa9e4066Sahrens if (error == EBADF) { 1954fa9e4066Sahrens /* 1955560e6e96Seschrock * If vdev_validate() returns failure (indicated by 1956560e6e96Seschrock * EBADF), it indicates that one of the vdevs indicates 1957560e6e96Seschrock * that the pool has been exported or destroyed. If 1958560e6e96Seschrock * this is the case, the config cache is out of sync and 1959560e6e96Seschrock * we should remove the pool from the namespace. 1960fa9e4066Sahrens */ 1961fa9e4066Sahrens spa_unload(spa); 1962fa9e4066Sahrens spa_deactivate(spa); 1963c5904d13Seschrock spa_config_sync(spa, B_TRUE, B_TRUE); 1964fa9e4066Sahrens spa_remove(spa); 1965fa9e4066Sahrens if (locked) 1966fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1967fa9e4066Sahrens return (ENOENT); 1968ea8dc4b6Seschrock } 1969ea8dc4b6Seschrock 1970ea8dc4b6Seschrock if (error) { 1971fa9e4066Sahrens /* 1972fa9e4066Sahrens * We can't open the pool, but we still have useful 1973fa9e4066Sahrens * information: the state of each vdev after the 1974fa9e4066Sahrens * attempted vdev_open(). Return this to the user. 1975fa9e4066Sahrens */ 1976468c413aSTim Haley if (config != NULL && spa->spa_config) 1977468c413aSTim Haley VERIFY(nvlist_dup(spa->spa_config, config, 1978468c413aSTim Haley KM_SLEEP) == 0); 1979fa9e4066Sahrens spa_unload(spa); 1980fa9e4066Sahrens spa_deactivate(spa); 1981468c413aSTim Haley spa->spa_last_open_failed = error; 1982fa9e4066Sahrens if (locked) 1983fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1984fa9e4066Sahrens *spapp = NULL; 1985fa9e4066Sahrens return (error); 1986fa9e4066Sahrens } 1987468c413aSTim Haley 1988fa9e4066Sahrens } 1989fa9e4066Sahrens 1990fa9e4066Sahrens spa_open_ref(spa, tag); 19913d7072f8Seschrock 1992468c413aSTim Haley 1993468c413aSTim Haley if (config != NULL) 1994468c413aSTim Haley *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 1995468c413aSTim Haley 1996a33cae98STim Haley if (locked) { 1997a33cae98STim Haley spa->spa_last_open_failed = 0; 1998a33cae98STim Haley spa->spa_last_ubsync_txg = 0; 1999a33cae98STim Haley spa->spa_load_txg = 0; 2000fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2001a33cae98STim Haley } 2002fa9e4066Sahrens 2003fa9e4066Sahrens *spapp = spa; 2004fa9e4066Sahrens 2005fa9e4066Sahrens return (0); 2006fa9e4066Sahrens } 2007fa9e4066Sahrens 2008468c413aSTim Haley int 2009468c413aSTim Haley spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy, 2010468c413aSTim Haley nvlist_t **config) 2011468c413aSTim Haley { 2012468c413aSTim Haley return (spa_open_common(name, spapp, tag, policy, config)); 2013468c413aSTim Haley } 2014468c413aSTim Haley 2015fa9e4066Sahrens int 2016fa9e4066Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 2017fa9e4066Sahrens { 2018468c413aSTim Haley return (spa_open_common(name, spapp, tag, NULL, NULL)); 2019fa9e4066Sahrens } 2020fa9e4066Sahrens 2021ea8dc4b6Seschrock /* 2022ea8dc4b6Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 2023ea8dc4b6Seschrock * preventing it from being exported or destroyed. 2024ea8dc4b6Seschrock */ 2025ea8dc4b6Seschrock spa_t * 2026ea8dc4b6Seschrock spa_inject_addref(char *name) 2027ea8dc4b6Seschrock { 2028ea8dc4b6Seschrock spa_t *spa; 2029ea8dc4b6Seschrock 2030ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 2031ea8dc4b6Seschrock if ((spa = spa_lookup(name)) == NULL) { 2032ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 2033ea8dc4b6Seschrock return (NULL); 2034ea8dc4b6Seschrock } 2035ea8dc4b6Seschrock spa->spa_inject_ref++; 2036ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 2037ea8dc4b6Seschrock 2038ea8dc4b6Seschrock return (spa); 2039ea8dc4b6Seschrock } 2040ea8dc4b6Seschrock 2041ea8dc4b6Seschrock void 2042ea8dc4b6Seschrock spa_inject_delref(spa_t *spa) 2043ea8dc4b6Seschrock { 2044ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 2045ea8dc4b6Seschrock spa->spa_inject_ref--; 2046ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 2047ea8dc4b6Seschrock } 2048ea8dc4b6Seschrock 2049fa94a07fSbrendan /* 2050fa94a07fSbrendan * Add spares device information to the nvlist. 2051fa94a07fSbrendan */ 205299653d4eSeschrock static void 205399653d4eSeschrock spa_add_spares(spa_t *spa, nvlist_t *config) 205499653d4eSeschrock { 205599653d4eSeschrock nvlist_t **spares; 205699653d4eSeschrock uint_t i, nspares; 205799653d4eSeschrock nvlist_t *nvroot; 205899653d4eSeschrock uint64_t guid; 205999653d4eSeschrock vdev_stat_t *vs; 206099653d4eSeschrock uint_t vsc; 206139c23413Seschrock uint64_t pool; 206299653d4eSeschrock 20636809eb4eSEric Schrock ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 20646809eb4eSEric Schrock 2065fa94a07fSbrendan if (spa->spa_spares.sav_count == 0) 206699653d4eSeschrock return; 206799653d4eSeschrock 206899653d4eSeschrock VERIFY(nvlist_lookup_nvlist(config, 206999653d4eSeschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 2070fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 207199653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 207299653d4eSeschrock if (nspares != 0) { 207399653d4eSeschrock VERIFY(nvlist_add_nvlist_array(nvroot, 207499653d4eSeschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 207599653d4eSeschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 207699653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 207799653d4eSeschrock 207899653d4eSeschrock /* 207999653d4eSeschrock * Go through and find any spares which have since been 208099653d4eSeschrock * repurposed as an active spare. If this is the case, update 208199653d4eSeschrock * their status appropriately. 208299653d4eSeschrock */ 208399653d4eSeschrock for (i = 0; i < nspares; i++) { 208499653d4eSeschrock VERIFY(nvlist_lookup_uint64(spares[i], 208599653d4eSeschrock ZPOOL_CONFIG_GUID, &guid) == 0); 208689a89ebfSlling if (spa_spare_exists(guid, &pool, NULL) && 208789a89ebfSlling pool != 0ULL) { 208899653d4eSeschrock VERIFY(nvlist_lookup_uint64_array( 208999653d4eSeschrock spares[i], ZPOOL_CONFIG_STATS, 209099653d4eSeschrock (uint64_t **)&vs, &vsc) == 0); 209199653d4eSeschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 209299653d4eSeschrock vs->vs_aux = VDEV_AUX_SPARED; 209399653d4eSeschrock } 209499653d4eSeschrock } 209599653d4eSeschrock } 209699653d4eSeschrock } 209799653d4eSeschrock 2098fa94a07fSbrendan /* 2099fa94a07fSbrendan * Add l2cache device information to the nvlist, including vdev stats. 2100fa94a07fSbrendan */ 2101fa94a07fSbrendan static void 2102fa94a07fSbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 2103fa94a07fSbrendan { 2104fa94a07fSbrendan nvlist_t **l2cache; 2105fa94a07fSbrendan uint_t i, j, nl2cache; 2106fa94a07fSbrendan nvlist_t *nvroot; 2107fa94a07fSbrendan uint64_t guid; 2108fa94a07fSbrendan vdev_t *vd; 2109fa94a07fSbrendan vdev_stat_t *vs; 2110fa94a07fSbrendan uint_t vsc; 2111fa94a07fSbrendan 21126809eb4eSEric Schrock ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 21136809eb4eSEric Schrock 2114fa94a07fSbrendan if (spa->spa_l2cache.sav_count == 0) 2115fa94a07fSbrendan return; 2116fa94a07fSbrendan 2117fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist(config, 2118fa94a07fSbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 2119fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 2120fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 2121fa94a07fSbrendan if (nl2cache != 0) { 2122fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 2123fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 2124fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 2125fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 2126fa94a07fSbrendan 2127fa94a07fSbrendan /* 2128fa94a07fSbrendan * Update level 2 cache device stats. 2129fa94a07fSbrendan */ 2130fa94a07fSbrendan 2131fa94a07fSbrendan for (i = 0; i < nl2cache; i++) { 2132fa94a07fSbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 2133fa94a07fSbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 2134fa94a07fSbrendan 2135fa94a07fSbrendan vd = NULL; 2136fa94a07fSbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 2137fa94a07fSbrendan if (guid == 2138fa94a07fSbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 2139fa94a07fSbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 2140fa94a07fSbrendan break; 2141fa94a07fSbrendan } 2142fa94a07fSbrendan } 2143fa94a07fSbrendan ASSERT(vd != NULL); 2144fa94a07fSbrendan 2145fa94a07fSbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 2146fa94a07fSbrendan ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 2147fa94a07fSbrendan vdev_get_stats(vd, vs); 2148fa94a07fSbrendan } 2149fa94a07fSbrendan } 2150fa94a07fSbrendan } 2151fa94a07fSbrendan 2152fa9e4066Sahrens int 2153ea8dc4b6Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 2154fa9e4066Sahrens { 2155fa9e4066Sahrens int error; 2156fa9e4066Sahrens spa_t *spa; 2157fa9e4066Sahrens 2158fa9e4066Sahrens *config = NULL; 2159468c413aSTim Haley error = spa_open_common(name, &spa, FTAG, NULL, config); 2160fa9e4066Sahrens 21616809eb4eSEric Schrock if (spa != NULL) { 21626809eb4eSEric Schrock /* 21636809eb4eSEric Schrock * This still leaves a window of inconsistency where the spares 21646809eb4eSEric Schrock * or l2cache devices could change and the config would be 21656809eb4eSEric Schrock * self-inconsistent. 21666809eb4eSEric Schrock */ 21676809eb4eSEric Schrock spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 2168ea8dc4b6Seschrock 21696809eb4eSEric Schrock if (*config != NULL) { 2170e14bb325SJeff Bonwick VERIFY(nvlist_add_uint64(*config, 21716809eb4eSEric Schrock ZPOOL_CONFIG_ERRCOUNT, 21726809eb4eSEric Schrock spa_get_errlog_size(spa)) == 0); 2173e14bb325SJeff Bonwick 21746809eb4eSEric Schrock if (spa_suspended(spa)) 21756809eb4eSEric Schrock VERIFY(nvlist_add_uint64(*config, 21766809eb4eSEric Schrock ZPOOL_CONFIG_SUSPENDED, 21776809eb4eSEric Schrock spa->spa_failmode) == 0); 21786809eb4eSEric Schrock 21796809eb4eSEric Schrock spa_add_spares(spa, *config); 21806809eb4eSEric Schrock spa_add_l2cache(spa, *config); 21816809eb4eSEric Schrock } 218299653d4eSeschrock } 218399653d4eSeschrock 2184ea8dc4b6Seschrock /* 2185ea8dc4b6Seschrock * We want to get the alternate root even for faulted pools, so we cheat 2186ea8dc4b6Seschrock * and call spa_lookup() directly. 2187ea8dc4b6Seschrock */ 2188ea8dc4b6Seschrock if (altroot) { 2189ea8dc4b6Seschrock if (spa == NULL) { 2190ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 2191ea8dc4b6Seschrock spa = spa_lookup(name); 2192ea8dc4b6Seschrock if (spa) 2193ea8dc4b6Seschrock spa_altroot(spa, altroot, buflen); 2194ea8dc4b6Seschrock else 2195ea8dc4b6Seschrock altroot[0] = '\0'; 2196ea8dc4b6Seschrock spa = NULL; 2197ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 2198ea8dc4b6Seschrock } else { 2199ea8dc4b6Seschrock spa_altroot(spa, altroot, buflen); 2200ea8dc4b6Seschrock } 2201ea8dc4b6Seschrock } 2202ea8dc4b6Seschrock 22036809eb4eSEric Schrock if (spa != NULL) { 22046809eb4eSEric Schrock spa_config_exit(spa, SCL_CONFIG, FTAG); 2205fa9e4066Sahrens spa_close(spa, FTAG); 22066809eb4eSEric Schrock } 2207fa9e4066Sahrens 2208fa9e4066Sahrens return (error); 2209fa9e4066Sahrens } 2210fa9e4066Sahrens 221199653d4eSeschrock /* 2212fa94a07fSbrendan * Validate that the auxiliary device array is well formed. We must have an 2213fa94a07fSbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 2214fa94a07fSbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 2215fa94a07fSbrendan * specified, as long as they are well-formed. 221699653d4eSeschrock */ 221799653d4eSeschrock static int 2218fa94a07fSbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 2219fa94a07fSbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 2220fa94a07fSbrendan vdev_labeltype_t label) 222199653d4eSeschrock { 2222fa94a07fSbrendan nvlist_t **dev; 2223fa94a07fSbrendan uint_t i, ndev; 222499653d4eSeschrock vdev_t *vd; 222599653d4eSeschrock int error; 222699653d4eSeschrock 2227e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 2228e14bb325SJeff Bonwick 222999653d4eSeschrock /* 2230fa94a07fSbrendan * It's acceptable to have no devs specified. 223199653d4eSeschrock */ 2232fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 223399653d4eSeschrock return (0); 223499653d4eSeschrock 2235fa94a07fSbrendan if (ndev == 0) 223699653d4eSeschrock return (EINVAL); 223799653d4eSeschrock 223899653d4eSeschrock /* 2239fa94a07fSbrendan * Make sure the pool is formatted with a version that supports this 2240fa94a07fSbrendan * device type. 224199653d4eSeschrock */ 2242fa94a07fSbrendan if (spa_version(spa) < version) 224399653d4eSeschrock return (ENOTSUP); 224499653d4eSeschrock 224539c23413Seschrock /* 2246fa94a07fSbrendan * Set the pending device list so we correctly handle device in-use 224739c23413Seschrock * checking. 224839c23413Seschrock */ 2249fa94a07fSbrendan sav->sav_pending = dev; 2250fa94a07fSbrendan sav->sav_npending = ndev; 225139c23413Seschrock 2252fa94a07fSbrendan for (i = 0; i < ndev; i++) { 2253fa94a07fSbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 225499653d4eSeschrock mode)) != 0) 225539c23413Seschrock goto out; 225699653d4eSeschrock 225799653d4eSeschrock if (!vd->vdev_ops->vdev_op_leaf) { 225899653d4eSeschrock vdev_free(vd); 225939c23413Seschrock error = EINVAL; 226039c23413Seschrock goto out; 226199653d4eSeschrock } 226299653d4eSeschrock 2263fa94a07fSbrendan /* 2264e14bb325SJeff Bonwick * The L2ARC currently only supports disk devices in 2265e14bb325SJeff Bonwick * kernel context. For user-level testing, we allow it. 2266fa94a07fSbrendan */ 2267e14bb325SJeff Bonwick #ifdef _KERNEL 2268fa94a07fSbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 2269fa94a07fSbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 2270fa94a07fSbrendan error = ENOTBLK; 2271fa94a07fSbrendan goto out; 2272fa94a07fSbrendan } 2273e14bb325SJeff Bonwick #endif 227499653d4eSeschrock vd->vdev_top = vd; 227599653d4eSeschrock 227639c23413Seschrock if ((error = vdev_open(vd)) == 0 && 2277fa94a07fSbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 2278fa94a07fSbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 227939c23413Seschrock vd->vdev_guid) == 0); 228039c23413Seschrock } 228199653d4eSeschrock 228299653d4eSeschrock vdev_free(vd); 228339c23413Seschrock 2284fa94a07fSbrendan if (error && 2285fa94a07fSbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 228639c23413Seschrock goto out; 228739c23413Seschrock else 228839c23413Seschrock error = 0; 228999653d4eSeschrock } 229099653d4eSeschrock 229139c23413Seschrock out: 2292fa94a07fSbrendan sav->sav_pending = NULL; 2293fa94a07fSbrendan sav->sav_npending = 0; 229439c23413Seschrock return (error); 229599653d4eSeschrock } 229699653d4eSeschrock 2297fa94a07fSbrendan static int 2298fa94a07fSbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 2299fa94a07fSbrendan { 2300fa94a07fSbrendan int error; 2301fa94a07fSbrendan 2302e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 2303e14bb325SJeff Bonwick 2304fa94a07fSbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 2305fa94a07fSbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 2306fa94a07fSbrendan VDEV_LABEL_SPARE)) != 0) { 2307fa94a07fSbrendan return (error); 2308fa94a07fSbrendan } 2309fa94a07fSbrendan 2310fa94a07fSbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 2311fa94a07fSbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 2312fa94a07fSbrendan VDEV_LABEL_L2CACHE)); 2313fa94a07fSbrendan } 2314fa94a07fSbrendan 2315fa94a07fSbrendan static void 2316fa94a07fSbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 2317fa94a07fSbrendan const char *config) 2318fa94a07fSbrendan { 2319fa94a07fSbrendan int i; 2320fa94a07fSbrendan 2321fa94a07fSbrendan if (sav->sav_config != NULL) { 2322fa94a07fSbrendan nvlist_t **olddevs; 2323fa94a07fSbrendan uint_t oldndevs; 2324fa94a07fSbrendan nvlist_t **newdevs; 2325fa94a07fSbrendan 2326fa94a07fSbrendan /* 2327fa94a07fSbrendan * Generate new dev list by concatentating with the 2328fa94a07fSbrendan * current dev list. 2329fa94a07fSbrendan */ 2330fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 2331fa94a07fSbrendan &olddevs, &oldndevs) == 0); 2332fa94a07fSbrendan 2333fa94a07fSbrendan newdevs = kmem_alloc(sizeof (void *) * 2334fa94a07fSbrendan (ndevs + oldndevs), KM_SLEEP); 2335fa94a07fSbrendan for (i = 0; i < oldndevs; i++) 2336fa94a07fSbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 2337fa94a07fSbrendan KM_SLEEP) == 0); 2338fa94a07fSbrendan for (i = 0; i < ndevs; i++) 2339fa94a07fSbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 2340fa94a07fSbrendan KM_SLEEP) == 0); 2341fa94a07fSbrendan 2342fa94a07fSbrendan VERIFY(nvlist_remove(sav->sav_config, config, 2343fa94a07fSbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 2344fa94a07fSbrendan 2345fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 2346fa94a07fSbrendan config, newdevs, ndevs + oldndevs) == 0); 2347fa94a07fSbrendan for (i = 0; i < oldndevs + ndevs; i++) 2348fa94a07fSbrendan nvlist_free(newdevs[i]); 2349fa94a07fSbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 2350fa94a07fSbrendan } else { 2351fa94a07fSbrendan /* 2352fa94a07fSbrendan * Generate a new dev list. 2353fa94a07fSbrendan */ 2354fa94a07fSbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 2355fa94a07fSbrendan KM_SLEEP) == 0); 2356fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 2357fa94a07fSbrendan devs, ndevs) == 0); 2358fa94a07fSbrendan } 2359fa94a07fSbrendan } 2360fa94a07fSbrendan 2361fa94a07fSbrendan /* 2362fa94a07fSbrendan * Stop and drop level 2 ARC devices 2363fa94a07fSbrendan */ 2364fa94a07fSbrendan void 2365fa94a07fSbrendan spa_l2cache_drop(spa_t *spa) 2366fa94a07fSbrendan { 2367fa94a07fSbrendan vdev_t *vd; 2368fa94a07fSbrendan int i; 2369fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 2370fa94a07fSbrendan 2371fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) { 2372fa94a07fSbrendan uint64_t pool; 2373fa94a07fSbrendan 2374fa94a07fSbrendan vd = sav->sav_vdevs[i]; 2375fa94a07fSbrendan ASSERT(vd != NULL); 2376fa94a07fSbrendan 23778ad4d6ddSJeff Bonwick if (spa_l2cache_exists(vd->vdev_guid, &pool) && 23788ad4d6ddSJeff Bonwick pool != 0ULL && l2arc_vdev_present(vd)) 2379fa94a07fSbrendan l2arc_remove_vdev(vd); 2380fa94a07fSbrendan if (vd->vdev_isl2cache) 2381fa94a07fSbrendan spa_l2cache_remove(vd); 2382fa94a07fSbrendan vdev_clear_stats(vd); 2383fa94a07fSbrendan (void) vdev_close(vd); 2384fa94a07fSbrendan } 2385fa94a07fSbrendan } 2386fa94a07fSbrendan 2387fa9e4066Sahrens /* 2388fa9e4066Sahrens * Pool Creation 2389fa9e4066Sahrens */ 2390fa9e4066Sahrens int 2391990b4856Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 23920a48a24eStimh const char *history_str, nvlist_t *zplprops) 2393fa9e4066Sahrens { 2394fa9e4066Sahrens spa_t *spa; 2395990b4856Slling char *altroot = NULL; 23960373e76bSbonwick vdev_t *rvd; 2397fa9e4066Sahrens dsl_pool_t *dp; 2398fa9e4066Sahrens dmu_tx_t *tx; 2399573ca77eSGeorge Wilson int error = 0; 2400fa9e4066Sahrens uint64_t txg = TXG_INITIAL; 2401fa94a07fSbrendan nvlist_t **spares, **l2cache; 2402fa94a07fSbrendan uint_t nspares, nl2cache; 2403990b4856Slling uint64_t version; 2404fa9e4066Sahrens 2405fa9e4066Sahrens /* 2406fa9e4066Sahrens * If this pool already exists, return failure. 2407fa9e4066Sahrens */ 2408fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 2409fa9e4066Sahrens if (spa_lookup(pool) != NULL) { 2410fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2411fa9e4066Sahrens return (EEXIST); 2412fa9e4066Sahrens } 2413fa9e4066Sahrens 2414fa9e4066Sahrens /* 2415fa9e4066Sahrens * Allocate a new spa_t structure. 2416fa9e4066Sahrens */ 2417990b4856Slling (void) nvlist_lookup_string(props, 2418990b4856Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 2419468c413aSTim Haley spa = spa_add(pool, NULL, altroot); 24208ad4d6ddSJeff Bonwick spa_activate(spa, spa_mode_global); 2421fa9e4066Sahrens 2422990b4856Slling if (props && (error = spa_prop_validate(spa, props))) { 2423990b4856Slling spa_deactivate(spa); 2424990b4856Slling spa_remove(spa); 2425c5904d13Seschrock mutex_exit(&spa_namespace_lock); 2426990b4856Slling return (error); 2427990b4856Slling } 2428990b4856Slling 2429990b4856Slling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 2430990b4856Slling &version) != 0) 2431990b4856Slling version = SPA_VERSION; 2432990b4856Slling ASSERT(version <= SPA_VERSION); 2433b24ab676SJeff Bonwick 2434b24ab676SJeff Bonwick spa->spa_first_txg = txg; 2435b24ab676SJeff Bonwick spa->spa_uberblock.ub_txg = txg - 1; 2436990b4856Slling spa->spa_uberblock.ub_version = version; 2437fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 2438fa9e4066Sahrens 243954d692b7SGeorge Wilson /* 244054d692b7SGeorge Wilson * Create "The Godfather" zio to hold all async IOs 244154d692b7SGeorge Wilson */ 244225f89ee2SJeff Bonwick spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 244325f89ee2SJeff Bonwick ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 244454d692b7SGeorge Wilson 24450373e76bSbonwick /* 24460373e76bSbonwick * Create the root vdev. 24470373e76bSbonwick */ 2448e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 24490373e76bSbonwick 245099653d4eSeschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 24510373e76bSbonwick 245299653d4eSeschrock ASSERT(error != 0 || rvd != NULL); 245399653d4eSeschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 24540373e76bSbonwick 2455b7b97454Sperrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 24560373e76bSbonwick error = EINVAL; 245799653d4eSeschrock 245899653d4eSeschrock if (error == 0 && 245999653d4eSeschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 2460fa94a07fSbrendan (error = spa_validate_aux(spa, nvroot, txg, 246199653d4eSeschrock VDEV_ALLOC_ADD)) == 0) { 2462573ca77eSGeorge Wilson for (int c = 0; c < rvd->vdev_children; c++) { 2463573ca77eSGeorge Wilson vdev_metaslab_set_size(rvd->vdev_child[c]); 2464573ca77eSGeorge Wilson vdev_expand(rvd->vdev_child[c], txg); 2465573ca77eSGeorge Wilson } 24660373e76bSbonwick } 24670373e76bSbonwick 2468e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2469fa9e4066Sahrens 247099653d4eSeschrock if (error != 0) { 2471fa9e4066Sahrens spa_unload(spa); 2472fa9e4066Sahrens spa_deactivate(spa); 2473fa9e4066Sahrens spa_remove(spa); 2474fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2475fa9e4066Sahrens return (error); 2476fa9e4066Sahrens } 2477fa9e4066Sahrens 247899653d4eSeschrock /* 247999653d4eSeschrock * Get the list of spares, if specified. 248099653d4eSeschrock */ 248199653d4eSeschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 248299653d4eSeschrock &spares, &nspares) == 0) { 2483fa94a07fSbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 248499653d4eSeschrock KM_SLEEP) == 0); 2485fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 248699653d4eSeschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 2487e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 248899653d4eSeschrock spa_load_spares(spa); 2489e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2490fa94a07fSbrendan spa->spa_spares.sav_sync = B_TRUE; 2491fa94a07fSbrendan } 2492fa94a07fSbrendan 2493fa94a07fSbrendan /* 2494fa94a07fSbrendan * Get the list of level 2 cache devices, if specified. 2495fa94a07fSbrendan */ 2496fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 2497fa94a07fSbrendan &l2cache, &nl2cache) == 0) { 2498fa94a07fSbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 2499fa94a07fSbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 2500fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 2501fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 2502e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2503fa94a07fSbrendan spa_load_l2cache(spa); 2504e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2505fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 250699653d4eSeschrock } 250799653d4eSeschrock 25080a48a24eStimh spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 2509fa9e4066Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 2510fa9e4066Sahrens 2511485bbbf5SGeorge Wilson /* 2512485bbbf5SGeorge Wilson * Create DDTs (dedup tables). 2513485bbbf5SGeorge Wilson */ 2514485bbbf5SGeorge Wilson ddt_create(spa); 2515485bbbf5SGeorge Wilson 2516485bbbf5SGeorge Wilson spa_update_dspace(spa); 2517485bbbf5SGeorge Wilson 2518fa9e4066Sahrens tx = dmu_tx_create_assigned(dp, txg); 2519fa9e4066Sahrens 2520fa9e4066Sahrens /* 2521fa9e4066Sahrens * Create the pool config object. 2522fa9e4066Sahrens */ 2523fa9e4066Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 2524f7991ba4STim Haley DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 2525fa9e4066Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 2526fa9e4066Sahrens 2527ea8dc4b6Seschrock if (zap_add(spa->spa_meta_objset, 2528fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 2529ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 2530ea8dc4b6Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 2531ea8dc4b6Seschrock } 2532fa9e4066Sahrens 2533990b4856Slling /* Newly created pools with the right version are always deflated. */ 2534990b4856Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 2535990b4856Slling spa->spa_deflate = TRUE; 2536990b4856Slling if (zap_add(spa->spa_meta_objset, 2537990b4856Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 2538990b4856Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 2539990b4856Slling cmn_err(CE_PANIC, "failed to add deflate"); 2540990b4856Slling } 254199653d4eSeschrock } 254299653d4eSeschrock 2543fa9e4066Sahrens /* 2544fa9e4066Sahrens * Create the deferred-free bplist object. Turn off compression 2545fa9e4066Sahrens * because sync-to-convergence takes longer if the blocksize 2546fa9e4066Sahrens * keeps changing. 2547fa9e4066Sahrens */ 2548b24ab676SJeff Bonwick spa->spa_deferred_bplist_obj = bplist_create(spa->spa_meta_objset, 2549fa9e4066Sahrens 1 << 14, tx); 2550b24ab676SJeff Bonwick dmu_object_set_compress(spa->spa_meta_objset, 2551b24ab676SJeff Bonwick spa->spa_deferred_bplist_obj, ZIO_COMPRESS_OFF, tx); 2552fa9e4066Sahrens 2553ea8dc4b6Seschrock if (zap_add(spa->spa_meta_objset, 2554fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 2555b24ab676SJeff Bonwick sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj, tx) != 0) { 2556ea8dc4b6Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 2557ea8dc4b6Seschrock } 2558fa9e4066Sahrens 255906eeb2adSek /* 256006eeb2adSek * Create the pool's history object. 256106eeb2adSek */ 2562990b4856Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 2563990b4856Slling spa_history_create_obj(spa, tx); 2564990b4856Slling 2565990b4856Slling /* 2566990b4856Slling * Set pool properties. 2567990b4856Slling */ 2568990b4856Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 2569990b4856Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 25700a4e9518Sgw spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 2571573ca77eSGeorge Wilson spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); 2572b24ab676SJeff Bonwick 2573379c004dSEric Schrock if (props != NULL) { 2574379c004dSEric Schrock spa_configfile_set(spa, props, B_FALSE); 2575990b4856Slling spa_sync_props(spa, props, CRED(), tx); 2576379c004dSEric Schrock } 257706eeb2adSek 2578fa9e4066Sahrens dmu_tx_commit(tx); 2579fa9e4066Sahrens 2580fa9e4066Sahrens spa->spa_sync_on = B_TRUE; 2581fa9e4066Sahrens txg_sync_start(spa->spa_dsl_pool); 2582fa9e4066Sahrens 2583fa9e4066Sahrens /* 2584fa9e4066Sahrens * We explicitly wait for the first transaction to complete so that our 2585fa9e4066Sahrens * bean counters are appropriately updated. 2586fa9e4066Sahrens */ 2587fa9e4066Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 2588fa9e4066Sahrens 2589c5904d13Seschrock spa_config_sync(spa, B_FALSE, B_TRUE); 2590fa9e4066Sahrens 2591990b4856Slling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 2592228975ccSek (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 2593c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_CREATE); 2594228975ccSek 2595088f3894Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 2596088f3894Sahrens 2597daaa36a7SGeorge Wilson mutex_exit(&spa_namespace_lock); 2598daaa36a7SGeorge Wilson 2599fa9e4066Sahrens return (0); 2600fa9e4066Sahrens } 2601fa9e4066Sahrens 2602e7cbe64fSgw #ifdef _KERNEL 2603e7cbe64fSgw /* 260421ecdf64SLin Ling * Get the root pool information from the root disk, then import the root pool 260521ecdf64SLin Ling * during the system boot up time. 2606e7cbe64fSgw */ 260721ecdf64SLin Ling extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 260821ecdf64SLin Ling 260921ecdf64SLin Ling static nvlist_t * 261021ecdf64SLin Ling spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) 2611e7cbe64fSgw { 261221ecdf64SLin Ling nvlist_t *config; 2613e7cbe64fSgw nvlist_t *nvtop, *nvroot; 2614e7cbe64fSgw uint64_t pgid; 2615e7cbe64fSgw 261621ecdf64SLin Ling if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) 261721ecdf64SLin Ling return (NULL); 261821ecdf64SLin Ling 2619e7cbe64fSgw /* 2620e7cbe64fSgw * Add this top-level vdev to the child array. 2621e7cbe64fSgw */ 262221ecdf64SLin Ling VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 262321ecdf64SLin Ling &nvtop) == 0); 262421ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 262521ecdf64SLin Ling &pgid) == 0); 262621ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); 2627e7cbe64fSgw 2628e7cbe64fSgw /* 2629e7cbe64fSgw * Put this pool's top-level vdevs into a root vdev. 2630e7cbe64fSgw */ 2631e7cbe64fSgw VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 263221ecdf64SLin Ling VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 263321ecdf64SLin Ling VDEV_TYPE_ROOT) == 0); 2634e7cbe64fSgw VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 2635e7cbe64fSgw VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 2636e7cbe64fSgw VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 2637e7cbe64fSgw &nvtop, 1) == 0); 2638e7cbe64fSgw 2639e7cbe64fSgw /* 2640e7cbe64fSgw * Replace the existing vdev_tree with the new root vdev in 2641e7cbe64fSgw * this pool's configuration (remove the old, add the new). 2642e7cbe64fSgw */ 2643e7cbe64fSgw VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 2644e7cbe64fSgw nvlist_free(nvroot); 264521ecdf64SLin Ling return (config); 2646e7cbe64fSgw } 2647e7cbe64fSgw 2648e7cbe64fSgw /* 264921ecdf64SLin Ling * Walk the vdev tree and see if we can find a device with "better" 265021ecdf64SLin Ling * configuration. A configuration is "better" if the label on that 265121ecdf64SLin Ling * device has a more recent txg. 2652051aabe6Staylor */ 265321ecdf64SLin Ling static void 265421ecdf64SLin Ling spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) 2655051aabe6Staylor { 2656573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) 265721ecdf64SLin Ling spa_alt_rootvdev(vd->vdev_child[c], avd, txg); 2658051aabe6Staylor 265921ecdf64SLin Ling if (vd->vdev_ops->vdev_op_leaf) { 266021ecdf64SLin Ling nvlist_t *label; 266121ecdf64SLin Ling uint64_t label_txg; 2662051aabe6Staylor 266321ecdf64SLin Ling if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, 266421ecdf64SLin Ling &label) != 0) 266521ecdf64SLin Ling return; 2666051aabe6Staylor 266721ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, 266821ecdf64SLin Ling &label_txg) == 0); 2669051aabe6Staylor 267021ecdf64SLin Ling /* 267121ecdf64SLin Ling * Do we have a better boot device? 267221ecdf64SLin Ling */ 267321ecdf64SLin Ling if (label_txg > *txg) { 267421ecdf64SLin Ling *txg = label_txg; 267521ecdf64SLin Ling *avd = vd; 2676051aabe6Staylor } 267721ecdf64SLin Ling nvlist_free(label); 2678051aabe6Staylor } 2679051aabe6Staylor } 2680051aabe6Staylor 2681e7cbe64fSgw /* 2682e7cbe64fSgw * Import a root pool. 2683e7cbe64fSgw * 2684051aabe6Staylor * For x86. devpath_list will consist of devid and/or physpath name of 2685051aabe6Staylor * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 2686051aabe6Staylor * The GRUB "findroot" command will return the vdev we should boot. 2687e7cbe64fSgw * 2688e7cbe64fSgw * For Sparc, devpath_list consists the physpath name of the booting device 2689e7cbe64fSgw * no matter the rootpool is a single device pool or a mirrored pool. 2690e7cbe64fSgw * e.g. 2691e7cbe64fSgw * "/pci@1f,0/ide@d/disk@0,0:a" 2692e7cbe64fSgw */ 2693e7cbe64fSgw int 2694051aabe6Staylor spa_import_rootpool(char *devpath, char *devid) 2695e7cbe64fSgw { 269621ecdf64SLin Ling spa_t *spa; 269721ecdf64SLin Ling vdev_t *rvd, *bvd, *avd = NULL; 269821ecdf64SLin Ling nvlist_t *config, *nvtop; 269921ecdf64SLin Ling uint64_t guid, txg; 2700e7cbe64fSgw char *pname; 2701e7cbe64fSgw int error; 2702e7cbe64fSgw 2703e7cbe64fSgw /* 270421ecdf64SLin Ling * Read the label from the boot device and generate a configuration. 2705e7cbe64fSgw */ 2706dedec472SJack Meng config = spa_generate_rootconf(devpath, devid, &guid); 2707dedec472SJack Meng #if defined(_OBP) && defined(_KERNEL) 2708dedec472SJack Meng if (config == NULL) { 2709dedec472SJack Meng if (strstr(devpath, "/iscsi/ssd") != NULL) { 2710dedec472SJack Meng /* iscsi boot */ 2711dedec472SJack Meng get_iscsi_bootpath_phy(devpath); 2712dedec472SJack Meng config = spa_generate_rootconf(devpath, devid, &guid); 2713dedec472SJack Meng } 2714dedec472SJack Meng } 2715dedec472SJack Meng #endif 2716dedec472SJack Meng if (config == NULL) { 271721ecdf64SLin Ling cmn_err(CE_NOTE, "Can not read the pool label from '%s'", 271821ecdf64SLin Ling devpath); 271921ecdf64SLin Ling return (EIO); 272021ecdf64SLin Ling } 2721e7cbe64fSgw 272221ecdf64SLin Ling VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 272321ecdf64SLin Ling &pname) == 0); 272421ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 2725e7cbe64fSgw 27266809eb4eSEric Schrock mutex_enter(&spa_namespace_lock); 27276809eb4eSEric Schrock if ((spa = spa_lookup(pname)) != NULL) { 27286809eb4eSEric Schrock /* 27296809eb4eSEric Schrock * Remove the existing root pool from the namespace so that we 27306809eb4eSEric Schrock * can replace it with the correct config we just read in. 27316809eb4eSEric Schrock */ 27326809eb4eSEric Schrock spa_remove(spa); 27336809eb4eSEric Schrock } 27346809eb4eSEric Schrock 2735468c413aSTim Haley spa = spa_add(pname, config, NULL); 27366809eb4eSEric Schrock spa->spa_is_root = B_TRUE; 2737bc758434SLin Ling spa->spa_load_verbatim = B_TRUE; 2738e7cbe64fSgw 273921ecdf64SLin Ling /* 274021ecdf64SLin Ling * Build up a vdev tree based on the boot device's label config. 274121ecdf64SLin Ling */ 274221ecdf64SLin Ling VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 274321ecdf64SLin Ling &nvtop) == 0); 274421ecdf64SLin Ling spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 274521ecdf64SLin Ling error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, 274621ecdf64SLin Ling VDEV_ALLOC_ROOTPOOL); 274721ecdf64SLin Ling spa_config_exit(spa, SCL_ALL, FTAG); 274821ecdf64SLin Ling if (error) { 274921ecdf64SLin Ling mutex_exit(&spa_namespace_lock); 275021ecdf64SLin Ling nvlist_free(config); 275121ecdf64SLin Ling cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", 275221ecdf64SLin Ling pname); 275321ecdf64SLin Ling return (error); 275421ecdf64SLin Ling } 275521ecdf64SLin Ling 275621ecdf64SLin Ling /* 275721ecdf64SLin Ling * Get the boot vdev. 275821ecdf64SLin Ling */ 275921ecdf64SLin Ling if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 276021ecdf64SLin Ling cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", 276121ecdf64SLin Ling (u_longlong_t)guid); 276221ecdf64SLin Ling error = ENOENT; 276321ecdf64SLin Ling goto out; 276421ecdf64SLin Ling } 2765e7cbe64fSgw 276621ecdf64SLin Ling /* 276721ecdf64SLin Ling * Determine if there is a better boot device. 276821ecdf64SLin Ling */ 276921ecdf64SLin Ling avd = bvd; 277021ecdf64SLin Ling spa_alt_rootvdev(rvd, &avd, &txg); 277121ecdf64SLin Ling if (avd != bvd) { 277221ecdf64SLin Ling cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " 277321ecdf64SLin Ling "try booting from '%s'", avd->vdev_path); 277421ecdf64SLin Ling error = EINVAL; 277521ecdf64SLin Ling goto out; 277621ecdf64SLin Ling } 2777e7cbe64fSgw 277821ecdf64SLin Ling /* 277921ecdf64SLin Ling * If the boot device is part of a spare vdev then ensure that 278021ecdf64SLin Ling * we're booting off the active spare. 278121ecdf64SLin Ling */ 278221ecdf64SLin Ling if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && 278321ecdf64SLin Ling !bvd->vdev_isspare) { 278421ecdf64SLin Ling cmn_err(CE_NOTE, "The boot device is currently spared. Please " 278521ecdf64SLin Ling "try booting from '%s'", 278621ecdf64SLin Ling bvd->vdev_parent->vdev_child[1]->vdev_path); 278721ecdf64SLin Ling error = EINVAL; 278821ecdf64SLin Ling goto out; 278921ecdf64SLin Ling } 279021ecdf64SLin Ling 279121ecdf64SLin Ling error = 0; 2792c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_IMPORT); 279321ecdf64SLin Ling out: 279421ecdf64SLin Ling spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 279521ecdf64SLin Ling vdev_free(rvd); 279621ecdf64SLin Ling spa_config_exit(spa, SCL_ALL, FTAG); 279721ecdf64SLin Ling mutex_exit(&spa_namespace_lock); 279821ecdf64SLin Ling 279921ecdf64SLin Ling nvlist_free(config); 2800e7cbe64fSgw return (error); 2801e7cbe64fSgw } 280221ecdf64SLin Ling 2803e7cbe64fSgw #endif 2804e7cbe64fSgw 2805e7cbe64fSgw /* 28066809eb4eSEric Schrock * Take a pool and insert it into the namespace as if it had been loaded at 28076809eb4eSEric Schrock * boot. 2808e7cbe64fSgw */ 2809e7cbe64fSgw int 28106809eb4eSEric Schrock spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props) 2811e7cbe64fSgw { 28126809eb4eSEric Schrock spa_t *spa; 2813468c413aSTim Haley zpool_rewind_policy_t policy; 28146809eb4eSEric Schrock char *altroot = NULL; 28156809eb4eSEric Schrock 28166809eb4eSEric Schrock mutex_enter(&spa_namespace_lock); 28176809eb4eSEric Schrock if (spa_lookup(pool) != NULL) { 28186809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 28196809eb4eSEric Schrock return (EEXIST); 28206809eb4eSEric Schrock } 28216809eb4eSEric Schrock 28226809eb4eSEric Schrock (void) nvlist_lookup_string(props, 28236809eb4eSEric Schrock zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 2824468c413aSTim Haley spa = spa_add(pool, config, altroot); 28256809eb4eSEric Schrock 2826468c413aSTim Haley zpool_get_rewind_policy(config, &policy); 2827468c413aSTim Haley spa->spa_load_max_txg = policy.zrp_txg; 28284f0f5e5bSVictor Latushkin 2829468c413aSTim Haley spa->spa_load_verbatim = B_TRUE; 28306809eb4eSEric Schrock 28316809eb4eSEric Schrock if (props != NULL) 28326809eb4eSEric Schrock spa_configfile_set(spa, props, B_FALSE); 28336809eb4eSEric Schrock 28346809eb4eSEric Schrock spa_config_sync(spa, B_FALSE, B_TRUE); 28356809eb4eSEric Schrock 28366809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 2837c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_IMPORT); 28386809eb4eSEric Schrock 28396809eb4eSEric Schrock return (0); 2840e7cbe64fSgw } 2841e7cbe64fSgw 28426809eb4eSEric Schrock /* 28436809eb4eSEric Schrock * Import a non-root pool into the system. 28446809eb4eSEric Schrock */ 2845c5904d13Seschrock int 28466809eb4eSEric Schrock spa_import(const char *pool, nvlist_t *config, nvlist_t *props) 2847c5904d13Seschrock { 28486809eb4eSEric Schrock spa_t *spa; 28496809eb4eSEric Schrock char *altroot = NULL; 2850468c413aSTim Haley spa_load_state_t state = SPA_LOAD_IMPORT; 2851468c413aSTim Haley zpool_rewind_policy_t policy; 28526809eb4eSEric Schrock int error; 28536809eb4eSEric Schrock nvlist_t *nvroot; 28546809eb4eSEric Schrock nvlist_t **spares, **l2cache; 28556809eb4eSEric Schrock uint_t nspares, nl2cache; 28566809eb4eSEric Schrock 28576809eb4eSEric Schrock /* 28586809eb4eSEric Schrock * If a pool with this name exists, return failure. 28596809eb4eSEric Schrock */ 28606809eb4eSEric Schrock mutex_enter(&spa_namespace_lock); 28616809eb4eSEric Schrock if ((spa = spa_lookup(pool)) != NULL) { 28626809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 28636809eb4eSEric Schrock return (EEXIST); 28646809eb4eSEric Schrock } 28656809eb4eSEric Schrock 2866468c413aSTim Haley zpool_get_rewind_policy(config, &policy); 2867468c413aSTim Haley if (policy.zrp_request & ZPOOL_DO_REWIND) 2868468c413aSTim Haley state = SPA_LOAD_RECOVER; 2869468c413aSTim Haley 28706809eb4eSEric Schrock /* 28716809eb4eSEric Schrock * Create and initialize the spa structure. 28726809eb4eSEric Schrock */ 28736809eb4eSEric Schrock (void) nvlist_lookup_string(props, 28746809eb4eSEric Schrock zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 2875468c413aSTim Haley spa = spa_add(pool, config, altroot); 28766809eb4eSEric Schrock spa_activate(spa, spa_mode_global); 28776809eb4eSEric Schrock 287825f89ee2SJeff Bonwick /* 287925f89ee2SJeff Bonwick * Don't start async tasks until we know everything is healthy. 288025f89ee2SJeff Bonwick */ 288125f89ee2SJeff Bonwick spa_async_suspend(spa); 288225f89ee2SJeff Bonwick 28836809eb4eSEric Schrock /* 28846809eb4eSEric Schrock * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig 28856809eb4eSEric Schrock * because the user-supplied config is actually the one to trust when 28866809eb4eSEric Schrock * doing an import. 28876809eb4eSEric Schrock */ 2888468c413aSTim Haley if (state != SPA_LOAD_RECOVER) 2889468c413aSTim Haley spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; 2890468c413aSTim Haley error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg, 2891468c413aSTim Haley ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0)); 2892468c413aSTim Haley 2893468c413aSTim Haley /* 2894468c413aSTim Haley * Propagate anything learned about failing or best txgs 2895468c413aSTim Haley * back to caller 2896468c413aSTim Haley */ 2897468c413aSTim Haley spa_rewind_data_to_nvlist(spa, config); 28986809eb4eSEric Schrock 28996809eb4eSEric Schrock spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 29006809eb4eSEric Schrock /* 29016809eb4eSEric Schrock * Toss any existing sparelist, as it doesn't have any validity 29026809eb4eSEric Schrock * anymore, and conflicts with spa_has_spare(). 29036809eb4eSEric Schrock */ 29046809eb4eSEric Schrock if (spa->spa_spares.sav_config) { 29056809eb4eSEric Schrock nvlist_free(spa->spa_spares.sav_config); 29066809eb4eSEric Schrock spa->spa_spares.sav_config = NULL; 29076809eb4eSEric Schrock spa_load_spares(spa); 29086809eb4eSEric Schrock } 29096809eb4eSEric Schrock if (spa->spa_l2cache.sav_config) { 29106809eb4eSEric Schrock nvlist_free(spa->spa_l2cache.sav_config); 29116809eb4eSEric Schrock spa->spa_l2cache.sav_config = NULL; 29126809eb4eSEric Schrock spa_load_l2cache(spa); 29136809eb4eSEric Schrock } 29146809eb4eSEric Schrock 29156809eb4eSEric Schrock VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 29166809eb4eSEric Schrock &nvroot) == 0); 29176809eb4eSEric Schrock if (error == 0) 29186809eb4eSEric Schrock error = spa_validate_aux(spa, nvroot, -1ULL, 29196809eb4eSEric Schrock VDEV_ALLOC_SPARE); 29206809eb4eSEric Schrock if (error == 0) 29216809eb4eSEric Schrock error = spa_validate_aux(spa, nvroot, -1ULL, 29226809eb4eSEric Schrock VDEV_ALLOC_L2CACHE); 29236809eb4eSEric Schrock spa_config_exit(spa, SCL_ALL, FTAG); 29246809eb4eSEric Schrock 29256809eb4eSEric Schrock if (props != NULL) 29266809eb4eSEric Schrock spa_configfile_set(spa, props, B_FALSE); 29276809eb4eSEric Schrock 29286809eb4eSEric Schrock if (error != 0 || (props && spa_writeable(spa) && 29296809eb4eSEric Schrock (error = spa_prop_set(spa, props)))) { 29306809eb4eSEric Schrock spa_unload(spa); 29316809eb4eSEric Schrock spa_deactivate(spa); 29326809eb4eSEric Schrock spa_remove(spa); 29336809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 29346809eb4eSEric Schrock return (error); 29356809eb4eSEric Schrock } 29366809eb4eSEric Schrock 293725f89ee2SJeff Bonwick spa_async_resume(spa); 293825f89ee2SJeff Bonwick 29396809eb4eSEric Schrock /* 29406809eb4eSEric Schrock * Override any spares and level 2 cache devices as specified by 29416809eb4eSEric Schrock * the user, as these may have correct device names/devids, etc. 29426809eb4eSEric Schrock */ 29436809eb4eSEric Schrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 29446809eb4eSEric Schrock &spares, &nspares) == 0) { 29456809eb4eSEric Schrock if (spa->spa_spares.sav_config) 29466809eb4eSEric Schrock VERIFY(nvlist_remove(spa->spa_spares.sav_config, 29476809eb4eSEric Schrock ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 29486809eb4eSEric Schrock else 29496809eb4eSEric Schrock VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 29506809eb4eSEric Schrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 29516809eb4eSEric Schrock VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 29526809eb4eSEric Schrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 29536809eb4eSEric Schrock spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 29546809eb4eSEric Schrock spa_load_spares(spa); 29556809eb4eSEric Schrock spa_config_exit(spa, SCL_ALL, FTAG); 29566809eb4eSEric Schrock spa->spa_spares.sav_sync = B_TRUE; 29576809eb4eSEric Schrock } 29586809eb4eSEric Schrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 29596809eb4eSEric Schrock &l2cache, &nl2cache) == 0) { 29606809eb4eSEric Schrock if (spa->spa_l2cache.sav_config) 29616809eb4eSEric Schrock VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 29626809eb4eSEric Schrock ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 29636809eb4eSEric Schrock else 29646809eb4eSEric Schrock VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 29656809eb4eSEric Schrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 29666809eb4eSEric Schrock VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 29676809eb4eSEric Schrock ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 29686809eb4eSEric Schrock spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 29696809eb4eSEric Schrock spa_load_l2cache(spa); 29706809eb4eSEric Schrock spa_config_exit(spa, SCL_ALL, FTAG); 29716809eb4eSEric Schrock spa->spa_l2cache.sav_sync = B_TRUE; 29726809eb4eSEric Schrock } 29736809eb4eSEric Schrock 2974b693757aSEric Schrock /* 2975b693757aSEric Schrock * Check for any removed devices. 2976b693757aSEric Schrock */ 2977b693757aSEric Schrock if (spa->spa_autoreplace) { 2978b693757aSEric Schrock spa_aux_check_removed(&spa->spa_spares); 2979b693757aSEric Schrock spa_aux_check_removed(&spa->spa_l2cache); 2980b693757aSEric Schrock } 2981b693757aSEric Schrock 29826809eb4eSEric Schrock if (spa_writeable(spa)) { 29836809eb4eSEric Schrock /* 29846809eb4eSEric Schrock * Update the config cache to include the newly-imported pool. 29856809eb4eSEric Schrock */ 2986bc758434SLin Ling spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 29876809eb4eSEric Schrock } 29886809eb4eSEric Schrock 2989573ca77eSGeorge Wilson /* 2990573ca77eSGeorge Wilson * It's possible that the pool was expanded while it was exported. 2991573ca77eSGeorge Wilson * We kick off an async task to handle this for us. 2992573ca77eSGeorge Wilson */ 2993573ca77eSGeorge Wilson spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 2994573ca77eSGeorge Wilson 29956809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 2996c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_IMPORT); 29976809eb4eSEric Schrock 29986809eb4eSEric Schrock return (0); 2999c5904d13Seschrock } 3000c5904d13Seschrock 3001c5904d13Seschrock 3002fa9e4066Sahrens /* 3003fa9e4066Sahrens * This (illegal) pool name is used when temporarily importing a spa_t in order 3004fa9e4066Sahrens * to get the vdev stats associated with the imported devices. 3005fa9e4066Sahrens */ 3006fa9e4066Sahrens #define TRYIMPORT_NAME "$import" 3007fa9e4066Sahrens 3008fa9e4066Sahrens nvlist_t * 3009fa9e4066Sahrens spa_tryimport(nvlist_t *tryconfig) 3010fa9e4066Sahrens { 3011fa9e4066Sahrens nvlist_t *config = NULL; 3012fa9e4066Sahrens char *poolname; 3013fa9e4066Sahrens spa_t *spa; 3014fa9e4066Sahrens uint64_t state; 30157b7154beSLin Ling int error; 3016fa9e4066Sahrens 3017fa9e4066Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 3018fa9e4066Sahrens return (NULL); 3019fa9e4066Sahrens 3020fa9e4066Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 3021fa9e4066Sahrens return (NULL); 3022fa9e4066Sahrens 3023fa9e4066Sahrens /* 30240373e76bSbonwick * Create and initialize the spa structure. 3025fa9e4066Sahrens */ 30260373e76bSbonwick mutex_enter(&spa_namespace_lock); 3027468c413aSTim Haley spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); 30288ad4d6ddSJeff Bonwick spa_activate(spa, FREAD); 3029fa9e4066Sahrens 3030fa9e4066Sahrens /* 30310373e76bSbonwick * Pass off the heavy lifting to spa_load(). 3032ecc2d604Sbonwick * Pass TRUE for mosconfig because the user-supplied config 3033ecc2d604Sbonwick * is actually the one to trust when doing an import. 3034fa9e4066Sahrens */ 3035468c413aSTim Haley error = spa_load(spa, SPA_LOAD_TRYIMPORT, B_TRUE); 3036fa9e4066Sahrens 3037fa9e4066Sahrens /* 3038fa9e4066Sahrens * If 'tryconfig' was at least parsable, return the current config. 3039fa9e4066Sahrens */ 3040fa9e4066Sahrens if (spa->spa_root_vdev != NULL) { 3041fa9e4066Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 3042fa9e4066Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 3043fa9e4066Sahrens poolname) == 0); 3044fa9e4066Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 3045fa9e4066Sahrens state) == 0); 304695173954Sek VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 304795173954Sek spa->spa_uberblock.ub_timestamp) == 0); 304899653d4eSeschrock 3049e7cbe64fSgw /* 3050e7cbe64fSgw * If the bootfs property exists on this pool then we 3051e7cbe64fSgw * copy it out so that external consumers can tell which 3052e7cbe64fSgw * pools are bootable. 3053e7cbe64fSgw */ 30547b7154beSLin Ling if ((!error || error == EEXIST) && spa->spa_bootfs) { 3055e7cbe64fSgw char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 3056e7cbe64fSgw 3057e7cbe64fSgw /* 3058e7cbe64fSgw * We have to play games with the name since the 3059e7cbe64fSgw * pool was opened as TRYIMPORT_NAME. 3060e7cbe64fSgw */ 3061e14bb325SJeff Bonwick if (dsl_dsobj_to_dsname(spa_name(spa), 3062e7cbe64fSgw spa->spa_bootfs, tmpname) == 0) { 3063e7cbe64fSgw char *cp; 3064e7cbe64fSgw char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 3065e7cbe64fSgw 3066e7cbe64fSgw cp = strchr(tmpname, '/'); 3067e7cbe64fSgw if (cp == NULL) { 3068e7cbe64fSgw (void) strlcpy(dsname, tmpname, 3069e7cbe64fSgw MAXPATHLEN); 3070e7cbe64fSgw } else { 3071e7cbe64fSgw (void) snprintf(dsname, MAXPATHLEN, 3072e7cbe64fSgw "%s/%s", poolname, ++cp); 3073e7cbe64fSgw } 3074e7cbe64fSgw VERIFY(nvlist_add_string(config, 3075e7cbe64fSgw ZPOOL_CONFIG_BOOTFS, dsname) == 0); 3076e7cbe64fSgw kmem_free(dsname, MAXPATHLEN); 3077e7cbe64fSgw } 3078e7cbe64fSgw kmem_free(tmpname, MAXPATHLEN); 3079e7cbe64fSgw } 3080e7cbe64fSgw 308199653d4eSeschrock /* 3082fa94a07fSbrendan * Add the list of hot spares and level 2 cache devices. 308399653d4eSeschrock */ 30846809eb4eSEric Schrock spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 308599653d4eSeschrock spa_add_spares(spa, config); 3086fa94a07fSbrendan spa_add_l2cache(spa, config); 30876809eb4eSEric Schrock spa_config_exit(spa, SCL_CONFIG, FTAG); 3088fa9e4066Sahrens } 3089fa9e4066Sahrens 3090fa9e4066Sahrens spa_unload(spa); 3091fa9e4066Sahrens spa_deactivate(spa); 3092fa9e4066Sahrens spa_remove(spa); 3093fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 3094fa9e4066Sahrens 3095fa9e4066Sahrens return (config); 3096fa9e4066Sahrens } 3097fa9e4066Sahrens 3098fa9e4066Sahrens /* 3099fa9e4066Sahrens * Pool export/destroy 3100fa9e4066Sahrens * 3101fa9e4066Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 3102fa9e4066Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 3103fa9e4066Sahrens * update the pool state and sync all the labels to disk, removing the 3104394ab0cbSGeorge Wilson * configuration from the cache afterwards. If the 'hardforce' flag is set, then 3105394ab0cbSGeorge Wilson * we don't sync the labels or remove the configuration cache. 3106fa9e4066Sahrens */ 3107fa9e4066Sahrens static int 310889a89ebfSlling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 3109394ab0cbSGeorge Wilson boolean_t force, boolean_t hardforce) 3110fa9e4066Sahrens { 3111fa9e4066Sahrens spa_t *spa; 3112fa9e4066Sahrens 311344cd46caSbillm if (oldconfig) 311444cd46caSbillm *oldconfig = NULL; 311544cd46caSbillm 31168ad4d6ddSJeff Bonwick if (!(spa_mode_global & FWRITE)) 3117fa9e4066Sahrens return (EROFS); 3118fa9e4066Sahrens 3119fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 3120fa9e4066Sahrens if ((spa = spa_lookup(pool)) == NULL) { 3121fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 3122fa9e4066Sahrens return (ENOENT); 3123fa9e4066Sahrens } 3124fa9e4066Sahrens 3125ea8dc4b6Seschrock /* 3126ea8dc4b6Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 3127ea8dc4b6Seschrock * reacquire the namespace lock, and see if we can export. 3128ea8dc4b6Seschrock */ 3129ea8dc4b6Seschrock spa_open_ref(spa, FTAG); 3130ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 3131ea8dc4b6Seschrock spa_async_suspend(spa); 3132ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 3133ea8dc4b6Seschrock spa_close(spa, FTAG); 3134ea8dc4b6Seschrock 3135fa9e4066Sahrens /* 3136fa9e4066Sahrens * The pool will be in core if it's openable, 3137fa9e4066Sahrens * in which case we can modify its state. 3138fa9e4066Sahrens */ 3139fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 3140fa9e4066Sahrens /* 3141fa9e4066Sahrens * Objsets may be open only because they're dirty, so we 3142fa9e4066Sahrens * have to force it to sync before checking spa_refcnt. 3143fa9e4066Sahrens */ 3144fa9e4066Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 3145fa9e4066Sahrens 3146ea8dc4b6Seschrock /* 3147ea8dc4b6Seschrock * A pool cannot be exported or destroyed if there are active 3148ea8dc4b6Seschrock * references. If we are resetting a pool, allow references by 3149ea8dc4b6Seschrock * fault injection handlers. 3150ea8dc4b6Seschrock */ 3151ea8dc4b6Seschrock if (!spa_refcount_zero(spa) || 3152ea8dc4b6Seschrock (spa->spa_inject_ref != 0 && 3153ea8dc4b6Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 3154ea8dc4b6Seschrock spa_async_resume(spa); 3155fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 3156fa9e4066Sahrens return (EBUSY); 3157fa9e4066Sahrens } 3158fa9e4066Sahrens 315989a89ebfSlling /* 316089a89ebfSlling * A pool cannot be exported if it has an active shared spare. 316189a89ebfSlling * This is to prevent other pools stealing the active spare 316289a89ebfSlling * from an exported pool. At user's own will, such pool can 316389a89ebfSlling * be forcedly exported. 316489a89ebfSlling */ 316589a89ebfSlling if (!force && new_state == POOL_STATE_EXPORTED && 316689a89ebfSlling spa_has_active_shared_spare(spa)) { 316789a89ebfSlling spa_async_resume(spa); 316889a89ebfSlling mutex_exit(&spa_namespace_lock); 316989a89ebfSlling return (EXDEV); 317089a89ebfSlling } 317189a89ebfSlling 3172fa9e4066Sahrens /* 3173fa9e4066Sahrens * We want this to be reflected on every label, 3174fa9e4066Sahrens * so mark them all dirty. spa_unload() will do the 3175fa9e4066Sahrens * final sync that pushes these changes out. 3176fa9e4066Sahrens */ 3177394ab0cbSGeorge Wilson if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 3178e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3179ea8dc4b6Seschrock spa->spa_state = new_state; 31800373e76bSbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 3181ea8dc4b6Seschrock vdev_config_dirty(spa->spa_root_vdev); 3182e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 3183ea8dc4b6Seschrock } 3184fa9e4066Sahrens } 3185fa9e4066Sahrens 31863d7072f8Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 31873d7072f8Seschrock 3188fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 3189fa9e4066Sahrens spa_unload(spa); 3190fa9e4066Sahrens spa_deactivate(spa); 3191fa9e4066Sahrens } 3192fa9e4066Sahrens 319344cd46caSbillm if (oldconfig && spa->spa_config) 319444cd46caSbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 319544cd46caSbillm 3196ea8dc4b6Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 3197394ab0cbSGeorge Wilson if (!hardforce) 3198394ab0cbSGeorge Wilson spa_config_sync(spa, B_TRUE, B_TRUE); 3199ea8dc4b6Seschrock spa_remove(spa); 3200ea8dc4b6Seschrock } 3201fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 3202fa9e4066Sahrens 3203fa9e4066Sahrens return (0); 3204fa9e4066Sahrens } 3205fa9e4066Sahrens 3206fa9e4066Sahrens /* 3207fa9e4066Sahrens * Destroy a storage pool. 3208fa9e4066Sahrens */ 3209fa9e4066Sahrens int 3210fa9e4066Sahrens spa_destroy(char *pool) 3211fa9e4066Sahrens { 3212394ab0cbSGeorge Wilson return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 3213394ab0cbSGeorge Wilson B_FALSE, B_FALSE)); 3214fa9e4066Sahrens } 3215fa9e4066Sahrens 3216fa9e4066Sahrens /* 3217fa9e4066Sahrens * Export a storage pool. 3218fa9e4066Sahrens */ 3219fa9e4066Sahrens int 3220394ab0cbSGeorge Wilson spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 3221394ab0cbSGeorge Wilson boolean_t hardforce) 3222fa9e4066Sahrens { 3223394ab0cbSGeorge Wilson return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 3224394ab0cbSGeorge Wilson force, hardforce)); 3225fa9e4066Sahrens } 3226fa9e4066Sahrens 3227ea8dc4b6Seschrock /* 3228ea8dc4b6Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 3229ea8dc4b6Seschrock * from the namespace in any way. 3230ea8dc4b6Seschrock */ 3231ea8dc4b6Seschrock int 3232ea8dc4b6Seschrock spa_reset(char *pool) 3233ea8dc4b6Seschrock { 323489a89ebfSlling return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 3235394ab0cbSGeorge Wilson B_FALSE, B_FALSE)); 3236ea8dc4b6Seschrock } 3237ea8dc4b6Seschrock 3238fa9e4066Sahrens /* 3239fa9e4066Sahrens * ========================================================================== 3240fa9e4066Sahrens * Device manipulation 3241fa9e4066Sahrens * ========================================================================== 3242fa9e4066Sahrens */ 3243fa9e4066Sahrens 3244fa9e4066Sahrens /* 32458654d025Sperrin * Add a device to a storage pool. 3246fa9e4066Sahrens */ 3247fa9e4066Sahrens int 3248fa9e4066Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 3249fa9e4066Sahrens { 325088ecc943SGeorge Wilson uint64_t txg, id; 32518ad4d6ddSJeff Bonwick int error; 3252fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 32530e34b6a7Sbonwick vdev_t *vd, *tvd; 3254fa94a07fSbrendan nvlist_t **spares, **l2cache; 3255fa94a07fSbrendan uint_t nspares, nl2cache; 3256fa9e4066Sahrens 3257fa9e4066Sahrens txg = spa_vdev_enter(spa); 3258fa9e4066Sahrens 325999653d4eSeschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 326099653d4eSeschrock VDEV_ALLOC_ADD)) != 0) 326199653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, error)); 3262fa9e4066Sahrens 3263e14bb325SJeff Bonwick spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 326499653d4eSeschrock 3265fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 3266fa94a07fSbrendan &nspares) != 0) 326799653d4eSeschrock nspares = 0; 326899653d4eSeschrock 3269fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 3270fa94a07fSbrendan &nl2cache) != 0) 3271fa94a07fSbrendan nl2cache = 0; 3272fa94a07fSbrendan 3273e14bb325SJeff Bonwick if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 3274fa9e4066Sahrens return (spa_vdev_exit(spa, vd, txg, EINVAL)); 3275fa9e4066Sahrens 3276e14bb325SJeff Bonwick if (vd->vdev_children != 0 && 3277e14bb325SJeff Bonwick (error = vdev_create(vd, txg, B_FALSE)) != 0) 3278e14bb325SJeff Bonwick return (spa_vdev_exit(spa, vd, txg, error)); 327999653d4eSeschrock 328039c23413Seschrock /* 3281fa94a07fSbrendan * We must validate the spares and l2cache devices after checking the 3282fa94a07fSbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 328339c23413Seschrock */ 3284e14bb325SJeff Bonwick if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 328539c23413Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 328639c23413Seschrock 328739c23413Seschrock /* 328839c23413Seschrock * Transfer each new top-level vdev from vd to rvd. 328939c23413Seschrock */ 32908ad4d6ddSJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) { 329188ecc943SGeorge Wilson 329288ecc943SGeorge Wilson /* 329388ecc943SGeorge Wilson * Set the vdev id to the first hole, if one exists. 329488ecc943SGeorge Wilson */ 329588ecc943SGeorge Wilson for (id = 0; id < rvd->vdev_children; id++) { 329688ecc943SGeorge Wilson if (rvd->vdev_child[id]->vdev_ishole) { 329788ecc943SGeorge Wilson vdev_free(rvd->vdev_child[id]); 329888ecc943SGeorge Wilson break; 329988ecc943SGeorge Wilson } 330088ecc943SGeorge Wilson } 330139c23413Seschrock tvd = vd->vdev_child[c]; 330239c23413Seschrock vdev_remove_child(vd, tvd); 330388ecc943SGeorge Wilson tvd->vdev_id = id; 330439c23413Seschrock vdev_add_child(rvd, tvd); 330539c23413Seschrock vdev_config_dirty(tvd); 330639c23413Seschrock } 330739c23413Seschrock 330899653d4eSeschrock if (nspares != 0) { 3309fa94a07fSbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 3310fa94a07fSbrendan ZPOOL_CONFIG_SPARES); 331199653d4eSeschrock spa_load_spares(spa); 3312fa94a07fSbrendan spa->spa_spares.sav_sync = B_TRUE; 3313fa94a07fSbrendan } 3314fa94a07fSbrendan 3315fa94a07fSbrendan if (nl2cache != 0) { 3316fa94a07fSbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 3317fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE); 3318fa94a07fSbrendan spa_load_l2cache(spa); 3319fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 3320fa9e4066Sahrens } 3321fa9e4066Sahrens 3322fa9e4066Sahrens /* 33230e34b6a7Sbonwick * We have to be careful when adding new vdevs to an existing pool. 33240e34b6a7Sbonwick * If other threads start allocating from these vdevs before we 33250e34b6a7Sbonwick * sync the config cache, and we lose power, then upon reboot we may 33260e34b6a7Sbonwick * fail to open the pool because there are DVAs that the config cache 33270e34b6a7Sbonwick * can't translate. Therefore, we first add the vdevs without 33280e34b6a7Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 33290373e76bSbonwick * and then let spa_config_update() initialize the new metaslabs. 33300e34b6a7Sbonwick * 33310e34b6a7Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 33320e34b6a7Sbonwick * if we lose power at any point in this sequence, the remaining 33330e34b6a7Sbonwick * steps will be completed the next time we load the pool. 33340e34b6a7Sbonwick */ 33350373e76bSbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 33360e34b6a7Sbonwick 33370373e76bSbonwick mutex_enter(&spa_namespace_lock); 33380373e76bSbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 33390373e76bSbonwick mutex_exit(&spa_namespace_lock); 3340fa9e4066Sahrens 33410373e76bSbonwick return (0); 3342fa9e4066Sahrens } 3343fa9e4066Sahrens 3344fa9e4066Sahrens /* 3345fa9e4066Sahrens * Attach a device to a mirror. The arguments are the path to any device 3346fa9e4066Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 3347fa9e4066Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 3348fa9e4066Sahrens * 3349fa9e4066Sahrens * If 'replacing' is specified, the new device is intended to replace the 3350fa9e4066Sahrens * existing device; in this case the two devices are made into their own 33513d7072f8Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 3352fa9e4066Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 3353fa9e4066Sahrens * extra rules: you can't attach to it after it's been created, and upon 3354fa9e4066Sahrens * completion of resilvering, the first disk (the one being replaced) 3355fa9e4066Sahrens * is automatically detached. 3356fa9e4066Sahrens */ 3357fa9e4066Sahrens int 3358ea8dc4b6Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 3359fa9e4066Sahrens { 3360fa9e4066Sahrens uint64_t txg, open_txg; 3361fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 3362fa9e4066Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 336399653d4eSeschrock vdev_ops_t *pvops; 33649b3f6b42SEric Kustarz char *oldvdpath, *newvdpath; 33659b3f6b42SEric Kustarz int newvd_isspare; 33669b3f6b42SEric Kustarz int error; 3367fa9e4066Sahrens 3368fa9e4066Sahrens txg = spa_vdev_enter(spa); 3369fa9e4066Sahrens 3370c5904d13Seschrock oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 3371fa9e4066Sahrens 3372fa9e4066Sahrens if (oldvd == NULL) 3373fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3374fa9e4066Sahrens 33750e34b6a7Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 33760e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 33770e34b6a7Sbonwick 3378fa9e4066Sahrens pvd = oldvd->vdev_parent; 3379fa9e4066Sahrens 338099653d4eSeschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 33813d7072f8Seschrock VDEV_ALLOC_ADD)) != 0) 33823d7072f8Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 33833d7072f8Seschrock 33843d7072f8Seschrock if (newrootvd->vdev_children != 1) 3385fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3386fa9e4066Sahrens 3387fa9e4066Sahrens newvd = newrootvd->vdev_child[0]; 3388fa9e4066Sahrens 3389fa9e4066Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 3390fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3391fa9e4066Sahrens 339299653d4eSeschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 3393fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 3394fa9e4066Sahrens 33958654d025Sperrin /* 33968654d025Sperrin * Spares can't replace logs 33978654d025Sperrin */ 3398ee0eb9f2SEric Schrock if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 33998654d025Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 34008654d025Sperrin 340199653d4eSeschrock if (!replacing) { 340299653d4eSeschrock /* 340399653d4eSeschrock * For attach, the only allowable parent is a mirror or the root 340499653d4eSeschrock * vdev. 340599653d4eSeschrock */ 340699653d4eSeschrock if (pvd->vdev_ops != &vdev_mirror_ops && 340799653d4eSeschrock pvd->vdev_ops != &vdev_root_ops) 340899653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 340999653d4eSeschrock 341099653d4eSeschrock pvops = &vdev_mirror_ops; 341199653d4eSeschrock } else { 341299653d4eSeschrock /* 341399653d4eSeschrock * Active hot spares can only be replaced by inactive hot 341499653d4eSeschrock * spares. 341599653d4eSeschrock */ 341699653d4eSeschrock if (pvd->vdev_ops == &vdev_spare_ops && 341799653d4eSeschrock pvd->vdev_child[1] == oldvd && 341899653d4eSeschrock !spa_has_spare(spa, newvd->vdev_guid)) 341999653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 342099653d4eSeschrock 342199653d4eSeschrock /* 342299653d4eSeschrock * If the source is a hot spare, and the parent isn't already a 342399653d4eSeschrock * spare, then we want to create a new hot spare. Otherwise, we 342439c23413Seschrock * want to create a replacing vdev. The user is not allowed to 342539c23413Seschrock * attach to a spared vdev child unless the 'isspare' state is 342639c23413Seschrock * the same (spare replaces spare, non-spare replaces 342739c23413Seschrock * non-spare). 342899653d4eSeschrock */ 342999653d4eSeschrock if (pvd->vdev_ops == &vdev_replacing_ops) 343099653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 343139c23413Seschrock else if (pvd->vdev_ops == &vdev_spare_ops && 343239c23413Seschrock newvd->vdev_isspare != oldvd->vdev_isspare) 343339c23413Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 343499653d4eSeschrock else if (pvd->vdev_ops != &vdev_spare_ops && 343599653d4eSeschrock newvd->vdev_isspare) 343699653d4eSeschrock pvops = &vdev_spare_ops; 343799653d4eSeschrock else 343899653d4eSeschrock pvops = &vdev_replacing_ops; 343999653d4eSeschrock } 344099653d4eSeschrock 34412a79c5feSlling /* 3442573ca77eSGeorge Wilson * Make sure the new device is big enough. 34432a79c5feSlling */ 3444573ca77eSGeorge Wilson if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) 3445fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 3446fa9e4066Sahrens 3447ecc2d604Sbonwick /* 3448ecc2d604Sbonwick * The new device cannot have a higher alignment requirement 3449ecc2d604Sbonwick * than the top-level vdev. 3450ecc2d604Sbonwick */ 3451ecc2d604Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 3452fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 3453fa9e4066Sahrens 3454fa9e4066Sahrens /* 3455fa9e4066Sahrens * If this is an in-place replacement, update oldvd's path and devid 3456fa9e4066Sahrens * to make it distinguishable from newvd, and unopenable from now on. 3457fa9e4066Sahrens */ 3458fa9e4066Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 3459fa9e4066Sahrens spa_strfree(oldvd->vdev_path); 3460fa9e4066Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 3461fa9e4066Sahrens KM_SLEEP); 3462fa9e4066Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 3463fa9e4066Sahrens newvd->vdev_path, "old"); 3464fa9e4066Sahrens if (oldvd->vdev_devid != NULL) { 3465fa9e4066Sahrens spa_strfree(oldvd->vdev_devid); 3466fa9e4066Sahrens oldvd->vdev_devid = NULL; 3467fa9e4066Sahrens } 3468fa9e4066Sahrens } 3469fa9e4066Sahrens 3470fa9e4066Sahrens /* 347199653d4eSeschrock * If the parent is not a mirror, or if we're replacing, insert the new 347299653d4eSeschrock * mirror/replacing/spare vdev above oldvd. 3473fa9e4066Sahrens */ 3474fa9e4066Sahrens if (pvd->vdev_ops != pvops) 3475fa9e4066Sahrens pvd = vdev_add_parent(oldvd, pvops); 3476fa9e4066Sahrens 3477fa9e4066Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 3478fa9e4066Sahrens ASSERT(pvd->vdev_ops == pvops); 3479fa9e4066Sahrens ASSERT(oldvd->vdev_parent == pvd); 3480fa9e4066Sahrens 3481fa9e4066Sahrens /* 3482fa9e4066Sahrens * Extract the new device from its root and add it to pvd. 3483fa9e4066Sahrens */ 3484fa9e4066Sahrens vdev_remove_child(newrootvd, newvd); 3485fa9e4066Sahrens newvd->vdev_id = pvd->vdev_children; 348688ecc943SGeorge Wilson newvd->vdev_crtxg = oldvd->vdev_crtxg; 3487fa9e4066Sahrens vdev_add_child(pvd, newvd); 3488fa9e4066Sahrens 3489fa9e4066Sahrens tvd = newvd->vdev_top; 3490fa9e4066Sahrens ASSERT(pvd->vdev_top == tvd); 3491fa9e4066Sahrens ASSERT(tvd->vdev_parent == rvd); 3492fa9e4066Sahrens 3493fa9e4066Sahrens vdev_config_dirty(tvd); 3494fa9e4066Sahrens 3495fa9e4066Sahrens /* 3496fa9e4066Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 3497fa9e4066Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 3498fa9e4066Sahrens */ 3499fa9e4066Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 3500fa9e4066Sahrens 35018ad4d6ddSJeff Bonwick vdev_dtl_dirty(newvd, DTL_MISSING, 35028ad4d6ddSJeff Bonwick TXG_INITIAL, open_txg - TXG_INITIAL + 1); 3503fa9e4066Sahrens 35046809eb4eSEric Schrock if (newvd->vdev_isspare) { 350539c23413Seschrock spa_spare_activate(newvd); 35066809eb4eSEric Schrock spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); 35076809eb4eSEric Schrock } 35086809eb4eSEric Schrock 3509e14bb325SJeff Bonwick oldvdpath = spa_strdup(oldvd->vdev_path); 3510e14bb325SJeff Bonwick newvdpath = spa_strdup(newvd->vdev_path); 35119b3f6b42SEric Kustarz newvd_isspare = newvd->vdev_isspare; 3512ea8dc4b6Seschrock 3513fa9e4066Sahrens /* 3514fa9e4066Sahrens * Mark newvd's DTL dirty in this txg. 3515fa9e4066Sahrens */ 3516ecc2d604Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 3517fa9e4066Sahrens 3518fa9e4066Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 3519fa9e4066Sahrens 3520c8e1f6d2SMark J Musante spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, NULL, 3521c8e1f6d2SMark J Musante CRED(), "%s vdev=%s %s vdev=%s", 3522c8e1f6d2SMark J Musante replacing && newvd_isspare ? "spare in" : 3523c8e1f6d2SMark J Musante replacing ? "replace" : "attach", newvdpath, 3524c8e1f6d2SMark J Musante replacing ? "for" : "to", oldvdpath); 35259b3f6b42SEric Kustarz 35269b3f6b42SEric Kustarz spa_strfree(oldvdpath); 35279b3f6b42SEric Kustarz spa_strfree(newvdpath); 35289b3f6b42SEric Kustarz 3529fa9e4066Sahrens /* 3530088f3894Sahrens * Kick off a resilver to update newvd. 3531fa9e4066Sahrens */ 3532088f3894Sahrens VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0); 3533fa9e4066Sahrens 3534fa9e4066Sahrens return (0); 3535fa9e4066Sahrens } 3536fa9e4066Sahrens 3537fa9e4066Sahrens /* 3538fa9e4066Sahrens * Detach a device from a mirror or replacing vdev. 3539fa9e4066Sahrens * If 'replace_done' is specified, only detach if the parent 3540fa9e4066Sahrens * is a replacing vdev. 3541fa9e4066Sahrens */ 3542fa9e4066Sahrens int 35438ad4d6ddSJeff Bonwick spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) 3544fa9e4066Sahrens { 3545fa9e4066Sahrens uint64_t txg; 35468ad4d6ddSJeff Bonwick int error; 3547fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 3548fa9e4066Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 354999653d4eSeschrock boolean_t unspare = B_FALSE; 355099653d4eSeschrock uint64_t unspare_guid; 3551bf82a41bSeschrock size_t len; 3552fa9e4066Sahrens 3553fa9e4066Sahrens txg = spa_vdev_enter(spa); 3554fa9e4066Sahrens 3555c5904d13Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3556fa9e4066Sahrens 3557fa9e4066Sahrens if (vd == NULL) 3558fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3559fa9e4066Sahrens 35600e34b6a7Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 35610e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 35620e34b6a7Sbonwick 3563fa9e4066Sahrens pvd = vd->vdev_parent; 3564fa9e4066Sahrens 35658ad4d6ddSJeff Bonwick /* 35668ad4d6ddSJeff Bonwick * If the parent/child relationship is not as expected, don't do it. 35678ad4d6ddSJeff Bonwick * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing 35688ad4d6ddSJeff Bonwick * vdev that's replacing B with C. The user's intent in replacing 35698ad4d6ddSJeff Bonwick * is to go from M(A,B) to M(A,C). If the user decides to cancel 35708ad4d6ddSJeff Bonwick * the replace by detaching C, the expected behavior is to end up 35718ad4d6ddSJeff Bonwick * M(A,B). But suppose that right after deciding to detach C, 35728ad4d6ddSJeff Bonwick * the replacement of B completes. We would have M(A,C), and then 35738ad4d6ddSJeff Bonwick * ask to detach C, which would leave us with just A -- not what 35748ad4d6ddSJeff Bonwick * the user wanted. To prevent this, we make sure that the 35758ad4d6ddSJeff Bonwick * parent/child relationship hasn't changed -- in this example, 35768ad4d6ddSJeff Bonwick * that C's parent is still the replacing vdev R. 35778ad4d6ddSJeff Bonwick */ 35788ad4d6ddSJeff Bonwick if (pvd->vdev_guid != pguid && pguid != 0) 35798ad4d6ddSJeff Bonwick return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 35808ad4d6ddSJeff Bonwick 3581fa9e4066Sahrens /* 3582fa9e4066Sahrens * If replace_done is specified, only remove this device if it's 358399653d4eSeschrock * the first child of a replacing vdev. For the 'spare' vdev, either 358499653d4eSeschrock * disk can be removed. 358599653d4eSeschrock */ 358699653d4eSeschrock if (replace_done) { 358799653d4eSeschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 358899653d4eSeschrock if (vd->vdev_id != 0) 358999653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 359099653d4eSeschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 359199653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 359299653d4eSeschrock } 359399653d4eSeschrock } 359499653d4eSeschrock 359599653d4eSeschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 3596e7437265Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 3597fa9e4066Sahrens 3598fa9e4066Sahrens /* 359999653d4eSeschrock * Only mirror, replacing, and spare vdevs support detach. 3600fa9e4066Sahrens */ 3601fa9e4066Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 360299653d4eSeschrock pvd->vdev_ops != &vdev_mirror_ops && 360399653d4eSeschrock pvd->vdev_ops != &vdev_spare_ops) 3604fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3605fa9e4066Sahrens 3606fa9e4066Sahrens /* 36078ad4d6ddSJeff Bonwick * If this device has the only valid copy of some data, 36088ad4d6ddSJeff Bonwick * we cannot safely detach it. 3609fa9e4066Sahrens */ 36108ad4d6ddSJeff Bonwick if (vdev_dtl_required(vd)) 3611fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 3612fa9e4066Sahrens 36138ad4d6ddSJeff Bonwick ASSERT(pvd->vdev_children >= 2); 3614fa9e4066Sahrens 3615bf82a41bSeschrock /* 3616bf82a41bSeschrock * If we are detaching the second disk from a replacing vdev, then 3617bf82a41bSeschrock * check to see if we changed the original vdev's path to have "/old" 3618bf82a41bSeschrock * at the end in spa_vdev_attach(). If so, undo that change now. 3619bf82a41bSeschrock */ 3620bf82a41bSeschrock if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 && 3621bf82a41bSeschrock pvd->vdev_child[0]->vdev_path != NULL && 3622bf82a41bSeschrock pvd->vdev_child[1]->vdev_path != NULL) { 3623bf82a41bSeschrock ASSERT(pvd->vdev_child[1] == vd); 3624bf82a41bSeschrock cvd = pvd->vdev_child[0]; 3625bf82a41bSeschrock len = strlen(vd->vdev_path); 3626bf82a41bSeschrock if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 3627bf82a41bSeschrock strcmp(cvd->vdev_path + len, "/old") == 0) { 3628bf82a41bSeschrock spa_strfree(cvd->vdev_path); 3629bf82a41bSeschrock cvd->vdev_path = spa_strdup(vd->vdev_path); 3630bf82a41bSeschrock } 3631bf82a41bSeschrock } 3632bf82a41bSeschrock 363399653d4eSeschrock /* 363499653d4eSeschrock * If we are detaching the original disk from a spare, then it implies 363599653d4eSeschrock * that the spare should become a real disk, and be removed from the 363699653d4eSeschrock * active spare list for the pool. 363799653d4eSeschrock */ 363899653d4eSeschrock if (pvd->vdev_ops == &vdev_spare_ops && 36398ad4d6ddSJeff Bonwick vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare) 364099653d4eSeschrock unspare = B_TRUE; 364199653d4eSeschrock 3642fa9e4066Sahrens /* 3643fa9e4066Sahrens * Erase the disk labels so the disk can be used for other things. 3644fa9e4066Sahrens * This must be done after all other error cases are handled, 3645fa9e4066Sahrens * but before we disembowel vd (so we can still do I/O to it). 3646fa9e4066Sahrens * But if we can't do it, don't treat the error as fatal -- 3647fa9e4066Sahrens * it may be that the unwritability of the disk is the reason 3648fa9e4066Sahrens * it's being detached! 3649fa9e4066Sahrens */ 365039c23413Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3651fa9e4066Sahrens 3652fa9e4066Sahrens /* 3653fa9e4066Sahrens * Remove vd from its parent and compact the parent's children. 3654fa9e4066Sahrens */ 3655fa9e4066Sahrens vdev_remove_child(pvd, vd); 3656fa9e4066Sahrens vdev_compact_children(pvd); 3657fa9e4066Sahrens 3658fa9e4066Sahrens /* 3659fa9e4066Sahrens * Remember one of the remaining children so we can get tvd below. 3660fa9e4066Sahrens */ 3661fa9e4066Sahrens cvd = pvd->vdev_child[0]; 3662fa9e4066Sahrens 366399653d4eSeschrock /* 366499653d4eSeschrock * If we need to remove the remaining child from the list of hot spares, 36658ad4d6ddSJeff Bonwick * do it now, marking the vdev as no longer a spare in the process. 36668ad4d6ddSJeff Bonwick * We must do this before vdev_remove_parent(), because that can 36678ad4d6ddSJeff Bonwick * change the GUID if it creates a new toplevel GUID. For a similar 36688ad4d6ddSJeff Bonwick * reason, we must remove the spare now, in the same txg as the detach; 36698ad4d6ddSJeff Bonwick * otherwise someone could attach a new sibling, change the GUID, and 36708ad4d6ddSJeff Bonwick * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. 367199653d4eSeschrock */ 367299653d4eSeschrock if (unspare) { 367399653d4eSeschrock ASSERT(cvd->vdev_isspare); 367439c23413Seschrock spa_spare_remove(cvd); 367599653d4eSeschrock unspare_guid = cvd->vdev_guid; 36768ad4d6ddSJeff Bonwick (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 367799653d4eSeschrock } 367899653d4eSeschrock 3679fa9e4066Sahrens /* 3680fa9e4066Sahrens * If the parent mirror/replacing vdev only has one child, 3681fa9e4066Sahrens * the parent is no longer needed. Remove it from the tree. 3682fa9e4066Sahrens */ 3683fa9e4066Sahrens if (pvd->vdev_children == 1) 3684fa9e4066Sahrens vdev_remove_parent(cvd); 3685fa9e4066Sahrens 3686fa9e4066Sahrens /* 3687fa9e4066Sahrens * We don't set tvd until now because the parent we just removed 3688fa9e4066Sahrens * may have been the previous top-level vdev. 3689fa9e4066Sahrens */ 3690fa9e4066Sahrens tvd = cvd->vdev_top; 3691fa9e4066Sahrens ASSERT(tvd->vdev_parent == rvd); 3692fa9e4066Sahrens 3693fa9e4066Sahrens /* 369439c23413Seschrock * Reevaluate the parent vdev state. 3695fa9e4066Sahrens */ 36963d7072f8Seschrock vdev_propagate_state(cvd); 3697fa9e4066Sahrens 3698fa9e4066Sahrens /* 3699573ca77eSGeorge Wilson * If the 'autoexpand' property is set on the pool then automatically 3700573ca77eSGeorge Wilson * try to expand the size of the pool. For example if the device we 3701573ca77eSGeorge Wilson * just detached was smaller than the others, it may be possible to 3702573ca77eSGeorge Wilson * add metaslabs (i.e. grow the pool). We need to reopen the vdev 3703573ca77eSGeorge Wilson * first so that we can obtain the updated sizes of the leaf vdevs. 3704fa9e4066Sahrens */ 3705573ca77eSGeorge Wilson if (spa->spa_autoexpand) { 3706573ca77eSGeorge Wilson vdev_reopen(tvd); 3707573ca77eSGeorge Wilson vdev_expand(tvd, txg); 3708573ca77eSGeorge Wilson } 3709fa9e4066Sahrens 3710fa9e4066Sahrens vdev_config_dirty(tvd); 3711fa9e4066Sahrens 3712fa9e4066Sahrens /* 371339c23413Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 371439c23413Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 371539c23413Seschrock * But first make sure we're not on any *other* txg's DTL list, to 371639c23413Seschrock * prevent vd from being accessed after it's freed. 3717fa9e4066Sahrens */ 37188ad4d6ddSJeff Bonwick for (int t = 0; t < TXG_SIZE; t++) 3719fa9e4066Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 3720ecc2d604Sbonwick vd->vdev_detached = B_TRUE; 3721ecc2d604Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 3722fa9e4066Sahrens 37233d7072f8Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 37243d7072f8Seschrock 372599653d4eSeschrock error = spa_vdev_exit(spa, vd, txg, 0); 372699653d4eSeschrock 372799653d4eSeschrock /* 372839c23413Seschrock * If this was the removal of the original device in a hot spare vdev, 372939c23413Seschrock * then we want to go through and remove the device from the hot spare 373039c23413Seschrock * list of every other pool. 373199653d4eSeschrock */ 373299653d4eSeschrock if (unspare) { 37338ad4d6ddSJeff Bonwick spa_t *myspa = spa; 373499653d4eSeschrock spa = NULL; 373599653d4eSeschrock mutex_enter(&spa_namespace_lock); 373699653d4eSeschrock while ((spa = spa_next(spa)) != NULL) { 373799653d4eSeschrock if (spa->spa_state != POOL_STATE_ACTIVE) 373899653d4eSeschrock continue; 37398ad4d6ddSJeff Bonwick if (spa == myspa) 37408ad4d6ddSJeff Bonwick continue; 37419af0a4dfSJeff Bonwick spa_open_ref(spa, FTAG); 37429af0a4dfSJeff Bonwick mutex_exit(&spa_namespace_lock); 374399653d4eSeschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 37449af0a4dfSJeff Bonwick mutex_enter(&spa_namespace_lock); 37459af0a4dfSJeff Bonwick spa_close(spa, FTAG); 374699653d4eSeschrock } 374799653d4eSeschrock mutex_exit(&spa_namespace_lock); 374899653d4eSeschrock } 374999653d4eSeschrock 375099653d4eSeschrock return (error); 375199653d4eSeschrock } 375299653d4eSeschrock 3753e14bb325SJeff Bonwick static nvlist_t * 3754e14bb325SJeff Bonwick spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 375599653d4eSeschrock { 3756e14bb325SJeff Bonwick for (int i = 0; i < count; i++) { 3757e14bb325SJeff Bonwick uint64_t guid; 375899653d4eSeschrock 3759e14bb325SJeff Bonwick VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 3760e14bb325SJeff Bonwick &guid) == 0); 376199653d4eSeschrock 3762e14bb325SJeff Bonwick if (guid == target_guid) 3763e14bb325SJeff Bonwick return (nvpp[i]); 376499653d4eSeschrock } 376599653d4eSeschrock 3766e14bb325SJeff Bonwick return (NULL); 3767fa94a07fSbrendan } 3768fa94a07fSbrendan 3769e14bb325SJeff Bonwick static void 3770e14bb325SJeff Bonwick spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 3771e14bb325SJeff Bonwick nvlist_t *dev_to_remove) 3772fa94a07fSbrendan { 3773e14bb325SJeff Bonwick nvlist_t **newdev = NULL; 3774fa94a07fSbrendan 3775e14bb325SJeff Bonwick if (count > 1) 3776e14bb325SJeff Bonwick newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 3777fa94a07fSbrendan 3778e14bb325SJeff Bonwick for (int i = 0, j = 0; i < count; i++) { 3779e14bb325SJeff Bonwick if (dev[i] == dev_to_remove) 3780e14bb325SJeff Bonwick continue; 3781e14bb325SJeff Bonwick VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 3782fa94a07fSbrendan } 3783fa94a07fSbrendan 3784e14bb325SJeff Bonwick VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 3785e14bb325SJeff Bonwick VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 3786fa94a07fSbrendan 3787e14bb325SJeff Bonwick for (int i = 0; i < count - 1; i++) 3788e14bb325SJeff Bonwick nvlist_free(newdev[i]); 3789fa94a07fSbrendan 3790e14bb325SJeff Bonwick if (count > 1) 3791e14bb325SJeff Bonwick kmem_free(newdev, (count - 1) * sizeof (void *)); 3792fa94a07fSbrendan } 3793fa94a07fSbrendan 379488ecc943SGeorge Wilson /* 379588ecc943SGeorge Wilson * Removing a device from the vdev namespace requires several steps 379688ecc943SGeorge Wilson * and can take a significant amount of time. As a result we use 379788ecc943SGeorge Wilson * the spa_vdev_config_[enter/exit] functions which allow us to 379888ecc943SGeorge Wilson * grab and release the spa_config_lock while still holding the namespace 379988ecc943SGeorge Wilson * lock. During each step the configuration is synced out. 380088ecc943SGeorge Wilson */ 380188ecc943SGeorge Wilson 380288ecc943SGeorge Wilson /* 380388ecc943SGeorge Wilson * Evacuate the device. 380488ecc943SGeorge Wilson */ 380588ecc943SGeorge Wilson int 380688ecc943SGeorge Wilson spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) 380788ecc943SGeorge Wilson { 3808a1521560SJeff Bonwick int error = 0; 380988ecc943SGeorge Wilson uint64_t txg; 381088ecc943SGeorge Wilson 381188ecc943SGeorge Wilson ASSERT(MUTEX_HELD(&spa_namespace_lock)); 381288ecc943SGeorge Wilson ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 3813b24ab676SJeff Bonwick ASSERT(vd == vd->vdev_top); 381488ecc943SGeorge Wilson 381588ecc943SGeorge Wilson /* 381688ecc943SGeorge Wilson * Evacuate the device. We don't hold the config lock as writer 381788ecc943SGeorge Wilson * since we need to do I/O but we do keep the 381888ecc943SGeorge Wilson * spa_namespace_lock held. Once this completes the device 381988ecc943SGeorge Wilson * should no longer have any blocks allocated on it. 382088ecc943SGeorge Wilson */ 382188ecc943SGeorge Wilson if (vd->vdev_islog) { 3822a1521560SJeff Bonwick error = dmu_objset_find(spa_name(spa), zil_vdev_offline, 3823a1521560SJeff Bonwick NULL, DS_FIND_CHILDREN); 3824a1521560SJeff Bonwick } else { 3825a1521560SJeff Bonwick error = ENOTSUP; /* until we have bp rewrite */ 382688ecc943SGeorge Wilson } 382788ecc943SGeorge Wilson 3828a1521560SJeff Bonwick txg_wait_synced(spa_get_dsl(spa), 0); 3829a1521560SJeff Bonwick 3830a1521560SJeff Bonwick if (error) 3831a1521560SJeff Bonwick return (error); 3832a1521560SJeff Bonwick 383388ecc943SGeorge Wilson /* 3834a1521560SJeff Bonwick * The evacuation succeeded. Remove any remaining MOS metadata 3835a1521560SJeff Bonwick * associated with this vdev, and wait for these changes to sync. 383688ecc943SGeorge Wilson */ 383788ecc943SGeorge Wilson txg = spa_vdev_config_enter(spa); 383888ecc943SGeorge Wilson vd->vdev_removing = B_TRUE; 383988ecc943SGeorge Wilson vdev_dirty(vd, 0, NULL, txg); 384088ecc943SGeorge Wilson vdev_config_dirty(vd); 384188ecc943SGeorge Wilson spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 384288ecc943SGeorge Wilson 384388ecc943SGeorge Wilson return (0); 384488ecc943SGeorge Wilson } 384588ecc943SGeorge Wilson 384688ecc943SGeorge Wilson /* 384788ecc943SGeorge Wilson * Complete the removal by cleaning up the namespace. 384888ecc943SGeorge Wilson */ 384988ecc943SGeorge Wilson void 3850a1521560SJeff Bonwick spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd) 385188ecc943SGeorge Wilson { 385288ecc943SGeorge Wilson vdev_t *rvd = spa->spa_root_vdev; 385388ecc943SGeorge Wilson uint64_t id = vd->vdev_id; 385488ecc943SGeorge Wilson boolean_t last_vdev = (id == (rvd->vdev_children - 1)); 385588ecc943SGeorge Wilson 385688ecc943SGeorge Wilson ASSERT(MUTEX_HELD(&spa_namespace_lock)); 385788ecc943SGeorge Wilson ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 3858b24ab676SJeff Bonwick ASSERT(vd == vd->vdev_top); 385988ecc943SGeorge Wilson 386088ecc943SGeorge Wilson (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3861b24ab676SJeff Bonwick 3862b24ab676SJeff Bonwick if (list_link_active(&vd->vdev_state_dirty_node)) 3863b24ab676SJeff Bonwick vdev_state_clean(vd); 3864b24ab676SJeff Bonwick if (list_link_active(&vd->vdev_config_dirty_node)) 3865b24ab676SJeff Bonwick vdev_config_clean(vd); 3866b24ab676SJeff Bonwick 386788ecc943SGeorge Wilson vdev_free(vd); 386888ecc943SGeorge Wilson 386988ecc943SGeorge Wilson if (last_vdev) { 387088ecc943SGeorge Wilson vdev_compact_children(rvd); 387188ecc943SGeorge Wilson } else { 387288ecc943SGeorge Wilson vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); 387388ecc943SGeorge Wilson vdev_add_child(rvd, vd); 387488ecc943SGeorge Wilson } 387588ecc943SGeorge Wilson vdev_config_dirty(rvd); 387688ecc943SGeorge Wilson 387788ecc943SGeorge Wilson /* 387888ecc943SGeorge Wilson * Reassess the health of our root vdev. 387988ecc943SGeorge Wilson */ 388088ecc943SGeorge Wilson vdev_reopen(rvd); 388188ecc943SGeorge Wilson } 388288ecc943SGeorge Wilson 3883fa94a07fSbrendan /* 3884fa94a07fSbrendan * Remove a device from the pool. Currently, this supports removing only hot 388588ecc943SGeorge Wilson * spares, slogs, and level 2 ARC devices. 3886fa94a07fSbrendan */ 3887fa94a07fSbrendan int 3888fa94a07fSbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 3889fa94a07fSbrendan { 3890fa94a07fSbrendan vdev_t *vd; 3891a1521560SJeff Bonwick metaslab_group_t *mg; 3892e14bb325SJeff Bonwick nvlist_t **spares, **l2cache, *nv; 38938ad4d6ddSJeff Bonwick uint64_t txg = 0; 389488ecc943SGeorge Wilson uint_t nspares, nl2cache; 3895fa94a07fSbrendan int error = 0; 38968ad4d6ddSJeff Bonwick boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 3897fa94a07fSbrendan 38988ad4d6ddSJeff Bonwick if (!locked) 38998ad4d6ddSJeff Bonwick txg = spa_vdev_enter(spa); 3900fa94a07fSbrendan 3901c5904d13Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3902fa94a07fSbrendan 3903fa94a07fSbrendan if (spa->spa_spares.sav_vdevs != NULL && 3904fa94a07fSbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 3905e14bb325SJeff Bonwick ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 3906e14bb325SJeff Bonwick (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 3907e14bb325SJeff Bonwick /* 3908e14bb325SJeff Bonwick * Only remove the hot spare if it's not currently in use 3909e14bb325SJeff Bonwick * in this pool. 3910e14bb325SJeff Bonwick */ 3911e14bb325SJeff Bonwick if (vd == NULL || unspare) { 3912e14bb325SJeff Bonwick spa_vdev_remove_aux(spa->spa_spares.sav_config, 3913e14bb325SJeff Bonwick ZPOOL_CONFIG_SPARES, spares, nspares, nv); 3914e14bb325SJeff Bonwick spa_load_spares(spa); 3915e14bb325SJeff Bonwick spa->spa_spares.sav_sync = B_TRUE; 3916e14bb325SJeff Bonwick } else { 3917e14bb325SJeff Bonwick error = EBUSY; 3918e14bb325SJeff Bonwick } 3919e14bb325SJeff Bonwick } else if (spa->spa_l2cache.sav_vdevs != NULL && 3920fa94a07fSbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 3921e14bb325SJeff Bonwick ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 3922e14bb325SJeff Bonwick (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 3923e14bb325SJeff Bonwick /* 3924e14bb325SJeff Bonwick * Cache devices can always be removed. 3925e14bb325SJeff Bonwick */ 3926e14bb325SJeff Bonwick spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 3927e14bb325SJeff Bonwick ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 3928fa94a07fSbrendan spa_load_l2cache(spa); 3929fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 393088ecc943SGeorge Wilson } else if (vd != NULL && vd->vdev_islog) { 393188ecc943SGeorge Wilson ASSERT(!locked); 3932b24ab676SJeff Bonwick ASSERT(vd == vd->vdev_top); 393388ecc943SGeorge Wilson 393488ecc943SGeorge Wilson /* 393588ecc943SGeorge Wilson * XXX - Once we have bp-rewrite this should 393688ecc943SGeorge Wilson * become the common case. 393788ecc943SGeorge Wilson */ 393888ecc943SGeorge Wilson 3939a1521560SJeff Bonwick mg = vd->vdev_mg; 3940a1521560SJeff Bonwick 394188ecc943SGeorge Wilson /* 3942a1521560SJeff Bonwick * Stop allocating from this vdev. 394388ecc943SGeorge Wilson */ 3944a1521560SJeff Bonwick metaslab_group_passivate(mg); 394588ecc943SGeorge Wilson 3946b24ab676SJeff Bonwick /* 3947b24ab676SJeff Bonwick * Wait for the youngest allocations and frees to sync, 3948b24ab676SJeff Bonwick * and then wait for the deferral of those frees to finish. 3949b24ab676SJeff Bonwick */ 3950b24ab676SJeff Bonwick spa_vdev_config_exit(spa, NULL, 3951b24ab676SJeff Bonwick txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); 3952b24ab676SJeff Bonwick 3953a1521560SJeff Bonwick /* 3954a1521560SJeff Bonwick * Attempt to evacuate the vdev. 3955a1521560SJeff Bonwick */ 3956a1521560SJeff Bonwick error = spa_vdev_remove_evacuate(spa, vd); 3957a1521560SJeff Bonwick 395888ecc943SGeorge Wilson txg = spa_vdev_config_enter(spa); 395988ecc943SGeorge Wilson 3960a1521560SJeff Bonwick /* 3961a1521560SJeff Bonwick * If we couldn't evacuate the vdev, unwind. 3962a1521560SJeff Bonwick */ 3963a1521560SJeff Bonwick if (error) { 3964a1521560SJeff Bonwick metaslab_group_activate(mg); 3965a1521560SJeff Bonwick return (spa_vdev_exit(spa, NULL, txg, error)); 3966a1521560SJeff Bonwick } 3967a1521560SJeff Bonwick 3968a1521560SJeff Bonwick /* 3969a1521560SJeff Bonwick * Clean up the vdev namespace. 3970a1521560SJeff Bonwick */ 3971a1521560SJeff Bonwick spa_vdev_remove_from_namespace(spa, vd); 397288ecc943SGeorge Wilson 3973e14bb325SJeff Bonwick } else if (vd != NULL) { 3974e14bb325SJeff Bonwick /* 3975e14bb325SJeff Bonwick * Normal vdevs cannot be removed (yet). 3976e14bb325SJeff Bonwick */ 3977e14bb325SJeff Bonwick error = ENOTSUP; 3978e14bb325SJeff Bonwick } else { 3979e14bb325SJeff Bonwick /* 3980e14bb325SJeff Bonwick * There is no vdev of any kind with the specified guid. 3981e14bb325SJeff Bonwick */ 3982e14bb325SJeff Bonwick error = ENOENT; 3983fa94a07fSbrendan } 398499653d4eSeschrock 39858ad4d6ddSJeff Bonwick if (!locked) 39868ad4d6ddSJeff Bonwick return (spa_vdev_exit(spa, NULL, txg, error)); 39878ad4d6ddSJeff Bonwick 39888ad4d6ddSJeff Bonwick return (error); 3989fa9e4066Sahrens } 3990fa9e4066Sahrens 3991fa9e4066Sahrens /* 39923d7072f8Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 39933d7072f8Seschrock * current spared, so we can detach it. 3994fa9e4066Sahrens */ 3995ea8dc4b6Seschrock static vdev_t * 39963d7072f8Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 3997fa9e4066Sahrens { 3998ea8dc4b6Seschrock vdev_t *newvd, *oldvd; 3999fa9e4066Sahrens 4000573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) { 40013d7072f8Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 4002ea8dc4b6Seschrock if (oldvd != NULL) 4003ea8dc4b6Seschrock return (oldvd); 4004ea8dc4b6Seschrock } 4005fa9e4066Sahrens 40063d7072f8Seschrock /* 40073d7072f8Seschrock * Check for a completed replacement. 40083d7072f8Seschrock */ 4009fa9e4066Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 4010ea8dc4b6Seschrock oldvd = vd->vdev_child[0]; 4011ea8dc4b6Seschrock newvd = vd->vdev_child[1]; 4012ea8dc4b6Seschrock 40138ad4d6ddSJeff Bonwick if (vdev_dtl_empty(newvd, DTL_MISSING) && 40148ad4d6ddSJeff Bonwick !vdev_dtl_required(oldvd)) 4015ea8dc4b6Seschrock return (oldvd); 4016fa9e4066Sahrens } 4017ea8dc4b6Seschrock 40183d7072f8Seschrock /* 40193d7072f8Seschrock * Check for a completed resilver with the 'unspare' flag set. 40203d7072f8Seschrock */ 40213d7072f8Seschrock if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 40223d7072f8Seschrock newvd = vd->vdev_child[0]; 40233d7072f8Seschrock oldvd = vd->vdev_child[1]; 40243d7072f8Seschrock 40253d7072f8Seschrock if (newvd->vdev_unspare && 40268ad4d6ddSJeff Bonwick vdev_dtl_empty(newvd, DTL_MISSING) && 40278ad4d6ddSJeff Bonwick !vdev_dtl_required(oldvd)) { 40283d7072f8Seschrock newvd->vdev_unspare = 0; 40293d7072f8Seschrock return (oldvd); 40303d7072f8Seschrock } 40313d7072f8Seschrock } 40323d7072f8Seschrock 4033ea8dc4b6Seschrock return (NULL); 4034fa9e4066Sahrens } 4035fa9e4066Sahrens 4036ea8dc4b6Seschrock static void 40373d7072f8Seschrock spa_vdev_resilver_done(spa_t *spa) 4038fa9e4066Sahrens { 40398ad4d6ddSJeff Bonwick vdev_t *vd, *pvd, *ppvd; 40408ad4d6ddSJeff Bonwick uint64_t guid, sguid, pguid, ppguid; 4041ea8dc4b6Seschrock 40428ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4043ea8dc4b6Seschrock 40443d7072f8Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 40458ad4d6ddSJeff Bonwick pvd = vd->vdev_parent; 40468ad4d6ddSJeff Bonwick ppvd = pvd->vdev_parent; 4047ea8dc4b6Seschrock guid = vd->vdev_guid; 40488ad4d6ddSJeff Bonwick pguid = pvd->vdev_guid; 40498ad4d6ddSJeff Bonwick ppguid = ppvd->vdev_guid; 40508ad4d6ddSJeff Bonwick sguid = 0; 405199653d4eSeschrock /* 405299653d4eSeschrock * If we have just finished replacing a hot spared device, then 405399653d4eSeschrock * we need to detach the parent's first child (the original hot 405499653d4eSeschrock * spare) as well. 405599653d4eSeschrock */ 40568ad4d6ddSJeff Bonwick if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) { 405799653d4eSeschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 40588ad4d6ddSJeff Bonwick ASSERT(ppvd->vdev_children == 2); 40598ad4d6ddSJeff Bonwick sguid = ppvd->vdev_child[1]->vdev_guid; 406099653d4eSeschrock } 40618ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 40628ad4d6ddSJeff Bonwick if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) 4063ea8dc4b6Seschrock return; 40648ad4d6ddSJeff Bonwick if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) 406599653d4eSeschrock return; 40668ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4067fa9e4066Sahrens } 4068fa9e4066Sahrens 40698ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 4070fa9e4066Sahrens } 4071fa9e4066Sahrens 4072c67d9675Seschrock /* 4073b3388e4fSEric Taylor * Update the stored path or FRU for this vdev. 4074c67d9675Seschrock */ 4075c67d9675Seschrock int 40766809eb4eSEric Schrock spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, 40776809eb4eSEric Schrock boolean_t ispath) 4078c67d9675Seschrock { 4079c5904d13Seschrock vdev_t *vd; 4080c67d9675Seschrock 4081b3388e4fSEric Taylor spa_vdev_state_enter(spa, SCL_ALL); 4082c67d9675Seschrock 40836809eb4eSEric Schrock if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 4084b3388e4fSEric Taylor return (spa_vdev_state_exit(spa, NULL, ENOENT)); 4085c67d9675Seschrock 40860e34b6a7Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 4087b3388e4fSEric Taylor return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); 40880e34b6a7Sbonwick 40896809eb4eSEric Schrock if (ispath) { 40906809eb4eSEric Schrock spa_strfree(vd->vdev_path); 40916809eb4eSEric Schrock vd->vdev_path = spa_strdup(value); 40926809eb4eSEric Schrock } else { 40936809eb4eSEric Schrock if (vd->vdev_fru != NULL) 40946809eb4eSEric Schrock spa_strfree(vd->vdev_fru); 40956809eb4eSEric Schrock vd->vdev_fru = spa_strdup(value); 40966809eb4eSEric Schrock } 4097c67d9675Seschrock 4098b3388e4fSEric Taylor return (spa_vdev_state_exit(spa, vd, 0)); 4099c67d9675Seschrock } 4100c67d9675Seschrock 41016809eb4eSEric Schrock int 41026809eb4eSEric Schrock spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 41036809eb4eSEric Schrock { 41046809eb4eSEric Schrock return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); 41056809eb4eSEric Schrock } 41066809eb4eSEric Schrock 41076809eb4eSEric Schrock int 41086809eb4eSEric Schrock spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) 41096809eb4eSEric Schrock { 41106809eb4eSEric Schrock return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); 41116809eb4eSEric Schrock } 41126809eb4eSEric Schrock 4113fa9e4066Sahrens /* 4114fa9e4066Sahrens * ========================================================================== 4115fa9e4066Sahrens * SPA Scrubbing 4116fa9e4066Sahrens * ========================================================================== 4117fa9e4066Sahrens */ 4118fa9e4066Sahrens 4119ea8dc4b6Seschrock int 4120088f3894Sahrens spa_scrub(spa_t *spa, pool_scrub_type_t type) 4121fa9e4066Sahrens { 4122e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 4123bb8b5132Sek 4124fa9e4066Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 4125fa9e4066Sahrens return (ENOTSUP); 4126fa9e4066Sahrens 4127fa9e4066Sahrens /* 4128088f3894Sahrens * If a resilver was requested, but there is no DTL on a 4129088f3894Sahrens * writeable leaf device, we have nothing to do. 4130fa9e4066Sahrens */ 4131088f3894Sahrens if (type == POOL_SCRUB_RESILVER && 4132088f3894Sahrens !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 4133088f3894Sahrens spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 4134ea8dc4b6Seschrock return (0); 4135ea8dc4b6Seschrock } 4136fa9e4066Sahrens 4137088f3894Sahrens if (type == POOL_SCRUB_EVERYTHING && 4138088f3894Sahrens spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE && 4139088f3894Sahrens spa->spa_dsl_pool->dp_scrub_isresilver) 4140088f3894Sahrens return (EBUSY); 4141fa9e4066Sahrens 4142088f3894Sahrens if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) { 4143088f3894Sahrens return (dsl_pool_scrub_clean(spa->spa_dsl_pool)); 4144088f3894Sahrens } else if (type == POOL_SCRUB_NONE) { 4145088f3894Sahrens return (dsl_pool_scrub_cancel(spa->spa_dsl_pool)); 4146ea8dc4b6Seschrock } else { 4147088f3894Sahrens return (EINVAL); 4148fa9e4066Sahrens } 4149fa9e4066Sahrens } 4150fa9e4066Sahrens 4151ea8dc4b6Seschrock /* 4152ea8dc4b6Seschrock * ========================================================================== 4153ea8dc4b6Seschrock * SPA async task processing 4154ea8dc4b6Seschrock * ========================================================================== 4155ea8dc4b6Seschrock */ 4156ea8dc4b6Seschrock 4157ea8dc4b6Seschrock static void 41583d7072f8Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 4159fa9e4066Sahrens { 416049cf58c0SBrendan Gregg - Sun Microsystems if (vd->vdev_remove_wanted) { 416149cf58c0SBrendan Gregg - Sun Microsystems vd->vdev_remove_wanted = 0; 416249cf58c0SBrendan Gregg - Sun Microsystems vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 41631d713200SEric Schrock 41641d713200SEric Schrock /* 41651d713200SEric Schrock * We want to clear the stats, but we don't want to do a full 41661d713200SEric Schrock * vdev_clear() as that will cause us to throw away 41671d713200SEric Schrock * degraded/faulted state as well as attempt to reopen the 41681d713200SEric Schrock * device, all of which is a waste. 41691d713200SEric Schrock */ 41701d713200SEric Schrock vd->vdev_stat.vs_read_errors = 0; 41711d713200SEric Schrock vd->vdev_stat.vs_write_errors = 0; 41721d713200SEric Schrock vd->vdev_stat.vs_checksum_errors = 0; 41731d713200SEric Schrock 4174e14bb325SJeff Bonwick vdev_state_dirty(vd->vdev_top); 4175ea8dc4b6Seschrock } 417649cf58c0SBrendan Gregg - Sun Microsystems 4177e14bb325SJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) 417849cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, vd->vdev_child[c]); 4179ea8dc4b6Seschrock } 4180fa9e4066Sahrens 4181e14bb325SJeff Bonwick static void 4182e14bb325SJeff Bonwick spa_async_probe(spa_t *spa, vdev_t *vd) 4183e14bb325SJeff Bonwick { 4184e14bb325SJeff Bonwick if (vd->vdev_probe_wanted) { 4185e14bb325SJeff Bonwick vd->vdev_probe_wanted = 0; 4186e14bb325SJeff Bonwick vdev_reopen(vd); /* vdev_open() does the actual probe */ 4187e14bb325SJeff Bonwick } 4188e14bb325SJeff Bonwick 4189e14bb325SJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) 4190e14bb325SJeff Bonwick spa_async_probe(spa, vd->vdev_child[c]); 4191e14bb325SJeff Bonwick } 4192e14bb325SJeff Bonwick 4193573ca77eSGeorge Wilson static void 4194573ca77eSGeorge Wilson spa_async_autoexpand(spa_t *spa, vdev_t *vd) 4195573ca77eSGeorge Wilson { 4196573ca77eSGeorge Wilson sysevent_id_t eid; 4197573ca77eSGeorge Wilson nvlist_t *attr; 4198573ca77eSGeorge Wilson char *physpath; 4199573ca77eSGeorge Wilson 4200573ca77eSGeorge Wilson if (!spa->spa_autoexpand) 4201573ca77eSGeorge Wilson return; 4202573ca77eSGeorge Wilson 4203573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) { 4204573ca77eSGeorge Wilson vdev_t *cvd = vd->vdev_child[c]; 4205573ca77eSGeorge Wilson spa_async_autoexpand(spa, cvd); 4206573ca77eSGeorge Wilson } 4207573ca77eSGeorge Wilson 4208573ca77eSGeorge Wilson if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) 4209573ca77eSGeorge Wilson return; 4210573ca77eSGeorge Wilson 4211573ca77eSGeorge Wilson physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4212573ca77eSGeorge Wilson (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); 4213573ca77eSGeorge Wilson 4214573ca77eSGeorge Wilson VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 4215573ca77eSGeorge Wilson VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 4216573ca77eSGeorge Wilson 4217573ca77eSGeorge Wilson (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 4218573ca77eSGeorge Wilson ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 4219573ca77eSGeorge Wilson 4220573ca77eSGeorge Wilson nvlist_free(attr); 4221573ca77eSGeorge Wilson kmem_free(physpath, MAXPATHLEN); 4222573ca77eSGeorge Wilson } 4223573ca77eSGeorge Wilson 4224ea8dc4b6Seschrock static void 4225ea8dc4b6Seschrock spa_async_thread(spa_t *spa) 4226ea8dc4b6Seschrock { 4227e14bb325SJeff Bonwick int tasks; 4228ea8dc4b6Seschrock 4229ea8dc4b6Seschrock ASSERT(spa->spa_sync_on); 4230ea8dc4b6Seschrock 4231ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4232ea8dc4b6Seschrock tasks = spa->spa_async_tasks; 4233ea8dc4b6Seschrock spa->spa_async_tasks = 0; 4234ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4235ea8dc4b6Seschrock 42360373e76bSbonwick /* 42370373e76bSbonwick * See if the config needs to be updated. 42380373e76bSbonwick */ 42390373e76bSbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 4240b24ab676SJeff Bonwick uint64_t old_space, new_space; 4241573ca77eSGeorge Wilson 42420373e76bSbonwick mutex_enter(&spa_namespace_lock); 4243b24ab676SJeff Bonwick old_space = metaslab_class_get_space(spa_normal_class(spa)); 42440373e76bSbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 4245b24ab676SJeff Bonwick new_space = metaslab_class_get_space(spa_normal_class(spa)); 42460373e76bSbonwick mutex_exit(&spa_namespace_lock); 4247573ca77eSGeorge Wilson 4248573ca77eSGeorge Wilson /* 4249573ca77eSGeorge Wilson * If the pool grew as a result of the config update, 4250573ca77eSGeorge Wilson * then log an internal history event. 4251573ca77eSGeorge Wilson */ 4252b24ab676SJeff Bonwick if (new_space != old_space) { 4253c8e1f6d2SMark J Musante spa_history_internal_log(LOG_POOL_VDEV_ONLINE, 4254c8e1f6d2SMark J Musante spa, NULL, CRED(), 4255c8e1f6d2SMark J Musante "pool '%s' size: %llu(+%llu)", 4256b24ab676SJeff Bonwick spa_name(spa), new_space, new_space - old_space); 4257573ca77eSGeorge Wilson } 42580373e76bSbonwick } 42590373e76bSbonwick 4260ea8dc4b6Seschrock /* 42613d7072f8Seschrock * See if any devices need to be marked REMOVED. 4262ea8dc4b6Seschrock */ 4263e14bb325SJeff Bonwick if (tasks & SPA_ASYNC_REMOVE) { 42648f18d1faSGeorge Wilson spa_vdev_state_enter(spa, SCL_NONE); 42653d7072f8Seschrock spa_async_remove(spa, spa->spa_root_vdev); 4266e14bb325SJeff Bonwick for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 426749cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 4268e14bb325SJeff Bonwick for (int i = 0; i < spa->spa_spares.sav_count; i++) 426949cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 4270e14bb325SJeff Bonwick (void) spa_vdev_state_exit(spa, NULL, 0); 4271e14bb325SJeff Bonwick } 4272e14bb325SJeff Bonwick 4273573ca77eSGeorge Wilson if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { 4274573ca77eSGeorge Wilson spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 4275573ca77eSGeorge Wilson spa_async_autoexpand(spa, spa->spa_root_vdev); 4276573ca77eSGeorge Wilson spa_config_exit(spa, SCL_CONFIG, FTAG); 4277573ca77eSGeorge Wilson } 4278573ca77eSGeorge Wilson 4279e14bb325SJeff Bonwick /* 4280e14bb325SJeff Bonwick * See if any devices need to be probed. 4281e14bb325SJeff Bonwick */ 4282e14bb325SJeff Bonwick if (tasks & SPA_ASYNC_PROBE) { 42838f18d1faSGeorge Wilson spa_vdev_state_enter(spa, SCL_NONE); 4284e14bb325SJeff Bonwick spa_async_probe(spa, spa->spa_root_vdev); 4285e14bb325SJeff Bonwick (void) spa_vdev_state_exit(spa, NULL, 0); 42863d7072f8Seschrock } 4287ea8dc4b6Seschrock 4288ea8dc4b6Seschrock /* 4289ea8dc4b6Seschrock * If any devices are done replacing, detach them. 4290ea8dc4b6Seschrock */ 42913d7072f8Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 42923d7072f8Seschrock spa_vdev_resilver_done(spa); 4293fa9e4066Sahrens 4294ea8dc4b6Seschrock /* 4295ea8dc4b6Seschrock * Kick off a resilver. 4296ea8dc4b6Seschrock */ 4297088f3894Sahrens if (tasks & SPA_ASYNC_RESILVER) 4298088f3894Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0); 4299ea8dc4b6Seschrock 4300ea8dc4b6Seschrock /* 4301ea8dc4b6Seschrock * Let the world know that we're done. 4302ea8dc4b6Seschrock */ 4303ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4304ea8dc4b6Seschrock spa->spa_async_thread = NULL; 4305ea8dc4b6Seschrock cv_broadcast(&spa->spa_async_cv); 4306ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4307ea8dc4b6Seschrock thread_exit(); 4308ea8dc4b6Seschrock } 4309ea8dc4b6Seschrock 4310ea8dc4b6Seschrock void 4311ea8dc4b6Seschrock spa_async_suspend(spa_t *spa) 4312ea8dc4b6Seschrock { 4313ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4314ea8dc4b6Seschrock spa->spa_async_suspended++; 4315ea8dc4b6Seschrock while (spa->spa_async_thread != NULL) 4316ea8dc4b6Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 4317ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4318ea8dc4b6Seschrock } 4319ea8dc4b6Seschrock 4320ea8dc4b6Seschrock void 4321ea8dc4b6Seschrock spa_async_resume(spa_t *spa) 4322ea8dc4b6Seschrock { 4323ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4324ea8dc4b6Seschrock ASSERT(spa->spa_async_suspended != 0); 4325ea8dc4b6Seschrock spa->spa_async_suspended--; 4326ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4327ea8dc4b6Seschrock } 4328ea8dc4b6Seschrock 4329ea8dc4b6Seschrock static void 4330ea8dc4b6Seschrock spa_async_dispatch(spa_t *spa) 4331ea8dc4b6Seschrock { 4332ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4333ea8dc4b6Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 43340373e76bSbonwick spa->spa_async_thread == NULL && 43350373e76bSbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 4336ea8dc4b6Seschrock spa->spa_async_thread = thread_create(NULL, 0, 4337ea8dc4b6Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 4338ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4339ea8dc4b6Seschrock } 4340ea8dc4b6Seschrock 4341ea8dc4b6Seschrock void 4342ea8dc4b6Seschrock spa_async_request(spa_t *spa, int task) 4343ea8dc4b6Seschrock { 4344ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4345ea8dc4b6Seschrock spa->spa_async_tasks |= task; 4346ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4347fa9e4066Sahrens } 4348fa9e4066Sahrens 4349fa9e4066Sahrens /* 4350fa9e4066Sahrens * ========================================================================== 4351fa9e4066Sahrens * SPA syncing routines 4352fa9e4066Sahrens * ========================================================================== 4353fa9e4066Sahrens */ 4354fa9e4066Sahrens static void 4355b24ab676SJeff Bonwick spa_sync_deferred_bplist(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx, uint64_t txg) 4356fa9e4066Sahrens { 4357fa9e4066Sahrens blkptr_t blk; 4358fa9e4066Sahrens uint64_t itor = 0; 4359fa9e4066Sahrens uint8_t c = 1; 4360fa9e4066Sahrens 4361e14bb325SJeff Bonwick while (bplist_iterate(bpl, &itor, &blk) == 0) { 4362e14bb325SJeff Bonwick ASSERT(blk.blk_birth < txg); 4363b24ab676SJeff Bonwick zio_free(spa, txg, &blk); 4364e14bb325SJeff Bonwick } 4365fa9e4066Sahrens 4366fa9e4066Sahrens bplist_vacate(bpl, tx); 4367fa9e4066Sahrens 4368fa9e4066Sahrens /* 4369fa9e4066Sahrens * Pre-dirty the first block so we sync to convergence faster. 4370fa9e4066Sahrens * (Usually only the first block is needed.) 4371fa9e4066Sahrens */ 4372b24ab676SJeff Bonwick dmu_write(bpl->bpl_mos, spa->spa_deferred_bplist_obj, 0, 1, &c, tx); 4373b24ab676SJeff Bonwick } 4374b24ab676SJeff Bonwick 4375b24ab676SJeff Bonwick static void 4376b24ab676SJeff Bonwick spa_sync_free(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 4377b24ab676SJeff Bonwick { 4378b24ab676SJeff Bonwick zio_t *zio = arg; 4379b24ab676SJeff Bonwick 4380b24ab676SJeff Bonwick zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp, 4381b24ab676SJeff Bonwick zio->io_flags)); 4382fa9e4066Sahrens } 4383fa9e4066Sahrens 4384fa9e4066Sahrens static void 438599653d4eSeschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 4386fa9e4066Sahrens { 4387fa9e4066Sahrens char *packed = NULL; 4388f7991ba4STim Haley size_t bufsize; 4389fa9e4066Sahrens size_t nvsize = 0; 4390fa9e4066Sahrens dmu_buf_t *db; 4391fa9e4066Sahrens 439299653d4eSeschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 4393fa9e4066Sahrens 4394f7991ba4STim Haley /* 4395f7991ba4STim Haley * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 4396f7991ba4STim Haley * information. This avoids the dbuf_will_dirty() path and 4397f7991ba4STim Haley * saves us a pre-read to get data we don't actually care about. 4398f7991ba4STim Haley */ 4399f7991ba4STim Haley bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); 4400f7991ba4STim Haley packed = kmem_alloc(bufsize, KM_SLEEP); 4401fa9e4066Sahrens 440299653d4eSeschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 4403ea8dc4b6Seschrock KM_SLEEP) == 0); 4404f7991ba4STim Haley bzero(packed + nvsize, bufsize - nvsize); 4405fa9e4066Sahrens 4406f7991ba4STim Haley dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 4407fa9e4066Sahrens 4408f7991ba4STim Haley kmem_free(packed, bufsize); 4409fa9e4066Sahrens 441099653d4eSeschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 4411fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 4412fa9e4066Sahrens *(uint64_t *)db->db_data = nvsize; 4413ea8dc4b6Seschrock dmu_buf_rele(db, FTAG); 4414fa9e4066Sahrens } 4415fa9e4066Sahrens 441699653d4eSeschrock static void 4417fa94a07fSbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 4418fa94a07fSbrendan const char *config, const char *entry) 441999653d4eSeschrock { 442099653d4eSeschrock nvlist_t *nvroot; 4421fa94a07fSbrendan nvlist_t **list; 442299653d4eSeschrock int i; 442399653d4eSeschrock 4424fa94a07fSbrendan if (!sav->sav_sync) 442599653d4eSeschrock return; 442699653d4eSeschrock 442799653d4eSeschrock /* 4428fa94a07fSbrendan * Update the MOS nvlist describing the list of available devices. 4429fa94a07fSbrendan * spa_validate_aux() will have already made sure this nvlist is 44303d7072f8Seschrock * valid and the vdevs are labeled appropriately. 443199653d4eSeschrock */ 4432fa94a07fSbrendan if (sav->sav_object == 0) { 4433fa94a07fSbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 4434fa94a07fSbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 4435fa94a07fSbrendan sizeof (uint64_t), tx); 443699653d4eSeschrock VERIFY(zap_update(spa->spa_meta_objset, 4437fa94a07fSbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 4438fa94a07fSbrendan &sav->sav_object, tx) == 0); 443999653d4eSeschrock } 444099653d4eSeschrock 444199653d4eSeschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 4442fa94a07fSbrendan if (sav->sav_count == 0) { 4443fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 444499653d4eSeschrock } else { 4445fa94a07fSbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 4446fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 4447fa94a07fSbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 4448fa94a07fSbrendan B_FALSE, B_FALSE, B_TRUE); 4449fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 4450fa94a07fSbrendan sav->sav_count) == 0); 4451fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 4452fa94a07fSbrendan nvlist_free(list[i]); 4453fa94a07fSbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 445499653d4eSeschrock } 445599653d4eSeschrock 4456fa94a07fSbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 445706eeb2adSek nvlist_free(nvroot); 445899653d4eSeschrock 4459fa94a07fSbrendan sav->sav_sync = B_FALSE; 446099653d4eSeschrock } 446199653d4eSeschrock 446299653d4eSeschrock static void 446399653d4eSeschrock spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 446499653d4eSeschrock { 446599653d4eSeschrock nvlist_t *config; 446699653d4eSeschrock 4467e14bb325SJeff Bonwick if (list_is_empty(&spa->spa_config_dirty_list)) 446899653d4eSeschrock return; 446999653d4eSeschrock 4470e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4471e14bb325SJeff Bonwick 4472e14bb325SJeff Bonwick config = spa_config_generate(spa, spa->spa_root_vdev, 4473e14bb325SJeff Bonwick dmu_tx_get_txg(tx), B_FALSE); 4474e14bb325SJeff Bonwick 4475e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 447699653d4eSeschrock 447799653d4eSeschrock if (spa->spa_config_syncing) 447899653d4eSeschrock nvlist_free(spa->spa_config_syncing); 447999653d4eSeschrock spa->spa_config_syncing = config; 448099653d4eSeschrock 448199653d4eSeschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 448299653d4eSeschrock } 448399653d4eSeschrock 4484990b4856Slling /* 4485990b4856Slling * Set zpool properties. 4486990b4856Slling */ 4487b1b8ab34Slling static void 4488ecd6cf80Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 4489b1b8ab34Slling { 4490b1b8ab34Slling spa_t *spa = arg1; 4491b1b8ab34Slling objset_t *mos = spa->spa_meta_objset; 4492990b4856Slling nvlist_t *nvp = arg2; 4493990b4856Slling nvpair_t *elem; 44943d7072f8Seschrock uint64_t intval; 4495c5904d13Seschrock char *strval; 4496990b4856Slling zpool_prop_t prop; 4497990b4856Slling const char *propname; 4498990b4856Slling zprop_type_t proptype; 4499b1b8ab34Slling 4500e14bb325SJeff Bonwick mutex_enter(&spa->spa_props_lock); 4501e14bb325SJeff Bonwick 4502990b4856Slling elem = NULL; 4503990b4856Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 4504990b4856Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 4505990b4856Slling case ZPOOL_PROP_VERSION: 4506990b4856Slling /* 4507990b4856Slling * Only set version for non-zpool-creation cases 4508990b4856Slling * (set/import). spa_create() needs special care 4509990b4856Slling * for version setting. 4510990b4856Slling */ 4511990b4856Slling if (tx->tx_txg != TXG_INITIAL) { 4512990b4856Slling VERIFY(nvpair_value_uint64(elem, 4513990b4856Slling &intval) == 0); 4514990b4856Slling ASSERT(intval <= SPA_VERSION); 4515990b4856Slling ASSERT(intval >= spa_version(spa)); 4516990b4856Slling spa->spa_uberblock.ub_version = intval; 4517990b4856Slling vdev_config_dirty(spa->spa_root_vdev); 4518990b4856Slling } 4519ecd6cf80Smarks break; 4520990b4856Slling 4521990b4856Slling case ZPOOL_PROP_ALTROOT: 4522990b4856Slling /* 4523990b4856Slling * 'altroot' is a non-persistent property. It should 4524990b4856Slling * have been set temporarily at creation or import time. 4525990b4856Slling */ 4526990b4856Slling ASSERT(spa->spa_root != NULL); 4527b1b8ab34Slling break; 45283d7072f8Seschrock 45292f8aaab3Seschrock case ZPOOL_PROP_CACHEFILE: 4530990b4856Slling /* 4531379c004dSEric Schrock * 'cachefile' is also a non-persisitent property. 4532990b4856Slling */ 45333d7072f8Seschrock break; 4534990b4856Slling default: 4535990b4856Slling /* 4536990b4856Slling * Set pool property values in the poolprops mos object. 4537990b4856Slling */ 4538990b4856Slling if (spa->spa_pool_props_object == 0) { 4539990b4856Slling VERIFY((spa->spa_pool_props_object = 4540990b4856Slling zap_create(mos, DMU_OT_POOL_PROPS, 4541990b4856Slling DMU_OT_NONE, 0, tx)) > 0); 4542990b4856Slling 4543990b4856Slling VERIFY(zap_update(mos, 4544990b4856Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 4545990b4856Slling 8, 1, &spa->spa_pool_props_object, tx) 4546990b4856Slling == 0); 4547990b4856Slling } 4548990b4856Slling 4549990b4856Slling /* normalize the property name */ 4550990b4856Slling propname = zpool_prop_to_name(prop); 4551990b4856Slling proptype = zpool_prop_get_type(prop); 4552990b4856Slling 4553990b4856Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 4554990b4856Slling ASSERT(proptype == PROP_TYPE_STRING); 4555990b4856Slling VERIFY(nvpair_value_string(elem, &strval) == 0); 4556990b4856Slling VERIFY(zap_update(mos, 4557990b4856Slling spa->spa_pool_props_object, propname, 4558990b4856Slling 1, strlen(strval) + 1, strval, tx) == 0); 4559990b4856Slling 4560990b4856Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 4561990b4856Slling VERIFY(nvpair_value_uint64(elem, &intval) == 0); 4562990b4856Slling 4563990b4856Slling if (proptype == PROP_TYPE_INDEX) { 4564990b4856Slling const char *unused; 4565990b4856Slling VERIFY(zpool_prop_index_to_string( 4566990b4856Slling prop, intval, &unused) == 0); 4567990b4856Slling } 4568990b4856Slling VERIFY(zap_update(mos, 4569990b4856Slling spa->spa_pool_props_object, propname, 4570990b4856Slling 8, 1, &intval, tx) == 0); 4571990b4856Slling } else { 4572990b4856Slling ASSERT(0); /* not allowed */ 4573990b4856Slling } 4574990b4856Slling 45750a4e9518Sgw switch (prop) { 45760a4e9518Sgw case ZPOOL_PROP_DELEGATION: 4577990b4856Slling spa->spa_delegation = intval; 45780a4e9518Sgw break; 45790a4e9518Sgw case ZPOOL_PROP_BOOTFS: 4580990b4856Slling spa->spa_bootfs = intval; 45810a4e9518Sgw break; 45820a4e9518Sgw case ZPOOL_PROP_FAILUREMODE: 45830a4e9518Sgw spa->spa_failmode = intval; 45840a4e9518Sgw break; 4585573ca77eSGeorge Wilson case ZPOOL_PROP_AUTOEXPAND: 4586573ca77eSGeorge Wilson spa->spa_autoexpand = intval; 4587573ca77eSGeorge Wilson spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 4588573ca77eSGeorge Wilson break; 4589b24ab676SJeff Bonwick case ZPOOL_PROP_DEDUPDITTO: 4590b24ab676SJeff Bonwick spa->spa_dedup_ditto = intval; 4591b24ab676SJeff Bonwick break; 45920a4e9518Sgw default: 45930a4e9518Sgw break; 45940a4e9518Sgw } 4595990b4856Slling } 4596990b4856Slling 4597990b4856Slling /* log internal history if this is not a zpool create */ 4598990b4856Slling if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 4599990b4856Slling tx->tx_txg != TXG_INITIAL) { 4600990b4856Slling spa_history_internal_log(LOG_POOL_PROPSET, 4601990b4856Slling spa, tx, cr, "%s %lld %s", 4602e14bb325SJeff Bonwick nvpair_name(elem), intval, spa_name(spa)); 4603b1b8ab34Slling } 4604b1b8ab34Slling } 4605e14bb325SJeff Bonwick 4606e14bb325SJeff Bonwick mutex_exit(&spa->spa_props_lock); 4607b1b8ab34Slling } 4608b1b8ab34Slling 4609fa9e4066Sahrens /* 4610fa9e4066Sahrens * Sync the specified transaction group. New blocks may be dirtied as 4611fa9e4066Sahrens * part of the process, so we iterate until it converges. 4612fa9e4066Sahrens */ 4613fa9e4066Sahrens void 4614fa9e4066Sahrens spa_sync(spa_t *spa, uint64_t txg) 4615fa9e4066Sahrens { 4616fa9e4066Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 4617fa9e4066Sahrens objset_t *mos = spa->spa_meta_objset; 4618b24ab676SJeff Bonwick bplist_t *defer_bpl = &spa->spa_deferred_bplist; 4619b24ab676SJeff Bonwick bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK]; 46200373e76bSbonwick vdev_t *rvd = spa->spa_root_vdev; 4621fa9e4066Sahrens vdev_t *vd; 4622fa9e4066Sahrens dmu_tx_t *tx; 4623e14bb325SJeff Bonwick int error; 4624fa9e4066Sahrens 4625fa9e4066Sahrens /* 4626fa9e4066Sahrens * Lock out configuration changes. 4627fa9e4066Sahrens */ 4628e14bb325SJeff Bonwick spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 4629fa9e4066Sahrens 4630fa9e4066Sahrens spa->spa_syncing_txg = txg; 4631fa9e4066Sahrens spa->spa_sync_pass = 0; 4632fa9e4066Sahrens 4633e14bb325SJeff Bonwick /* 4634e14bb325SJeff Bonwick * If there are any pending vdev state changes, convert them 4635e14bb325SJeff Bonwick * into config changes that go out with this transaction group. 4636e14bb325SJeff Bonwick */ 4637e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 46388ad4d6ddSJeff Bonwick while (list_head(&spa->spa_state_dirty_list) != NULL) { 46398ad4d6ddSJeff Bonwick /* 46408ad4d6ddSJeff Bonwick * We need the write lock here because, for aux vdevs, 46418ad4d6ddSJeff Bonwick * calling vdev_config_dirty() modifies sav_config. 46428ad4d6ddSJeff Bonwick * This is ugly and will become unnecessary when we 46438ad4d6ddSJeff Bonwick * eliminate the aux vdev wart by integrating all vdevs 46448ad4d6ddSJeff Bonwick * into the root vdev tree. 46458ad4d6ddSJeff Bonwick */ 46468ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 46478ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); 46488ad4d6ddSJeff Bonwick while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 46498ad4d6ddSJeff Bonwick vdev_state_clean(vd); 46508ad4d6ddSJeff Bonwick vdev_config_dirty(vd); 46518ad4d6ddSJeff Bonwick } 46528ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 46538ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); 4654e14bb325SJeff Bonwick } 4655e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 4656e14bb325SJeff Bonwick 4657b24ab676SJeff Bonwick VERIFY(0 == bplist_open(defer_bpl, mos, spa->spa_deferred_bplist_obj)); 4658fa9e4066Sahrens 465999653d4eSeschrock tx = dmu_tx_create_assigned(dp, txg); 466099653d4eSeschrock 466199653d4eSeschrock /* 4662e7437265Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 466399653d4eSeschrock * set spa_deflate if we have no raid-z vdevs. 466499653d4eSeschrock */ 4665e7437265Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 4666e7437265Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 466799653d4eSeschrock int i; 466899653d4eSeschrock 466999653d4eSeschrock for (i = 0; i < rvd->vdev_children; i++) { 467099653d4eSeschrock vd = rvd->vdev_child[i]; 467199653d4eSeschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 467299653d4eSeschrock break; 467399653d4eSeschrock } 467499653d4eSeschrock if (i == rvd->vdev_children) { 467599653d4eSeschrock spa->spa_deflate = TRUE; 467699653d4eSeschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 467799653d4eSeschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 467899653d4eSeschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 467999653d4eSeschrock } 468099653d4eSeschrock } 468199653d4eSeschrock 4682088f3894Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 4683088f3894Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 4684088f3894Sahrens dsl_pool_create_origin(dp, tx); 4685088f3894Sahrens 4686088f3894Sahrens /* Keeping the origin open increases spa_minref */ 4687088f3894Sahrens spa->spa_minref += 3; 4688088f3894Sahrens } 4689088f3894Sahrens 4690088f3894Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 4691088f3894Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 4692088f3894Sahrens dsl_pool_upgrade_clones(dp, tx); 4693088f3894Sahrens } 4694088f3894Sahrens 4695fa9e4066Sahrens /* 4696fa9e4066Sahrens * If anything has changed in this txg, push the deferred frees 4697fa9e4066Sahrens * from the previous txg. If not, leave them alone so that we 4698fa9e4066Sahrens * don't generate work on an otherwise idle system. 4699fa9e4066Sahrens */ 4700fa9e4066Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 47011615a317Sek !txg_list_empty(&dp->dp_dirty_dirs, txg) || 47021615a317Sek !txg_list_empty(&dp->dp_sync_tasks, txg)) 4703b24ab676SJeff Bonwick spa_sync_deferred_bplist(spa, defer_bpl, tx, txg); 4704fa9e4066Sahrens 4705fa9e4066Sahrens /* 4706fa9e4066Sahrens * Iterate to convergence. 4707fa9e4066Sahrens */ 4708fa9e4066Sahrens do { 4709b24ab676SJeff Bonwick int pass = ++spa->spa_sync_pass; 4710fa9e4066Sahrens 4711fa9e4066Sahrens spa_sync_config_object(spa, tx); 4712fa94a07fSbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 4713fa94a07fSbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 4714fa94a07fSbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 4715fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 4716ea8dc4b6Seschrock spa_errlog_sync(spa, txg); 4717fa9e4066Sahrens dsl_pool_sync(dp, txg); 4718fa9e4066Sahrens 4719b24ab676SJeff Bonwick if (pass <= SYNC_PASS_DEFERRED_FREE) { 4720b24ab676SJeff Bonwick zio_t *zio = zio_root(spa, NULL, NULL, 0); 4721b24ab676SJeff Bonwick bplist_sync(free_bpl, spa_sync_free, zio, tx); 4722b24ab676SJeff Bonwick VERIFY(zio_wait(zio) == 0); 4723b24ab676SJeff Bonwick } else { 4724b24ab676SJeff Bonwick bplist_sync(free_bpl, bplist_enqueue_cb, defer_bpl, tx); 4725fa9e4066Sahrens } 4726fa9e4066Sahrens 4727b24ab676SJeff Bonwick ddt_sync(spa, txg); 4728b24ab676SJeff Bonwick 4729b24ab676SJeff Bonwick while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) 4730b24ab676SJeff Bonwick vdev_sync(vd, txg); 4731b24ab676SJeff Bonwick 4732b24ab676SJeff Bonwick } while (dmu_objset_is_dirty(mos, txg)); 4733fa9e4066Sahrens 4734b24ab676SJeff Bonwick ASSERT(free_bpl->bpl_queue == NULL); 4735fa9e4066Sahrens 4736b24ab676SJeff Bonwick bplist_close(defer_bpl); 4737fa9e4066Sahrens 4738fa9e4066Sahrens /* 4739fa9e4066Sahrens * Rewrite the vdev configuration (which includes the uberblock) 4740fa9e4066Sahrens * to commit the transaction group. 47410373e76bSbonwick * 474217f17c2dSbonwick * If there are no dirty vdevs, we sync the uberblock to a few 474317f17c2dSbonwick * random top-level vdevs that are known to be visible in the 4744e14bb325SJeff Bonwick * config cache (see spa_vdev_add() for a complete description). 4745e14bb325SJeff Bonwick * If there *are* dirty vdevs, sync the uberblock to all vdevs. 47460373e76bSbonwick */ 4747e14bb325SJeff Bonwick for (;;) { 4748e14bb325SJeff Bonwick /* 4749e14bb325SJeff Bonwick * We hold SCL_STATE to prevent vdev open/close/etc. 4750e14bb325SJeff Bonwick * while we're attempting to write the vdev labels. 4751e14bb325SJeff Bonwick */ 4752e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4753e14bb325SJeff Bonwick 4754e14bb325SJeff Bonwick if (list_is_empty(&spa->spa_config_dirty_list)) { 4755e14bb325SJeff Bonwick vdev_t *svd[SPA_DVAS_PER_BP]; 4756e14bb325SJeff Bonwick int svdcount = 0; 4757e14bb325SJeff Bonwick int children = rvd->vdev_children; 4758e14bb325SJeff Bonwick int c0 = spa_get_random(children); 4759e14bb325SJeff Bonwick 4760573ca77eSGeorge Wilson for (int c = 0; c < children; c++) { 4761e14bb325SJeff Bonwick vd = rvd->vdev_child[(c0 + c) % children]; 4762e14bb325SJeff Bonwick if (vd->vdev_ms_array == 0 || vd->vdev_islog) 4763e14bb325SJeff Bonwick continue; 4764e14bb325SJeff Bonwick svd[svdcount++] = vd; 4765e14bb325SJeff Bonwick if (svdcount == SPA_DVAS_PER_BP) 4766e14bb325SJeff Bonwick break; 4767e14bb325SJeff Bonwick } 47688956713aSEric Schrock error = vdev_config_sync(svd, svdcount, txg, B_FALSE); 47698956713aSEric Schrock if (error != 0) 47708956713aSEric Schrock error = vdev_config_sync(svd, svdcount, txg, 47718956713aSEric Schrock B_TRUE); 4772e14bb325SJeff Bonwick } else { 4773e14bb325SJeff Bonwick error = vdev_config_sync(rvd->vdev_child, 47748956713aSEric Schrock rvd->vdev_children, txg, B_FALSE); 47758956713aSEric Schrock if (error != 0) 47768956713aSEric Schrock error = vdev_config_sync(rvd->vdev_child, 47778956713aSEric Schrock rvd->vdev_children, txg, B_TRUE); 47780373e76bSbonwick } 4779e14bb325SJeff Bonwick 4780e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 4781e14bb325SJeff Bonwick 4782e14bb325SJeff Bonwick if (error == 0) 4783e14bb325SJeff Bonwick break; 4784e14bb325SJeff Bonwick zio_suspend(spa, NULL); 4785e14bb325SJeff Bonwick zio_resume_wait(spa); 47860373e76bSbonwick } 478799653d4eSeschrock dmu_tx_commit(tx); 478899653d4eSeschrock 47890373e76bSbonwick /* 47900373e76bSbonwick * Clear the dirty config list. 4791fa9e4066Sahrens */ 4792e14bb325SJeff Bonwick while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 47930373e76bSbonwick vdev_config_clean(vd); 47940373e76bSbonwick 47950373e76bSbonwick /* 47960373e76bSbonwick * Now that the new config has synced transactionally, 47970373e76bSbonwick * let it become visible to the config cache. 47980373e76bSbonwick */ 47990373e76bSbonwick if (spa->spa_config_syncing != NULL) { 48000373e76bSbonwick spa_config_set(spa, spa->spa_config_syncing); 48010373e76bSbonwick spa->spa_config_txg = txg; 48020373e76bSbonwick spa->spa_config_syncing = NULL; 48030373e76bSbonwick } 4804fa9e4066Sahrens 4805fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 4806fa9e4066Sahrens 4807b24ab676SJeff Bonwick dsl_pool_sync_done(dp, txg); 4808fa9e4066Sahrens 4809fa9e4066Sahrens /* 4810fa9e4066Sahrens * Update usable space statistics. 4811fa9e4066Sahrens */ 4812fa9e4066Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 4813fa9e4066Sahrens vdev_sync_done(vd, txg); 4814fa9e4066Sahrens 4815485bbbf5SGeorge Wilson spa_update_dspace(spa); 4816485bbbf5SGeorge Wilson 4817fa9e4066Sahrens /* 4818fa9e4066Sahrens * It had better be the case that we didn't dirty anything 481999653d4eSeschrock * since vdev_config_sync(). 4820fa9e4066Sahrens */ 4821fa9e4066Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 4822fa9e4066Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 4823fa9e4066Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 4824b24ab676SJeff Bonwick ASSERT(defer_bpl->bpl_queue == NULL); 4825b24ab676SJeff Bonwick ASSERT(free_bpl->bpl_queue == NULL); 4826b24ab676SJeff Bonwick 4827b24ab676SJeff Bonwick spa->spa_sync_pass = 0; 4828fa9e4066Sahrens 4829e14bb325SJeff Bonwick spa_config_exit(spa, SCL_CONFIG, FTAG); 4830ea8dc4b6Seschrock 4831468c413aSTim Haley spa_handle_ignored_writes(spa); 4832468c413aSTim Haley 4833ea8dc4b6Seschrock /* 4834ea8dc4b6Seschrock * If any async tasks have been requested, kick them off. 4835ea8dc4b6Seschrock */ 4836ea8dc4b6Seschrock spa_async_dispatch(spa); 4837fa9e4066Sahrens } 4838fa9e4066Sahrens 4839fa9e4066Sahrens /* 4840fa9e4066Sahrens * Sync all pools. We don't want to hold the namespace lock across these 4841fa9e4066Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 4842fa9e4066Sahrens * sync. 4843fa9e4066Sahrens */ 4844fa9e4066Sahrens void 4845fa9e4066Sahrens spa_sync_allpools(void) 4846fa9e4066Sahrens { 4847fa9e4066Sahrens spa_t *spa = NULL; 4848fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4849fa9e4066Sahrens while ((spa = spa_next(spa)) != NULL) { 4850e14bb325SJeff Bonwick if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa)) 4851fa9e4066Sahrens continue; 4852fa9e4066Sahrens spa_open_ref(spa, FTAG); 4853fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4854fa9e4066Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 4855fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4856fa9e4066Sahrens spa_close(spa, FTAG); 4857fa9e4066Sahrens } 4858fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4859fa9e4066Sahrens } 4860fa9e4066Sahrens 4861fa9e4066Sahrens /* 4862fa9e4066Sahrens * ========================================================================== 4863fa9e4066Sahrens * Miscellaneous routines 4864fa9e4066Sahrens * ========================================================================== 4865fa9e4066Sahrens */ 4866fa9e4066Sahrens 4867fa9e4066Sahrens /* 4868fa9e4066Sahrens * Remove all pools in the system. 4869fa9e4066Sahrens */ 4870fa9e4066Sahrens void 4871fa9e4066Sahrens spa_evict_all(void) 4872fa9e4066Sahrens { 4873fa9e4066Sahrens spa_t *spa; 4874fa9e4066Sahrens 4875fa9e4066Sahrens /* 4876fa9e4066Sahrens * Remove all cached state. All pools should be closed now, 4877fa9e4066Sahrens * so every spa in the AVL tree should be unreferenced. 4878fa9e4066Sahrens */ 4879fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4880fa9e4066Sahrens while ((spa = spa_next(NULL)) != NULL) { 4881fa9e4066Sahrens /* 4882ea8dc4b6Seschrock * Stop async tasks. The async thread may need to detach 4883ea8dc4b6Seschrock * a device that's been replaced, which requires grabbing 4884ea8dc4b6Seschrock * spa_namespace_lock, so we must drop it here. 4885fa9e4066Sahrens */ 4886fa9e4066Sahrens spa_open_ref(spa, FTAG); 4887fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4888ea8dc4b6Seschrock spa_async_suspend(spa); 4889fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4890fa9e4066Sahrens spa_close(spa, FTAG); 4891fa9e4066Sahrens 4892fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4893fa9e4066Sahrens spa_unload(spa); 4894fa9e4066Sahrens spa_deactivate(spa); 4895fa9e4066Sahrens } 4896fa9e4066Sahrens spa_remove(spa); 4897fa9e4066Sahrens } 4898fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4899fa9e4066Sahrens } 4900ea8dc4b6Seschrock 4901ea8dc4b6Seschrock vdev_t * 49026809eb4eSEric Schrock spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) 4903ea8dc4b6Seschrock { 4904c5904d13Seschrock vdev_t *vd; 4905c5904d13Seschrock int i; 4906c5904d13Seschrock 4907c5904d13Seschrock if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 4908c5904d13Seschrock return (vd); 4909c5904d13Seschrock 49106809eb4eSEric Schrock if (aux) { 4911c5904d13Seschrock for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 4912c5904d13Seschrock vd = spa->spa_l2cache.sav_vdevs[i]; 49136809eb4eSEric Schrock if (vd->vdev_guid == guid) 49146809eb4eSEric Schrock return (vd); 49156809eb4eSEric Schrock } 49166809eb4eSEric Schrock 49176809eb4eSEric Schrock for (i = 0; i < spa->spa_spares.sav_count; i++) { 49186809eb4eSEric Schrock vd = spa->spa_spares.sav_vdevs[i]; 4919c5904d13Seschrock if (vd->vdev_guid == guid) 4920c5904d13Seschrock return (vd); 4921c5904d13Seschrock } 4922c5904d13Seschrock } 4923c5904d13Seschrock 4924c5904d13Seschrock return (NULL); 4925ea8dc4b6Seschrock } 4926eaca9bbdSeschrock 4927eaca9bbdSeschrock void 4928990b4856Slling spa_upgrade(spa_t *spa, uint64_t version) 4929eaca9bbdSeschrock { 4930e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4931eaca9bbdSeschrock 4932eaca9bbdSeschrock /* 4933eaca9bbdSeschrock * This should only be called for a non-faulted pool, and since a 4934eaca9bbdSeschrock * future version would result in an unopenable pool, this shouldn't be 4935eaca9bbdSeschrock * possible. 4936eaca9bbdSeschrock */ 4937e7437265Sahrens ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 4938990b4856Slling ASSERT(version >= spa->spa_uberblock.ub_version); 4939eaca9bbdSeschrock 4940990b4856Slling spa->spa_uberblock.ub_version = version; 4941eaca9bbdSeschrock vdev_config_dirty(spa->spa_root_vdev); 4942eaca9bbdSeschrock 4943e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 494499653d4eSeschrock 494599653d4eSeschrock txg_wait_synced(spa_get_dsl(spa), 0); 494699653d4eSeschrock } 494799653d4eSeschrock 494899653d4eSeschrock boolean_t 494999653d4eSeschrock spa_has_spare(spa_t *spa, uint64_t guid) 495099653d4eSeschrock { 495199653d4eSeschrock int i; 495239c23413Seschrock uint64_t spareguid; 4953fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 495499653d4eSeschrock 4955fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 4956fa94a07fSbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 495799653d4eSeschrock return (B_TRUE); 495899653d4eSeschrock 4959fa94a07fSbrendan for (i = 0; i < sav->sav_npending; i++) { 4960fa94a07fSbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 4961fa94a07fSbrendan &spareguid) == 0 && spareguid == guid) 496239c23413Seschrock return (B_TRUE); 496339c23413Seschrock } 496439c23413Seschrock 496599653d4eSeschrock return (B_FALSE); 4966eaca9bbdSeschrock } 4967b1b8ab34Slling 496889a89ebfSlling /* 496989a89ebfSlling * Check if a pool has an active shared spare device. 497089a89ebfSlling * Note: reference count of an active spare is 2, as a spare and as a replace 497189a89ebfSlling */ 497289a89ebfSlling static boolean_t 497389a89ebfSlling spa_has_active_shared_spare(spa_t *spa) 497489a89ebfSlling { 497589a89ebfSlling int i, refcnt; 497689a89ebfSlling uint64_t pool; 497789a89ebfSlling spa_aux_vdev_t *sav = &spa->spa_spares; 497889a89ebfSlling 497989a89ebfSlling for (i = 0; i < sav->sav_count; i++) { 498089a89ebfSlling if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 498189a89ebfSlling &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 498289a89ebfSlling refcnt > 2) 498389a89ebfSlling return (B_TRUE); 498489a89ebfSlling } 498589a89ebfSlling 498689a89ebfSlling return (B_FALSE); 498789a89ebfSlling } 498889a89ebfSlling 49893d7072f8Seschrock /* 49903d7072f8Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 49913d7072f8Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 49923d7072f8Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 49933d7072f8Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 49943d7072f8Seschrock * or zdb as real changes. 49953d7072f8Seschrock */ 49963d7072f8Seschrock void 49973d7072f8Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 49983d7072f8Seschrock { 49993d7072f8Seschrock #ifdef _KERNEL 50003d7072f8Seschrock sysevent_t *ev; 50013d7072f8Seschrock sysevent_attr_list_t *attr = NULL; 50023d7072f8Seschrock sysevent_value_t value; 50033d7072f8Seschrock sysevent_id_t eid; 50043d7072f8Seschrock 50053d7072f8Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 50063d7072f8Seschrock SE_SLEEP); 50073d7072f8Seschrock 50083d7072f8Seschrock value.value_type = SE_DATA_TYPE_STRING; 50093d7072f8Seschrock value.value.sv_string = spa_name(spa); 50103d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 50113d7072f8Seschrock goto done; 50123d7072f8Seschrock 50133d7072f8Seschrock value.value_type = SE_DATA_TYPE_UINT64; 50143d7072f8Seschrock value.value.sv_uint64 = spa_guid(spa); 50153d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 50163d7072f8Seschrock goto done; 50173d7072f8Seschrock 50183d7072f8Seschrock if (vd) { 50193d7072f8Seschrock value.value_type = SE_DATA_TYPE_UINT64; 50203d7072f8Seschrock value.value.sv_uint64 = vd->vdev_guid; 50213d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 50223d7072f8Seschrock SE_SLEEP) != 0) 50233d7072f8Seschrock goto done; 50243d7072f8Seschrock 50253d7072f8Seschrock if (vd->vdev_path) { 50263d7072f8Seschrock value.value_type = SE_DATA_TYPE_STRING; 50273d7072f8Seschrock value.value.sv_string = vd->vdev_path; 50283d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 50293d7072f8Seschrock &value, SE_SLEEP) != 0) 50303d7072f8Seschrock goto done; 50313d7072f8Seschrock } 50323d7072f8Seschrock } 50333d7072f8Seschrock 5034b01c3b58Seschrock if (sysevent_attach_attributes(ev, attr) != 0) 5035b01c3b58Seschrock goto done; 5036b01c3b58Seschrock attr = NULL; 5037b01c3b58Seschrock 50383d7072f8Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 50393d7072f8Seschrock 50403d7072f8Seschrock done: 50413d7072f8Seschrock if (attr) 50423d7072f8Seschrock sysevent_free_attr(attr); 50433d7072f8Seschrock sysevent_free(ev); 50443d7072f8Seschrock #endif 50453d7072f8Seschrock } 5046