1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 2199653d4eSeschrock 22fa9e4066Sahrens /* 23b01c3b58Seschrock * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24fa9e4066Sahrens * Use is subject to license terms. 25fa9e4066Sahrens */ 26fa9e4066Sahrens 27fa9e4066Sahrens /* 28fa9e4066Sahrens * This file contains all the routines used when modifying on-disk SPA state. 29fa9e4066Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 30fa9e4066Sahrens * pool. 31fa9e4066Sahrens */ 32fa9e4066Sahrens 33fa9e4066Sahrens #include <sys/zfs_context.h> 34ea8dc4b6Seschrock #include <sys/fm/fs/zfs.h> 35fa9e4066Sahrens #include <sys/spa_impl.h> 36fa9e4066Sahrens #include <sys/zio.h> 37fa9e4066Sahrens #include <sys/zio_checksum.h> 38fa9e4066Sahrens #include <sys/zio_compress.h> 39fa9e4066Sahrens #include <sys/dmu.h> 40fa9e4066Sahrens #include <sys/dmu_tx.h> 41fa9e4066Sahrens #include <sys/zap.h> 42fa9e4066Sahrens #include <sys/zil.h> 43fa9e4066Sahrens #include <sys/vdev_impl.h> 44fa9e4066Sahrens #include <sys/metaslab.h> 45fa9e4066Sahrens #include <sys/uberblock_impl.h> 46fa9e4066Sahrens #include <sys/txg.h> 47fa9e4066Sahrens #include <sys/avl.h> 48fa9e4066Sahrens #include <sys/dmu_traverse.h> 49b1b8ab34Slling #include <sys/dmu_objset.h> 50fa9e4066Sahrens #include <sys/unique.h> 51fa9e4066Sahrens #include <sys/dsl_pool.h> 52b1b8ab34Slling #include <sys/dsl_dataset.h> 53fa9e4066Sahrens #include <sys/dsl_dir.h> 54fa9e4066Sahrens #include <sys/dsl_prop.h> 55b1b8ab34Slling #include <sys/dsl_synctask.h> 56fa9e4066Sahrens #include <sys/fs/zfs.h> 57fa94a07fSbrendan #include <sys/arc.h> 58fa9e4066Sahrens #include <sys/callb.h> 5995173954Sek #include <sys/systeminfo.h> 6095173954Sek #include <sys/sunddi.h> 61e7cbe64fSgw #include <sys/spa_boot.h> 62fa9e4066Sahrens 63990b4856Slling #include "zfs_prop.h" 64b7b97454Sperrin #include "zfs_comutil.h" 65990b4856Slling 66e14bb325SJeff Bonwick int zio_taskq_threads[ZIO_TYPES][ZIO_TASKQ_TYPES] = { 67e14bb325SJeff Bonwick /* ISSUE INTR */ 68e14bb325SJeff Bonwick { 1, 1 }, /* ZIO_TYPE_NULL */ 69e14bb325SJeff Bonwick { 1, 8 }, /* ZIO_TYPE_READ */ 70e14bb325SJeff Bonwick { 8, 1 }, /* ZIO_TYPE_WRITE */ 71e14bb325SJeff Bonwick { 1, 1 }, /* ZIO_TYPE_FREE */ 72e14bb325SJeff Bonwick { 1, 1 }, /* ZIO_TYPE_CLAIM */ 73e14bb325SJeff Bonwick { 1, 1 }, /* ZIO_TYPE_IOCTL */ 74e14bb325SJeff Bonwick }; 75416e0cd8Sek 76990b4856Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 7789a89ebfSlling static boolean_t spa_has_active_shared_spare(spa_t *spa); 78990b4856Slling 79990b4856Slling /* 80990b4856Slling * ========================================================================== 81990b4856Slling * SPA properties routines 82990b4856Slling * ========================================================================== 83990b4856Slling */ 84990b4856Slling 85990b4856Slling /* 86990b4856Slling * Add a (source=src, propname=propval) list to an nvlist. 87990b4856Slling */ 889d82f4f6Slling static void 89990b4856Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 90990b4856Slling uint64_t intval, zprop_source_t src) 91990b4856Slling { 92990b4856Slling const char *propname = zpool_prop_to_name(prop); 93990b4856Slling nvlist_t *propval; 94990b4856Slling 959d82f4f6Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 969d82f4f6Slling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 97990b4856Slling 989d82f4f6Slling if (strval != NULL) 999d82f4f6Slling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 1009d82f4f6Slling else 1019d82f4f6Slling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 102990b4856Slling 1039d82f4f6Slling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 104990b4856Slling nvlist_free(propval); 105990b4856Slling } 106990b4856Slling 107990b4856Slling /* 108990b4856Slling * Get property values from the spa configuration. 109990b4856Slling */ 1109d82f4f6Slling static void 111990b4856Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 112990b4856Slling { 113990b4856Slling uint64_t size = spa_get_space(spa); 114990b4856Slling uint64_t used = spa_get_alloc(spa); 115990b4856Slling uint64_t cap, version; 116990b4856Slling zprop_source_t src = ZPROP_SRC_NONE; 117c5904d13Seschrock spa_config_dirent_t *dp; 118990b4856Slling 119e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 120e14bb325SJeff Bonwick 121990b4856Slling /* 122990b4856Slling * readonly properties 123990b4856Slling */ 124e14bb325SJeff Bonwick spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 1259d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 1269d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src); 1279d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, size - used, src); 128990b4856Slling 129990b4856Slling cap = (size == 0) ? 0 : (used * 100 / size); 1309d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 131990b4856Slling 1329d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 1339d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 1349d82f4f6Slling spa->spa_root_vdev->vdev_state, src); 135990b4856Slling 136990b4856Slling /* 137990b4856Slling * settable properties that are not stored in the pool property object. 138990b4856Slling */ 139990b4856Slling version = spa_version(spa); 140990b4856Slling if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 141990b4856Slling src = ZPROP_SRC_DEFAULT; 142990b4856Slling else 143990b4856Slling src = ZPROP_SRC_LOCAL; 1449d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 145990b4856Slling 1469d82f4f6Slling if (spa->spa_root != NULL) 1479d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 1489d82f4f6Slling 0, ZPROP_SRC_LOCAL); 149990b4856Slling 150c5904d13Seschrock if ((dp = list_head(&spa->spa_config_list)) != NULL) { 151c5904d13Seschrock if (dp->scd_path == NULL) { 1529d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 153c5904d13Seschrock "none", 0, ZPROP_SRC_LOCAL); 154c5904d13Seschrock } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 1559d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 156c5904d13Seschrock dp->scd_path, 0, ZPROP_SRC_LOCAL); 1572f8aaab3Seschrock } 1582f8aaab3Seschrock } 159990b4856Slling } 160990b4856Slling 161990b4856Slling /* 162990b4856Slling * Get zpool property values. 163990b4856Slling */ 164990b4856Slling int 165990b4856Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 166990b4856Slling { 167990b4856Slling zap_cursor_t zc; 168990b4856Slling zap_attribute_t za; 169990b4856Slling objset_t *mos = spa->spa_meta_objset; 170990b4856Slling int err; 171990b4856Slling 1729d82f4f6Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 173990b4856Slling 174e14bb325SJeff Bonwick mutex_enter(&spa->spa_props_lock); 175e14bb325SJeff Bonwick 176990b4856Slling /* 177990b4856Slling * Get properties from the spa config. 178990b4856Slling */ 1799d82f4f6Slling spa_prop_get_config(spa, nvp); 180990b4856Slling 181990b4856Slling /* If no pool property object, no more prop to get. */ 182990b4856Slling if (spa->spa_pool_props_object == 0) { 183990b4856Slling mutex_exit(&spa->spa_props_lock); 184990b4856Slling return (0); 185990b4856Slling } 186990b4856Slling 187990b4856Slling /* 188990b4856Slling * Get properties from the MOS pool property object. 189990b4856Slling */ 190990b4856Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 191990b4856Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 192990b4856Slling zap_cursor_advance(&zc)) { 193990b4856Slling uint64_t intval = 0; 194990b4856Slling char *strval = NULL; 195990b4856Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 196990b4856Slling zpool_prop_t prop; 197990b4856Slling 198990b4856Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 199990b4856Slling continue; 200990b4856Slling 201990b4856Slling switch (za.za_integer_length) { 202990b4856Slling case 8: 203990b4856Slling /* integer property */ 204990b4856Slling if (za.za_first_integer != 205990b4856Slling zpool_prop_default_numeric(prop)) 206990b4856Slling src = ZPROP_SRC_LOCAL; 207990b4856Slling 208990b4856Slling if (prop == ZPOOL_PROP_BOOTFS) { 209990b4856Slling dsl_pool_t *dp; 210990b4856Slling dsl_dataset_t *ds = NULL; 211990b4856Slling 212990b4856Slling dp = spa_get_dsl(spa); 213990b4856Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 214745cd3c5Smaybee if (err = dsl_dataset_hold_obj(dp, 215745cd3c5Smaybee za.za_first_integer, FTAG, &ds)) { 216990b4856Slling rw_exit(&dp->dp_config_rwlock); 217990b4856Slling break; 218990b4856Slling } 219990b4856Slling 220990b4856Slling strval = kmem_alloc( 221990b4856Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 222990b4856Slling KM_SLEEP); 223990b4856Slling dsl_dataset_name(ds, strval); 224745cd3c5Smaybee dsl_dataset_rele(ds, FTAG); 225990b4856Slling rw_exit(&dp->dp_config_rwlock); 226990b4856Slling } else { 227990b4856Slling strval = NULL; 228990b4856Slling intval = za.za_first_integer; 229990b4856Slling } 230990b4856Slling 2319d82f4f6Slling spa_prop_add_list(*nvp, prop, strval, intval, src); 232990b4856Slling 233990b4856Slling if (strval != NULL) 234990b4856Slling kmem_free(strval, 235990b4856Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 236990b4856Slling 237990b4856Slling break; 238990b4856Slling 239990b4856Slling case 1: 240990b4856Slling /* string property */ 241990b4856Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 242990b4856Slling err = zap_lookup(mos, spa->spa_pool_props_object, 243990b4856Slling za.za_name, 1, za.za_num_integers, strval); 244990b4856Slling if (err) { 245990b4856Slling kmem_free(strval, za.za_num_integers); 246990b4856Slling break; 247990b4856Slling } 2489d82f4f6Slling spa_prop_add_list(*nvp, prop, strval, 0, src); 249990b4856Slling kmem_free(strval, za.za_num_integers); 250990b4856Slling break; 251990b4856Slling 252990b4856Slling default: 253990b4856Slling break; 254990b4856Slling } 255990b4856Slling } 256990b4856Slling zap_cursor_fini(&zc); 257990b4856Slling mutex_exit(&spa->spa_props_lock); 258990b4856Slling out: 259990b4856Slling if (err && err != ENOENT) { 260990b4856Slling nvlist_free(*nvp); 2619d82f4f6Slling *nvp = NULL; 262990b4856Slling return (err); 263990b4856Slling } 264990b4856Slling 265990b4856Slling return (0); 266990b4856Slling } 267990b4856Slling 268990b4856Slling /* 269990b4856Slling * Validate the given pool properties nvlist and modify the list 270990b4856Slling * for the property values to be set. 271990b4856Slling */ 272990b4856Slling static int 273990b4856Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 274990b4856Slling { 275990b4856Slling nvpair_t *elem; 276990b4856Slling int error = 0, reset_bootfs = 0; 277990b4856Slling uint64_t objnum; 278990b4856Slling 279990b4856Slling elem = NULL; 280990b4856Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 281990b4856Slling zpool_prop_t prop; 282990b4856Slling char *propname, *strval; 283990b4856Slling uint64_t intval; 284990b4856Slling objset_t *os; 2852f8aaab3Seschrock char *slash; 286990b4856Slling 287990b4856Slling propname = nvpair_name(elem); 288990b4856Slling 289990b4856Slling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 290990b4856Slling return (EINVAL); 291990b4856Slling 292990b4856Slling switch (prop) { 293990b4856Slling case ZPOOL_PROP_VERSION: 294990b4856Slling error = nvpair_value_uint64(elem, &intval); 295990b4856Slling if (!error && 296990b4856Slling (intval < spa_version(spa) || intval > SPA_VERSION)) 297990b4856Slling error = EINVAL; 298990b4856Slling break; 299990b4856Slling 300990b4856Slling case ZPOOL_PROP_DELEGATION: 301990b4856Slling case ZPOOL_PROP_AUTOREPLACE: 302d5b5bb25SRich Morris case ZPOOL_PROP_LISTSNAPS: 303990b4856Slling error = nvpair_value_uint64(elem, &intval); 304990b4856Slling if (!error && intval > 1) 305990b4856Slling error = EINVAL; 306990b4856Slling break; 307990b4856Slling 308990b4856Slling case ZPOOL_PROP_BOOTFS: 309990b4856Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 310990b4856Slling error = ENOTSUP; 311990b4856Slling break; 312990b4856Slling } 313990b4856Slling 314990b4856Slling /* 31515e6edf1Sgw * Make sure the vdev config is bootable 316990b4856Slling */ 31715e6edf1Sgw if (!vdev_is_bootable(spa->spa_root_vdev)) { 318990b4856Slling error = ENOTSUP; 319990b4856Slling break; 320990b4856Slling } 321990b4856Slling 322990b4856Slling reset_bootfs = 1; 323990b4856Slling 324990b4856Slling error = nvpair_value_string(elem, &strval); 325990b4856Slling 326990b4856Slling if (!error) { 32715e6edf1Sgw uint64_t compress; 32815e6edf1Sgw 329990b4856Slling if (strval == NULL || strval[0] == '\0') { 330990b4856Slling objnum = zpool_prop_default_numeric( 331990b4856Slling ZPOOL_PROP_BOOTFS); 332990b4856Slling break; 333990b4856Slling } 334990b4856Slling 335990b4856Slling if (error = dmu_objset_open(strval, DMU_OST_ZFS, 336745cd3c5Smaybee DS_MODE_USER | DS_MODE_READONLY, &os)) 337990b4856Slling break; 33815e6edf1Sgw 33915e6edf1Sgw /* We don't support gzip bootable datasets */ 34015e6edf1Sgw if ((error = dsl_prop_get_integer(strval, 34115e6edf1Sgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), 34215e6edf1Sgw &compress, NULL)) == 0 && 34315e6edf1Sgw !BOOTFS_COMPRESS_VALID(compress)) { 34415e6edf1Sgw error = ENOTSUP; 34515e6edf1Sgw } else { 34615e6edf1Sgw objnum = dmu_objset_id(os); 34715e6edf1Sgw } 348990b4856Slling dmu_objset_close(os); 349990b4856Slling } 350990b4856Slling break; 351e14bb325SJeff Bonwick 3520a4e9518Sgw case ZPOOL_PROP_FAILUREMODE: 3530a4e9518Sgw error = nvpair_value_uint64(elem, &intval); 3540a4e9518Sgw if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 3550a4e9518Sgw intval > ZIO_FAILURE_MODE_PANIC)) 3560a4e9518Sgw error = EINVAL; 3570a4e9518Sgw 3580a4e9518Sgw /* 3590a4e9518Sgw * This is a special case which only occurs when 3600a4e9518Sgw * the pool has completely failed. This allows 3610a4e9518Sgw * the user to change the in-core failmode property 3620a4e9518Sgw * without syncing it out to disk (I/Os might 3630a4e9518Sgw * currently be blocked). We do this by returning 3640a4e9518Sgw * EIO to the caller (spa_prop_set) to trick it 3650a4e9518Sgw * into thinking we encountered a property validation 3660a4e9518Sgw * error. 3670a4e9518Sgw */ 368e14bb325SJeff Bonwick if (!error && spa_suspended(spa)) { 3690a4e9518Sgw spa->spa_failmode = intval; 3700a4e9518Sgw error = EIO; 3710a4e9518Sgw } 3720a4e9518Sgw break; 3732f8aaab3Seschrock 3742f8aaab3Seschrock case ZPOOL_PROP_CACHEFILE: 3752f8aaab3Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 3762f8aaab3Seschrock break; 3772f8aaab3Seschrock 3782f8aaab3Seschrock if (strval[0] == '\0') 3792f8aaab3Seschrock break; 3802f8aaab3Seschrock 3812f8aaab3Seschrock if (strcmp(strval, "none") == 0) 3822f8aaab3Seschrock break; 3832f8aaab3Seschrock 3842f8aaab3Seschrock if (strval[0] != '/') { 3852f8aaab3Seschrock error = EINVAL; 3862f8aaab3Seschrock break; 3872f8aaab3Seschrock } 3882f8aaab3Seschrock 3892f8aaab3Seschrock slash = strrchr(strval, '/'); 3902f8aaab3Seschrock ASSERT(slash != NULL); 3912f8aaab3Seschrock 3922f8aaab3Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 3932f8aaab3Seschrock strcmp(slash, "/..") == 0) 3942f8aaab3Seschrock error = EINVAL; 3952f8aaab3Seschrock break; 396990b4856Slling } 397990b4856Slling 398990b4856Slling if (error) 399990b4856Slling break; 400990b4856Slling } 401990b4856Slling 402990b4856Slling if (!error && reset_bootfs) { 403990b4856Slling error = nvlist_remove(props, 404990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 405990b4856Slling 406990b4856Slling if (!error) { 407990b4856Slling error = nvlist_add_uint64(props, 408990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 409990b4856Slling } 410990b4856Slling } 411990b4856Slling 412990b4856Slling return (error); 413990b4856Slling } 414990b4856Slling 415990b4856Slling int 416990b4856Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 417990b4856Slling { 418990b4856Slling int error; 419990b4856Slling 420990b4856Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 421990b4856Slling return (error); 422990b4856Slling 423990b4856Slling return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 424990b4856Slling spa, nvp, 3)); 425990b4856Slling } 426990b4856Slling 427990b4856Slling /* 428990b4856Slling * If the bootfs property value is dsobj, clear it. 429990b4856Slling */ 430990b4856Slling void 431990b4856Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 432990b4856Slling { 433990b4856Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 434990b4856Slling VERIFY(zap_remove(spa->spa_meta_objset, 435990b4856Slling spa->spa_pool_props_object, 436990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 437990b4856Slling spa->spa_bootfs = 0; 438990b4856Slling } 439990b4856Slling } 440990b4856Slling 441fa9e4066Sahrens /* 442fa9e4066Sahrens * ========================================================================== 443fa9e4066Sahrens * SPA state manipulation (open/create/destroy/import/export) 444fa9e4066Sahrens * ========================================================================== 445fa9e4066Sahrens */ 446fa9e4066Sahrens 447ea8dc4b6Seschrock static int 448ea8dc4b6Seschrock spa_error_entry_compare(const void *a, const void *b) 449ea8dc4b6Seschrock { 450ea8dc4b6Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 451ea8dc4b6Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 452ea8dc4b6Seschrock int ret; 453ea8dc4b6Seschrock 454ea8dc4b6Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 455ea8dc4b6Seschrock sizeof (zbookmark_t)); 456ea8dc4b6Seschrock 457ea8dc4b6Seschrock if (ret < 0) 458ea8dc4b6Seschrock return (-1); 459ea8dc4b6Seschrock else if (ret > 0) 460ea8dc4b6Seschrock return (1); 461ea8dc4b6Seschrock else 462ea8dc4b6Seschrock return (0); 463ea8dc4b6Seschrock } 464ea8dc4b6Seschrock 465ea8dc4b6Seschrock /* 466ea8dc4b6Seschrock * Utility function which retrieves copies of the current logs and 467ea8dc4b6Seschrock * re-initializes them in the process. 468ea8dc4b6Seschrock */ 469ea8dc4b6Seschrock void 470ea8dc4b6Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 471ea8dc4b6Seschrock { 472ea8dc4b6Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 473ea8dc4b6Seschrock 474ea8dc4b6Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 475ea8dc4b6Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 476ea8dc4b6Seschrock 477ea8dc4b6Seschrock avl_create(&spa->spa_errlist_scrub, 478ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 479ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 480ea8dc4b6Seschrock avl_create(&spa->spa_errlist_last, 481ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 482ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 483ea8dc4b6Seschrock } 484ea8dc4b6Seschrock 485fa9e4066Sahrens /* 486fa9e4066Sahrens * Activate an uninitialized pool. 487fa9e4066Sahrens */ 488fa9e4066Sahrens static void 489*8ad4d6ddSJeff Bonwick spa_activate(spa_t *spa, int mode) 490fa9e4066Sahrens { 491fa9e4066Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 492fa9e4066Sahrens 493fa9e4066Sahrens spa->spa_state = POOL_STATE_ACTIVE; 494*8ad4d6ddSJeff Bonwick spa->spa_mode = mode; 495fa9e4066Sahrens 496fa9e4066Sahrens spa->spa_normal_class = metaslab_class_create(); 4978654d025Sperrin spa->spa_log_class = metaslab_class_create(); 498fa9e4066Sahrens 499e14bb325SJeff Bonwick for (int t = 0; t < ZIO_TYPES; t++) { 500e14bb325SJeff Bonwick for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 501e14bb325SJeff Bonwick spa->spa_zio_taskq[t][q] = taskq_create("spa_zio", 502e14bb325SJeff Bonwick zio_taskq_threads[t][q], maxclsyspri, 50, 503e14bb325SJeff Bonwick INT_MAX, TASKQ_PREPOPULATE); 504e14bb325SJeff Bonwick } 505fa9e4066Sahrens } 506fa9e4066Sahrens 507e14bb325SJeff Bonwick list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 508e14bb325SJeff Bonwick offsetof(vdev_t, vdev_config_dirty_node)); 509e14bb325SJeff Bonwick list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 510e14bb325SJeff Bonwick offsetof(vdev_t, vdev_state_dirty_node)); 511fa9e4066Sahrens 512fa9e4066Sahrens txg_list_create(&spa->spa_vdev_txg_list, 513fa9e4066Sahrens offsetof(struct vdev, vdev_txg_node)); 514ea8dc4b6Seschrock 515ea8dc4b6Seschrock avl_create(&spa->spa_errlist_scrub, 516ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 517ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 518ea8dc4b6Seschrock avl_create(&spa->spa_errlist_last, 519ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 520ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 521fa9e4066Sahrens } 522fa9e4066Sahrens 523fa9e4066Sahrens /* 524fa9e4066Sahrens * Opposite of spa_activate(). 525fa9e4066Sahrens */ 526fa9e4066Sahrens static void 527fa9e4066Sahrens spa_deactivate(spa_t *spa) 528fa9e4066Sahrens { 529fa9e4066Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 530fa9e4066Sahrens ASSERT(spa->spa_dsl_pool == NULL); 531fa9e4066Sahrens ASSERT(spa->spa_root_vdev == NULL); 532fa9e4066Sahrens 533fa9e4066Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 534fa9e4066Sahrens 535fa9e4066Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 536fa9e4066Sahrens 537e14bb325SJeff Bonwick list_destroy(&spa->spa_config_dirty_list); 538e14bb325SJeff Bonwick list_destroy(&spa->spa_state_dirty_list); 539fa9e4066Sahrens 540e14bb325SJeff Bonwick for (int t = 0; t < ZIO_TYPES; t++) { 541e14bb325SJeff Bonwick for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 542e14bb325SJeff Bonwick taskq_destroy(spa->spa_zio_taskq[t][q]); 543e14bb325SJeff Bonwick spa->spa_zio_taskq[t][q] = NULL; 544e14bb325SJeff Bonwick } 545fa9e4066Sahrens } 546fa9e4066Sahrens 547fa9e4066Sahrens metaslab_class_destroy(spa->spa_normal_class); 548fa9e4066Sahrens spa->spa_normal_class = NULL; 549fa9e4066Sahrens 5508654d025Sperrin metaslab_class_destroy(spa->spa_log_class); 5518654d025Sperrin spa->spa_log_class = NULL; 5528654d025Sperrin 553ea8dc4b6Seschrock /* 554ea8dc4b6Seschrock * If this was part of an import or the open otherwise failed, we may 555ea8dc4b6Seschrock * still have errors left in the queues. Empty them just in case. 556ea8dc4b6Seschrock */ 557ea8dc4b6Seschrock spa_errlog_drain(spa); 558ea8dc4b6Seschrock 559ea8dc4b6Seschrock avl_destroy(&spa->spa_errlist_scrub); 560ea8dc4b6Seschrock avl_destroy(&spa->spa_errlist_last); 561ea8dc4b6Seschrock 562fa9e4066Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 563fa9e4066Sahrens } 564fa9e4066Sahrens 565fa9e4066Sahrens /* 566fa9e4066Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 567fa9e4066Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 568fa9e4066Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 569fa9e4066Sahrens * All vdev validation is done by the vdev_alloc() routine. 570fa9e4066Sahrens */ 57199653d4eSeschrock static int 57299653d4eSeschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 57399653d4eSeschrock uint_t id, int atype) 574fa9e4066Sahrens { 575fa9e4066Sahrens nvlist_t **child; 576fa9e4066Sahrens uint_t c, children; 57799653d4eSeschrock int error; 578fa9e4066Sahrens 57999653d4eSeschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 58099653d4eSeschrock return (error); 581fa9e4066Sahrens 58299653d4eSeschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 58399653d4eSeschrock return (0); 584fa9e4066Sahrens 585e14bb325SJeff Bonwick error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 586e14bb325SJeff Bonwick &child, &children); 587e14bb325SJeff Bonwick 588e14bb325SJeff Bonwick if (error == ENOENT) 589e14bb325SJeff Bonwick return (0); 590e14bb325SJeff Bonwick 591e14bb325SJeff Bonwick if (error) { 59299653d4eSeschrock vdev_free(*vdp); 59399653d4eSeschrock *vdp = NULL; 59499653d4eSeschrock return (EINVAL); 595fa9e4066Sahrens } 596fa9e4066Sahrens 597fa9e4066Sahrens for (c = 0; c < children; c++) { 59899653d4eSeschrock vdev_t *vd; 59999653d4eSeschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 60099653d4eSeschrock atype)) != 0) { 60199653d4eSeschrock vdev_free(*vdp); 60299653d4eSeschrock *vdp = NULL; 60399653d4eSeschrock return (error); 604fa9e4066Sahrens } 605fa9e4066Sahrens } 606fa9e4066Sahrens 60799653d4eSeschrock ASSERT(*vdp != NULL); 60899653d4eSeschrock 60999653d4eSeschrock return (0); 610fa9e4066Sahrens } 611fa9e4066Sahrens 612fa9e4066Sahrens /* 613fa9e4066Sahrens * Opposite of spa_load(). 614fa9e4066Sahrens */ 615fa9e4066Sahrens static void 616fa9e4066Sahrens spa_unload(spa_t *spa) 617fa9e4066Sahrens { 61899653d4eSeschrock int i; 61999653d4eSeschrock 620e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa_namespace_lock)); 621e14bb325SJeff Bonwick 622ea8dc4b6Seschrock /* 623ea8dc4b6Seschrock * Stop async tasks. 624ea8dc4b6Seschrock */ 625ea8dc4b6Seschrock spa_async_suspend(spa); 626ea8dc4b6Seschrock 627fa9e4066Sahrens /* 628fa9e4066Sahrens * Stop syncing. 629fa9e4066Sahrens */ 630fa9e4066Sahrens if (spa->spa_sync_on) { 631fa9e4066Sahrens txg_sync_stop(spa->spa_dsl_pool); 632fa9e4066Sahrens spa->spa_sync_on = B_FALSE; 633fa9e4066Sahrens } 634fa9e4066Sahrens 635fa9e4066Sahrens /* 636e14bb325SJeff Bonwick * Wait for any outstanding async I/O to complete. 637fa9e4066Sahrens */ 638e14bb325SJeff Bonwick mutex_enter(&spa->spa_async_root_lock); 639e14bb325SJeff Bonwick while (spa->spa_async_root_count != 0) 640e14bb325SJeff Bonwick cv_wait(&spa->spa_async_root_cv, &spa->spa_async_root_lock); 641e14bb325SJeff Bonwick mutex_exit(&spa->spa_async_root_lock); 642fa9e4066Sahrens 643fa9e4066Sahrens /* 644fa9e4066Sahrens * Close the dsl pool. 645fa9e4066Sahrens */ 646fa9e4066Sahrens if (spa->spa_dsl_pool) { 647fa9e4066Sahrens dsl_pool_close(spa->spa_dsl_pool); 648fa9e4066Sahrens spa->spa_dsl_pool = NULL; 649fa9e4066Sahrens } 650fa9e4066Sahrens 651*8ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 652*8ad4d6ddSJeff Bonwick 653*8ad4d6ddSJeff Bonwick /* 654*8ad4d6ddSJeff Bonwick * Drop and purge level 2 cache 655*8ad4d6ddSJeff Bonwick */ 656*8ad4d6ddSJeff Bonwick spa_l2cache_drop(spa); 657*8ad4d6ddSJeff Bonwick 658fa9e4066Sahrens /* 659fa9e4066Sahrens * Close all vdevs. 660fa9e4066Sahrens */ 6610e34b6a7Sbonwick if (spa->spa_root_vdev) 662fa9e4066Sahrens vdev_free(spa->spa_root_vdev); 6630e34b6a7Sbonwick ASSERT(spa->spa_root_vdev == NULL); 664ea8dc4b6Seschrock 665fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 666fa94a07fSbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 667fa94a07fSbrendan if (spa->spa_spares.sav_vdevs) { 668fa94a07fSbrendan kmem_free(spa->spa_spares.sav_vdevs, 669fa94a07fSbrendan spa->spa_spares.sav_count * sizeof (void *)); 670fa94a07fSbrendan spa->spa_spares.sav_vdevs = NULL; 67199653d4eSeschrock } 672fa94a07fSbrendan if (spa->spa_spares.sav_config) { 673fa94a07fSbrendan nvlist_free(spa->spa_spares.sav_config); 674fa94a07fSbrendan spa->spa_spares.sav_config = NULL; 675fa94a07fSbrendan } 6762ce8af81SEric Schrock spa->spa_spares.sav_count = 0; 677fa94a07fSbrendan 678fa94a07fSbrendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 679fa94a07fSbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 680fa94a07fSbrendan if (spa->spa_l2cache.sav_vdevs) { 681fa94a07fSbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 682fa94a07fSbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 683fa94a07fSbrendan spa->spa_l2cache.sav_vdevs = NULL; 684fa94a07fSbrendan } 685fa94a07fSbrendan if (spa->spa_l2cache.sav_config) { 686fa94a07fSbrendan nvlist_free(spa->spa_l2cache.sav_config); 687fa94a07fSbrendan spa->spa_l2cache.sav_config = NULL; 68899653d4eSeschrock } 6892ce8af81SEric Schrock spa->spa_l2cache.sav_count = 0; 69099653d4eSeschrock 691ea8dc4b6Seschrock spa->spa_async_suspended = 0; 692*8ad4d6ddSJeff Bonwick 693*8ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 694fa9e4066Sahrens } 695fa9e4066Sahrens 69699653d4eSeschrock /* 69799653d4eSeschrock * Load (or re-load) the current list of vdevs describing the active spares for 69899653d4eSeschrock * this pool. When this is called, we have some form of basic information in 699fa94a07fSbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 700fa94a07fSbrendan * then re-generate a more complete list including status information. 70199653d4eSeschrock */ 70299653d4eSeschrock static void 70399653d4eSeschrock spa_load_spares(spa_t *spa) 70499653d4eSeschrock { 70599653d4eSeschrock nvlist_t **spares; 70699653d4eSeschrock uint_t nspares; 70799653d4eSeschrock int i; 70839c23413Seschrock vdev_t *vd, *tvd; 70999653d4eSeschrock 710e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 711e14bb325SJeff Bonwick 71299653d4eSeschrock /* 71399653d4eSeschrock * First, close and free any existing spare vdevs. 71499653d4eSeschrock */ 715fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 716fa94a07fSbrendan vd = spa->spa_spares.sav_vdevs[i]; 71739c23413Seschrock 71839c23413Seschrock /* Undo the call to spa_activate() below */ 719c5904d13Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 720c5904d13Seschrock B_FALSE)) != NULL && tvd->vdev_isspare) 72139c23413Seschrock spa_spare_remove(tvd); 72239c23413Seschrock vdev_close(vd); 72339c23413Seschrock vdev_free(vd); 72499653d4eSeschrock } 72539c23413Seschrock 726fa94a07fSbrendan if (spa->spa_spares.sav_vdevs) 727fa94a07fSbrendan kmem_free(spa->spa_spares.sav_vdevs, 728fa94a07fSbrendan spa->spa_spares.sav_count * sizeof (void *)); 72999653d4eSeschrock 730fa94a07fSbrendan if (spa->spa_spares.sav_config == NULL) 73199653d4eSeschrock nspares = 0; 73299653d4eSeschrock else 733fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 73499653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 73599653d4eSeschrock 736fa94a07fSbrendan spa->spa_spares.sav_count = (int)nspares; 737fa94a07fSbrendan spa->spa_spares.sav_vdevs = NULL; 73899653d4eSeschrock 73999653d4eSeschrock if (nspares == 0) 74099653d4eSeschrock return; 74199653d4eSeschrock 74299653d4eSeschrock /* 74399653d4eSeschrock * Construct the array of vdevs, opening them to get status in the 74439c23413Seschrock * process. For each spare, there is potentially two different vdev_t 74539c23413Seschrock * structures associated with it: one in the list of spares (used only 74639c23413Seschrock * for basic validation purposes) and one in the active vdev 74739c23413Seschrock * configuration (if it's spared in). During this phase we open and 74839c23413Seschrock * validate each vdev on the spare list. If the vdev also exists in the 74939c23413Seschrock * active configuration, then we also mark this vdev as an active spare. 75099653d4eSeschrock */ 751fa94a07fSbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 752fa94a07fSbrendan KM_SLEEP); 753fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 75499653d4eSeschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 75599653d4eSeschrock VDEV_ALLOC_SPARE) == 0); 75699653d4eSeschrock ASSERT(vd != NULL); 75799653d4eSeschrock 758fa94a07fSbrendan spa->spa_spares.sav_vdevs[i] = vd; 75999653d4eSeschrock 760c5904d13Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 761c5904d13Seschrock B_FALSE)) != NULL) { 76239c23413Seschrock if (!tvd->vdev_isspare) 76339c23413Seschrock spa_spare_add(tvd); 76439c23413Seschrock 76539c23413Seschrock /* 76639c23413Seschrock * We only mark the spare active if we were successfully 76739c23413Seschrock * able to load the vdev. Otherwise, importing a pool 76839c23413Seschrock * with a bad active spare would result in strange 76939c23413Seschrock * behavior, because multiple pool would think the spare 77039c23413Seschrock * is actively in use. 77139c23413Seschrock * 77239c23413Seschrock * There is a vulnerability here to an equally bizarre 77339c23413Seschrock * circumstance, where a dead active spare is later 77439c23413Seschrock * brought back to life (onlined or otherwise). Given 77539c23413Seschrock * the rarity of this scenario, and the extra complexity 77639c23413Seschrock * it adds, we ignore the possibility. 77739c23413Seschrock */ 77839c23413Seschrock if (!vdev_is_dead(tvd)) 77939c23413Seschrock spa_spare_activate(tvd); 78039c23413Seschrock } 78139c23413Seschrock 782e14bb325SJeff Bonwick vd->vdev_top = vd; 783e14bb325SJeff Bonwick 78499653d4eSeschrock if (vdev_open(vd) != 0) 78599653d4eSeschrock continue; 78699653d4eSeschrock 787fa94a07fSbrendan if (vdev_validate_aux(vd) == 0) 788fa94a07fSbrendan spa_spare_add(vd); 78999653d4eSeschrock } 79099653d4eSeschrock 79199653d4eSeschrock /* 79299653d4eSeschrock * Recompute the stashed list of spares, with status information 79399653d4eSeschrock * this time. 79499653d4eSeschrock */ 795fa94a07fSbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 79699653d4eSeschrock DATA_TYPE_NVLIST_ARRAY) == 0); 79799653d4eSeschrock 798fa94a07fSbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 799fa94a07fSbrendan KM_SLEEP); 800fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 801fa94a07fSbrendan spares[i] = vdev_config_generate(spa, 802fa94a07fSbrendan spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 803fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 804fa94a07fSbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 805fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 80699653d4eSeschrock nvlist_free(spares[i]); 807fa94a07fSbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 808fa94a07fSbrendan } 809fa94a07fSbrendan 810fa94a07fSbrendan /* 811fa94a07fSbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 812fa94a07fSbrendan * this pool. When this is called, we have some form of basic information in 813fa94a07fSbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 814fa94a07fSbrendan * then re-generate a more complete list including status information. 815fa94a07fSbrendan * Devices which are already active have their details maintained, and are 816fa94a07fSbrendan * not re-opened. 817fa94a07fSbrendan */ 818fa94a07fSbrendan static void 819fa94a07fSbrendan spa_load_l2cache(spa_t *spa) 820fa94a07fSbrendan { 821fa94a07fSbrendan nvlist_t **l2cache; 822fa94a07fSbrendan uint_t nl2cache; 823fa94a07fSbrendan int i, j, oldnvdevs; 824c5904d13Seschrock uint64_t guid, size; 825fa94a07fSbrendan vdev_t *vd, **oldvdevs, **newvdevs; 826fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 827fa94a07fSbrendan 828e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 829e14bb325SJeff Bonwick 830fa94a07fSbrendan if (sav->sav_config != NULL) { 831fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 832fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 833fa94a07fSbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 834fa94a07fSbrendan } else { 835fa94a07fSbrendan nl2cache = 0; 836fa94a07fSbrendan } 837fa94a07fSbrendan 838fa94a07fSbrendan oldvdevs = sav->sav_vdevs; 839fa94a07fSbrendan oldnvdevs = sav->sav_count; 840fa94a07fSbrendan sav->sav_vdevs = NULL; 841fa94a07fSbrendan sav->sav_count = 0; 842fa94a07fSbrendan 843fa94a07fSbrendan /* 844fa94a07fSbrendan * Process new nvlist of vdevs. 845fa94a07fSbrendan */ 846fa94a07fSbrendan for (i = 0; i < nl2cache; i++) { 847fa94a07fSbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 848fa94a07fSbrendan &guid) == 0); 849fa94a07fSbrendan 850fa94a07fSbrendan newvdevs[i] = NULL; 851fa94a07fSbrendan for (j = 0; j < oldnvdevs; j++) { 852fa94a07fSbrendan vd = oldvdevs[j]; 853fa94a07fSbrendan if (vd != NULL && guid == vd->vdev_guid) { 854fa94a07fSbrendan /* 855fa94a07fSbrendan * Retain previous vdev for add/remove ops. 856fa94a07fSbrendan */ 857fa94a07fSbrendan newvdevs[i] = vd; 858fa94a07fSbrendan oldvdevs[j] = NULL; 859fa94a07fSbrendan break; 860fa94a07fSbrendan } 861fa94a07fSbrendan } 862fa94a07fSbrendan 863fa94a07fSbrendan if (newvdevs[i] == NULL) { 864fa94a07fSbrendan /* 865fa94a07fSbrendan * Create new vdev 866fa94a07fSbrendan */ 867fa94a07fSbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 868fa94a07fSbrendan VDEV_ALLOC_L2CACHE) == 0); 869fa94a07fSbrendan ASSERT(vd != NULL); 870fa94a07fSbrendan newvdevs[i] = vd; 871fa94a07fSbrendan 872fa94a07fSbrendan /* 873fa94a07fSbrendan * Commit this vdev as an l2cache device, 874fa94a07fSbrendan * even if it fails to open. 875fa94a07fSbrendan */ 876fa94a07fSbrendan spa_l2cache_add(vd); 877fa94a07fSbrendan 878c5904d13Seschrock vd->vdev_top = vd; 879c5904d13Seschrock vd->vdev_aux = sav; 880c5904d13Seschrock 881c5904d13Seschrock spa_l2cache_activate(vd); 882c5904d13Seschrock 883fa94a07fSbrendan if (vdev_open(vd) != 0) 884fa94a07fSbrendan continue; 885fa94a07fSbrendan 886fa94a07fSbrendan (void) vdev_validate_aux(vd); 887fa94a07fSbrendan 888fa94a07fSbrendan if (!vdev_is_dead(vd)) { 889fa94a07fSbrendan size = vdev_get_rsize(vd); 890c5904d13Seschrock l2arc_add_vdev(spa, vd, 891c5904d13Seschrock VDEV_LABEL_START_SIZE, 892c5904d13Seschrock size - VDEV_LABEL_START_SIZE); 893fa94a07fSbrendan } 894fa94a07fSbrendan } 895fa94a07fSbrendan } 896fa94a07fSbrendan 897fa94a07fSbrendan /* 898fa94a07fSbrendan * Purge vdevs that were dropped 899fa94a07fSbrendan */ 900fa94a07fSbrendan for (i = 0; i < oldnvdevs; i++) { 901fa94a07fSbrendan uint64_t pool; 902fa94a07fSbrendan 903fa94a07fSbrendan vd = oldvdevs[i]; 904fa94a07fSbrendan if (vd != NULL) { 905*8ad4d6ddSJeff Bonwick if (spa_l2cache_exists(vd->vdev_guid, &pool) && 906*8ad4d6ddSJeff Bonwick pool != 0ULL && l2arc_vdev_present(vd)) 907fa94a07fSbrendan l2arc_remove_vdev(vd); 908fa94a07fSbrendan (void) vdev_close(vd); 909fa94a07fSbrendan spa_l2cache_remove(vd); 910fa94a07fSbrendan } 911fa94a07fSbrendan } 912fa94a07fSbrendan 913fa94a07fSbrendan if (oldvdevs) 914fa94a07fSbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 915fa94a07fSbrendan 916fa94a07fSbrendan if (sav->sav_config == NULL) 917fa94a07fSbrendan goto out; 918fa94a07fSbrendan 919fa94a07fSbrendan sav->sav_vdevs = newvdevs; 920fa94a07fSbrendan sav->sav_count = (int)nl2cache; 921fa94a07fSbrendan 922fa94a07fSbrendan /* 923fa94a07fSbrendan * Recompute the stashed list of l2cache devices, with status 924fa94a07fSbrendan * information this time. 925fa94a07fSbrendan */ 926fa94a07fSbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 927fa94a07fSbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 928fa94a07fSbrendan 929fa94a07fSbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 930fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 931fa94a07fSbrendan l2cache[i] = vdev_config_generate(spa, 932fa94a07fSbrendan sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 933fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 934fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 935fa94a07fSbrendan out: 936fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 937fa94a07fSbrendan nvlist_free(l2cache[i]); 938fa94a07fSbrendan if (sav->sav_count) 939fa94a07fSbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 94099653d4eSeschrock } 94199653d4eSeschrock 94299653d4eSeschrock static int 94399653d4eSeschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 94499653d4eSeschrock { 94599653d4eSeschrock dmu_buf_t *db; 94699653d4eSeschrock char *packed = NULL; 94799653d4eSeschrock size_t nvsize = 0; 94899653d4eSeschrock int error; 94999653d4eSeschrock *value = NULL; 95099653d4eSeschrock 95199653d4eSeschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 95299653d4eSeschrock nvsize = *(uint64_t *)db->db_data; 95399653d4eSeschrock dmu_buf_rele(db, FTAG); 95499653d4eSeschrock 95599653d4eSeschrock packed = kmem_alloc(nvsize, KM_SLEEP); 95699653d4eSeschrock error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed); 95799653d4eSeschrock if (error == 0) 95899653d4eSeschrock error = nvlist_unpack(packed, nvsize, value, 0); 95999653d4eSeschrock kmem_free(packed, nvsize); 96099653d4eSeschrock 96199653d4eSeschrock return (error); 96299653d4eSeschrock } 96399653d4eSeschrock 9643d7072f8Seschrock /* 9653d7072f8Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 9663d7072f8Seschrock * sysevent to notify the autoreplace code that the device has been removed. 9673d7072f8Seschrock */ 9683d7072f8Seschrock static void 9693d7072f8Seschrock spa_check_removed(vdev_t *vd) 9703d7072f8Seschrock { 9713d7072f8Seschrock int c; 9723d7072f8Seschrock 9733d7072f8Seschrock for (c = 0; c < vd->vdev_children; c++) 9743d7072f8Seschrock spa_check_removed(vd->vdev_child[c]); 9753d7072f8Seschrock 9763d7072f8Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 9773d7072f8Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 9783d7072f8Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 9793d7072f8Seschrock } 9803d7072f8Seschrock } 9813d7072f8Seschrock 982b87f3af3Sperrin /* 983b87f3af3Sperrin * Check for missing log devices 984b87f3af3Sperrin */ 985b87f3af3Sperrin int 986b87f3af3Sperrin spa_check_logs(spa_t *spa) 987b87f3af3Sperrin { 988b87f3af3Sperrin switch (spa->spa_log_state) { 989b87f3af3Sperrin case SPA_LOG_MISSING: 990b87f3af3Sperrin /* need to recheck in case slog has been restored */ 991b87f3af3Sperrin case SPA_LOG_UNKNOWN: 992b87f3af3Sperrin if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 993b87f3af3Sperrin DS_FIND_CHILDREN)) { 994b87f3af3Sperrin spa->spa_log_state = SPA_LOG_MISSING; 995b87f3af3Sperrin return (1); 996b87f3af3Sperrin } 997b87f3af3Sperrin break; 998b87f3af3Sperrin 999b87f3af3Sperrin case SPA_LOG_CLEAR: 1000b87f3af3Sperrin (void) dmu_objset_find(spa->spa_name, zil_clear_log_chain, NULL, 1001b87f3af3Sperrin DS_FIND_CHILDREN); 1002b87f3af3Sperrin break; 1003b87f3af3Sperrin } 1004b87f3af3Sperrin spa->spa_log_state = SPA_LOG_GOOD; 1005b87f3af3Sperrin return (0); 1006b87f3af3Sperrin } 1007b87f3af3Sperrin 1008fa9e4066Sahrens /* 1009fa9e4066Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 1010ea8dc4b6Seschrock * source of configuration information. 1011fa9e4066Sahrens */ 1012fa9e4066Sahrens static int 1013ea8dc4b6Seschrock spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) 1014fa9e4066Sahrens { 1015fa9e4066Sahrens int error = 0; 1016fa9e4066Sahrens nvlist_t *nvroot = NULL; 1017fa9e4066Sahrens vdev_t *rvd; 1018fa9e4066Sahrens uberblock_t *ub = &spa->spa_uberblock; 10190373e76bSbonwick uint64_t config_cache_txg = spa->spa_config_txg; 1020fa9e4066Sahrens uint64_t pool_guid; 102199653d4eSeschrock uint64_t version; 10223d7072f8Seschrock uint64_t autoreplace = 0; 1023*8ad4d6ddSJeff Bonwick int orig_mode = spa->spa_mode; 1024b87f3af3Sperrin char *ereport = FM_EREPORT_ZFS_POOL; 1025fa9e4066Sahrens 1026*8ad4d6ddSJeff Bonwick /* 1027*8ad4d6ddSJeff Bonwick * If this is an untrusted config, access the pool in read-only mode. 1028*8ad4d6ddSJeff Bonwick * This prevents things like resilvering recently removed devices. 1029*8ad4d6ddSJeff Bonwick */ 1030*8ad4d6ddSJeff Bonwick if (!mosconfig) 1031*8ad4d6ddSJeff Bonwick spa->spa_mode = FREAD; 1032*8ad4d6ddSJeff Bonwick 1033e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1034e14bb325SJeff Bonwick 1035ea8dc4b6Seschrock spa->spa_load_state = state; 10360373e76bSbonwick 1037fa9e4066Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 1038a9926bf0Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 1039ea8dc4b6Seschrock error = EINVAL; 1040ea8dc4b6Seschrock goto out; 1041ea8dc4b6Seschrock } 1042fa9e4066Sahrens 104399653d4eSeschrock /* 104499653d4eSeschrock * Versioning wasn't explicitly added to the label until later, so if 104599653d4eSeschrock * it's not present treat it as the initial version. 104699653d4eSeschrock */ 104799653d4eSeschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 1048e7437265Sahrens version = SPA_VERSION_INITIAL; 104999653d4eSeschrock 1050a9926bf0Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 1051a9926bf0Sbonwick &spa->spa_config_txg); 1052a9926bf0Sbonwick 10530373e76bSbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 1054ea8dc4b6Seschrock spa_guid_exists(pool_guid, 0)) { 1055ea8dc4b6Seschrock error = EEXIST; 1056ea8dc4b6Seschrock goto out; 1057ea8dc4b6Seschrock } 1058fa9e4066Sahrens 1059b5989ec7Seschrock spa->spa_load_guid = pool_guid; 1060b5989ec7Seschrock 1061fa9e4066Sahrens /* 106299653d4eSeschrock * Parse the configuration into a vdev tree. We explicitly set the 106399653d4eSeschrock * value that will be returned by spa_version() since parsing the 106499653d4eSeschrock * configuration requires knowing the version number. 1065fa9e4066Sahrens */ 1066e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 106799653d4eSeschrock spa->spa_ubsync.ub_version = version; 106899653d4eSeschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 1069e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1070fa9e4066Sahrens 107199653d4eSeschrock if (error != 0) 1072ea8dc4b6Seschrock goto out; 1073fa9e4066Sahrens 10740e34b6a7Sbonwick ASSERT(spa->spa_root_vdev == rvd); 1075fa9e4066Sahrens ASSERT(spa_guid(spa) == pool_guid); 1076fa9e4066Sahrens 1077fa9e4066Sahrens /* 1078fa9e4066Sahrens * Try to open all vdevs, loading each label in the process. 1079fa9e4066Sahrens */ 1080e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 10810bf246f5Smc error = vdev_open(rvd); 1082e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 10830bf246f5Smc if (error != 0) 1084ea8dc4b6Seschrock goto out; 1085fa9e4066Sahrens 1086560e6e96Seschrock /* 1087560e6e96Seschrock * Validate the labels for all leaf vdevs. We need to grab the config 1088e14bb325SJeff Bonwick * lock because all label I/O is done with ZIO_FLAG_CONFIG_WRITER. 1089560e6e96Seschrock */ 1090*8ad4d6ddSJeff Bonwick if (mosconfig) { 1091*8ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1092*8ad4d6ddSJeff Bonwick error = vdev_validate(rvd); 1093*8ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1094*8ad4d6ddSJeff Bonwick if (error != 0) 1095*8ad4d6ddSJeff Bonwick goto out; 1096*8ad4d6ddSJeff Bonwick } 1097560e6e96Seschrock 1098560e6e96Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 1099560e6e96Seschrock error = ENXIO; 1100560e6e96Seschrock goto out; 1101560e6e96Seschrock } 1102560e6e96Seschrock 1103fa9e4066Sahrens /* 1104fa9e4066Sahrens * Find the best uberblock. 1105fa9e4066Sahrens */ 1106e14bb325SJeff Bonwick vdev_uberblock_load(NULL, rvd, ub); 1107fa9e4066Sahrens 1108fa9e4066Sahrens /* 1109fa9e4066Sahrens * If we weren't able to find a single valid uberblock, return failure. 1110fa9e4066Sahrens */ 1111fa9e4066Sahrens if (ub->ub_txg == 0) { 1112eaca9bbdSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1113eaca9bbdSeschrock VDEV_AUX_CORRUPT_DATA); 1114ea8dc4b6Seschrock error = ENXIO; 1115ea8dc4b6Seschrock goto out; 1116ea8dc4b6Seschrock } 1117ea8dc4b6Seschrock 1118ea8dc4b6Seschrock /* 1119ea8dc4b6Seschrock * If the pool is newer than the code, we can't open it. 1120ea8dc4b6Seschrock */ 1121e7437265Sahrens if (ub->ub_version > SPA_VERSION) { 1122eaca9bbdSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1123eaca9bbdSeschrock VDEV_AUX_VERSION_NEWER); 1124ea8dc4b6Seschrock error = ENOTSUP; 1125ea8dc4b6Seschrock goto out; 1126fa9e4066Sahrens } 1127fa9e4066Sahrens 1128fa9e4066Sahrens /* 1129fa9e4066Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 1130fa9e4066Sahrens * incomplete configuration. 1131fa9e4066Sahrens */ 1132ecc2d604Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 1133ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1134ea8dc4b6Seschrock VDEV_AUX_BAD_GUID_SUM); 1135ea8dc4b6Seschrock error = ENXIO; 1136ea8dc4b6Seschrock goto out; 1137fa9e4066Sahrens } 1138fa9e4066Sahrens 1139fa9e4066Sahrens /* 1140fa9e4066Sahrens * Initialize internal SPA structures. 1141fa9e4066Sahrens */ 1142fa9e4066Sahrens spa->spa_state = POOL_STATE_ACTIVE; 1143fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 1144fa9e4066Sahrens spa->spa_first_txg = spa_last_synced_txg(spa) + 1; 1145ea8dc4b6Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 1146ea8dc4b6Seschrock if (error) { 1147ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1148ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1149ea8dc4b6Seschrock goto out; 1150ea8dc4b6Seschrock } 1151fa9e4066Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1152fa9e4066Sahrens 1153ea8dc4b6Seschrock if (zap_lookup(spa->spa_meta_objset, 1154fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 1155ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 1156ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1157ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1158ea8dc4b6Seschrock error = EIO; 1159ea8dc4b6Seschrock goto out; 1160ea8dc4b6Seschrock } 1161fa9e4066Sahrens 1162fa9e4066Sahrens if (!mosconfig) { 116399653d4eSeschrock nvlist_t *newconfig; 116495173954Sek uint64_t hostid; 1165fa9e4066Sahrens 116699653d4eSeschrock if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) { 1167ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1168ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1169ea8dc4b6Seschrock error = EIO; 1170ea8dc4b6Seschrock goto out; 1171ea8dc4b6Seschrock } 1172fa9e4066Sahrens 117377650510SLin Ling if (!spa_is_root(spa) && nvlist_lookup_uint64(newconfig, 117477650510SLin Ling ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 117595173954Sek char *hostname; 117695173954Sek unsigned long myhostid = 0; 117795173954Sek 117895173954Sek VERIFY(nvlist_lookup_string(newconfig, 117995173954Sek ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 118095173954Sek 118195173954Sek (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 118217194a52Slling if (hostid != 0 && myhostid != 0 && 118317194a52Slling (unsigned long)hostid != myhostid) { 118495173954Sek cmn_err(CE_WARN, "pool '%s' could not be " 118595173954Sek "loaded as it was last accessed by " 118677650510SLin Ling "another system (host: %s hostid: 0x%lx). " 118795173954Sek "See: http://www.sun.com/msg/ZFS-8000-EY", 1188e14bb325SJeff Bonwick spa_name(spa), hostname, 118995173954Sek (unsigned long)hostid); 119095173954Sek error = EBADF; 119195173954Sek goto out; 119295173954Sek } 119395173954Sek } 119495173954Sek 1195fa9e4066Sahrens spa_config_set(spa, newconfig); 1196fa9e4066Sahrens spa_unload(spa); 1197fa9e4066Sahrens spa_deactivate(spa); 1198*8ad4d6ddSJeff Bonwick spa_activate(spa, orig_mode); 1199fa9e4066Sahrens 1200ea8dc4b6Seschrock return (spa_load(spa, newconfig, state, B_TRUE)); 1201fa9e4066Sahrens } 1202fa9e4066Sahrens 1203ea8dc4b6Seschrock if (zap_lookup(spa->spa_meta_objset, 1204fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 1205ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { 1206ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1207ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1208ea8dc4b6Seschrock error = EIO; 1209ea8dc4b6Seschrock goto out; 1210ea8dc4b6Seschrock } 1211fa9e4066Sahrens 121299653d4eSeschrock /* 121399653d4eSeschrock * Load the bit that tells us to use the new accounting function 121499653d4eSeschrock * (raid-z deflation). If we have an older pool, this will not 121599653d4eSeschrock * be present. 121699653d4eSeschrock */ 121799653d4eSeschrock error = zap_lookup(spa->spa_meta_objset, 121899653d4eSeschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 121999653d4eSeschrock sizeof (uint64_t), 1, &spa->spa_deflate); 122099653d4eSeschrock if (error != 0 && error != ENOENT) { 122199653d4eSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 122299653d4eSeschrock VDEV_AUX_CORRUPT_DATA); 122399653d4eSeschrock error = EIO; 122499653d4eSeschrock goto out; 122599653d4eSeschrock } 122699653d4eSeschrock 1227fa9e4066Sahrens /* 1228ea8dc4b6Seschrock * Load the persistent error log. If we have an older pool, this will 1229ea8dc4b6Seschrock * not be present. 1230fa9e4066Sahrens */ 1231ea8dc4b6Seschrock error = zap_lookup(spa->spa_meta_objset, 1232ea8dc4b6Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 1233ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 1234d80c45e0Sbonwick if (error != 0 && error != ENOENT) { 1235ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1236ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1237ea8dc4b6Seschrock error = EIO; 1238ea8dc4b6Seschrock goto out; 1239ea8dc4b6Seschrock } 1240ea8dc4b6Seschrock 1241ea8dc4b6Seschrock error = zap_lookup(spa->spa_meta_objset, 1242ea8dc4b6Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 1243ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 1244ea8dc4b6Seschrock if (error != 0 && error != ENOENT) { 1245ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1246ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1247ea8dc4b6Seschrock error = EIO; 1248ea8dc4b6Seschrock goto out; 1249ea8dc4b6Seschrock } 1250ea8dc4b6Seschrock 125106eeb2adSek /* 125206eeb2adSek * Load the history object. If we have an older pool, this 125306eeb2adSek * will not be present. 125406eeb2adSek */ 125506eeb2adSek error = zap_lookup(spa->spa_meta_objset, 125606eeb2adSek DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 125706eeb2adSek sizeof (uint64_t), 1, &spa->spa_history); 125806eeb2adSek if (error != 0 && error != ENOENT) { 125906eeb2adSek vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 126006eeb2adSek VDEV_AUX_CORRUPT_DATA); 126106eeb2adSek error = EIO; 126206eeb2adSek goto out; 126306eeb2adSek } 126406eeb2adSek 126599653d4eSeschrock /* 126699653d4eSeschrock * Load any hot spares for this pool. 126799653d4eSeschrock */ 126899653d4eSeschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1269fa94a07fSbrendan DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 127099653d4eSeschrock if (error != 0 && error != ENOENT) { 127199653d4eSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 127299653d4eSeschrock VDEV_AUX_CORRUPT_DATA); 127399653d4eSeschrock error = EIO; 127499653d4eSeschrock goto out; 127599653d4eSeschrock } 127699653d4eSeschrock if (error == 0) { 1277e7437265Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 1278fa94a07fSbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 1279fa94a07fSbrendan &spa->spa_spares.sav_config) != 0) { 128099653d4eSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 128199653d4eSeschrock VDEV_AUX_CORRUPT_DATA); 128299653d4eSeschrock error = EIO; 128399653d4eSeschrock goto out; 128499653d4eSeschrock } 128599653d4eSeschrock 1286e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 128799653d4eSeschrock spa_load_spares(spa); 1288e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 128999653d4eSeschrock } 129099653d4eSeschrock 1291fa94a07fSbrendan /* 1292fa94a07fSbrendan * Load any level 2 ARC devices for this pool. 1293fa94a07fSbrendan */ 1294fa94a07fSbrendan error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1295fa94a07fSbrendan DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 1296fa94a07fSbrendan &spa->spa_l2cache.sav_object); 1297fa94a07fSbrendan if (error != 0 && error != ENOENT) { 1298fa94a07fSbrendan vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1299fa94a07fSbrendan VDEV_AUX_CORRUPT_DATA); 1300fa94a07fSbrendan error = EIO; 1301fa94a07fSbrendan goto out; 1302fa94a07fSbrendan } 1303fa94a07fSbrendan if (error == 0) { 1304fa94a07fSbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 1305fa94a07fSbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 1306fa94a07fSbrendan &spa->spa_l2cache.sav_config) != 0) { 1307fa94a07fSbrendan vdev_set_state(rvd, B_TRUE, 1308fa94a07fSbrendan VDEV_STATE_CANT_OPEN, 1309fa94a07fSbrendan VDEV_AUX_CORRUPT_DATA); 1310fa94a07fSbrendan error = EIO; 1311fa94a07fSbrendan goto out; 1312fa94a07fSbrendan } 1313fa94a07fSbrendan 1314e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1315fa94a07fSbrendan spa_load_l2cache(spa); 1316e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1317fa94a07fSbrendan } 1318fa94a07fSbrendan 1319b87f3af3Sperrin if (spa_check_logs(spa)) { 1320b87f3af3Sperrin vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1321b87f3af3Sperrin VDEV_AUX_BAD_LOG); 1322b87f3af3Sperrin error = ENXIO; 1323b87f3af3Sperrin ereport = FM_EREPORT_ZFS_LOG_REPLAY; 1324b87f3af3Sperrin goto out; 1325b87f3af3Sperrin } 1326b87f3af3Sperrin 1327b87f3af3Sperrin 1328990b4856Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 1329ecd6cf80Smarks 1330b1b8ab34Slling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1331b1b8ab34Slling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 1332b1b8ab34Slling 1333b1b8ab34Slling if (error && error != ENOENT) { 1334b1b8ab34Slling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1335b1b8ab34Slling VDEV_AUX_CORRUPT_DATA); 1336b1b8ab34Slling error = EIO; 1337b1b8ab34Slling goto out; 1338b1b8ab34Slling } 1339b1b8ab34Slling 1340b1b8ab34Slling if (error == 0) { 1341b1b8ab34Slling (void) zap_lookup(spa->spa_meta_objset, 1342b1b8ab34Slling spa->spa_pool_props_object, 13433d7072f8Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 1344b1b8ab34Slling sizeof (uint64_t), 1, &spa->spa_bootfs); 13453d7072f8Seschrock (void) zap_lookup(spa->spa_meta_objset, 13463d7072f8Seschrock spa->spa_pool_props_object, 13473d7072f8Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 13483d7072f8Seschrock sizeof (uint64_t), 1, &autoreplace); 1349ecd6cf80Smarks (void) zap_lookup(spa->spa_meta_objset, 1350ecd6cf80Smarks spa->spa_pool_props_object, 1351ecd6cf80Smarks zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 1352ecd6cf80Smarks sizeof (uint64_t), 1, &spa->spa_delegation); 13530a4e9518Sgw (void) zap_lookup(spa->spa_meta_objset, 13540a4e9518Sgw spa->spa_pool_props_object, 13550a4e9518Sgw zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 13560a4e9518Sgw sizeof (uint64_t), 1, &spa->spa_failmode); 1357b1b8ab34Slling } 1358b1b8ab34Slling 13593d7072f8Seschrock /* 13603d7072f8Seschrock * If the 'autoreplace' property is set, then post a resource notifying 13613d7072f8Seschrock * the ZFS DE that it should not issue any faults for unopenable 13623d7072f8Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 13633d7072f8Seschrock * unopenable vdevs so that the normal autoreplace handler can take 13643d7072f8Seschrock * over. 13653d7072f8Seschrock */ 1366b01c3b58Seschrock if (autoreplace && state != SPA_LOAD_TRYIMPORT) 13673d7072f8Seschrock spa_check_removed(spa->spa_root_vdev); 13683d7072f8Seschrock 1369ea8dc4b6Seschrock /* 1370560e6e96Seschrock * Load the vdev state for all toplevel vdevs. 1371ea8dc4b6Seschrock */ 1372560e6e96Seschrock vdev_load(rvd); 13730373e76bSbonwick 1374fa9e4066Sahrens /* 1375fa9e4066Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 1376fa9e4066Sahrens */ 1377e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1378fa9e4066Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 1379e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1380fa9e4066Sahrens 1381fa9e4066Sahrens /* 1382fa9e4066Sahrens * Check the state of the root vdev. If it can't be opened, it 1383fa9e4066Sahrens * indicates one or more toplevel vdevs are faulted. 1384fa9e4066Sahrens */ 1385ea8dc4b6Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 1386ea8dc4b6Seschrock error = ENXIO; 1387ea8dc4b6Seschrock goto out; 1388ea8dc4b6Seschrock } 1389fa9e4066Sahrens 1390*8ad4d6ddSJeff Bonwick if (spa_writeable(spa)) { 13915dabedeeSbonwick dmu_tx_t *tx; 13920373e76bSbonwick int need_update = B_FALSE; 1393*8ad4d6ddSJeff Bonwick 1394*8ad4d6ddSJeff Bonwick ASSERT(state != SPA_LOAD_TRYIMPORT); 13955dabedeeSbonwick 13960373e76bSbonwick /* 13970373e76bSbonwick * Claim log blocks that haven't been committed yet. 13980373e76bSbonwick * This must all happen in a single txg. 13990373e76bSbonwick */ 14005dabedeeSbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1401fa9e4066Sahrens spa_first_txg(spa)); 1402e14bb325SJeff Bonwick (void) dmu_objset_find(spa_name(spa), 14030b69c2f0Sahrens zil_claim, tx, DS_FIND_CHILDREN); 1404fa9e4066Sahrens dmu_tx_commit(tx); 1405fa9e4066Sahrens 1406fa9e4066Sahrens spa->spa_sync_on = B_TRUE; 1407fa9e4066Sahrens txg_sync_start(spa->spa_dsl_pool); 1408fa9e4066Sahrens 1409fa9e4066Sahrens /* 1410fa9e4066Sahrens * Wait for all claims to sync. 1411fa9e4066Sahrens */ 1412fa9e4066Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 14130e34b6a7Sbonwick 14140e34b6a7Sbonwick /* 14150373e76bSbonwick * If the config cache is stale, or we have uninitialized 14160373e76bSbonwick * metaslabs (see spa_vdev_add()), then update the config. 14170e34b6a7Sbonwick */ 14180373e76bSbonwick if (config_cache_txg != spa->spa_config_txg || 14190373e76bSbonwick state == SPA_LOAD_IMPORT) 14200373e76bSbonwick need_update = B_TRUE; 14210373e76bSbonwick 1422*8ad4d6ddSJeff Bonwick for (int c = 0; c < rvd->vdev_children; c++) 14230373e76bSbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 14240373e76bSbonwick need_update = B_TRUE; 14250e34b6a7Sbonwick 14260e34b6a7Sbonwick /* 14270373e76bSbonwick * Update the config cache asychronously in case we're the 14280373e76bSbonwick * root pool, in which case the config cache isn't writable yet. 14290e34b6a7Sbonwick */ 14300373e76bSbonwick if (need_update) 14310373e76bSbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 1432*8ad4d6ddSJeff Bonwick 1433*8ad4d6ddSJeff Bonwick /* 1434*8ad4d6ddSJeff Bonwick * Check all DTLs to see if anything needs resilvering. 1435*8ad4d6ddSJeff Bonwick */ 1436*8ad4d6ddSJeff Bonwick if (vdev_resilver_needed(rvd, NULL, NULL)) 1437*8ad4d6ddSJeff Bonwick spa_async_request(spa, SPA_ASYNC_RESILVER); 1438fa9e4066Sahrens } 1439fa9e4066Sahrens 1440ea8dc4b6Seschrock error = 0; 1441ea8dc4b6Seschrock out: 1442088f3894Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 144399653d4eSeschrock if (error && error != EBADF) 1444b87f3af3Sperrin zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 1445ea8dc4b6Seschrock spa->spa_load_state = SPA_LOAD_NONE; 1446ea8dc4b6Seschrock spa->spa_ena = 0; 1447ea8dc4b6Seschrock 1448ea8dc4b6Seschrock return (error); 1449fa9e4066Sahrens } 1450fa9e4066Sahrens 1451fa9e4066Sahrens /* 1452fa9e4066Sahrens * Pool Open/Import 1453fa9e4066Sahrens * 1454fa9e4066Sahrens * The import case is identical to an open except that the configuration is sent 1455fa9e4066Sahrens * down from userland, instead of grabbed from the configuration cache. For the 1456fa9e4066Sahrens * case of an open, the pool configuration will exist in the 14573d7072f8Seschrock * POOL_STATE_UNINITIALIZED state. 1458fa9e4066Sahrens * 1459fa9e4066Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 1460fa9e4066Sahrens * the same time open the pool, without having to keep around the spa_t in some 1461fa9e4066Sahrens * ambiguous state. 1462fa9e4066Sahrens */ 1463fa9e4066Sahrens static int 1464fa9e4066Sahrens spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) 1465fa9e4066Sahrens { 1466fa9e4066Sahrens spa_t *spa; 1467fa9e4066Sahrens int error; 1468fa9e4066Sahrens int locked = B_FALSE; 1469fa9e4066Sahrens 1470fa9e4066Sahrens *spapp = NULL; 1471fa9e4066Sahrens 1472fa9e4066Sahrens /* 1473fa9e4066Sahrens * As disgusting as this is, we need to support recursive calls to this 1474fa9e4066Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 1475fa9e4066Sahrens * up calling spa_open() again. The real fix is to figure out how to 1476fa9e4066Sahrens * avoid dsl_dir_open() calling this in the first place. 1477fa9e4066Sahrens */ 1478fa9e4066Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 1479fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 1480fa9e4066Sahrens locked = B_TRUE; 1481fa9e4066Sahrens } 1482fa9e4066Sahrens 1483fa9e4066Sahrens if ((spa = spa_lookup(pool)) == NULL) { 1484fa9e4066Sahrens if (locked) 1485fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1486fa9e4066Sahrens return (ENOENT); 1487fa9e4066Sahrens } 1488fa9e4066Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 1489fa9e4066Sahrens 1490*8ad4d6ddSJeff Bonwick spa_activate(spa, spa_mode_global); 1491fa9e4066Sahrens 14920373e76bSbonwick error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); 1493fa9e4066Sahrens 1494fa9e4066Sahrens if (error == EBADF) { 1495fa9e4066Sahrens /* 1496560e6e96Seschrock * If vdev_validate() returns failure (indicated by 1497560e6e96Seschrock * EBADF), it indicates that one of the vdevs indicates 1498560e6e96Seschrock * that the pool has been exported or destroyed. If 1499560e6e96Seschrock * this is the case, the config cache is out of sync and 1500560e6e96Seschrock * we should remove the pool from the namespace. 1501fa9e4066Sahrens */ 1502fa9e4066Sahrens spa_unload(spa); 1503fa9e4066Sahrens spa_deactivate(spa); 1504c5904d13Seschrock spa_config_sync(spa, B_TRUE, B_TRUE); 1505fa9e4066Sahrens spa_remove(spa); 1506fa9e4066Sahrens if (locked) 1507fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1508fa9e4066Sahrens return (ENOENT); 1509ea8dc4b6Seschrock } 1510ea8dc4b6Seschrock 1511ea8dc4b6Seschrock if (error) { 1512fa9e4066Sahrens /* 1513fa9e4066Sahrens * We can't open the pool, but we still have useful 1514fa9e4066Sahrens * information: the state of each vdev after the 1515fa9e4066Sahrens * attempted vdev_open(). Return this to the user. 1516fa9e4066Sahrens */ 1517e14bb325SJeff Bonwick if (config != NULL && spa->spa_root_vdev != NULL) 1518fa9e4066Sahrens *config = spa_config_generate(spa, NULL, -1ULL, 1519fa9e4066Sahrens B_TRUE); 1520fa9e4066Sahrens spa_unload(spa); 1521fa9e4066Sahrens spa_deactivate(spa); 1522ea8dc4b6Seschrock spa->spa_last_open_failed = B_TRUE; 1523fa9e4066Sahrens if (locked) 1524fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1525fa9e4066Sahrens *spapp = NULL; 1526fa9e4066Sahrens return (error); 1527ea8dc4b6Seschrock } else { 1528ea8dc4b6Seschrock spa->spa_last_open_failed = B_FALSE; 1529fa9e4066Sahrens } 1530fa9e4066Sahrens } 1531fa9e4066Sahrens 1532fa9e4066Sahrens spa_open_ref(spa, tag); 15333d7072f8Seschrock 1534fa9e4066Sahrens if (locked) 1535fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1536fa9e4066Sahrens 1537fa9e4066Sahrens *spapp = spa; 1538fa9e4066Sahrens 1539e14bb325SJeff Bonwick if (config != NULL) 1540fa9e4066Sahrens *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 1541fa9e4066Sahrens 1542fa9e4066Sahrens return (0); 1543fa9e4066Sahrens } 1544fa9e4066Sahrens 1545fa9e4066Sahrens int 1546fa9e4066Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 1547fa9e4066Sahrens { 1548fa9e4066Sahrens return (spa_open_common(name, spapp, tag, NULL)); 1549fa9e4066Sahrens } 1550fa9e4066Sahrens 1551ea8dc4b6Seschrock /* 1552ea8dc4b6Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 1553ea8dc4b6Seschrock * preventing it from being exported or destroyed. 1554ea8dc4b6Seschrock */ 1555ea8dc4b6Seschrock spa_t * 1556ea8dc4b6Seschrock spa_inject_addref(char *name) 1557ea8dc4b6Seschrock { 1558ea8dc4b6Seschrock spa_t *spa; 1559ea8dc4b6Seschrock 1560ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 1561ea8dc4b6Seschrock if ((spa = spa_lookup(name)) == NULL) { 1562ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 1563ea8dc4b6Seschrock return (NULL); 1564ea8dc4b6Seschrock } 1565ea8dc4b6Seschrock spa->spa_inject_ref++; 1566ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 1567ea8dc4b6Seschrock 1568ea8dc4b6Seschrock return (spa); 1569ea8dc4b6Seschrock } 1570ea8dc4b6Seschrock 1571ea8dc4b6Seschrock void 1572ea8dc4b6Seschrock spa_inject_delref(spa_t *spa) 1573ea8dc4b6Seschrock { 1574ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 1575ea8dc4b6Seschrock spa->spa_inject_ref--; 1576ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 1577ea8dc4b6Seschrock } 1578ea8dc4b6Seschrock 1579fa94a07fSbrendan /* 1580fa94a07fSbrendan * Add spares device information to the nvlist. 1581fa94a07fSbrendan */ 158299653d4eSeschrock static void 158399653d4eSeschrock spa_add_spares(spa_t *spa, nvlist_t *config) 158499653d4eSeschrock { 158599653d4eSeschrock nvlist_t **spares; 158699653d4eSeschrock uint_t i, nspares; 158799653d4eSeschrock nvlist_t *nvroot; 158899653d4eSeschrock uint64_t guid; 158999653d4eSeschrock vdev_stat_t *vs; 159099653d4eSeschrock uint_t vsc; 159139c23413Seschrock uint64_t pool; 159299653d4eSeschrock 1593fa94a07fSbrendan if (spa->spa_spares.sav_count == 0) 159499653d4eSeschrock return; 159599653d4eSeschrock 159699653d4eSeschrock VERIFY(nvlist_lookup_nvlist(config, 159799653d4eSeschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 1598fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 159999653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 160099653d4eSeschrock if (nspares != 0) { 160199653d4eSeschrock VERIFY(nvlist_add_nvlist_array(nvroot, 160299653d4eSeschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 160399653d4eSeschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 160499653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 160599653d4eSeschrock 160699653d4eSeschrock /* 160799653d4eSeschrock * Go through and find any spares which have since been 160899653d4eSeschrock * repurposed as an active spare. If this is the case, update 160999653d4eSeschrock * their status appropriately. 161099653d4eSeschrock */ 161199653d4eSeschrock for (i = 0; i < nspares; i++) { 161299653d4eSeschrock VERIFY(nvlist_lookup_uint64(spares[i], 161399653d4eSeschrock ZPOOL_CONFIG_GUID, &guid) == 0); 161489a89ebfSlling if (spa_spare_exists(guid, &pool, NULL) && 161589a89ebfSlling pool != 0ULL) { 161699653d4eSeschrock VERIFY(nvlist_lookup_uint64_array( 161799653d4eSeschrock spares[i], ZPOOL_CONFIG_STATS, 161899653d4eSeschrock (uint64_t **)&vs, &vsc) == 0); 161999653d4eSeschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 162099653d4eSeschrock vs->vs_aux = VDEV_AUX_SPARED; 162199653d4eSeschrock } 162299653d4eSeschrock } 162399653d4eSeschrock } 162499653d4eSeschrock } 162599653d4eSeschrock 1626fa94a07fSbrendan /* 1627fa94a07fSbrendan * Add l2cache device information to the nvlist, including vdev stats. 1628fa94a07fSbrendan */ 1629fa94a07fSbrendan static void 1630fa94a07fSbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 1631fa94a07fSbrendan { 1632fa94a07fSbrendan nvlist_t **l2cache; 1633fa94a07fSbrendan uint_t i, j, nl2cache; 1634fa94a07fSbrendan nvlist_t *nvroot; 1635fa94a07fSbrendan uint64_t guid; 1636fa94a07fSbrendan vdev_t *vd; 1637fa94a07fSbrendan vdev_stat_t *vs; 1638fa94a07fSbrendan uint_t vsc; 1639fa94a07fSbrendan 1640fa94a07fSbrendan if (spa->spa_l2cache.sav_count == 0) 1641fa94a07fSbrendan return; 1642fa94a07fSbrendan 1643e14bb325SJeff Bonwick spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 1644fa94a07fSbrendan 1645fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist(config, 1646fa94a07fSbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 1647fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 1648fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 1649fa94a07fSbrendan if (nl2cache != 0) { 1650fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 1651fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 1652fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 1653fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 1654fa94a07fSbrendan 1655fa94a07fSbrendan /* 1656fa94a07fSbrendan * Update level 2 cache device stats. 1657fa94a07fSbrendan */ 1658fa94a07fSbrendan 1659fa94a07fSbrendan for (i = 0; i < nl2cache; i++) { 1660fa94a07fSbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 1661fa94a07fSbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 1662fa94a07fSbrendan 1663fa94a07fSbrendan vd = NULL; 1664fa94a07fSbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 1665fa94a07fSbrendan if (guid == 1666fa94a07fSbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 1667fa94a07fSbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 1668fa94a07fSbrendan break; 1669fa94a07fSbrendan } 1670fa94a07fSbrendan } 1671fa94a07fSbrendan ASSERT(vd != NULL); 1672fa94a07fSbrendan 1673fa94a07fSbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 1674fa94a07fSbrendan ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 1675fa94a07fSbrendan vdev_get_stats(vd, vs); 1676fa94a07fSbrendan } 1677fa94a07fSbrendan } 1678fa94a07fSbrendan 1679e14bb325SJeff Bonwick spa_config_exit(spa, SCL_CONFIG, FTAG); 1680fa94a07fSbrendan } 1681fa94a07fSbrendan 1682fa9e4066Sahrens int 1683ea8dc4b6Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 1684fa9e4066Sahrens { 1685fa9e4066Sahrens int error; 1686fa9e4066Sahrens spa_t *spa; 1687fa9e4066Sahrens 1688fa9e4066Sahrens *config = NULL; 1689fa9e4066Sahrens error = spa_open_common(name, &spa, FTAG, config); 1690fa9e4066Sahrens 169199653d4eSeschrock if (spa && *config != NULL) { 1692ea8dc4b6Seschrock VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT, 1693ea8dc4b6Seschrock spa_get_errlog_size(spa)) == 0); 1694ea8dc4b6Seschrock 1695e14bb325SJeff Bonwick if (spa_suspended(spa)) 1696e14bb325SJeff Bonwick VERIFY(nvlist_add_uint64(*config, 1697e14bb325SJeff Bonwick ZPOOL_CONFIG_SUSPENDED, spa->spa_failmode) == 0); 1698e14bb325SJeff Bonwick 169999653d4eSeschrock spa_add_spares(spa, *config); 1700fa94a07fSbrendan spa_add_l2cache(spa, *config); 170199653d4eSeschrock } 170299653d4eSeschrock 1703ea8dc4b6Seschrock /* 1704ea8dc4b6Seschrock * We want to get the alternate root even for faulted pools, so we cheat 1705ea8dc4b6Seschrock * and call spa_lookup() directly. 1706ea8dc4b6Seschrock */ 1707ea8dc4b6Seschrock if (altroot) { 1708ea8dc4b6Seschrock if (spa == NULL) { 1709ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 1710ea8dc4b6Seschrock spa = spa_lookup(name); 1711ea8dc4b6Seschrock if (spa) 1712ea8dc4b6Seschrock spa_altroot(spa, altroot, buflen); 1713ea8dc4b6Seschrock else 1714ea8dc4b6Seschrock altroot[0] = '\0'; 1715ea8dc4b6Seschrock spa = NULL; 1716ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 1717ea8dc4b6Seschrock } else { 1718ea8dc4b6Seschrock spa_altroot(spa, altroot, buflen); 1719ea8dc4b6Seschrock } 1720ea8dc4b6Seschrock } 1721ea8dc4b6Seschrock 1722fa9e4066Sahrens if (spa != NULL) 1723fa9e4066Sahrens spa_close(spa, FTAG); 1724fa9e4066Sahrens 1725fa9e4066Sahrens return (error); 1726fa9e4066Sahrens } 1727fa9e4066Sahrens 172899653d4eSeschrock /* 1729fa94a07fSbrendan * Validate that the auxiliary device array is well formed. We must have an 1730fa94a07fSbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 1731fa94a07fSbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 1732fa94a07fSbrendan * specified, as long as they are well-formed. 173399653d4eSeschrock */ 173499653d4eSeschrock static int 1735fa94a07fSbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 1736fa94a07fSbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 1737fa94a07fSbrendan vdev_labeltype_t label) 173899653d4eSeschrock { 1739fa94a07fSbrendan nvlist_t **dev; 1740fa94a07fSbrendan uint_t i, ndev; 174199653d4eSeschrock vdev_t *vd; 174299653d4eSeschrock int error; 174399653d4eSeschrock 1744e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 1745e14bb325SJeff Bonwick 174699653d4eSeschrock /* 1747fa94a07fSbrendan * It's acceptable to have no devs specified. 174899653d4eSeschrock */ 1749fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 175099653d4eSeschrock return (0); 175199653d4eSeschrock 1752fa94a07fSbrendan if (ndev == 0) 175399653d4eSeschrock return (EINVAL); 175499653d4eSeschrock 175599653d4eSeschrock /* 1756fa94a07fSbrendan * Make sure the pool is formatted with a version that supports this 1757fa94a07fSbrendan * device type. 175899653d4eSeschrock */ 1759fa94a07fSbrendan if (spa_version(spa) < version) 176099653d4eSeschrock return (ENOTSUP); 176199653d4eSeschrock 176239c23413Seschrock /* 1763fa94a07fSbrendan * Set the pending device list so we correctly handle device in-use 176439c23413Seschrock * checking. 176539c23413Seschrock */ 1766fa94a07fSbrendan sav->sav_pending = dev; 1767fa94a07fSbrendan sav->sav_npending = ndev; 176839c23413Seschrock 1769fa94a07fSbrendan for (i = 0; i < ndev; i++) { 1770fa94a07fSbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 177199653d4eSeschrock mode)) != 0) 177239c23413Seschrock goto out; 177399653d4eSeschrock 177499653d4eSeschrock if (!vd->vdev_ops->vdev_op_leaf) { 177599653d4eSeschrock vdev_free(vd); 177639c23413Seschrock error = EINVAL; 177739c23413Seschrock goto out; 177899653d4eSeschrock } 177999653d4eSeschrock 1780fa94a07fSbrendan /* 1781e14bb325SJeff Bonwick * The L2ARC currently only supports disk devices in 1782e14bb325SJeff Bonwick * kernel context. For user-level testing, we allow it. 1783fa94a07fSbrendan */ 1784e14bb325SJeff Bonwick #ifdef _KERNEL 1785fa94a07fSbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 1786fa94a07fSbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 1787fa94a07fSbrendan error = ENOTBLK; 1788fa94a07fSbrendan goto out; 1789fa94a07fSbrendan } 1790e14bb325SJeff Bonwick #endif 179199653d4eSeschrock vd->vdev_top = vd; 179299653d4eSeschrock 179339c23413Seschrock if ((error = vdev_open(vd)) == 0 && 1794fa94a07fSbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 1795fa94a07fSbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 179639c23413Seschrock vd->vdev_guid) == 0); 179739c23413Seschrock } 179899653d4eSeschrock 179999653d4eSeschrock vdev_free(vd); 180039c23413Seschrock 1801fa94a07fSbrendan if (error && 1802fa94a07fSbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 180339c23413Seschrock goto out; 180439c23413Seschrock else 180539c23413Seschrock error = 0; 180699653d4eSeschrock } 180799653d4eSeschrock 180839c23413Seschrock out: 1809fa94a07fSbrendan sav->sav_pending = NULL; 1810fa94a07fSbrendan sav->sav_npending = 0; 181139c23413Seschrock return (error); 181299653d4eSeschrock } 181399653d4eSeschrock 1814fa94a07fSbrendan static int 1815fa94a07fSbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 1816fa94a07fSbrendan { 1817fa94a07fSbrendan int error; 1818fa94a07fSbrendan 1819e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 1820e14bb325SJeff Bonwick 1821fa94a07fSbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 1822fa94a07fSbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 1823fa94a07fSbrendan VDEV_LABEL_SPARE)) != 0) { 1824fa94a07fSbrendan return (error); 1825fa94a07fSbrendan } 1826fa94a07fSbrendan 1827fa94a07fSbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 1828fa94a07fSbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 1829fa94a07fSbrendan VDEV_LABEL_L2CACHE)); 1830fa94a07fSbrendan } 1831fa94a07fSbrendan 1832fa94a07fSbrendan static void 1833fa94a07fSbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 1834fa94a07fSbrendan const char *config) 1835fa94a07fSbrendan { 1836fa94a07fSbrendan int i; 1837fa94a07fSbrendan 1838fa94a07fSbrendan if (sav->sav_config != NULL) { 1839fa94a07fSbrendan nvlist_t **olddevs; 1840fa94a07fSbrendan uint_t oldndevs; 1841fa94a07fSbrendan nvlist_t **newdevs; 1842fa94a07fSbrendan 1843fa94a07fSbrendan /* 1844fa94a07fSbrendan * Generate new dev list by concatentating with the 1845fa94a07fSbrendan * current dev list. 1846fa94a07fSbrendan */ 1847fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 1848fa94a07fSbrendan &olddevs, &oldndevs) == 0); 1849fa94a07fSbrendan 1850fa94a07fSbrendan newdevs = kmem_alloc(sizeof (void *) * 1851fa94a07fSbrendan (ndevs + oldndevs), KM_SLEEP); 1852fa94a07fSbrendan for (i = 0; i < oldndevs; i++) 1853fa94a07fSbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 1854fa94a07fSbrendan KM_SLEEP) == 0); 1855fa94a07fSbrendan for (i = 0; i < ndevs; i++) 1856fa94a07fSbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 1857fa94a07fSbrendan KM_SLEEP) == 0); 1858fa94a07fSbrendan 1859fa94a07fSbrendan VERIFY(nvlist_remove(sav->sav_config, config, 1860fa94a07fSbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 1861fa94a07fSbrendan 1862fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 1863fa94a07fSbrendan config, newdevs, ndevs + oldndevs) == 0); 1864fa94a07fSbrendan for (i = 0; i < oldndevs + ndevs; i++) 1865fa94a07fSbrendan nvlist_free(newdevs[i]); 1866fa94a07fSbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 1867fa94a07fSbrendan } else { 1868fa94a07fSbrendan /* 1869fa94a07fSbrendan * Generate a new dev list. 1870fa94a07fSbrendan */ 1871fa94a07fSbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 1872fa94a07fSbrendan KM_SLEEP) == 0); 1873fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 1874fa94a07fSbrendan devs, ndevs) == 0); 1875fa94a07fSbrendan } 1876fa94a07fSbrendan } 1877fa94a07fSbrendan 1878fa94a07fSbrendan /* 1879fa94a07fSbrendan * Stop and drop level 2 ARC devices 1880fa94a07fSbrendan */ 1881fa94a07fSbrendan void 1882fa94a07fSbrendan spa_l2cache_drop(spa_t *spa) 1883fa94a07fSbrendan { 1884fa94a07fSbrendan vdev_t *vd; 1885fa94a07fSbrendan int i; 1886fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 1887fa94a07fSbrendan 1888fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) { 1889fa94a07fSbrendan uint64_t pool; 1890fa94a07fSbrendan 1891fa94a07fSbrendan vd = sav->sav_vdevs[i]; 1892fa94a07fSbrendan ASSERT(vd != NULL); 1893fa94a07fSbrendan 1894*8ad4d6ddSJeff Bonwick if (spa_l2cache_exists(vd->vdev_guid, &pool) && 1895*8ad4d6ddSJeff Bonwick pool != 0ULL && l2arc_vdev_present(vd)) 1896fa94a07fSbrendan l2arc_remove_vdev(vd); 1897fa94a07fSbrendan if (vd->vdev_isl2cache) 1898fa94a07fSbrendan spa_l2cache_remove(vd); 1899fa94a07fSbrendan vdev_clear_stats(vd); 1900fa94a07fSbrendan (void) vdev_close(vd); 1901fa94a07fSbrendan } 1902fa94a07fSbrendan } 1903fa94a07fSbrendan 1904fa9e4066Sahrens /* 1905fa9e4066Sahrens * Pool Creation 1906fa9e4066Sahrens */ 1907fa9e4066Sahrens int 1908990b4856Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 19090a48a24eStimh const char *history_str, nvlist_t *zplprops) 1910fa9e4066Sahrens { 1911fa9e4066Sahrens spa_t *spa; 1912990b4856Slling char *altroot = NULL; 19130373e76bSbonwick vdev_t *rvd; 1914fa9e4066Sahrens dsl_pool_t *dp; 1915fa9e4066Sahrens dmu_tx_t *tx; 191699653d4eSeschrock int c, error = 0; 1917fa9e4066Sahrens uint64_t txg = TXG_INITIAL; 1918fa94a07fSbrendan nvlist_t **spares, **l2cache; 1919fa94a07fSbrendan uint_t nspares, nl2cache; 1920990b4856Slling uint64_t version; 1921fa9e4066Sahrens 1922fa9e4066Sahrens /* 1923fa9e4066Sahrens * If this pool already exists, return failure. 1924fa9e4066Sahrens */ 1925fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 1926fa9e4066Sahrens if (spa_lookup(pool) != NULL) { 1927fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1928fa9e4066Sahrens return (EEXIST); 1929fa9e4066Sahrens } 1930fa9e4066Sahrens 1931fa9e4066Sahrens /* 1932fa9e4066Sahrens * Allocate a new spa_t structure. 1933fa9e4066Sahrens */ 1934990b4856Slling (void) nvlist_lookup_string(props, 1935990b4856Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 19360373e76bSbonwick spa = spa_add(pool, altroot); 1937*8ad4d6ddSJeff Bonwick spa_activate(spa, spa_mode_global); 1938fa9e4066Sahrens 1939fa9e4066Sahrens spa->spa_uberblock.ub_txg = txg - 1; 1940990b4856Slling 1941990b4856Slling if (props && (error = spa_prop_validate(spa, props))) { 1942990b4856Slling spa_unload(spa); 1943990b4856Slling spa_deactivate(spa); 1944990b4856Slling spa_remove(spa); 1945c5904d13Seschrock mutex_exit(&spa_namespace_lock); 1946990b4856Slling return (error); 1947990b4856Slling } 1948990b4856Slling 1949990b4856Slling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 1950990b4856Slling &version) != 0) 1951990b4856Slling version = SPA_VERSION; 1952990b4856Slling ASSERT(version <= SPA_VERSION); 1953990b4856Slling spa->spa_uberblock.ub_version = version; 1954fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 1955fa9e4066Sahrens 19560373e76bSbonwick /* 19570373e76bSbonwick * Create the root vdev. 19580373e76bSbonwick */ 1959e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 19600373e76bSbonwick 196199653d4eSeschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 19620373e76bSbonwick 196399653d4eSeschrock ASSERT(error != 0 || rvd != NULL); 196499653d4eSeschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 19650373e76bSbonwick 1966b7b97454Sperrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 19670373e76bSbonwick error = EINVAL; 196899653d4eSeschrock 196999653d4eSeschrock if (error == 0 && 197099653d4eSeschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 1971fa94a07fSbrendan (error = spa_validate_aux(spa, nvroot, txg, 197299653d4eSeschrock VDEV_ALLOC_ADD)) == 0) { 197399653d4eSeschrock for (c = 0; c < rvd->vdev_children; c++) 197499653d4eSeschrock vdev_init(rvd->vdev_child[c], txg); 197599653d4eSeschrock vdev_config_dirty(rvd); 19760373e76bSbonwick } 19770373e76bSbonwick 1978e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1979fa9e4066Sahrens 198099653d4eSeschrock if (error != 0) { 1981fa9e4066Sahrens spa_unload(spa); 1982fa9e4066Sahrens spa_deactivate(spa); 1983fa9e4066Sahrens spa_remove(spa); 1984fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1985fa9e4066Sahrens return (error); 1986fa9e4066Sahrens } 1987fa9e4066Sahrens 198899653d4eSeschrock /* 198999653d4eSeschrock * Get the list of spares, if specified. 199099653d4eSeschrock */ 199199653d4eSeschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 199299653d4eSeschrock &spares, &nspares) == 0) { 1993fa94a07fSbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 199499653d4eSeschrock KM_SLEEP) == 0); 1995fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 199699653d4eSeschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 1997e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 199899653d4eSeschrock spa_load_spares(spa); 1999e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2000fa94a07fSbrendan spa->spa_spares.sav_sync = B_TRUE; 2001fa94a07fSbrendan } 2002fa94a07fSbrendan 2003fa94a07fSbrendan /* 2004fa94a07fSbrendan * Get the list of level 2 cache devices, if specified. 2005fa94a07fSbrendan */ 2006fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 2007fa94a07fSbrendan &l2cache, &nl2cache) == 0) { 2008fa94a07fSbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 2009fa94a07fSbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 2010fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 2011fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 2012e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2013fa94a07fSbrendan spa_load_l2cache(spa); 2014e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2015fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 201699653d4eSeschrock } 201799653d4eSeschrock 20180a48a24eStimh spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 2019fa9e4066Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 2020fa9e4066Sahrens 2021fa9e4066Sahrens tx = dmu_tx_create_assigned(dp, txg); 2022fa9e4066Sahrens 2023fa9e4066Sahrens /* 2024fa9e4066Sahrens * Create the pool config object. 2025fa9e4066Sahrens */ 2026fa9e4066Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 2027f7991ba4STim Haley DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 2028fa9e4066Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 2029fa9e4066Sahrens 2030ea8dc4b6Seschrock if (zap_add(spa->spa_meta_objset, 2031fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 2032ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 2033ea8dc4b6Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 2034ea8dc4b6Seschrock } 2035fa9e4066Sahrens 2036990b4856Slling /* Newly created pools with the right version are always deflated. */ 2037990b4856Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 2038990b4856Slling spa->spa_deflate = TRUE; 2039990b4856Slling if (zap_add(spa->spa_meta_objset, 2040990b4856Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 2041990b4856Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 2042990b4856Slling cmn_err(CE_PANIC, "failed to add deflate"); 2043990b4856Slling } 204499653d4eSeschrock } 204599653d4eSeschrock 2046fa9e4066Sahrens /* 2047fa9e4066Sahrens * Create the deferred-free bplist object. Turn off compression 2048fa9e4066Sahrens * because sync-to-convergence takes longer if the blocksize 2049fa9e4066Sahrens * keeps changing. 2050fa9e4066Sahrens */ 2051fa9e4066Sahrens spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, 2052fa9e4066Sahrens 1 << 14, tx); 2053fa9e4066Sahrens dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 2054fa9e4066Sahrens ZIO_COMPRESS_OFF, tx); 2055fa9e4066Sahrens 2056ea8dc4b6Seschrock if (zap_add(spa->spa_meta_objset, 2057fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 2058ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { 2059ea8dc4b6Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 2060ea8dc4b6Seschrock } 2061fa9e4066Sahrens 206206eeb2adSek /* 206306eeb2adSek * Create the pool's history object. 206406eeb2adSek */ 2065990b4856Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 2066990b4856Slling spa_history_create_obj(spa, tx); 2067990b4856Slling 2068990b4856Slling /* 2069990b4856Slling * Set pool properties. 2070990b4856Slling */ 2071990b4856Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 2072990b4856Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 20730a4e9518Sgw spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 2074990b4856Slling if (props) 2075990b4856Slling spa_sync_props(spa, props, CRED(), tx); 207606eeb2adSek 2077fa9e4066Sahrens dmu_tx_commit(tx); 2078fa9e4066Sahrens 2079fa9e4066Sahrens spa->spa_sync_on = B_TRUE; 2080fa9e4066Sahrens txg_sync_start(spa->spa_dsl_pool); 2081fa9e4066Sahrens 2082fa9e4066Sahrens /* 2083fa9e4066Sahrens * We explicitly wait for the first transaction to complete so that our 2084fa9e4066Sahrens * bean counters are appropriately updated. 2085fa9e4066Sahrens */ 2086fa9e4066Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 2087fa9e4066Sahrens 2088c5904d13Seschrock spa_config_sync(spa, B_FALSE, B_TRUE); 2089fa9e4066Sahrens 2090990b4856Slling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 2091228975ccSek (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 2092228975ccSek 2093fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2094fa9e4066Sahrens 2095088f3894Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 2096088f3894Sahrens 2097fa9e4066Sahrens return (0); 2098fa9e4066Sahrens } 2099fa9e4066Sahrens 2100fa9e4066Sahrens /* 2101fa9e4066Sahrens * Import the given pool into the system. We set up the necessary spa_t and 2102fa9e4066Sahrens * then call spa_load() to do the dirty work. 2103fa9e4066Sahrens */ 2104e7cbe64fSgw static int 2105e7cbe64fSgw spa_import_common(const char *pool, nvlist_t *config, nvlist_t *props, 2106c5904d13Seschrock boolean_t isroot, boolean_t allowfaulted) 2107fa9e4066Sahrens { 2108fa9e4066Sahrens spa_t *spa; 2109990b4856Slling char *altroot = NULL; 2110c5904d13Seschrock int error, loaderr; 211199653d4eSeschrock nvlist_t *nvroot; 2112fa94a07fSbrendan nvlist_t **spares, **l2cache; 2113fa94a07fSbrendan uint_t nspares, nl2cache; 2114fa9e4066Sahrens 2115fa9e4066Sahrens /* 2116fa9e4066Sahrens * If a pool with this name exists, return failure. 2117fa9e4066Sahrens */ 2118fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 211900504c01SLin Ling if ((spa = spa_lookup(pool)) != NULL) { 212000504c01SLin Ling if (isroot) { 212100504c01SLin Ling /* 212200504c01SLin Ling * Remove the existing root pool from the 212300504c01SLin Ling * namespace so that we can replace it with 212400504c01SLin Ling * the correct config we just read in. 212500504c01SLin Ling */ 212600504c01SLin Ling ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 212700504c01SLin Ling spa_remove(spa); 212800504c01SLin Ling } else { 212900504c01SLin Ling mutex_exit(&spa_namespace_lock); 213000504c01SLin Ling return (EEXIST); 213100504c01SLin Ling } 2132fa9e4066Sahrens } 2133fa9e4066Sahrens 2134fa9e4066Sahrens /* 21350373e76bSbonwick * Create and initialize the spa structure. 2136fa9e4066Sahrens */ 2137990b4856Slling (void) nvlist_lookup_string(props, 2138990b4856Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 21390373e76bSbonwick spa = spa_add(pool, altroot); 2140*8ad4d6ddSJeff Bonwick spa_activate(spa, spa_mode_global); 2141fa9e4066Sahrens 2142c5904d13Seschrock if (allowfaulted) 2143c5904d13Seschrock spa->spa_import_faulted = B_TRUE; 2144bf82a41bSeschrock spa->spa_is_root = isroot; 2145c5904d13Seschrock 21465dabedeeSbonwick /* 21470373e76bSbonwick * Pass off the heavy lifting to spa_load(). 2148088f3894Sahrens * Pass TRUE for mosconfig (unless this is a root pool) because 2149088f3894Sahrens * the user-supplied config is actually the one to trust when 2150088f3894Sahrens * doing an import. 21515dabedeeSbonwick */ 2152088f3894Sahrens loaderr = error = spa_load(spa, config, SPA_LOAD_IMPORT, !isroot); 2153fa9e4066Sahrens 2154e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 215599653d4eSeschrock /* 215699653d4eSeschrock * Toss any existing sparelist, as it doesn't have any validity anymore, 215799653d4eSeschrock * and conflicts with spa_has_spare(). 215899653d4eSeschrock */ 2159e7cbe64fSgw if (!isroot && spa->spa_spares.sav_config) { 2160fa94a07fSbrendan nvlist_free(spa->spa_spares.sav_config); 2161fa94a07fSbrendan spa->spa_spares.sav_config = NULL; 216299653d4eSeschrock spa_load_spares(spa); 216399653d4eSeschrock } 2164e7cbe64fSgw if (!isroot && spa->spa_l2cache.sav_config) { 2165fa94a07fSbrendan nvlist_free(spa->spa_l2cache.sav_config); 2166fa94a07fSbrendan spa->spa_l2cache.sav_config = NULL; 2167fa94a07fSbrendan spa_load_l2cache(spa); 2168fa94a07fSbrendan } 216999653d4eSeschrock 217099653d4eSeschrock VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 217199653d4eSeschrock &nvroot) == 0); 2172fa94a07fSbrendan if (error == 0) 2173fa94a07fSbrendan error = spa_validate_aux(spa, nvroot, -1ULL, VDEV_ALLOC_SPARE); 2174fa94a07fSbrendan if (error == 0) 2175fa94a07fSbrendan error = spa_validate_aux(spa, nvroot, -1ULL, 2176fa94a07fSbrendan VDEV_ALLOC_L2CACHE); 2177e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 217899653d4eSeschrock 2179*8ad4d6ddSJeff Bonwick if (error != 0 || (props && spa_writeable(spa) && 2180*8ad4d6ddSJeff Bonwick (error = spa_prop_set(spa, props)))) { 2181c5904d13Seschrock if (loaderr != 0 && loaderr != EINVAL && allowfaulted) { 2182c5904d13Seschrock /* 2183c5904d13Seschrock * If we failed to load the pool, but 'allowfaulted' is 2184c5904d13Seschrock * set, then manually set the config as if the config 2185c5904d13Seschrock * passed in was specified in the cache file. 2186c5904d13Seschrock */ 2187c5904d13Seschrock error = 0; 2188c5904d13Seschrock spa->spa_import_faulted = B_FALSE; 2189e14bb325SJeff Bonwick if (spa->spa_config == NULL) 2190c5904d13Seschrock spa->spa_config = spa_config_generate(spa, 2191c5904d13Seschrock NULL, -1ULL, B_TRUE); 2192c5904d13Seschrock spa_unload(spa); 2193c5904d13Seschrock spa_deactivate(spa); 2194c5904d13Seschrock spa_config_sync(spa, B_FALSE, B_TRUE); 2195c5904d13Seschrock } else { 2196c5904d13Seschrock spa_unload(spa); 2197c5904d13Seschrock spa_deactivate(spa); 2198c5904d13Seschrock spa_remove(spa); 2199c5904d13Seschrock } 2200fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2201fa9e4066Sahrens return (error); 2202fa9e4066Sahrens } 2203fa9e4066Sahrens 220499653d4eSeschrock /* 2205fa94a07fSbrendan * Override any spares and level 2 cache devices as specified by 2206fa94a07fSbrendan * the user, as these may have correct device names/devids, etc. 220799653d4eSeschrock */ 220899653d4eSeschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 220999653d4eSeschrock &spares, &nspares) == 0) { 2210fa94a07fSbrendan if (spa->spa_spares.sav_config) 2211fa94a07fSbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, 221299653d4eSeschrock ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 221399653d4eSeschrock else 2214fa94a07fSbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 221599653d4eSeschrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 2216fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 221799653d4eSeschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 2218e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 221999653d4eSeschrock spa_load_spares(spa); 2220e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2221fa94a07fSbrendan spa->spa_spares.sav_sync = B_TRUE; 2222fa94a07fSbrendan } 2223fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 2224fa94a07fSbrendan &l2cache, &nl2cache) == 0) { 2225fa94a07fSbrendan if (spa->spa_l2cache.sav_config) 2226fa94a07fSbrendan VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 2227fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 2228fa94a07fSbrendan else 2229fa94a07fSbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 2230fa94a07fSbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 2231fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 2232fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 2233e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2234fa94a07fSbrendan spa_load_l2cache(spa); 2235e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2236fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 223799653d4eSeschrock } 223899653d4eSeschrock 2239*8ad4d6ddSJeff Bonwick if (spa_writeable(spa)) { 2240c5904d13Seschrock /* 2241c5904d13Seschrock * Update the config cache to include the newly-imported pool. 2242c5904d13Seschrock */ 2243e7cbe64fSgw spa_config_update_common(spa, SPA_CONFIG_UPDATE_POOL, isroot); 2244c5904d13Seschrock } 2245fa9e4066Sahrens 2246c5904d13Seschrock spa->spa_import_faulted = B_FALSE; 22473d7072f8Seschrock mutex_exit(&spa_namespace_lock); 22483d7072f8Seschrock 2249fa9e4066Sahrens return (0); 2250fa9e4066Sahrens } 2251fa9e4066Sahrens 2252e7cbe64fSgw #ifdef _KERNEL 2253e7cbe64fSgw /* 2254e7cbe64fSgw * Build a "root" vdev for a top level vdev read in from a rootpool 2255e7cbe64fSgw * device label. 2256e7cbe64fSgw */ 2257e7cbe64fSgw static void 2258e7cbe64fSgw spa_build_rootpool_config(nvlist_t *config) 2259e7cbe64fSgw { 2260e7cbe64fSgw nvlist_t *nvtop, *nvroot; 2261e7cbe64fSgw uint64_t pgid; 2262e7cbe64fSgw 2263e7cbe64fSgw /* 2264e7cbe64fSgw * Add this top-level vdev to the child array. 2265e7cbe64fSgw */ 2266e7cbe64fSgw VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtop) 2267e7cbe64fSgw == 0); 2268e7cbe64fSgw VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pgid) 2269e7cbe64fSgw == 0); 2270e7cbe64fSgw 2271e7cbe64fSgw /* 2272e7cbe64fSgw * Put this pool's top-level vdevs into a root vdev. 2273e7cbe64fSgw */ 2274e7cbe64fSgw VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2275e7cbe64fSgw VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) 2276e7cbe64fSgw == 0); 2277e7cbe64fSgw VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 2278e7cbe64fSgw VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 2279e7cbe64fSgw VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 2280e7cbe64fSgw &nvtop, 1) == 0); 2281e7cbe64fSgw 2282e7cbe64fSgw /* 2283e7cbe64fSgw * Replace the existing vdev_tree with the new root vdev in 2284e7cbe64fSgw * this pool's configuration (remove the old, add the new). 2285e7cbe64fSgw */ 2286e7cbe64fSgw VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 2287e7cbe64fSgw nvlist_free(nvroot); 2288e7cbe64fSgw } 2289e7cbe64fSgw 2290e7cbe64fSgw /* 2291e7cbe64fSgw * Get the root pool information from the root disk, then import the root pool 2292e7cbe64fSgw * during the system boot up time. 2293e7cbe64fSgw */ 2294f940fbb1SLin Ling extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 2295e7cbe64fSgw 2296051aabe6Staylor int 2297051aabe6Staylor spa_check_rootconf(char *devpath, char *devid, nvlist_t **bestconf, 2298e7cbe64fSgw uint64_t *besttxg) 2299e7cbe64fSgw { 2300e7cbe64fSgw nvlist_t *config; 2301e7cbe64fSgw uint64_t txg; 2302f940fbb1SLin Ling int error; 2303e7cbe64fSgw 2304f940fbb1SLin Ling if (error = vdev_disk_read_rootlabel(devpath, devid, &config)) 2305f940fbb1SLin Ling return (error); 2306e7cbe64fSgw 2307e7cbe64fSgw VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 2308e7cbe64fSgw 2309051aabe6Staylor if (bestconf != NULL) 2310e7cbe64fSgw *bestconf = config; 2311f940fbb1SLin Ling else 2312f940fbb1SLin Ling nvlist_free(config); 2313051aabe6Staylor *besttxg = txg; 2314051aabe6Staylor return (0); 2315e7cbe64fSgw } 2316e7cbe64fSgw 2317e7cbe64fSgw boolean_t 2318e7cbe64fSgw spa_rootdev_validate(nvlist_t *nv) 2319e7cbe64fSgw { 2320e7cbe64fSgw uint64_t ival; 2321e7cbe64fSgw 2322e7cbe64fSgw if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 || 2323e7cbe64fSgw nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 || 2324e7cbe64fSgw nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0) 2325e7cbe64fSgw return (B_FALSE); 2326e7cbe64fSgw 2327e7cbe64fSgw return (B_TRUE); 2328e7cbe64fSgw } 2329e7cbe64fSgw 2330051aabe6Staylor 2331051aabe6Staylor /* 2332051aabe6Staylor * Given the boot device's physical path or devid, check if the device 2333051aabe6Staylor * is in a valid state. If so, return the configuration from the vdev 2334051aabe6Staylor * label. 2335051aabe6Staylor */ 2336051aabe6Staylor int 2337051aabe6Staylor spa_get_rootconf(char *devpath, char *devid, nvlist_t **bestconf) 2338051aabe6Staylor { 2339051aabe6Staylor nvlist_t *conf = NULL; 2340051aabe6Staylor uint64_t txg = 0; 2341051aabe6Staylor nvlist_t *nvtop, **child; 2342051aabe6Staylor char *type; 2343051aabe6Staylor char *bootpath = NULL; 2344051aabe6Staylor uint_t children, c; 2345051aabe6Staylor char *tmp; 2346f940fbb1SLin Ling int error; 2347051aabe6Staylor 2348051aabe6Staylor if (devpath && ((tmp = strchr(devpath, ' ')) != NULL)) 2349051aabe6Staylor *tmp = '\0'; 2350f940fbb1SLin Ling if (error = spa_check_rootconf(devpath, devid, &conf, &txg)) { 2351051aabe6Staylor cmn_err(CE_NOTE, "error reading device label"); 2352f940fbb1SLin Ling return (error); 2353051aabe6Staylor } 2354051aabe6Staylor if (txg == 0) { 2355051aabe6Staylor cmn_err(CE_NOTE, "this device is detached"); 2356051aabe6Staylor nvlist_free(conf); 2357051aabe6Staylor return (EINVAL); 2358051aabe6Staylor } 2359051aabe6Staylor 2360051aabe6Staylor VERIFY(nvlist_lookup_nvlist(conf, ZPOOL_CONFIG_VDEV_TREE, 2361051aabe6Staylor &nvtop) == 0); 2362051aabe6Staylor VERIFY(nvlist_lookup_string(nvtop, ZPOOL_CONFIG_TYPE, &type) == 0); 2363051aabe6Staylor 2364051aabe6Staylor if (strcmp(type, VDEV_TYPE_DISK) == 0) { 2365051aabe6Staylor if (spa_rootdev_validate(nvtop)) { 2366051aabe6Staylor goto out; 2367051aabe6Staylor } else { 2368051aabe6Staylor nvlist_free(conf); 2369051aabe6Staylor return (EINVAL); 2370051aabe6Staylor } 2371051aabe6Staylor } 2372051aabe6Staylor 2373051aabe6Staylor ASSERT(strcmp(type, VDEV_TYPE_MIRROR) == 0); 2374051aabe6Staylor 2375051aabe6Staylor VERIFY(nvlist_lookup_nvlist_array(nvtop, ZPOOL_CONFIG_CHILDREN, 2376051aabe6Staylor &child, &children) == 0); 2377051aabe6Staylor 2378051aabe6Staylor /* 2379051aabe6Staylor * Go thru vdevs in the mirror to see if the given device 2380051aabe6Staylor * has the most recent txg. Only the device with the most 2381051aabe6Staylor * recent txg has valid information and should be booted. 2382051aabe6Staylor */ 2383051aabe6Staylor for (c = 0; c < children; c++) { 2384051aabe6Staylor char *cdevid, *cpath; 2385051aabe6Staylor uint64_t tmptxg; 2386051aabe6Staylor 2387051aabe6Staylor if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_PHYS_PATH, 2388051aabe6Staylor &cpath) != 0) 2389051aabe6Staylor return (EINVAL); 2390051aabe6Staylor if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_DEVID, 2391051aabe6Staylor &cdevid) != 0) 2392051aabe6Staylor return (EINVAL); 2393f4565e39SLin Ling if ((spa_check_rootconf(cpath, cdevid, NULL, 2394f4565e39SLin Ling &tmptxg) == 0) && (tmptxg > txg)) { 2395051aabe6Staylor txg = tmptxg; 2396051aabe6Staylor VERIFY(nvlist_lookup_string(child[c], 2397051aabe6Staylor ZPOOL_CONFIG_PATH, &bootpath) == 0); 2398051aabe6Staylor } 2399051aabe6Staylor } 2400051aabe6Staylor 2401051aabe6Staylor /* Does the best device match the one we've booted from? */ 2402051aabe6Staylor if (bootpath) { 2403051aabe6Staylor cmn_err(CE_NOTE, "try booting from '%s'", bootpath); 2404051aabe6Staylor return (EINVAL); 2405051aabe6Staylor } 2406051aabe6Staylor out: 2407051aabe6Staylor *bestconf = conf; 2408051aabe6Staylor return (0); 2409051aabe6Staylor } 2410051aabe6Staylor 2411e7cbe64fSgw /* 2412e7cbe64fSgw * Import a root pool. 2413e7cbe64fSgw * 2414051aabe6Staylor * For x86. devpath_list will consist of devid and/or physpath name of 2415051aabe6Staylor * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 2416051aabe6Staylor * The GRUB "findroot" command will return the vdev we should boot. 2417e7cbe64fSgw * 2418e7cbe64fSgw * For Sparc, devpath_list consists the physpath name of the booting device 2419e7cbe64fSgw * no matter the rootpool is a single device pool or a mirrored pool. 2420e7cbe64fSgw * e.g. 2421e7cbe64fSgw * "/pci@1f,0/ide@d/disk@0,0:a" 2422e7cbe64fSgw */ 2423e7cbe64fSgw int 2424051aabe6Staylor spa_import_rootpool(char *devpath, char *devid) 2425e7cbe64fSgw { 2426e7cbe64fSgw nvlist_t *conf = NULL; 2427e7cbe64fSgw char *pname; 2428e7cbe64fSgw int error; 2429e7cbe64fSgw 2430e7cbe64fSgw /* 2431e7cbe64fSgw * Get the vdev pathname and configuation from the most 2432e7cbe64fSgw * recently updated vdev (highest txg). 2433e7cbe64fSgw */ 2434051aabe6Staylor if (error = spa_get_rootconf(devpath, devid, &conf)) 2435e7cbe64fSgw goto msg_out; 2436e7cbe64fSgw 2437e7cbe64fSgw /* 2438e7cbe64fSgw * Add type "root" vdev to the config. 2439e7cbe64fSgw */ 2440e7cbe64fSgw spa_build_rootpool_config(conf); 2441e7cbe64fSgw 2442e7cbe64fSgw VERIFY(nvlist_lookup_string(conf, ZPOOL_CONFIG_POOL_NAME, &pname) == 0); 2443e7cbe64fSgw 2444bf82a41bSeschrock /* 2445bf82a41bSeschrock * We specify 'allowfaulted' for this to be treated like spa_open() 2446bf82a41bSeschrock * instead of spa_import(). This prevents us from marking vdevs as 2447bf82a41bSeschrock * persistently unavailable, and generates FMA ereports as if it were a 2448bf82a41bSeschrock * pool open, not import. 2449bf82a41bSeschrock */ 2450bf82a41bSeschrock error = spa_import_common(pname, conf, NULL, B_TRUE, B_TRUE); 245100504c01SLin Ling ASSERT(error != EEXIST); 2452e7cbe64fSgw 2453e7cbe64fSgw nvlist_free(conf); 2454e7cbe64fSgw return (error); 2455e7cbe64fSgw 2456e7cbe64fSgw msg_out: 2457051aabe6Staylor cmn_err(CE_NOTE, "\n" 2458e7cbe64fSgw " *************************************************** \n" 2459e7cbe64fSgw " * This device is not bootable! * \n" 2460e7cbe64fSgw " * It is either offlined or detached or faulted. * \n" 2461e7cbe64fSgw " * Please try to boot from a different device. * \n" 2462051aabe6Staylor " *************************************************** "); 2463e7cbe64fSgw 2464e7cbe64fSgw return (error); 2465e7cbe64fSgw } 2466e7cbe64fSgw #endif 2467e7cbe64fSgw 2468e7cbe64fSgw /* 2469e7cbe64fSgw * Import a non-root pool into the system. 2470e7cbe64fSgw */ 2471e7cbe64fSgw int 2472e7cbe64fSgw spa_import(const char *pool, nvlist_t *config, nvlist_t *props) 2473e7cbe64fSgw { 2474c5904d13Seschrock return (spa_import_common(pool, config, props, B_FALSE, B_FALSE)); 2475e7cbe64fSgw } 2476e7cbe64fSgw 2477c5904d13Seschrock int 2478c5904d13Seschrock spa_import_faulted(const char *pool, nvlist_t *config, nvlist_t *props) 2479c5904d13Seschrock { 2480c5904d13Seschrock return (spa_import_common(pool, config, props, B_FALSE, B_TRUE)); 2481c5904d13Seschrock } 2482c5904d13Seschrock 2483c5904d13Seschrock 2484fa9e4066Sahrens /* 2485fa9e4066Sahrens * This (illegal) pool name is used when temporarily importing a spa_t in order 2486fa9e4066Sahrens * to get the vdev stats associated with the imported devices. 2487fa9e4066Sahrens */ 2488fa9e4066Sahrens #define TRYIMPORT_NAME "$import" 2489fa9e4066Sahrens 2490fa9e4066Sahrens nvlist_t * 2491fa9e4066Sahrens spa_tryimport(nvlist_t *tryconfig) 2492fa9e4066Sahrens { 2493fa9e4066Sahrens nvlist_t *config = NULL; 2494fa9e4066Sahrens char *poolname; 2495fa9e4066Sahrens spa_t *spa; 2496fa9e4066Sahrens uint64_t state; 2497fa9e4066Sahrens 2498fa9e4066Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 2499fa9e4066Sahrens return (NULL); 2500fa9e4066Sahrens 2501fa9e4066Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 2502fa9e4066Sahrens return (NULL); 2503fa9e4066Sahrens 2504fa9e4066Sahrens /* 25050373e76bSbonwick * Create and initialize the spa structure. 2506fa9e4066Sahrens */ 25070373e76bSbonwick mutex_enter(&spa_namespace_lock); 25080373e76bSbonwick spa = spa_add(TRYIMPORT_NAME, NULL); 2509*8ad4d6ddSJeff Bonwick spa_activate(spa, FREAD); 2510fa9e4066Sahrens 2511fa9e4066Sahrens /* 25120373e76bSbonwick * Pass off the heavy lifting to spa_load(). 2513ecc2d604Sbonwick * Pass TRUE for mosconfig because the user-supplied config 2514ecc2d604Sbonwick * is actually the one to trust when doing an import. 2515fa9e4066Sahrens */ 2516ecc2d604Sbonwick (void) spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); 2517fa9e4066Sahrens 2518fa9e4066Sahrens /* 2519fa9e4066Sahrens * If 'tryconfig' was at least parsable, return the current config. 2520fa9e4066Sahrens */ 2521fa9e4066Sahrens if (spa->spa_root_vdev != NULL) { 2522fa9e4066Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 2523fa9e4066Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 2524fa9e4066Sahrens poolname) == 0); 2525fa9e4066Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 2526fa9e4066Sahrens state) == 0); 252795173954Sek VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 252895173954Sek spa->spa_uberblock.ub_timestamp) == 0); 252999653d4eSeschrock 2530e7cbe64fSgw /* 2531e7cbe64fSgw * If the bootfs property exists on this pool then we 2532e7cbe64fSgw * copy it out so that external consumers can tell which 2533e7cbe64fSgw * pools are bootable. 2534e7cbe64fSgw */ 2535e7cbe64fSgw if (spa->spa_bootfs) { 2536e7cbe64fSgw char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2537e7cbe64fSgw 2538e7cbe64fSgw /* 2539e7cbe64fSgw * We have to play games with the name since the 2540e7cbe64fSgw * pool was opened as TRYIMPORT_NAME. 2541e7cbe64fSgw */ 2542e14bb325SJeff Bonwick if (dsl_dsobj_to_dsname(spa_name(spa), 2543e7cbe64fSgw spa->spa_bootfs, tmpname) == 0) { 2544e7cbe64fSgw char *cp; 2545e7cbe64fSgw char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2546e7cbe64fSgw 2547e7cbe64fSgw cp = strchr(tmpname, '/'); 2548e7cbe64fSgw if (cp == NULL) { 2549e7cbe64fSgw (void) strlcpy(dsname, tmpname, 2550e7cbe64fSgw MAXPATHLEN); 2551e7cbe64fSgw } else { 2552e7cbe64fSgw (void) snprintf(dsname, MAXPATHLEN, 2553e7cbe64fSgw "%s/%s", poolname, ++cp); 2554e7cbe64fSgw } 2555e7cbe64fSgw VERIFY(nvlist_add_string(config, 2556e7cbe64fSgw ZPOOL_CONFIG_BOOTFS, dsname) == 0); 2557e7cbe64fSgw kmem_free(dsname, MAXPATHLEN); 2558e7cbe64fSgw } 2559e7cbe64fSgw kmem_free(tmpname, MAXPATHLEN); 2560e7cbe64fSgw } 2561e7cbe64fSgw 256299653d4eSeschrock /* 2563fa94a07fSbrendan * Add the list of hot spares and level 2 cache devices. 256499653d4eSeschrock */ 256599653d4eSeschrock spa_add_spares(spa, config); 2566fa94a07fSbrendan spa_add_l2cache(spa, config); 2567fa9e4066Sahrens } 2568fa9e4066Sahrens 2569fa9e4066Sahrens spa_unload(spa); 2570fa9e4066Sahrens spa_deactivate(spa); 2571fa9e4066Sahrens spa_remove(spa); 2572fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2573fa9e4066Sahrens 2574fa9e4066Sahrens return (config); 2575fa9e4066Sahrens } 2576fa9e4066Sahrens 2577fa9e4066Sahrens /* 2578fa9e4066Sahrens * Pool export/destroy 2579fa9e4066Sahrens * 2580fa9e4066Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 2581fa9e4066Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 2582fa9e4066Sahrens * update the pool state and sync all the labels to disk, removing the 2583394ab0cbSGeorge Wilson * configuration from the cache afterwards. If the 'hardforce' flag is set, then 2584394ab0cbSGeorge Wilson * we don't sync the labels or remove the configuration cache. 2585fa9e4066Sahrens */ 2586fa9e4066Sahrens static int 258789a89ebfSlling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 2588394ab0cbSGeorge Wilson boolean_t force, boolean_t hardforce) 2589fa9e4066Sahrens { 2590fa9e4066Sahrens spa_t *spa; 2591fa9e4066Sahrens 259244cd46caSbillm if (oldconfig) 259344cd46caSbillm *oldconfig = NULL; 259444cd46caSbillm 2595*8ad4d6ddSJeff Bonwick if (!(spa_mode_global & FWRITE)) 2596fa9e4066Sahrens return (EROFS); 2597fa9e4066Sahrens 2598fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 2599fa9e4066Sahrens if ((spa = spa_lookup(pool)) == NULL) { 2600fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2601fa9e4066Sahrens return (ENOENT); 2602fa9e4066Sahrens } 2603fa9e4066Sahrens 2604ea8dc4b6Seschrock /* 2605ea8dc4b6Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 2606ea8dc4b6Seschrock * reacquire the namespace lock, and see if we can export. 2607ea8dc4b6Seschrock */ 2608ea8dc4b6Seschrock spa_open_ref(spa, FTAG); 2609ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 2610ea8dc4b6Seschrock spa_async_suspend(spa); 2611ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 2612ea8dc4b6Seschrock spa_close(spa, FTAG); 2613ea8dc4b6Seschrock 2614fa9e4066Sahrens /* 2615fa9e4066Sahrens * The pool will be in core if it's openable, 2616fa9e4066Sahrens * in which case we can modify its state. 2617fa9e4066Sahrens */ 2618fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 2619fa9e4066Sahrens /* 2620fa9e4066Sahrens * Objsets may be open only because they're dirty, so we 2621fa9e4066Sahrens * have to force it to sync before checking spa_refcnt. 2622fa9e4066Sahrens */ 2623fa9e4066Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 2624fa9e4066Sahrens 2625ea8dc4b6Seschrock /* 2626ea8dc4b6Seschrock * A pool cannot be exported or destroyed if there are active 2627ea8dc4b6Seschrock * references. If we are resetting a pool, allow references by 2628ea8dc4b6Seschrock * fault injection handlers. 2629ea8dc4b6Seschrock */ 2630ea8dc4b6Seschrock if (!spa_refcount_zero(spa) || 2631ea8dc4b6Seschrock (spa->spa_inject_ref != 0 && 2632ea8dc4b6Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 2633ea8dc4b6Seschrock spa_async_resume(spa); 2634fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2635fa9e4066Sahrens return (EBUSY); 2636fa9e4066Sahrens } 2637fa9e4066Sahrens 263889a89ebfSlling /* 263989a89ebfSlling * A pool cannot be exported if it has an active shared spare. 264089a89ebfSlling * This is to prevent other pools stealing the active spare 264189a89ebfSlling * from an exported pool. At user's own will, such pool can 264289a89ebfSlling * be forcedly exported. 264389a89ebfSlling */ 264489a89ebfSlling if (!force && new_state == POOL_STATE_EXPORTED && 264589a89ebfSlling spa_has_active_shared_spare(spa)) { 264689a89ebfSlling spa_async_resume(spa); 264789a89ebfSlling mutex_exit(&spa_namespace_lock); 264889a89ebfSlling return (EXDEV); 264989a89ebfSlling } 265089a89ebfSlling 2651fa9e4066Sahrens /* 2652fa9e4066Sahrens * We want this to be reflected on every label, 2653fa9e4066Sahrens * so mark them all dirty. spa_unload() will do the 2654fa9e4066Sahrens * final sync that pushes these changes out. 2655fa9e4066Sahrens */ 2656394ab0cbSGeorge Wilson if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 2657e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2658ea8dc4b6Seschrock spa->spa_state = new_state; 26590373e76bSbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 2660ea8dc4b6Seschrock vdev_config_dirty(spa->spa_root_vdev); 2661e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2662ea8dc4b6Seschrock } 2663fa9e4066Sahrens } 2664fa9e4066Sahrens 26653d7072f8Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 26663d7072f8Seschrock 2667fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 2668fa9e4066Sahrens spa_unload(spa); 2669fa9e4066Sahrens spa_deactivate(spa); 2670fa9e4066Sahrens } 2671fa9e4066Sahrens 267244cd46caSbillm if (oldconfig && spa->spa_config) 267344cd46caSbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 267444cd46caSbillm 2675ea8dc4b6Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 2676394ab0cbSGeorge Wilson if (!hardforce) 2677394ab0cbSGeorge Wilson spa_config_sync(spa, B_TRUE, B_TRUE); 2678ea8dc4b6Seschrock spa_remove(spa); 2679ea8dc4b6Seschrock } 2680fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2681fa9e4066Sahrens 2682fa9e4066Sahrens return (0); 2683fa9e4066Sahrens } 2684fa9e4066Sahrens 2685fa9e4066Sahrens /* 2686fa9e4066Sahrens * Destroy a storage pool. 2687fa9e4066Sahrens */ 2688fa9e4066Sahrens int 2689fa9e4066Sahrens spa_destroy(char *pool) 2690fa9e4066Sahrens { 2691394ab0cbSGeorge Wilson return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 2692394ab0cbSGeorge Wilson B_FALSE, B_FALSE)); 2693fa9e4066Sahrens } 2694fa9e4066Sahrens 2695fa9e4066Sahrens /* 2696fa9e4066Sahrens * Export a storage pool. 2697fa9e4066Sahrens */ 2698fa9e4066Sahrens int 2699394ab0cbSGeorge Wilson spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 2700394ab0cbSGeorge Wilson boolean_t hardforce) 2701fa9e4066Sahrens { 2702394ab0cbSGeorge Wilson return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 2703394ab0cbSGeorge Wilson force, hardforce)); 2704fa9e4066Sahrens } 2705fa9e4066Sahrens 2706ea8dc4b6Seschrock /* 2707ea8dc4b6Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 2708ea8dc4b6Seschrock * from the namespace in any way. 2709ea8dc4b6Seschrock */ 2710ea8dc4b6Seschrock int 2711ea8dc4b6Seschrock spa_reset(char *pool) 2712ea8dc4b6Seschrock { 271389a89ebfSlling return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 2714394ab0cbSGeorge Wilson B_FALSE, B_FALSE)); 2715ea8dc4b6Seschrock } 2716ea8dc4b6Seschrock 2717fa9e4066Sahrens /* 2718fa9e4066Sahrens * ========================================================================== 2719fa9e4066Sahrens * Device manipulation 2720fa9e4066Sahrens * ========================================================================== 2721fa9e4066Sahrens */ 2722fa9e4066Sahrens 2723fa9e4066Sahrens /* 27248654d025Sperrin * Add a device to a storage pool. 2725fa9e4066Sahrens */ 2726fa9e4066Sahrens int 2727fa9e4066Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 2728fa9e4066Sahrens { 2729fa9e4066Sahrens uint64_t txg; 2730*8ad4d6ddSJeff Bonwick int error; 2731fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 27320e34b6a7Sbonwick vdev_t *vd, *tvd; 2733fa94a07fSbrendan nvlist_t **spares, **l2cache; 2734fa94a07fSbrendan uint_t nspares, nl2cache; 2735fa9e4066Sahrens 2736fa9e4066Sahrens txg = spa_vdev_enter(spa); 2737fa9e4066Sahrens 273899653d4eSeschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 273999653d4eSeschrock VDEV_ALLOC_ADD)) != 0) 274099653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, error)); 2741fa9e4066Sahrens 2742e14bb325SJeff Bonwick spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 274399653d4eSeschrock 2744fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 2745fa94a07fSbrendan &nspares) != 0) 274699653d4eSeschrock nspares = 0; 274799653d4eSeschrock 2748fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 2749fa94a07fSbrendan &nl2cache) != 0) 2750fa94a07fSbrendan nl2cache = 0; 2751fa94a07fSbrendan 2752e14bb325SJeff Bonwick if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 2753fa9e4066Sahrens return (spa_vdev_exit(spa, vd, txg, EINVAL)); 2754fa9e4066Sahrens 2755e14bb325SJeff Bonwick if (vd->vdev_children != 0 && 2756e14bb325SJeff Bonwick (error = vdev_create(vd, txg, B_FALSE)) != 0) 2757e14bb325SJeff Bonwick return (spa_vdev_exit(spa, vd, txg, error)); 275899653d4eSeschrock 275939c23413Seschrock /* 2760fa94a07fSbrendan * We must validate the spares and l2cache devices after checking the 2761fa94a07fSbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 276239c23413Seschrock */ 2763e14bb325SJeff Bonwick if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 276439c23413Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 276539c23413Seschrock 276639c23413Seschrock /* 276739c23413Seschrock * Transfer each new top-level vdev from vd to rvd. 276839c23413Seschrock */ 2769*8ad4d6ddSJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) { 277039c23413Seschrock tvd = vd->vdev_child[c]; 277139c23413Seschrock vdev_remove_child(vd, tvd); 277239c23413Seschrock tvd->vdev_id = rvd->vdev_children; 277339c23413Seschrock vdev_add_child(rvd, tvd); 277439c23413Seschrock vdev_config_dirty(tvd); 277539c23413Seschrock } 277639c23413Seschrock 277799653d4eSeschrock if (nspares != 0) { 2778fa94a07fSbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 2779fa94a07fSbrendan ZPOOL_CONFIG_SPARES); 278099653d4eSeschrock spa_load_spares(spa); 2781fa94a07fSbrendan spa->spa_spares.sav_sync = B_TRUE; 2782fa94a07fSbrendan } 2783fa94a07fSbrendan 2784fa94a07fSbrendan if (nl2cache != 0) { 2785fa94a07fSbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 2786fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE); 2787fa94a07fSbrendan spa_load_l2cache(spa); 2788fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 2789fa9e4066Sahrens } 2790fa9e4066Sahrens 2791fa9e4066Sahrens /* 27920e34b6a7Sbonwick * We have to be careful when adding new vdevs to an existing pool. 27930e34b6a7Sbonwick * If other threads start allocating from these vdevs before we 27940e34b6a7Sbonwick * sync the config cache, and we lose power, then upon reboot we may 27950e34b6a7Sbonwick * fail to open the pool because there are DVAs that the config cache 27960e34b6a7Sbonwick * can't translate. Therefore, we first add the vdevs without 27970e34b6a7Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 27980373e76bSbonwick * and then let spa_config_update() initialize the new metaslabs. 27990e34b6a7Sbonwick * 28000e34b6a7Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 28010e34b6a7Sbonwick * if we lose power at any point in this sequence, the remaining 28020e34b6a7Sbonwick * steps will be completed the next time we load the pool. 28030e34b6a7Sbonwick */ 28040373e76bSbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 28050e34b6a7Sbonwick 28060373e76bSbonwick mutex_enter(&spa_namespace_lock); 28070373e76bSbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 28080373e76bSbonwick mutex_exit(&spa_namespace_lock); 2809fa9e4066Sahrens 28100373e76bSbonwick return (0); 2811fa9e4066Sahrens } 2812fa9e4066Sahrens 2813fa9e4066Sahrens /* 2814fa9e4066Sahrens * Attach a device to a mirror. The arguments are the path to any device 2815fa9e4066Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 2816fa9e4066Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 2817fa9e4066Sahrens * 2818fa9e4066Sahrens * If 'replacing' is specified, the new device is intended to replace the 2819fa9e4066Sahrens * existing device; in this case the two devices are made into their own 28203d7072f8Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 2821fa9e4066Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 2822fa9e4066Sahrens * extra rules: you can't attach to it after it's been created, and upon 2823fa9e4066Sahrens * completion of resilvering, the first disk (the one being replaced) 2824fa9e4066Sahrens * is automatically detached. 2825fa9e4066Sahrens */ 2826fa9e4066Sahrens int 2827ea8dc4b6Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 2828fa9e4066Sahrens { 2829fa9e4066Sahrens uint64_t txg, open_txg; 2830fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 2831fa9e4066Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 283299653d4eSeschrock vdev_ops_t *pvops; 28339b3f6b42SEric Kustarz dmu_tx_t *tx; 28349b3f6b42SEric Kustarz char *oldvdpath, *newvdpath; 28359b3f6b42SEric Kustarz int newvd_isspare; 28369b3f6b42SEric Kustarz int error; 2837fa9e4066Sahrens 2838fa9e4066Sahrens txg = spa_vdev_enter(spa); 2839fa9e4066Sahrens 2840c5904d13Seschrock oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 2841fa9e4066Sahrens 2842fa9e4066Sahrens if (oldvd == NULL) 2843fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 2844fa9e4066Sahrens 28450e34b6a7Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 28460e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 28470e34b6a7Sbonwick 2848fa9e4066Sahrens pvd = oldvd->vdev_parent; 2849fa9e4066Sahrens 285099653d4eSeschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 28513d7072f8Seschrock VDEV_ALLOC_ADD)) != 0) 28523d7072f8Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 28533d7072f8Seschrock 28543d7072f8Seschrock if (newrootvd->vdev_children != 1) 2855fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 2856fa9e4066Sahrens 2857fa9e4066Sahrens newvd = newrootvd->vdev_child[0]; 2858fa9e4066Sahrens 2859fa9e4066Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 2860fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 2861fa9e4066Sahrens 286299653d4eSeschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 2863fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 2864fa9e4066Sahrens 28658654d025Sperrin /* 28668654d025Sperrin * Spares can't replace logs 28678654d025Sperrin */ 2868ee0eb9f2SEric Schrock if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 28698654d025Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 28708654d025Sperrin 287199653d4eSeschrock if (!replacing) { 287299653d4eSeschrock /* 287399653d4eSeschrock * For attach, the only allowable parent is a mirror or the root 287499653d4eSeschrock * vdev. 287599653d4eSeschrock */ 287699653d4eSeschrock if (pvd->vdev_ops != &vdev_mirror_ops && 287799653d4eSeschrock pvd->vdev_ops != &vdev_root_ops) 287899653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 287999653d4eSeschrock 288099653d4eSeschrock pvops = &vdev_mirror_ops; 288199653d4eSeschrock } else { 288299653d4eSeschrock /* 288399653d4eSeschrock * Active hot spares can only be replaced by inactive hot 288499653d4eSeschrock * spares. 288599653d4eSeschrock */ 288699653d4eSeschrock if (pvd->vdev_ops == &vdev_spare_ops && 288799653d4eSeschrock pvd->vdev_child[1] == oldvd && 288899653d4eSeschrock !spa_has_spare(spa, newvd->vdev_guid)) 288999653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 289099653d4eSeschrock 289199653d4eSeschrock /* 289299653d4eSeschrock * If the source is a hot spare, and the parent isn't already a 289399653d4eSeschrock * spare, then we want to create a new hot spare. Otherwise, we 289439c23413Seschrock * want to create a replacing vdev. The user is not allowed to 289539c23413Seschrock * attach to a spared vdev child unless the 'isspare' state is 289639c23413Seschrock * the same (spare replaces spare, non-spare replaces 289739c23413Seschrock * non-spare). 289899653d4eSeschrock */ 289999653d4eSeschrock if (pvd->vdev_ops == &vdev_replacing_ops) 290099653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 290139c23413Seschrock else if (pvd->vdev_ops == &vdev_spare_ops && 290239c23413Seschrock newvd->vdev_isspare != oldvd->vdev_isspare) 290339c23413Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 290499653d4eSeschrock else if (pvd->vdev_ops != &vdev_spare_ops && 290599653d4eSeschrock newvd->vdev_isspare) 290699653d4eSeschrock pvops = &vdev_spare_ops; 290799653d4eSeschrock else 290899653d4eSeschrock pvops = &vdev_replacing_ops; 290999653d4eSeschrock } 291099653d4eSeschrock 29112a79c5feSlling /* 29122a79c5feSlling * Compare the new device size with the replaceable/attachable 29132a79c5feSlling * device size. 29142a79c5feSlling */ 29152a79c5feSlling if (newvd->vdev_psize < vdev_get_rsize(oldvd)) 2916fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 2917fa9e4066Sahrens 2918ecc2d604Sbonwick /* 2919ecc2d604Sbonwick * The new device cannot have a higher alignment requirement 2920ecc2d604Sbonwick * than the top-level vdev. 2921ecc2d604Sbonwick */ 2922ecc2d604Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 2923fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 2924fa9e4066Sahrens 2925fa9e4066Sahrens /* 2926fa9e4066Sahrens * If this is an in-place replacement, update oldvd's path and devid 2927fa9e4066Sahrens * to make it distinguishable from newvd, and unopenable from now on. 2928fa9e4066Sahrens */ 2929fa9e4066Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 2930fa9e4066Sahrens spa_strfree(oldvd->vdev_path); 2931fa9e4066Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 2932fa9e4066Sahrens KM_SLEEP); 2933fa9e4066Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 2934fa9e4066Sahrens newvd->vdev_path, "old"); 2935fa9e4066Sahrens if (oldvd->vdev_devid != NULL) { 2936fa9e4066Sahrens spa_strfree(oldvd->vdev_devid); 2937fa9e4066Sahrens oldvd->vdev_devid = NULL; 2938fa9e4066Sahrens } 2939fa9e4066Sahrens } 2940fa9e4066Sahrens 2941fa9e4066Sahrens /* 294299653d4eSeschrock * If the parent is not a mirror, or if we're replacing, insert the new 294399653d4eSeschrock * mirror/replacing/spare vdev above oldvd. 2944fa9e4066Sahrens */ 2945fa9e4066Sahrens if (pvd->vdev_ops != pvops) 2946fa9e4066Sahrens pvd = vdev_add_parent(oldvd, pvops); 2947fa9e4066Sahrens 2948fa9e4066Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 2949fa9e4066Sahrens ASSERT(pvd->vdev_ops == pvops); 2950fa9e4066Sahrens ASSERT(oldvd->vdev_parent == pvd); 2951fa9e4066Sahrens 2952fa9e4066Sahrens /* 2953fa9e4066Sahrens * Extract the new device from its root and add it to pvd. 2954fa9e4066Sahrens */ 2955fa9e4066Sahrens vdev_remove_child(newrootvd, newvd); 2956fa9e4066Sahrens newvd->vdev_id = pvd->vdev_children; 2957fa9e4066Sahrens vdev_add_child(pvd, newvd); 2958fa9e4066Sahrens 2959ea8dc4b6Seschrock /* 2960ea8dc4b6Seschrock * If newvd is smaller than oldvd, but larger than its rsize, 2961ea8dc4b6Seschrock * the addition of newvd may have decreased our parent's asize. 2962ea8dc4b6Seschrock */ 2963ea8dc4b6Seschrock pvd->vdev_asize = MIN(pvd->vdev_asize, newvd->vdev_asize); 2964ea8dc4b6Seschrock 2965fa9e4066Sahrens tvd = newvd->vdev_top; 2966fa9e4066Sahrens ASSERT(pvd->vdev_top == tvd); 2967fa9e4066Sahrens ASSERT(tvd->vdev_parent == rvd); 2968fa9e4066Sahrens 2969fa9e4066Sahrens vdev_config_dirty(tvd); 2970fa9e4066Sahrens 2971fa9e4066Sahrens /* 2972fa9e4066Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 2973fa9e4066Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 2974fa9e4066Sahrens */ 2975fa9e4066Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 2976fa9e4066Sahrens 2977*8ad4d6ddSJeff Bonwick vdev_dtl_dirty(newvd, DTL_MISSING, 2978*8ad4d6ddSJeff Bonwick TXG_INITIAL, open_txg - TXG_INITIAL + 1); 2979fa9e4066Sahrens 298039c23413Seschrock if (newvd->vdev_isspare) 298139c23413Seschrock spa_spare_activate(newvd); 2982e14bb325SJeff Bonwick oldvdpath = spa_strdup(oldvd->vdev_path); 2983e14bb325SJeff Bonwick newvdpath = spa_strdup(newvd->vdev_path); 29849b3f6b42SEric Kustarz newvd_isspare = newvd->vdev_isspare; 2985ea8dc4b6Seschrock 2986fa9e4066Sahrens /* 2987fa9e4066Sahrens * Mark newvd's DTL dirty in this txg. 2988fa9e4066Sahrens */ 2989ecc2d604Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 2990fa9e4066Sahrens 2991fa9e4066Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 2992fa9e4066Sahrens 29939b3f6b42SEric Kustarz tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 29949b3f6b42SEric Kustarz if (dmu_tx_assign(tx, TXG_WAIT) == 0) { 29959b3f6b42SEric Kustarz spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, tx, 29969b3f6b42SEric Kustarz CRED(), "%s vdev=%s %s vdev=%s", 29979b3f6b42SEric Kustarz replacing && newvd_isspare ? "spare in" : 29989b3f6b42SEric Kustarz replacing ? "replace" : "attach", newvdpath, 29999b3f6b42SEric Kustarz replacing ? "for" : "to", oldvdpath); 30009b3f6b42SEric Kustarz dmu_tx_commit(tx); 30019b3f6b42SEric Kustarz } else { 30029b3f6b42SEric Kustarz dmu_tx_abort(tx); 30039b3f6b42SEric Kustarz } 30049b3f6b42SEric Kustarz 30059b3f6b42SEric Kustarz spa_strfree(oldvdpath); 30069b3f6b42SEric Kustarz spa_strfree(newvdpath); 30079b3f6b42SEric Kustarz 3008fa9e4066Sahrens /* 3009088f3894Sahrens * Kick off a resilver to update newvd. 3010fa9e4066Sahrens */ 3011088f3894Sahrens VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0); 3012fa9e4066Sahrens 3013fa9e4066Sahrens return (0); 3014fa9e4066Sahrens } 3015fa9e4066Sahrens 3016fa9e4066Sahrens /* 3017fa9e4066Sahrens * Detach a device from a mirror or replacing vdev. 3018fa9e4066Sahrens * If 'replace_done' is specified, only detach if the parent 3019fa9e4066Sahrens * is a replacing vdev. 3020fa9e4066Sahrens */ 3021fa9e4066Sahrens int 3022*8ad4d6ddSJeff Bonwick spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) 3023fa9e4066Sahrens { 3024fa9e4066Sahrens uint64_t txg; 3025*8ad4d6ddSJeff Bonwick int error; 3026fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 3027fa9e4066Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 302899653d4eSeschrock boolean_t unspare = B_FALSE; 302999653d4eSeschrock uint64_t unspare_guid; 3030bf82a41bSeschrock size_t len; 3031fa9e4066Sahrens 3032fa9e4066Sahrens txg = spa_vdev_enter(spa); 3033fa9e4066Sahrens 3034c5904d13Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3035fa9e4066Sahrens 3036fa9e4066Sahrens if (vd == NULL) 3037fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3038fa9e4066Sahrens 30390e34b6a7Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 30400e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 30410e34b6a7Sbonwick 3042fa9e4066Sahrens pvd = vd->vdev_parent; 3043fa9e4066Sahrens 3044*8ad4d6ddSJeff Bonwick /* 3045*8ad4d6ddSJeff Bonwick * If the parent/child relationship is not as expected, don't do it. 3046*8ad4d6ddSJeff Bonwick * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing 3047*8ad4d6ddSJeff Bonwick * vdev that's replacing B with C. The user's intent in replacing 3048*8ad4d6ddSJeff Bonwick * is to go from M(A,B) to M(A,C). If the user decides to cancel 3049*8ad4d6ddSJeff Bonwick * the replace by detaching C, the expected behavior is to end up 3050*8ad4d6ddSJeff Bonwick * M(A,B). But suppose that right after deciding to detach C, 3051*8ad4d6ddSJeff Bonwick * the replacement of B completes. We would have M(A,C), and then 3052*8ad4d6ddSJeff Bonwick * ask to detach C, which would leave us with just A -- not what 3053*8ad4d6ddSJeff Bonwick * the user wanted. To prevent this, we make sure that the 3054*8ad4d6ddSJeff Bonwick * parent/child relationship hasn't changed -- in this example, 3055*8ad4d6ddSJeff Bonwick * that C's parent is still the replacing vdev R. 3056*8ad4d6ddSJeff Bonwick */ 3057*8ad4d6ddSJeff Bonwick if (pvd->vdev_guid != pguid && pguid != 0) 3058*8ad4d6ddSJeff Bonwick return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 3059*8ad4d6ddSJeff Bonwick 3060fa9e4066Sahrens /* 3061fa9e4066Sahrens * If replace_done is specified, only remove this device if it's 306299653d4eSeschrock * the first child of a replacing vdev. For the 'spare' vdev, either 306399653d4eSeschrock * disk can be removed. 306499653d4eSeschrock */ 306599653d4eSeschrock if (replace_done) { 306699653d4eSeschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 306799653d4eSeschrock if (vd->vdev_id != 0) 306899653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 306999653d4eSeschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 307099653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 307199653d4eSeschrock } 307299653d4eSeschrock } 307399653d4eSeschrock 307499653d4eSeschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 3075e7437265Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 3076fa9e4066Sahrens 3077fa9e4066Sahrens /* 307899653d4eSeschrock * Only mirror, replacing, and spare vdevs support detach. 3079fa9e4066Sahrens */ 3080fa9e4066Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 308199653d4eSeschrock pvd->vdev_ops != &vdev_mirror_ops && 308299653d4eSeschrock pvd->vdev_ops != &vdev_spare_ops) 3083fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3084fa9e4066Sahrens 3085fa9e4066Sahrens /* 3086*8ad4d6ddSJeff Bonwick * If this device has the only valid copy of some data, 3087*8ad4d6ddSJeff Bonwick * we cannot safely detach it. 3088fa9e4066Sahrens */ 3089*8ad4d6ddSJeff Bonwick if (vdev_dtl_required(vd)) 3090fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 3091fa9e4066Sahrens 3092*8ad4d6ddSJeff Bonwick ASSERT(pvd->vdev_children >= 2); 3093fa9e4066Sahrens 3094bf82a41bSeschrock /* 3095bf82a41bSeschrock * If we are detaching the second disk from a replacing vdev, then 3096bf82a41bSeschrock * check to see if we changed the original vdev's path to have "/old" 3097bf82a41bSeschrock * at the end in spa_vdev_attach(). If so, undo that change now. 3098bf82a41bSeschrock */ 3099bf82a41bSeschrock if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 && 3100bf82a41bSeschrock pvd->vdev_child[0]->vdev_path != NULL && 3101bf82a41bSeschrock pvd->vdev_child[1]->vdev_path != NULL) { 3102bf82a41bSeschrock ASSERT(pvd->vdev_child[1] == vd); 3103bf82a41bSeschrock cvd = pvd->vdev_child[0]; 3104bf82a41bSeschrock len = strlen(vd->vdev_path); 3105bf82a41bSeschrock if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 3106bf82a41bSeschrock strcmp(cvd->vdev_path + len, "/old") == 0) { 3107bf82a41bSeschrock spa_strfree(cvd->vdev_path); 3108bf82a41bSeschrock cvd->vdev_path = spa_strdup(vd->vdev_path); 3109bf82a41bSeschrock } 3110bf82a41bSeschrock } 3111bf82a41bSeschrock 311299653d4eSeschrock /* 311399653d4eSeschrock * If we are detaching the original disk from a spare, then it implies 311499653d4eSeschrock * that the spare should become a real disk, and be removed from the 311599653d4eSeschrock * active spare list for the pool. 311699653d4eSeschrock */ 311799653d4eSeschrock if (pvd->vdev_ops == &vdev_spare_ops && 3118*8ad4d6ddSJeff Bonwick vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare) 311999653d4eSeschrock unspare = B_TRUE; 312099653d4eSeschrock 3121fa9e4066Sahrens /* 3122fa9e4066Sahrens * Erase the disk labels so the disk can be used for other things. 3123fa9e4066Sahrens * This must be done after all other error cases are handled, 3124fa9e4066Sahrens * but before we disembowel vd (so we can still do I/O to it). 3125fa9e4066Sahrens * But if we can't do it, don't treat the error as fatal -- 3126fa9e4066Sahrens * it may be that the unwritability of the disk is the reason 3127fa9e4066Sahrens * it's being detached! 3128fa9e4066Sahrens */ 312939c23413Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3130fa9e4066Sahrens 3131fa9e4066Sahrens /* 3132fa9e4066Sahrens * Remove vd from its parent and compact the parent's children. 3133fa9e4066Sahrens */ 3134fa9e4066Sahrens vdev_remove_child(pvd, vd); 3135fa9e4066Sahrens vdev_compact_children(pvd); 3136fa9e4066Sahrens 3137fa9e4066Sahrens /* 3138fa9e4066Sahrens * Remember one of the remaining children so we can get tvd below. 3139fa9e4066Sahrens */ 3140fa9e4066Sahrens cvd = pvd->vdev_child[0]; 3141fa9e4066Sahrens 314299653d4eSeschrock /* 314399653d4eSeschrock * If we need to remove the remaining child from the list of hot spares, 3144*8ad4d6ddSJeff Bonwick * do it now, marking the vdev as no longer a spare in the process. 3145*8ad4d6ddSJeff Bonwick * We must do this before vdev_remove_parent(), because that can 3146*8ad4d6ddSJeff Bonwick * change the GUID if it creates a new toplevel GUID. For a similar 3147*8ad4d6ddSJeff Bonwick * reason, we must remove the spare now, in the same txg as the detach; 3148*8ad4d6ddSJeff Bonwick * otherwise someone could attach a new sibling, change the GUID, and 3149*8ad4d6ddSJeff Bonwick * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. 315099653d4eSeschrock */ 315199653d4eSeschrock if (unspare) { 315299653d4eSeschrock ASSERT(cvd->vdev_isspare); 315339c23413Seschrock spa_spare_remove(cvd); 315499653d4eSeschrock unspare_guid = cvd->vdev_guid; 3155*8ad4d6ddSJeff Bonwick (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 315699653d4eSeschrock } 315799653d4eSeschrock 3158fa9e4066Sahrens /* 3159fa9e4066Sahrens * If the parent mirror/replacing vdev only has one child, 3160fa9e4066Sahrens * the parent is no longer needed. Remove it from the tree. 3161fa9e4066Sahrens */ 3162fa9e4066Sahrens if (pvd->vdev_children == 1) 3163fa9e4066Sahrens vdev_remove_parent(cvd); 3164fa9e4066Sahrens 3165fa9e4066Sahrens /* 3166fa9e4066Sahrens * We don't set tvd until now because the parent we just removed 3167fa9e4066Sahrens * may have been the previous top-level vdev. 3168fa9e4066Sahrens */ 3169fa9e4066Sahrens tvd = cvd->vdev_top; 3170fa9e4066Sahrens ASSERT(tvd->vdev_parent == rvd); 3171fa9e4066Sahrens 3172fa9e4066Sahrens /* 317339c23413Seschrock * Reevaluate the parent vdev state. 3174fa9e4066Sahrens */ 31753d7072f8Seschrock vdev_propagate_state(cvd); 3176fa9e4066Sahrens 3177fa9e4066Sahrens /* 317839c23413Seschrock * If the device we just detached was smaller than the others, it may be 317939c23413Seschrock * possible to add metaslabs (i.e. grow the pool). vdev_metaslab_init() 318039c23413Seschrock * can't fail because the existing metaslabs are already in core, so 318139c23413Seschrock * there's nothing to read from disk. 3182fa9e4066Sahrens */ 3183ecc2d604Sbonwick VERIFY(vdev_metaslab_init(tvd, txg) == 0); 3184fa9e4066Sahrens 3185fa9e4066Sahrens vdev_config_dirty(tvd); 3186fa9e4066Sahrens 3187fa9e4066Sahrens /* 318839c23413Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 318939c23413Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 319039c23413Seschrock * But first make sure we're not on any *other* txg's DTL list, to 319139c23413Seschrock * prevent vd from being accessed after it's freed. 3192fa9e4066Sahrens */ 3193*8ad4d6ddSJeff Bonwick for (int t = 0; t < TXG_SIZE; t++) 3194fa9e4066Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 3195ecc2d604Sbonwick vd->vdev_detached = B_TRUE; 3196ecc2d604Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 3197fa9e4066Sahrens 31983d7072f8Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 31993d7072f8Seschrock 320099653d4eSeschrock error = spa_vdev_exit(spa, vd, txg, 0); 320199653d4eSeschrock 320299653d4eSeschrock /* 320339c23413Seschrock * If this was the removal of the original device in a hot spare vdev, 320439c23413Seschrock * then we want to go through and remove the device from the hot spare 320539c23413Seschrock * list of every other pool. 320699653d4eSeschrock */ 320799653d4eSeschrock if (unspare) { 3208*8ad4d6ddSJeff Bonwick spa_t *myspa = spa; 320999653d4eSeschrock spa = NULL; 321099653d4eSeschrock mutex_enter(&spa_namespace_lock); 321199653d4eSeschrock while ((spa = spa_next(spa)) != NULL) { 321299653d4eSeschrock if (spa->spa_state != POOL_STATE_ACTIVE) 321399653d4eSeschrock continue; 3214*8ad4d6ddSJeff Bonwick if (spa == myspa) 3215*8ad4d6ddSJeff Bonwick continue; 32169af0a4dfSJeff Bonwick spa_open_ref(spa, FTAG); 32179af0a4dfSJeff Bonwick mutex_exit(&spa_namespace_lock); 321899653d4eSeschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 32199af0a4dfSJeff Bonwick mutex_enter(&spa_namespace_lock); 32209af0a4dfSJeff Bonwick spa_close(spa, FTAG); 322199653d4eSeschrock } 322299653d4eSeschrock mutex_exit(&spa_namespace_lock); 322399653d4eSeschrock } 322499653d4eSeschrock 322599653d4eSeschrock return (error); 322699653d4eSeschrock } 322799653d4eSeschrock 3228e14bb325SJeff Bonwick static nvlist_t * 3229e14bb325SJeff Bonwick spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 323099653d4eSeschrock { 3231e14bb325SJeff Bonwick for (int i = 0; i < count; i++) { 3232e14bb325SJeff Bonwick uint64_t guid; 323399653d4eSeschrock 3234e14bb325SJeff Bonwick VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 3235e14bb325SJeff Bonwick &guid) == 0); 323699653d4eSeschrock 3237e14bb325SJeff Bonwick if (guid == target_guid) 3238e14bb325SJeff Bonwick return (nvpp[i]); 323999653d4eSeschrock } 324099653d4eSeschrock 3241e14bb325SJeff Bonwick return (NULL); 3242fa94a07fSbrendan } 3243fa94a07fSbrendan 3244e14bb325SJeff Bonwick static void 3245e14bb325SJeff Bonwick spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 3246e14bb325SJeff Bonwick nvlist_t *dev_to_remove) 3247fa94a07fSbrendan { 3248e14bb325SJeff Bonwick nvlist_t **newdev = NULL; 3249fa94a07fSbrendan 3250e14bb325SJeff Bonwick if (count > 1) 3251e14bb325SJeff Bonwick newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 3252fa94a07fSbrendan 3253e14bb325SJeff Bonwick for (int i = 0, j = 0; i < count; i++) { 3254e14bb325SJeff Bonwick if (dev[i] == dev_to_remove) 3255e14bb325SJeff Bonwick continue; 3256e14bb325SJeff Bonwick VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 3257fa94a07fSbrendan } 3258fa94a07fSbrendan 3259e14bb325SJeff Bonwick VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 3260e14bb325SJeff Bonwick VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 3261fa94a07fSbrendan 3262e14bb325SJeff Bonwick for (int i = 0; i < count - 1; i++) 3263e14bb325SJeff Bonwick nvlist_free(newdev[i]); 3264fa94a07fSbrendan 3265e14bb325SJeff Bonwick if (count > 1) 3266e14bb325SJeff Bonwick kmem_free(newdev, (count - 1) * sizeof (void *)); 3267fa94a07fSbrendan } 3268fa94a07fSbrendan 3269fa94a07fSbrendan /* 3270fa94a07fSbrendan * Remove a device from the pool. Currently, this supports removing only hot 3271fa94a07fSbrendan * spares and level 2 ARC devices. 3272fa94a07fSbrendan */ 3273fa94a07fSbrendan int 3274fa94a07fSbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 3275fa94a07fSbrendan { 3276fa94a07fSbrendan vdev_t *vd; 3277e14bb325SJeff Bonwick nvlist_t **spares, **l2cache, *nv; 3278fa94a07fSbrendan uint_t nspares, nl2cache; 3279*8ad4d6ddSJeff Bonwick uint64_t txg = 0; 3280fa94a07fSbrendan int error = 0; 3281*8ad4d6ddSJeff Bonwick boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 3282fa94a07fSbrendan 3283*8ad4d6ddSJeff Bonwick if (!locked) 3284*8ad4d6ddSJeff Bonwick txg = spa_vdev_enter(spa); 3285fa94a07fSbrendan 3286c5904d13Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3287fa94a07fSbrendan 3288fa94a07fSbrendan if (spa->spa_spares.sav_vdevs != NULL && 3289fa94a07fSbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 3290e14bb325SJeff Bonwick ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 3291e14bb325SJeff Bonwick (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 3292e14bb325SJeff Bonwick /* 3293e14bb325SJeff Bonwick * Only remove the hot spare if it's not currently in use 3294e14bb325SJeff Bonwick * in this pool. 3295e14bb325SJeff Bonwick */ 3296e14bb325SJeff Bonwick if (vd == NULL || unspare) { 3297e14bb325SJeff Bonwick spa_vdev_remove_aux(spa->spa_spares.sav_config, 3298e14bb325SJeff Bonwick ZPOOL_CONFIG_SPARES, spares, nspares, nv); 3299e14bb325SJeff Bonwick spa_load_spares(spa); 3300e14bb325SJeff Bonwick spa->spa_spares.sav_sync = B_TRUE; 3301e14bb325SJeff Bonwick } else { 3302e14bb325SJeff Bonwick error = EBUSY; 3303e14bb325SJeff Bonwick } 3304e14bb325SJeff Bonwick } else if (spa->spa_l2cache.sav_vdevs != NULL && 3305fa94a07fSbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 3306e14bb325SJeff Bonwick ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 3307e14bb325SJeff Bonwick (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 3308e14bb325SJeff Bonwick /* 3309e14bb325SJeff Bonwick * Cache devices can always be removed. 3310e14bb325SJeff Bonwick */ 3311e14bb325SJeff Bonwick spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 3312e14bb325SJeff Bonwick ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 3313fa94a07fSbrendan spa_load_l2cache(spa); 3314fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 3315e14bb325SJeff Bonwick } else if (vd != NULL) { 3316e14bb325SJeff Bonwick /* 3317e14bb325SJeff Bonwick * Normal vdevs cannot be removed (yet). 3318e14bb325SJeff Bonwick */ 3319e14bb325SJeff Bonwick error = ENOTSUP; 3320e14bb325SJeff Bonwick } else { 3321e14bb325SJeff Bonwick /* 3322e14bb325SJeff Bonwick * There is no vdev of any kind with the specified guid. 3323e14bb325SJeff Bonwick */ 3324e14bb325SJeff Bonwick error = ENOENT; 3325fa94a07fSbrendan } 332699653d4eSeschrock 3327*8ad4d6ddSJeff Bonwick if (!locked) 3328*8ad4d6ddSJeff Bonwick return (spa_vdev_exit(spa, NULL, txg, error)); 3329*8ad4d6ddSJeff Bonwick 3330*8ad4d6ddSJeff Bonwick return (error); 3331fa9e4066Sahrens } 3332fa9e4066Sahrens 3333fa9e4066Sahrens /* 33343d7072f8Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 33353d7072f8Seschrock * current spared, so we can detach it. 3336fa9e4066Sahrens */ 3337ea8dc4b6Seschrock static vdev_t * 33383d7072f8Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 3339fa9e4066Sahrens { 3340ea8dc4b6Seschrock vdev_t *newvd, *oldvd; 3341fa9e4066Sahrens int c; 3342fa9e4066Sahrens 3343ea8dc4b6Seschrock for (c = 0; c < vd->vdev_children; c++) { 33443d7072f8Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 3345ea8dc4b6Seschrock if (oldvd != NULL) 3346ea8dc4b6Seschrock return (oldvd); 3347ea8dc4b6Seschrock } 3348fa9e4066Sahrens 33493d7072f8Seschrock /* 33503d7072f8Seschrock * Check for a completed replacement. 33513d7072f8Seschrock */ 3352fa9e4066Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 3353ea8dc4b6Seschrock oldvd = vd->vdev_child[0]; 3354ea8dc4b6Seschrock newvd = vd->vdev_child[1]; 3355ea8dc4b6Seschrock 3356*8ad4d6ddSJeff Bonwick if (vdev_dtl_empty(newvd, DTL_MISSING) && 3357*8ad4d6ddSJeff Bonwick !vdev_dtl_required(oldvd)) 3358ea8dc4b6Seschrock return (oldvd); 3359fa9e4066Sahrens } 3360ea8dc4b6Seschrock 33613d7072f8Seschrock /* 33623d7072f8Seschrock * Check for a completed resilver with the 'unspare' flag set. 33633d7072f8Seschrock */ 33643d7072f8Seschrock if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 33653d7072f8Seschrock newvd = vd->vdev_child[0]; 33663d7072f8Seschrock oldvd = vd->vdev_child[1]; 33673d7072f8Seschrock 33683d7072f8Seschrock if (newvd->vdev_unspare && 3369*8ad4d6ddSJeff Bonwick vdev_dtl_empty(newvd, DTL_MISSING) && 3370*8ad4d6ddSJeff Bonwick !vdev_dtl_required(oldvd)) { 33713d7072f8Seschrock newvd->vdev_unspare = 0; 33723d7072f8Seschrock return (oldvd); 33733d7072f8Seschrock } 33743d7072f8Seschrock } 33753d7072f8Seschrock 3376ea8dc4b6Seschrock return (NULL); 3377fa9e4066Sahrens } 3378fa9e4066Sahrens 3379ea8dc4b6Seschrock static void 33803d7072f8Seschrock spa_vdev_resilver_done(spa_t *spa) 3381fa9e4066Sahrens { 3382*8ad4d6ddSJeff Bonwick vdev_t *vd, *pvd, *ppvd; 3383*8ad4d6ddSJeff Bonwick uint64_t guid, sguid, pguid, ppguid; 3384ea8dc4b6Seschrock 3385*8ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3386ea8dc4b6Seschrock 33873d7072f8Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 3388*8ad4d6ddSJeff Bonwick pvd = vd->vdev_parent; 3389*8ad4d6ddSJeff Bonwick ppvd = pvd->vdev_parent; 3390ea8dc4b6Seschrock guid = vd->vdev_guid; 3391*8ad4d6ddSJeff Bonwick pguid = pvd->vdev_guid; 3392*8ad4d6ddSJeff Bonwick ppguid = ppvd->vdev_guid; 3393*8ad4d6ddSJeff Bonwick sguid = 0; 339499653d4eSeschrock /* 339599653d4eSeschrock * If we have just finished replacing a hot spared device, then 339699653d4eSeschrock * we need to detach the parent's first child (the original hot 339799653d4eSeschrock * spare) as well. 339899653d4eSeschrock */ 3399*8ad4d6ddSJeff Bonwick if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) { 340099653d4eSeschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 3401*8ad4d6ddSJeff Bonwick ASSERT(ppvd->vdev_children == 2); 3402*8ad4d6ddSJeff Bonwick sguid = ppvd->vdev_child[1]->vdev_guid; 340399653d4eSeschrock } 3404*8ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 3405*8ad4d6ddSJeff Bonwick if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) 3406ea8dc4b6Seschrock return; 3407*8ad4d6ddSJeff Bonwick if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) 340899653d4eSeschrock return; 3409*8ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3410fa9e4066Sahrens } 3411fa9e4066Sahrens 3412*8ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 3413fa9e4066Sahrens } 3414fa9e4066Sahrens 3415c67d9675Seschrock /* 3416c67d9675Seschrock * Update the stored path for this vdev. Dirty the vdev configuration, relying 3417c67d9675Seschrock * on spa_vdev_enter/exit() to synchronize the labels and cache. 3418c67d9675Seschrock */ 3419c67d9675Seschrock int 3420c67d9675Seschrock spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 3421c67d9675Seschrock { 3422c5904d13Seschrock vdev_t *vd; 3423c67d9675Seschrock uint64_t txg; 3424c67d9675Seschrock 3425c67d9675Seschrock txg = spa_vdev_enter(spa); 3426c67d9675Seschrock 3427c5904d13Seschrock if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) { 342899653d4eSeschrock /* 3429c5904d13Seschrock * Determine if this is a reference to a hot spare device. If 3430c5904d13Seschrock * it is, update the path manually as there is no associated 3431c5904d13Seschrock * vdev_t that can be synced to disk. 343299653d4eSeschrock */ 3433c5904d13Seschrock nvlist_t **spares; 3434c5904d13Seschrock uint_t i, nspares; 3435fa94a07fSbrendan 3436fa94a07fSbrendan if (spa->spa_spares.sav_config != NULL) { 3437fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array( 3438fa94a07fSbrendan spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 3439fa94a07fSbrendan &spares, &nspares) == 0); 344099653d4eSeschrock for (i = 0; i < nspares; i++) { 344199653d4eSeschrock uint64_t theguid; 344299653d4eSeschrock VERIFY(nvlist_lookup_uint64(spares[i], 344399653d4eSeschrock ZPOOL_CONFIG_GUID, &theguid) == 0); 3444fa94a07fSbrendan if (theguid == guid) { 3445fa94a07fSbrendan VERIFY(nvlist_add_string(spares[i], 3446fa94a07fSbrendan ZPOOL_CONFIG_PATH, newpath) == 0); 3447fa94a07fSbrendan spa_load_spares(spa); 3448fa94a07fSbrendan spa->spa_spares.sav_sync = B_TRUE; 3449fa94a07fSbrendan return (spa_vdev_exit(spa, NULL, txg, 3450fa94a07fSbrendan 0)); 3451fa94a07fSbrendan } 345299653d4eSeschrock } 3453fa94a07fSbrendan } 345499653d4eSeschrock 3455fa94a07fSbrendan return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 345699653d4eSeschrock } 3457c67d9675Seschrock 34580e34b6a7Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 34590e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 34600e34b6a7Sbonwick 3461c67d9675Seschrock spa_strfree(vd->vdev_path); 3462c67d9675Seschrock vd->vdev_path = spa_strdup(newpath); 3463c67d9675Seschrock 3464c67d9675Seschrock vdev_config_dirty(vd->vdev_top); 3465c67d9675Seschrock 3466c67d9675Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 3467c67d9675Seschrock } 3468c67d9675Seschrock 3469fa9e4066Sahrens /* 3470fa9e4066Sahrens * ========================================================================== 3471fa9e4066Sahrens * SPA Scrubbing 3472fa9e4066Sahrens * ========================================================================== 3473fa9e4066Sahrens */ 3474fa9e4066Sahrens 3475ea8dc4b6Seschrock int 3476088f3894Sahrens spa_scrub(spa_t *spa, pool_scrub_type_t type) 3477fa9e4066Sahrens { 3478e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 3479bb8b5132Sek 3480fa9e4066Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 3481fa9e4066Sahrens return (ENOTSUP); 3482fa9e4066Sahrens 3483fa9e4066Sahrens /* 3484088f3894Sahrens * If a resilver was requested, but there is no DTL on a 3485088f3894Sahrens * writeable leaf device, we have nothing to do. 3486fa9e4066Sahrens */ 3487088f3894Sahrens if (type == POOL_SCRUB_RESILVER && 3488088f3894Sahrens !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 3489088f3894Sahrens spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 3490ea8dc4b6Seschrock return (0); 3491ea8dc4b6Seschrock } 3492fa9e4066Sahrens 3493088f3894Sahrens if (type == POOL_SCRUB_EVERYTHING && 3494088f3894Sahrens spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE && 3495088f3894Sahrens spa->spa_dsl_pool->dp_scrub_isresilver) 3496088f3894Sahrens return (EBUSY); 3497fa9e4066Sahrens 3498088f3894Sahrens if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) { 3499088f3894Sahrens return (dsl_pool_scrub_clean(spa->spa_dsl_pool)); 3500088f3894Sahrens } else if (type == POOL_SCRUB_NONE) { 3501088f3894Sahrens return (dsl_pool_scrub_cancel(spa->spa_dsl_pool)); 3502ea8dc4b6Seschrock } else { 3503088f3894Sahrens return (EINVAL); 3504fa9e4066Sahrens } 3505fa9e4066Sahrens } 3506fa9e4066Sahrens 3507ea8dc4b6Seschrock /* 3508ea8dc4b6Seschrock * ========================================================================== 3509ea8dc4b6Seschrock * SPA async task processing 3510ea8dc4b6Seschrock * ========================================================================== 3511ea8dc4b6Seschrock */ 3512ea8dc4b6Seschrock 3513ea8dc4b6Seschrock static void 35143d7072f8Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 3515fa9e4066Sahrens { 351649cf58c0SBrendan Gregg - Sun Microsystems if (vd->vdev_remove_wanted) { 351749cf58c0SBrendan Gregg - Sun Microsystems vd->vdev_remove_wanted = 0; 351849cf58c0SBrendan Gregg - Sun Microsystems vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 3519e14bb325SJeff Bonwick vdev_clear(spa, vd); 3520e14bb325SJeff Bonwick vdev_state_dirty(vd->vdev_top); 3521ea8dc4b6Seschrock } 352249cf58c0SBrendan Gregg - Sun Microsystems 3523e14bb325SJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) 352449cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, vd->vdev_child[c]); 3525ea8dc4b6Seschrock } 3526fa9e4066Sahrens 3527e14bb325SJeff Bonwick static void 3528e14bb325SJeff Bonwick spa_async_probe(spa_t *spa, vdev_t *vd) 3529e14bb325SJeff Bonwick { 3530e14bb325SJeff Bonwick if (vd->vdev_probe_wanted) { 3531e14bb325SJeff Bonwick vd->vdev_probe_wanted = 0; 3532e14bb325SJeff Bonwick vdev_reopen(vd); /* vdev_open() does the actual probe */ 3533e14bb325SJeff Bonwick } 3534e14bb325SJeff Bonwick 3535e14bb325SJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) 3536e14bb325SJeff Bonwick spa_async_probe(spa, vd->vdev_child[c]); 3537e14bb325SJeff Bonwick } 3538e14bb325SJeff Bonwick 3539ea8dc4b6Seschrock static void 3540ea8dc4b6Seschrock spa_async_thread(spa_t *spa) 3541ea8dc4b6Seschrock { 3542e14bb325SJeff Bonwick int tasks; 3543ea8dc4b6Seschrock 3544ea8dc4b6Seschrock ASSERT(spa->spa_sync_on); 3545ea8dc4b6Seschrock 3546ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 3547ea8dc4b6Seschrock tasks = spa->spa_async_tasks; 3548ea8dc4b6Seschrock spa->spa_async_tasks = 0; 3549ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 3550ea8dc4b6Seschrock 35510373e76bSbonwick /* 35520373e76bSbonwick * See if the config needs to be updated. 35530373e76bSbonwick */ 35540373e76bSbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 35550373e76bSbonwick mutex_enter(&spa_namespace_lock); 35560373e76bSbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 35570373e76bSbonwick mutex_exit(&spa_namespace_lock); 35580373e76bSbonwick } 35590373e76bSbonwick 3560ea8dc4b6Seschrock /* 35613d7072f8Seschrock * See if any devices need to be marked REMOVED. 3562ea8dc4b6Seschrock */ 3563e14bb325SJeff Bonwick if (tasks & SPA_ASYNC_REMOVE) { 3564e14bb325SJeff Bonwick spa_vdev_state_enter(spa); 35653d7072f8Seschrock spa_async_remove(spa, spa->spa_root_vdev); 3566e14bb325SJeff Bonwick for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 356749cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 3568e14bb325SJeff Bonwick for (int i = 0; i < spa->spa_spares.sav_count; i++) 356949cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 3570e14bb325SJeff Bonwick (void) spa_vdev_state_exit(spa, NULL, 0); 3571e14bb325SJeff Bonwick } 3572e14bb325SJeff Bonwick 3573e14bb325SJeff Bonwick /* 3574e14bb325SJeff Bonwick * See if any devices need to be probed. 3575e14bb325SJeff Bonwick */ 3576e14bb325SJeff Bonwick if (tasks & SPA_ASYNC_PROBE) { 3577e14bb325SJeff Bonwick spa_vdev_state_enter(spa); 3578e14bb325SJeff Bonwick spa_async_probe(spa, spa->spa_root_vdev); 3579e14bb325SJeff Bonwick (void) spa_vdev_state_exit(spa, NULL, 0); 35803d7072f8Seschrock } 3581ea8dc4b6Seschrock 3582ea8dc4b6Seschrock /* 3583ea8dc4b6Seschrock * If any devices are done replacing, detach them. 3584ea8dc4b6Seschrock */ 35853d7072f8Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 35863d7072f8Seschrock spa_vdev_resilver_done(spa); 3587fa9e4066Sahrens 3588ea8dc4b6Seschrock /* 3589ea8dc4b6Seschrock * Kick off a resilver. 3590ea8dc4b6Seschrock */ 3591088f3894Sahrens if (tasks & SPA_ASYNC_RESILVER) 3592088f3894Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0); 3593ea8dc4b6Seschrock 3594ea8dc4b6Seschrock /* 3595ea8dc4b6Seschrock * Let the world know that we're done. 3596ea8dc4b6Seschrock */ 3597ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 3598ea8dc4b6Seschrock spa->spa_async_thread = NULL; 3599ea8dc4b6Seschrock cv_broadcast(&spa->spa_async_cv); 3600ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 3601ea8dc4b6Seschrock thread_exit(); 3602ea8dc4b6Seschrock } 3603ea8dc4b6Seschrock 3604ea8dc4b6Seschrock void 3605ea8dc4b6Seschrock spa_async_suspend(spa_t *spa) 3606ea8dc4b6Seschrock { 3607ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 3608ea8dc4b6Seschrock spa->spa_async_suspended++; 3609ea8dc4b6Seschrock while (spa->spa_async_thread != NULL) 3610ea8dc4b6Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 3611ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 3612ea8dc4b6Seschrock } 3613ea8dc4b6Seschrock 3614ea8dc4b6Seschrock void 3615ea8dc4b6Seschrock spa_async_resume(spa_t *spa) 3616ea8dc4b6Seschrock { 3617ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 3618ea8dc4b6Seschrock ASSERT(spa->spa_async_suspended != 0); 3619ea8dc4b6Seschrock spa->spa_async_suspended--; 3620ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 3621ea8dc4b6Seschrock } 3622ea8dc4b6Seschrock 3623ea8dc4b6Seschrock static void 3624ea8dc4b6Seschrock spa_async_dispatch(spa_t *spa) 3625ea8dc4b6Seschrock { 3626ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 3627ea8dc4b6Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 36280373e76bSbonwick spa->spa_async_thread == NULL && 36290373e76bSbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 3630ea8dc4b6Seschrock spa->spa_async_thread = thread_create(NULL, 0, 3631ea8dc4b6Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 3632ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 3633ea8dc4b6Seschrock } 3634ea8dc4b6Seschrock 3635ea8dc4b6Seschrock void 3636ea8dc4b6Seschrock spa_async_request(spa_t *spa, int task) 3637ea8dc4b6Seschrock { 3638ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 3639ea8dc4b6Seschrock spa->spa_async_tasks |= task; 3640ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 3641fa9e4066Sahrens } 3642fa9e4066Sahrens 3643fa9e4066Sahrens /* 3644fa9e4066Sahrens * ========================================================================== 3645fa9e4066Sahrens * SPA syncing routines 3646fa9e4066Sahrens * ========================================================================== 3647fa9e4066Sahrens */ 3648fa9e4066Sahrens 3649fa9e4066Sahrens static void 3650fa9e4066Sahrens spa_sync_deferred_frees(spa_t *spa, uint64_t txg) 3651fa9e4066Sahrens { 3652fa9e4066Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 3653fa9e4066Sahrens dmu_tx_t *tx; 3654fa9e4066Sahrens blkptr_t blk; 3655fa9e4066Sahrens uint64_t itor = 0; 3656fa9e4066Sahrens zio_t *zio; 3657fa9e4066Sahrens int error; 3658fa9e4066Sahrens uint8_t c = 1; 3659fa9e4066Sahrens 3660e14bb325SJeff Bonwick zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 3661fa9e4066Sahrens 3662e14bb325SJeff Bonwick while (bplist_iterate(bpl, &itor, &blk) == 0) { 3663e14bb325SJeff Bonwick ASSERT(blk.blk_birth < txg); 3664e14bb325SJeff Bonwick zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL, 3665e14bb325SJeff Bonwick ZIO_FLAG_MUSTSUCCEED)); 3666e14bb325SJeff Bonwick } 3667fa9e4066Sahrens 3668fa9e4066Sahrens error = zio_wait(zio); 3669fa9e4066Sahrens ASSERT3U(error, ==, 0); 3670fa9e4066Sahrens 3671fa9e4066Sahrens tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 3672fa9e4066Sahrens bplist_vacate(bpl, tx); 3673fa9e4066Sahrens 3674fa9e4066Sahrens /* 3675fa9e4066Sahrens * Pre-dirty the first block so we sync to convergence faster. 3676fa9e4066Sahrens * (Usually only the first block is needed.) 3677fa9e4066Sahrens */ 3678fa9e4066Sahrens dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); 3679fa9e4066Sahrens dmu_tx_commit(tx); 3680fa9e4066Sahrens } 3681fa9e4066Sahrens 3682fa9e4066Sahrens static void 368399653d4eSeschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 3684fa9e4066Sahrens { 3685fa9e4066Sahrens char *packed = NULL; 3686f7991ba4STim Haley size_t bufsize; 3687fa9e4066Sahrens size_t nvsize = 0; 3688fa9e4066Sahrens dmu_buf_t *db; 3689fa9e4066Sahrens 369099653d4eSeschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 3691fa9e4066Sahrens 3692f7991ba4STim Haley /* 3693f7991ba4STim Haley * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 3694f7991ba4STim Haley * information. This avoids the dbuf_will_dirty() path and 3695f7991ba4STim Haley * saves us a pre-read to get data we don't actually care about. 3696f7991ba4STim Haley */ 3697f7991ba4STim Haley bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); 3698f7991ba4STim Haley packed = kmem_alloc(bufsize, KM_SLEEP); 3699fa9e4066Sahrens 370099653d4eSeschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 3701ea8dc4b6Seschrock KM_SLEEP) == 0); 3702f7991ba4STim Haley bzero(packed + nvsize, bufsize - nvsize); 3703fa9e4066Sahrens 3704f7991ba4STim Haley dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 3705fa9e4066Sahrens 3706f7991ba4STim Haley kmem_free(packed, bufsize); 3707fa9e4066Sahrens 370899653d4eSeschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 3709fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 3710fa9e4066Sahrens *(uint64_t *)db->db_data = nvsize; 3711ea8dc4b6Seschrock dmu_buf_rele(db, FTAG); 3712fa9e4066Sahrens } 3713fa9e4066Sahrens 371499653d4eSeschrock static void 3715fa94a07fSbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 3716fa94a07fSbrendan const char *config, const char *entry) 371799653d4eSeschrock { 371899653d4eSeschrock nvlist_t *nvroot; 3719fa94a07fSbrendan nvlist_t **list; 372099653d4eSeschrock int i; 372199653d4eSeschrock 3722fa94a07fSbrendan if (!sav->sav_sync) 372399653d4eSeschrock return; 372499653d4eSeschrock 372599653d4eSeschrock /* 3726fa94a07fSbrendan * Update the MOS nvlist describing the list of available devices. 3727fa94a07fSbrendan * spa_validate_aux() will have already made sure this nvlist is 37283d7072f8Seschrock * valid and the vdevs are labeled appropriately. 372999653d4eSeschrock */ 3730fa94a07fSbrendan if (sav->sav_object == 0) { 3731fa94a07fSbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 3732fa94a07fSbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 3733fa94a07fSbrendan sizeof (uint64_t), tx); 373499653d4eSeschrock VERIFY(zap_update(spa->spa_meta_objset, 3735fa94a07fSbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 3736fa94a07fSbrendan &sav->sav_object, tx) == 0); 373799653d4eSeschrock } 373899653d4eSeschrock 373999653d4eSeschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 3740fa94a07fSbrendan if (sav->sav_count == 0) { 3741fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 374299653d4eSeschrock } else { 3743fa94a07fSbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 3744fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 3745fa94a07fSbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 3746fa94a07fSbrendan B_FALSE, B_FALSE, B_TRUE); 3747fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 3748fa94a07fSbrendan sav->sav_count) == 0); 3749fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 3750fa94a07fSbrendan nvlist_free(list[i]); 3751fa94a07fSbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 375299653d4eSeschrock } 375399653d4eSeschrock 3754fa94a07fSbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 375506eeb2adSek nvlist_free(nvroot); 375699653d4eSeschrock 3757fa94a07fSbrendan sav->sav_sync = B_FALSE; 375899653d4eSeschrock } 375999653d4eSeschrock 376099653d4eSeschrock static void 376199653d4eSeschrock spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 376299653d4eSeschrock { 376399653d4eSeschrock nvlist_t *config; 376499653d4eSeschrock 3765e14bb325SJeff Bonwick if (list_is_empty(&spa->spa_config_dirty_list)) 376699653d4eSeschrock return; 376799653d4eSeschrock 3768e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 3769e14bb325SJeff Bonwick 3770e14bb325SJeff Bonwick config = spa_config_generate(spa, spa->spa_root_vdev, 3771e14bb325SJeff Bonwick dmu_tx_get_txg(tx), B_FALSE); 3772e14bb325SJeff Bonwick 3773e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 377499653d4eSeschrock 377599653d4eSeschrock if (spa->spa_config_syncing) 377699653d4eSeschrock nvlist_free(spa->spa_config_syncing); 377799653d4eSeschrock spa->spa_config_syncing = config; 377899653d4eSeschrock 377999653d4eSeschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 378099653d4eSeschrock } 378199653d4eSeschrock 3782990b4856Slling /* 3783990b4856Slling * Set zpool properties. 3784990b4856Slling */ 3785b1b8ab34Slling static void 3786ecd6cf80Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 3787b1b8ab34Slling { 3788b1b8ab34Slling spa_t *spa = arg1; 3789b1b8ab34Slling objset_t *mos = spa->spa_meta_objset; 3790990b4856Slling nvlist_t *nvp = arg2; 3791990b4856Slling nvpair_t *elem; 37923d7072f8Seschrock uint64_t intval; 3793c5904d13Seschrock char *strval; 3794990b4856Slling zpool_prop_t prop; 3795990b4856Slling const char *propname; 3796990b4856Slling zprop_type_t proptype; 3797c5904d13Seschrock spa_config_dirent_t *dp; 3798b1b8ab34Slling 3799e14bb325SJeff Bonwick mutex_enter(&spa->spa_props_lock); 3800e14bb325SJeff Bonwick 3801990b4856Slling elem = NULL; 3802990b4856Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 3803990b4856Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 3804990b4856Slling case ZPOOL_PROP_VERSION: 3805990b4856Slling /* 3806990b4856Slling * Only set version for non-zpool-creation cases 3807990b4856Slling * (set/import). spa_create() needs special care 3808990b4856Slling * for version setting. 3809990b4856Slling */ 3810990b4856Slling if (tx->tx_txg != TXG_INITIAL) { 3811990b4856Slling VERIFY(nvpair_value_uint64(elem, 3812990b4856Slling &intval) == 0); 3813990b4856Slling ASSERT(intval <= SPA_VERSION); 3814990b4856Slling ASSERT(intval >= spa_version(spa)); 3815990b4856Slling spa->spa_uberblock.ub_version = intval; 3816990b4856Slling vdev_config_dirty(spa->spa_root_vdev); 3817990b4856Slling } 3818ecd6cf80Smarks break; 3819990b4856Slling 3820990b4856Slling case ZPOOL_PROP_ALTROOT: 3821990b4856Slling /* 3822990b4856Slling * 'altroot' is a non-persistent property. It should 3823990b4856Slling * have been set temporarily at creation or import time. 3824990b4856Slling */ 3825990b4856Slling ASSERT(spa->spa_root != NULL); 3826b1b8ab34Slling break; 38273d7072f8Seschrock 38282f8aaab3Seschrock case ZPOOL_PROP_CACHEFILE: 3829990b4856Slling /* 38302f8aaab3Seschrock * 'cachefile' is a non-persistent property, but note 38312f8aaab3Seschrock * an async request that the config cache needs to be 38322f8aaab3Seschrock * udpated. 3833990b4856Slling */ 38342f8aaab3Seschrock VERIFY(nvpair_value_string(elem, &strval) == 0); 3835c5904d13Seschrock 3836e14bb325SJeff Bonwick dp = kmem_alloc(sizeof (spa_config_dirent_t), KM_SLEEP); 3837c5904d13Seschrock 3838c5904d13Seschrock if (strval[0] == '\0') 3839c5904d13Seschrock dp->scd_path = spa_strdup(spa_config_path); 3840c5904d13Seschrock else if (strcmp(strval, "none") == 0) 3841c5904d13Seschrock dp->scd_path = NULL; 3842c5904d13Seschrock else 3843c5904d13Seschrock dp->scd_path = spa_strdup(strval); 3844c5904d13Seschrock 3845c5904d13Seschrock list_insert_head(&spa->spa_config_list, dp); 38462f8aaab3Seschrock spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 38473d7072f8Seschrock break; 3848990b4856Slling default: 3849990b4856Slling /* 3850990b4856Slling * Set pool property values in the poolprops mos object. 3851990b4856Slling */ 3852990b4856Slling if (spa->spa_pool_props_object == 0) { 3853990b4856Slling objset_t *mos = spa->spa_meta_objset; 3854990b4856Slling 3855990b4856Slling VERIFY((spa->spa_pool_props_object = 3856990b4856Slling zap_create(mos, DMU_OT_POOL_PROPS, 3857990b4856Slling DMU_OT_NONE, 0, tx)) > 0); 3858990b4856Slling 3859990b4856Slling VERIFY(zap_update(mos, 3860990b4856Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 3861990b4856Slling 8, 1, &spa->spa_pool_props_object, tx) 3862990b4856Slling == 0); 3863990b4856Slling } 3864990b4856Slling 3865990b4856Slling /* normalize the property name */ 3866990b4856Slling propname = zpool_prop_to_name(prop); 3867990b4856Slling proptype = zpool_prop_get_type(prop); 3868990b4856Slling 3869990b4856Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 3870990b4856Slling ASSERT(proptype == PROP_TYPE_STRING); 3871990b4856Slling VERIFY(nvpair_value_string(elem, &strval) == 0); 3872990b4856Slling VERIFY(zap_update(mos, 3873990b4856Slling spa->spa_pool_props_object, propname, 3874990b4856Slling 1, strlen(strval) + 1, strval, tx) == 0); 3875990b4856Slling 3876990b4856Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 3877990b4856Slling VERIFY(nvpair_value_uint64(elem, &intval) == 0); 3878990b4856Slling 3879990b4856Slling if (proptype == PROP_TYPE_INDEX) { 3880990b4856Slling const char *unused; 3881990b4856Slling VERIFY(zpool_prop_index_to_string( 3882990b4856Slling prop, intval, &unused) == 0); 3883990b4856Slling } 3884990b4856Slling VERIFY(zap_update(mos, 3885990b4856Slling spa->spa_pool_props_object, propname, 3886990b4856Slling 8, 1, &intval, tx) == 0); 3887990b4856Slling } else { 3888990b4856Slling ASSERT(0); /* not allowed */ 3889990b4856Slling } 3890990b4856Slling 38910a4e9518Sgw switch (prop) { 38920a4e9518Sgw case ZPOOL_PROP_DELEGATION: 3893990b4856Slling spa->spa_delegation = intval; 38940a4e9518Sgw break; 38950a4e9518Sgw case ZPOOL_PROP_BOOTFS: 3896990b4856Slling spa->spa_bootfs = intval; 38970a4e9518Sgw break; 38980a4e9518Sgw case ZPOOL_PROP_FAILUREMODE: 38990a4e9518Sgw spa->spa_failmode = intval; 39000a4e9518Sgw break; 39010a4e9518Sgw default: 39020a4e9518Sgw break; 39030a4e9518Sgw } 3904990b4856Slling } 3905990b4856Slling 3906990b4856Slling /* log internal history if this is not a zpool create */ 3907990b4856Slling if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 3908990b4856Slling tx->tx_txg != TXG_INITIAL) { 3909990b4856Slling spa_history_internal_log(LOG_POOL_PROPSET, 3910990b4856Slling spa, tx, cr, "%s %lld %s", 3911e14bb325SJeff Bonwick nvpair_name(elem), intval, spa_name(spa)); 3912b1b8ab34Slling } 3913b1b8ab34Slling } 3914e14bb325SJeff Bonwick 3915e14bb325SJeff Bonwick mutex_exit(&spa->spa_props_lock); 3916b1b8ab34Slling } 3917b1b8ab34Slling 3918fa9e4066Sahrens /* 3919fa9e4066Sahrens * Sync the specified transaction group. New blocks may be dirtied as 3920fa9e4066Sahrens * part of the process, so we iterate until it converges. 3921fa9e4066Sahrens */ 3922fa9e4066Sahrens void 3923fa9e4066Sahrens spa_sync(spa_t *spa, uint64_t txg) 3924fa9e4066Sahrens { 3925fa9e4066Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 3926fa9e4066Sahrens objset_t *mos = spa->spa_meta_objset; 3927fa9e4066Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 39280373e76bSbonwick vdev_t *rvd = spa->spa_root_vdev; 3929fa9e4066Sahrens vdev_t *vd; 3930fa9e4066Sahrens dmu_tx_t *tx; 3931fa9e4066Sahrens int dirty_vdevs; 3932e14bb325SJeff Bonwick int error; 3933fa9e4066Sahrens 3934fa9e4066Sahrens /* 3935fa9e4066Sahrens * Lock out configuration changes. 3936fa9e4066Sahrens */ 3937e14bb325SJeff Bonwick spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 3938fa9e4066Sahrens 3939fa9e4066Sahrens spa->spa_syncing_txg = txg; 3940fa9e4066Sahrens spa->spa_sync_pass = 0; 3941fa9e4066Sahrens 3942e14bb325SJeff Bonwick /* 3943e14bb325SJeff Bonwick * If there are any pending vdev state changes, convert them 3944e14bb325SJeff Bonwick * into config changes that go out with this transaction group. 3945e14bb325SJeff Bonwick */ 3946e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 3947*8ad4d6ddSJeff Bonwick while (list_head(&spa->spa_state_dirty_list) != NULL) { 3948*8ad4d6ddSJeff Bonwick /* 3949*8ad4d6ddSJeff Bonwick * We need the write lock here because, for aux vdevs, 3950*8ad4d6ddSJeff Bonwick * calling vdev_config_dirty() modifies sav_config. 3951*8ad4d6ddSJeff Bonwick * This is ugly and will become unnecessary when we 3952*8ad4d6ddSJeff Bonwick * eliminate the aux vdev wart by integrating all vdevs 3953*8ad4d6ddSJeff Bonwick * into the root vdev tree. 3954*8ad4d6ddSJeff Bonwick */ 3955*8ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 3956*8ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); 3957*8ad4d6ddSJeff Bonwick while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 3958*8ad4d6ddSJeff Bonwick vdev_state_clean(vd); 3959*8ad4d6ddSJeff Bonwick vdev_config_dirty(vd); 3960*8ad4d6ddSJeff Bonwick } 3961*8ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 3962*8ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); 3963e14bb325SJeff Bonwick } 3964e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 3965e14bb325SJeff Bonwick 3966ea8dc4b6Seschrock VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); 3967fa9e4066Sahrens 396899653d4eSeschrock tx = dmu_tx_create_assigned(dp, txg); 396999653d4eSeschrock 397099653d4eSeschrock /* 3971e7437265Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 397299653d4eSeschrock * set spa_deflate if we have no raid-z vdevs. 397399653d4eSeschrock */ 3974e7437265Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 3975e7437265Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 397699653d4eSeschrock int i; 397799653d4eSeschrock 397899653d4eSeschrock for (i = 0; i < rvd->vdev_children; i++) { 397999653d4eSeschrock vd = rvd->vdev_child[i]; 398099653d4eSeschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 398199653d4eSeschrock break; 398299653d4eSeschrock } 398399653d4eSeschrock if (i == rvd->vdev_children) { 398499653d4eSeschrock spa->spa_deflate = TRUE; 398599653d4eSeschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 398699653d4eSeschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 398799653d4eSeschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 398899653d4eSeschrock } 398999653d4eSeschrock } 399099653d4eSeschrock 3991088f3894Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 3992088f3894Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 3993088f3894Sahrens dsl_pool_create_origin(dp, tx); 3994088f3894Sahrens 3995088f3894Sahrens /* Keeping the origin open increases spa_minref */ 3996088f3894Sahrens spa->spa_minref += 3; 3997088f3894Sahrens } 3998088f3894Sahrens 3999088f3894Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 4000088f3894Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 4001088f3894Sahrens dsl_pool_upgrade_clones(dp, tx); 4002088f3894Sahrens } 4003088f3894Sahrens 4004fa9e4066Sahrens /* 4005fa9e4066Sahrens * If anything has changed in this txg, push the deferred frees 4006fa9e4066Sahrens * from the previous txg. If not, leave them alone so that we 4007fa9e4066Sahrens * don't generate work on an otherwise idle system. 4008fa9e4066Sahrens */ 4009fa9e4066Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 40101615a317Sek !txg_list_empty(&dp->dp_dirty_dirs, txg) || 40111615a317Sek !txg_list_empty(&dp->dp_sync_tasks, txg)) 4012fa9e4066Sahrens spa_sync_deferred_frees(spa, txg); 4013fa9e4066Sahrens 4014fa9e4066Sahrens /* 4015fa9e4066Sahrens * Iterate to convergence. 4016fa9e4066Sahrens */ 4017fa9e4066Sahrens do { 4018fa9e4066Sahrens spa->spa_sync_pass++; 4019fa9e4066Sahrens 4020fa9e4066Sahrens spa_sync_config_object(spa, tx); 4021fa94a07fSbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 4022fa94a07fSbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 4023fa94a07fSbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 4024fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 4025ea8dc4b6Seschrock spa_errlog_sync(spa, txg); 4026fa9e4066Sahrens dsl_pool_sync(dp, txg); 4027fa9e4066Sahrens 4028fa9e4066Sahrens dirty_vdevs = 0; 4029fa9e4066Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { 4030fa9e4066Sahrens vdev_sync(vd, txg); 4031fa9e4066Sahrens dirty_vdevs++; 4032fa9e4066Sahrens } 4033fa9e4066Sahrens 4034fa9e4066Sahrens bplist_sync(bpl, tx); 4035fa9e4066Sahrens } while (dirty_vdevs); 4036fa9e4066Sahrens 4037fa9e4066Sahrens bplist_close(bpl); 4038fa9e4066Sahrens 4039fa9e4066Sahrens dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); 4040fa9e4066Sahrens 4041fa9e4066Sahrens /* 4042fa9e4066Sahrens * Rewrite the vdev configuration (which includes the uberblock) 4043fa9e4066Sahrens * to commit the transaction group. 40440373e76bSbonwick * 404517f17c2dSbonwick * If there are no dirty vdevs, we sync the uberblock to a few 404617f17c2dSbonwick * random top-level vdevs that are known to be visible in the 4047e14bb325SJeff Bonwick * config cache (see spa_vdev_add() for a complete description). 4048e14bb325SJeff Bonwick * If there *are* dirty vdevs, sync the uberblock to all vdevs. 40490373e76bSbonwick */ 4050e14bb325SJeff Bonwick for (;;) { 4051e14bb325SJeff Bonwick /* 4052e14bb325SJeff Bonwick * We hold SCL_STATE to prevent vdev open/close/etc. 4053e14bb325SJeff Bonwick * while we're attempting to write the vdev labels. 4054e14bb325SJeff Bonwick */ 4055e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4056e14bb325SJeff Bonwick 4057e14bb325SJeff Bonwick if (list_is_empty(&spa->spa_config_dirty_list)) { 4058e14bb325SJeff Bonwick vdev_t *svd[SPA_DVAS_PER_BP]; 4059e14bb325SJeff Bonwick int svdcount = 0; 4060e14bb325SJeff Bonwick int children = rvd->vdev_children; 4061e14bb325SJeff Bonwick int c0 = spa_get_random(children); 4062e14bb325SJeff Bonwick int c; 4063e14bb325SJeff Bonwick 4064e14bb325SJeff Bonwick for (c = 0; c < children; c++) { 4065e14bb325SJeff Bonwick vd = rvd->vdev_child[(c0 + c) % children]; 4066e14bb325SJeff Bonwick if (vd->vdev_ms_array == 0 || vd->vdev_islog) 4067e14bb325SJeff Bonwick continue; 4068e14bb325SJeff Bonwick svd[svdcount++] = vd; 4069e14bb325SJeff Bonwick if (svdcount == SPA_DVAS_PER_BP) 4070e14bb325SJeff Bonwick break; 4071e14bb325SJeff Bonwick } 4072e14bb325SJeff Bonwick error = vdev_config_sync(svd, svdcount, txg); 4073e14bb325SJeff Bonwick } else { 4074e14bb325SJeff Bonwick error = vdev_config_sync(rvd->vdev_child, 4075e14bb325SJeff Bonwick rvd->vdev_children, txg); 40760373e76bSbonwick } 4077e14bb325SJeff Bonwick 4078e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 4079e14bb325SJeff Bonwick 4080e14bb325SJeff Bonwick if (error == 0) 4081e14bb325SJeff Bonwick break; 4082e14bb325SJeff Bonwick zio_suspend(spa, NULL); 4083e14bb325SJeff Bonwick zio_resume_wait(spa); 40840373e76bSbonwick } 408599653d4eSeschrock dmu_tx_commit(tx); 408699653d4eSeschrock 40870373e76bSbonwick /* 40880373e76bSbonwick * Clear the dirty config list. 4089fa9e4066Sahrens */ 4090e14bb325SJeff Bonwick while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 40910373e76bSbonwick vdev_config_clean(vd); 40920373e76bSbonwick 40930373e76bSbonwick /* 40940373e76bSbonwick * Now that the new config has synced transactionally, 40950373e76bSbonwick * let it become visible to the config cache. 40960373e76bSbonwick */ 40970373e76bSbonwick if (spa->spa_config_syncing != NULL) { 40980373e76bSbonwick spa_config_set(spa, spa->spa_config_syncing); 40990373e76bSbonwick spa->spa_config_txg = txg; 41000373e76bSbonwick spa->spa_config_syncing = NULL; 41010373e76bSbonwick } 4102fa9e4066Sahrens 4103fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 4104fa9e4066Sahrens 4105fa9e4066Sahrens /* 4106fa9e4066Sahrens * Clean up the ZIL records for the synced txg. 4107fa9e4066Sahrens */ 4108fa9e4066Sahrens dsl_pool_zil_clean(dp); 4109fa9e4066Sahrens 4110fa9e4066Sahrens /* 4111fa9e4066Sahrens * Update usable space statistics. 4112fa9e4066Sahrens */ 4113fa9e4066Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 4114fa9e4066Sahrens vdev_sync_done(vd, txg); 4115fa9e4066Sahrens 4116fa9e4066Sahrens /* 4117fa9e4066Sahrens * It had better be the case that we didn't dirty anything 411899653d4eSeschrock * since vdev_config_sync(). 4119fa9e4066Sahrens */ 4120fa9e4066Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 4121fa9e4066Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 4122fa9e4066Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 4123fa9e4066Sahrens ASSERT(bpl->bpl_queue == NULL); 4124fa9e4066Sahrens 4125e14bb325SJeff Bonwick spa_config_exit(spa, SCL_CONFIG, FTAG); 4126ea8dc4b6Seschrock 4127ea8dc4b6Seschrock /* 4128ea8dc4b6Seschrock * If any async tasks have been requested, kick them off. 4129ea8dc4b6Seschrock */ 4130ea8dc4b6Seschrock spa_async_dispatch(spa); 4131fa9e4066Sahrens } 4132fa9e4066Sahrens 4133fa9e4066Sahrens /* 4134fa9e4066Sahrens * Sync all pools. We don't want to hold the namespace lock across these 4135fa9e4066Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 4136fa9e4066Sahrens * sync. 4137fa9e4066Sahrens */ 4138fa9e4066Sahrens void 4139fa9e4066Sahrens spa_sync_allpools(void) 4140fa9e4066Sahrens { 4141fa9e4066Sahrens spa_t *spa = NULL; 4142fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4143fa9e4066Sahrens while ((spa = spa_next(spa)) != NULL) { 4144e14bb325SJeff Bonwick if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa)) 4145fa9e4066Sahrens continue; 4146fa9e4066Sahrens spa_open_ref(spa, FTAG); 4147fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4148fa9e4066Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 4149fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4150fa9e4066Sahrens spa_close(spa, FTAG); 4151fa9e4066Sahrens } 4152fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4153fa9e4066Sahrens } 4154fa9e4066Sahrens 4155fa9e4066Sahrens /* 4156fa9e4066Sahrens * ========================================================================== 4157fa9e4066Sahrens * Miscellaneous routines 4158fa9e4066Sahrens * ========================================================================== 4159fa9e4066Sahrens */ 4160fa9e4066Sahrens 4161fa9e4066Sahrens /* 4162fa9e4066Sahrens * Remove all pools in the system. 4163fa9e4066Sahrens */ 4164fa9e4066Sahrens void 4165fa9e4066Sahrens spa_evict_all(void) 4166fa9e4066Sahrens { 4167fa9e4066Sahrens spa_t *spa; 4168fa9e4066Sahrens 4169fa9e4066Sahrens /* 4170fa9e4066Sahrens * Remove all cached state. All pools should be closed now, 4171fa9e4066Sahrens * so every spa in the AVL tree should be unreferenced. 4172fa9e4066Sahrens */ 4173fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4174fa9e4066Sahrens while ((spa = spa_next(NULL)) != NULL) { 4175fa9e4066Sahrens /* 4176ea8dc4b6Seschrock * Stop async tasks. The async thread may need to detach 4177ea8dc4b6Seschrock * a device that's been replaced, which requires grabbing 4178ea8dc4b6Seschrock * spa_namespace_lock, so we must drop it here. 4179fa9e4066Sahrens */ 4180fa9e4066Sahrens spa_open_ref(spa, FTAG); 4181fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4182ea8dc4b6Seschrock spa_async_suspend(spa); 4183fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4184fa9e4066Sahrens spa_close(spa, FTAG); 4185fa9e4066Sahrens 4186fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4187fa9e4066Sahrens spa_unload(spa); 4188fa9e4066Sahrens spa_deactivate(spa); 4189fa9e4066Sahrens } 4190fa9e4066Sahrens spa_remove(spa); 4191fa9e4066Sahrens } 4192fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4193fa9e4066Sahrens } 4194ea8dc4b6Seschrock 4195ea8dc4b6Seschrock vdev_t * 4196c5904d13Seschrock spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t l2cache) 4197ea8dc4b6Seschrock { 4198c5904d13Seschrock vdev_t *vd; 4199c5904d13Seschrock int i; 4200c5904d13Seschrock 4201c5904d13Seschrock if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 4202c5904d13Seschrock return (vd); 4203c5904d13Seschrock 4204c5904d13Seschrock if (l2cache) { 4205c5904d13Seschrock for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 4206c5904d13Seschrock vd = spa->spa_l2cache.sav_vdevs[i]; 4207c5904d13Seschrock if (vd->vdev_guid == guid) 4208c5904d13Seschrock return (vd); 4209c5904d13Seschrock } 4210c5904d13Seschrock } 4211c5904d13Seschrock 4212c5904d13Seschrock return (NULL); 4213ea8dc4b6Seschrock } 4214eaca9bbdSeschrock 4215eaca9bbdSeschrock void 4216990b4856Slling spa_upgrade(spa_t *spa, uint64_t version) 4217eaca9bbdSeschrock { 4218e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4219eaca9bbdSeschrock 4220eaca9bbdSeschrock /* 4221eaca9bbdSeschrock * This should only be called for a non-faulted pool, and since a 4222eaca9bbdSeschrock * future version would result in an unopenable pool, this shouldn't be 4223eaca9bbdSeschrock * possible. 4224eaca9bbdSeschrock */ 4225e7437265Sahrens ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 4226990b4856Slling ASSERT(version >= spa->spa_uberblock.ub_version); 4227eaca9bbdSeschrock 4228990b4856Slling spa->spa_uberblock.ub_version = version; 4229eaca9bbdSeschrock vdev_config_dirty(spa->spa_root_vdev); 4230eaca9bbdSeschrock 4231e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 423299653d4eSeschrock 423399653d4eSeschrock txg_wait_synced(spa_get_dsl(spa), 0); 423499653d4eSeschrock } 423599653d4eSeschrock 423699653d4eSeschrock boolean_t 423799653d4eSeschrock spa_has_spare(spa_t *spa, uint64_t guid) 423899653d4eSeschrock { 423999653d4eSeschrock int i; 424039c23413Seschrock uint64_t spareguid; 4241fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 424299653d4eSeschrock 4243fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 4244fa94a07fSbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 424599653d4eSeschrock return (B_TRUE); 424699653d4eSeschrock 4247fa94a07fSbrendan for (i = 0; i < sav->sav_npending; i++) { 4248fa94a07fSbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 4249fa94a07fSbrendan &spareguid) == 0 && spareguid == guid) 425039c23413Seschrock return (B_TRUE); 425139c23413Seschrock } 425239c23413Seschrock 425399653d4eSeschrock return (B_FALSE); 4254eaca9bbdSeschrock } 4255b1b8ab34Slling 425689a89ebfSlling /* 425789a89ebfSlling * Check if a pool has an active shared spare device. 425889a89ebfSlling * Note: reference count of an active spare is 2, as a spare and as a replace 425989a89ebfSlling */ 426089a89ebfSlling static boolean_t 426189a89ebfSlling spa_has_active_shared_spare(spa_t *spa) 426289a89ebfSlling { 426389a89ebfSlling int i, refcnt; 426489a89ebfSlling uint64_t pool; 426589a89ebfSlling spa_aux_vdev_t *sav = &spa->spa_spares; 426689a89ebfSlling 426789a89ebfSlling for (i = 0; i < sav->sav_count; i++) { 426889a89ebfSlling if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 426989a89ebfSlling &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 427089a89ebfSlling refcnt > 2) 427189a89ebfSlling return (B_TRUE); 427289a89ebfSlling } 427389a89ebfSlling 427489a89ebfSlling return (B_FALSE); 427589a89ebfSlling } 427689a89ebfSlling 42773d7072f8Seschrock /* 42783d7072f8Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 42793d7072f8Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 42803d7072f8Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 42813d7072f8Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 42823d7072f8Seschrock * or zdb as real changes. 42833d7072f8Seschrock */ 42843d7072f8Seschrock void 42853d7072f8Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 42863d7072f8Seschrock { 42873d7072f8Seschrock #ifdef _KERNEL 42883d7072f8Seschrock sysevent_t *ev; 42893d7072f8Seschrock sysevent_attr_list_t *attr = NULL; 42903d7072f8Seschrock sysevent_value_t value; 42913d7072f8Seschrock sysevent_id_t eid; 42923d7072f8Seschrock 42933d7072f8Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 42943d7072f8Seschrock SE_SLEEP); 42953d7072f8Seschrock 42963d7072f8Seschrock value.value_type = SE_DATA_TYPE_STRING; 42973d7072f8Seschrock value.value.sv_string = spa_name(spa); 42983d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 42993d7072f8Seschrock goto done; 43003d7072f8Seschrock 43013d7072f8Seschrock value.value_type = SE_DATA_TYPE_UINT64; 43023d7072f8Seschrock value.value.sv_uint64 = spa_guid(spa); 43033d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 43043d7072f8Seschrock goto done; 43053d7072f8Seschrock 43063d7072f8Seschrock if (vd) { 43073d7072f8Seschrock value.value_type = SE_DATA_TYPE_UINT64; 43083d7072f8Seschrock value.value.sv_uint64 = vd->vdev_guid; 43093d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 43103d7072f8Seschrock SE_SLEEP) != 0) 43113d7072f8Seschrock goto done; 43123d7072f8Seschrock 43133d7072f8Seschrock if (vd->vdev_path) { 43143d7072f8Seschrock value.value_type = SE_DATA_TYPE_STRING; 43153d7072f8Seschrock value.value.sv_string = vd->vdev_path; 43163d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 43173d7072f8Seschrock &value, SE_SLEEP) != 0) 43183d7072f8Seschrock goto done; 43193d7072f8Seschrock } 43203d7072f8Seschrock } 43213d7072f8Seschrock 4322b01c3b58Seschrock if (sysevent_attach_attributes(ev, attr) != 0) 4323b01c3b58Seschrock goto done; 4324b01c3b58Seschrock attr = NULL; 4325b01c3b58Seschrock 43263d7072f8Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 43273d7072f8Seschrock 43283d7072f8Seschrock done: 43293d7072f8Seschrock if (attr) 43303d7072f8Seschrock sysevent_free_attr(attr); 43313d7072f8Seschrock sysevent_free(ev); 43323d7072f8Seschrock #endif 43333d7072f8Seschrock } 4334