1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 2199653d4eSeschrock 22fa9e4066Sahrens /* 23379c004dSEric Schrock * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24fa9e4066Sahrens * Use is subject to license terms. 25fa9e4066Sahrens */ 26fa9e4066Sahrens 27fa9e4066Sahrens /* 28fa9e4066Sahrens * This file contains all the routines used when modifying on-disk SPA state. 29fa9e4066Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 30fa9e4066Sahrens * pool. 31fa9e4066Sahrens */ 32fa9e4066Sahrens 33fa9e4066Sahrens #include <sys/zfs_context.h> 34ea8dc4b6Seschrock #include <sys/fm/fs/zfs.h> 35fa9e4066Sahrens #include <sys/spa_impl.h> 36fa9e4066Sahrens #include <sys/zio.h> 37fa9e4066Sahrens #include <sys/zio_checksum.h> 38fa9e4066Sahrens #include <sys/zio_compress.h> 39fa9e4066Sahrens #include <sys/dmu.h> 40fa9e4066Sahrens #include <sys/dmu_tx.h> 41fa9e4066Sahrens #include <sys/zap.h> 42fa9e4066Sahrens #include <sys/zil.h> 43fa9e4066Sahrens #include <sys/vdev_impl.h> 44fa9e4066Sahrens #include <sys/metaslab.h> 45*88ecc943SGeorge Wilson #include <sys/metaslab_impl.h> 46fa9e4066Sahrens #include <sys/uberblock_impl.h> 47fa9e4066Sahrens #include <sys/txg.h> 48fa9e4066Sahrens #include <sys/avl.h> 49fa9e4066Sahrens #include <sys/dmu_traverse.h> 50b1b8ab34Slling #include <sys/dmu_objset.h> 51fa9e4066Sahrens #include <sys/unique.h> 52fa9e4066Sahrens #include <sys/dsl_pool.h> 53b1b8ab34Slling #include <sys/dsl_dataset.h> 54fa9e4066Sahrens #include <sys/dsl_dir.h> 55fa9e4066Sahrens #include <sys/dsl_prop.h> 56b1b8ab34Slling #include <sys/dsl_synctask.h> 57fa9e4066Sahrens #include <sys/fs/zfs.h> 58fa94a07fSbrendan #include <sys/arc.h> 59fa9e4066Sahrens #include <sys/callb.h> 6095173954Sek #include <sys/systeminfo.h> 6195173954Sek #include <sys/sunddi.h> 62e7cbe64fSgw #include <sys/spa_boot.h> 63573ca77eSGeorge Wilson #include <sys/zfs_ioctl.h> 64fa9e4066Sahrens 655679c89fSjv #ifdef _KERNEL 665679c89fSjv #include <sys/zone.h> 675679c89fSjv #endif /* _KERNEL */ 685679c89fSjv 69990b4856Slling #include "zfs_prop.h" 70b7b97454Sperrin #include "zfs_comutil.h" 71990b4856Slling 722e0c549eSJonathan Adams enum zti_modes { 732e0c549eSJonathan Adams zti_mode_fixed, /* value is # of threads (min 1) */ 742e0c549eSJonathan Adams zti_mode_online_percent, /* value is % of online CPUs */ 752e0c549eSJonathan Adams zti_mode_tune, /* fill from zio_taskq_tune_* */ 762e0c549eSJonathan Adams zti_nmodes 77e14bb325SJeff Bonwick }; 78416e0cd8Sek 792e0c549eSJonathan Adams #define ZTI_THREAD_FIX(n) { zti_mode_fixed, (n) } 802e0c549eSJonathan Adams #define ZTI_THREAD_PCT(n) { zti_mode_online_percent, (n) } 812e0c549eSJonathan Adams #define ZTI_THREAD_TUNE { zti_mode_tune, 0 } 822e0c549eSJonathan Adams 832e0c549eSJonathan Adams #define ZTI_THREAD_ONE ZTI_THREAD_FIX(1) 842e0c549eSJonathan Adams 852e0c549eSJonathan Adams typedef struct zio_taskq_info { 862e0c549eSJonathan Adams const char *zti_name; 872e0c549eSJonathan Adams struct { 882e0c549eSJonathan Adams enum zti_modes zti_mode; 892e0c549eSJonathan Adams uint_t zti_value; 902e0c549eSJonathan Adams } zti_nthreads[ZIO_TASKQ_TYPES]; 912e0c549eSJonathan Adams } zio_taskq_info_t; 922e0c549eSJonathan Adams 932e0c549eSJonathan Adams static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { 942e0c549eSJonathan Adams "issue", "intr" 952e0c549eSJonathan Adams }; 962e0c549eSJonathan Adams 972e0c549eSJonathan Adams const zio_taskq_info_t zio_taskqs[ZIO_TYPES] = { 982e0c549eSJonathan Adams /* ISSUE INTR */ 992e0c549eSJonathan Adams { "spa_zio_null", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1002e0c549eSJonathan Adams { "spa_zio_read", { ZTI_THREAD_FIX(8), ZTI_THREAD_TUNE } }, 1012e0c549eSJonathan Adams { "spa_zio_write", { ZTI_THREAD_TUNE, ZTI_THREAD_FIX(8) } }, 1022e0c549eSJonathan Adams { "spa_zio_free", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1032e0c549eSJonathan Adams { "spa_zio_claim", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1042e0c549eSJonathan Adams { "spa_zio_ioctl", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1052e0c549eSJonathan Adams }; 1062e0c549eSJonathan Adams 1072e0c549eSJonathan Adams enum zti_modes zio_taskq_tune_mode = zti_mode_online_percent; 1082e0c549eSJonathan Adams uint_t zio_taskq_tune_value = 80; /* #threads = 80% of # online CPUs */ 1092e0c549eSJonathan Adams 110990b4856Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 11189a89ebfSlling static boolean_t spa_has_active_shared_spare(spa_t *spa); 112990b4856Slling 113990b4856Slling /* 114990b4856Slling * ========================================================================== 115990b4856Slling * SPA properties routines 116990b4856Slling * ========================================================================== 117990b4856Slling */ 118990b4856Slling 119990b4856Slling /* 120990b4856Slling * Add a (source=src, propname=propval) list to an nvlist. 121990b4856Slling */ 1229d82f4f6Slling static void 123990b4856Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 124990b4856Slling uint64_t intval, zprop_source_t src) 125990b4856Slling { 126990b4856Slling const char *propname = zpool_prop_to_name(prop); 127990b4856Slling nvlist_t *propval; 128990b4856Slling 1299d82f4f6Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1309d82f4f6Slling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 131990b4856Slling 1329d82f4f6Slling if (strval != NULL) 1339d82f4f6Slling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 1349d82f4f6Slling else 1359d82f4f6Slling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 136990b4856Slling 1379d82f4f6Slling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 138990b4856Slling nvlist_free(propval); 139990b4856Slling } 140990b4856Slling 141990b4856Slling /* 142990b4856Slling * Get property values from the spa configuration. 143990b4856Slling */ 1449d82f4f6Slling static void 145990b4856Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 146990b4856Slling { 147379c004dSEric Schrock uint64_t size; 148379c004dSEric Schrock uint64_t used; 149990b4856Slling uint64_t cap, version; 150990b4856Slling zprop_source_t src = ZPROP_SRC_NONE; 151c5904d13Seschrock spa_config_dirent_t *dp; 152990b4856Slling 153e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 154e14bb325SJeff Bonwick 155379c004dSEric Schrock if (spa->spa_root_vdev != NULL) { 156379c004dSEric Schrock size = spa_get_space(spa); 157379c004dSEric Schrock used = spa_get_alloc(spa); 158379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 159379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 160379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src); 161379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, 162379c004dSEric Schrock size - used, src); 163379c004dSEric Schrock 164379c004dSEric Schrock cap = (size == 0) ? 0 : (used * 100 / size); 165379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 166379c004dSEric Schrock 167379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 168379c004dSEric Schrock spa->spa_root_vdev->vdev_state, src); 169379c004dSEric Schrock 170379c004dSEric Schrock version = spa_version(spa); 171379c004dSEric Schrock if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 172379c004dSEric Schrock src = ZPROP_SRC_DEFAULT; 173379c004dSEric Schrock else 174379c004dSEric Schrock src = ZPROP_SRC_LOCAL; 175379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 176379c004dSEric Schrock } 177990b4856Slling 1789d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 179990b4856Slling 1809d82f4f6Slling if (spa->spa_root != NULL) 1819d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 1829d82f4f6Slling 0, ZPROP_SRC_LOCAL); 183990b4856Slling 184c5904d13Seschrock if ((dp = list_head(&spa->spa_config_list)) != NULL) { 185c5904d13Seschrock if (dp->scd_path == NULL) { 1869d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 187c5904d13Seschrock "none", 0, ZPROP_SRC_LOCAL); 188c5904d13Seschrock } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 1899d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 190c5904d13Seschrock dp->scd_path, 0, ZPROP_SRC_LOCAL); 1912f8aaab3Seschrock } 1922f8aaab3Seschrock } 193990b4856Slling } 194990b4856Slling 195990b4856Slling /* 196990b4856Slling * Get zpool property values. 197990b4856Slling */ 198990b4856Slling int 199990b4856Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 200990b4856Slling { 201990b4856Slling zap_cursor_t zc; 202990b4856Slling zap_attribute_t za; 203990b4856Slling objset_t *mos = spa->spa_meta_objset; 204990b4856Slling int err; 205990b4856Slling 2069d82f4f6Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 207990b4856Slling 208e14bb325SJeff Bonwick mutex_enter(&spa->spa_props_lock); 209e14bb325SJeff Bonwick 210990b4856Slling /* 211990b4856Slling * Get properties from the spa config. 212990b4856Slling */ 2139d82f4f6Slling spa_prop_get_config(spa, nvp); 214990b4856Slling 215990b4856Slling /* If no pool property object, no more prop to get. */ 216990b4856Slling if (spa->spa_pool_props_object == 0) { 217990b4856Slling mutex_exit(&spa->spa_props_lock); 218990b4856Slling return (0); 219990b4856Slling } 220990b4856Slling 221990b4856Slling /* 222990b4856Slling * Get properties from the MOS pool property object. 223990b4856Slling */ 224990b4856Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 225990b4856Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 226990b4856Slling zap_cursor_advance(&zc)) { 227990b4856Slling uint64_t intval = 0; 228990b4856Slling char *strval = NULL; 229990b4856Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 230990b4856Slling zpool_prop_t prop; 231990b4856Slling 232990b4856Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 233990b4856Slling continue; 234990b4856Slling 235990b4856Slling switch (za.za_integer_length) { 236990b4856Slling case 8: 237990b4856Slling /* integer property */ 238990b4856Slling if (za.za_first_integer != 239990b4856Slling zpool_prop_default_numeric(prop)) 240990b4856Slling src = ZPROP_SRC_LOCAL; 241990b4856Slling 242990b4856Slling if (prop == ZPOOL_PROP_BOOTFS) { 243990b4856Slling dsl_pool_t *dp; 244990b4856Slling dsl_dataset_t *ds = NULL; 245990b4856Slling 246990b4856Slling dp = spa_get_dsl(spa); 247990b4856Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 248745cd3c5Smaybee if (err = dsl_dataset_hold_obj(dp, 249745cd3c5Smaybee za.za_first_integer, FTAG, &ds)) { 250990b4856Slling rw_exit(&dp->dp_config_rwlock); 251990b4856Slling break; 252990b4856Slling } 253990b4856Slling 254990b4856Slling strval = kmem_alloc( 255990b4856Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 256990b4856Slling KM_SLEEP); 257990b4856Slling dsl_dataset_name(ds, strval); 258745cd3c5Smaybee dsl_dataset_rele(ds, FTAG); 259990b4856Slling rw_exit(&dp->dp_config_rwlock); 260990b4856Slling } else { 261990b4856Slling strval = NULL; 262990b4856Slling intval = za.za_first_integer; 263990b4856Slling } 264990b4856Slling 2659d82f4f6Slling spa_prop_add_list(*nvp, prop, strval, intval, src); 266990b4856Slling 267990b4856Slling if (strval != NULL) 268990b4856Slling kmem_free(strval, 269990b4856Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 270990b4856Slling 271990b4856Slling break; 272990b4856Slling 273990b4856Slling case 1: 274990b4856Slling /* string property */ 275990b4856Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 276990b4856Slling err = zap_lookup(mos, spa->spa_pool_props_object, 277990b4856Slling za.za_name, 1, za.za_num_integers, strval); 278990b4856Slling if (err) { 279990b4856Slling kmem_free(strval, za.za_num_integers); 280990b4856Slling break; 281990b4856Slling } 2829d82f4f6Slling spa_prop_add_list(*nvp, prop, strval, 0, src); 283990b4856Slling kmem_free(strval, za.za_num_integers); 284990b4856Slling break; 285990b4856Slling 286990b4856Slling default: 287990b4856Slling break; 288990b4856Slling } 289990b4856Slling } 290990b4856Slling zap_cursor_fini(&zc); 291990b4856Slling mutex_exit(&spa->spa_props_lock); 292990b4856Slling out: 293990b4856Slling if (err && err != ENOENT) { 294990b4856Slling nvlist_free(*nvp); 2959d82f4f6Slling *nvp = NULL; 296990b4856Slling return (err); 297990b4856Slling } 298990b4856Slling 299990b4856Slling return (0); 300990b4856Slling } 301990b4856Slling 302990b4856Slling /* 303990b4856Slling * Validate the given pool properties nvlist and modify the list 304990b4856Slling * for the property values to be set. 305990b4856Slling */ 306990b4856Slling static int 307990b4856Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 308990b4856Slling { 309990b4856Slling nvpair_t *elem; 310990b4856Slling int error = 0, reset_bootfs = 0; 311990b4856Slling uint64_t objnum; 312990b4856Slling 313990b4856Slling elem = NULL; 314990b4856Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 315990b4856Slling zpool_prop_t prop; 316990b4856Slling char *propname, *strval; 317990b4856Slling uint64_t intval; 318990b4856Slling objset_t *os; 3192f8aaab3Seschrock char *slash; 320990b4856Slling 321990b4856Slling propname = nvpair_name(elem); 322990b4856Slling 323990b4856Slling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 324990b4856Slling return (EINVAL); 325990b4856Slling 326990b4856Slling switch (prop) { 327990b4856Slling case ZPOOL_PROP_VERSION: 328990b4856Slling error = nvpair_value_uint64(elem, &intval); 329990b4856Slling if (!error && 330990b4856Slling (intval < spa_version(spa) || intval > SPA_VERSION)) 331990b4856Slling error = EINVAL; 332990b4856Slling break; 333990b4856Slling 334990b4856Slling case ZPOOL_PROP_DELEGATION: 335990b4856Slling case ZPOOL_PROP_AUTOREPLACE: 336d5b5bb25SRich Morris case ZPOOL_PROP_LISTSNAPS: 337573ca77eSGeorge Wilson case ZPOOL_PROP_AUTOEXPAND: 338990b4856Slling error = nvpair_value_uint64(elem, &intval); 339990b4856Slling if (!error && intval > 1) 340990b4856Slling error = EINVAL; 341990b4856Slling break; 342990b4856Slling 343990b4856Slling case ZPOOL_PROP_BOOTFS: 34425f89ee2SJeff Bonwick /* 34525f89ee2SJeff Bonwick * If the pool version is less than SPA_VERSION_BOOTFS, 34625f89ee2SJeff Bonwick * or the pool is still being created (version == 0), 34725f89ee2SJeff Bonwick * the bootfs property cannot be set. 34825f89ee2SJeff Bonwick */ 349990b4856Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 350990b4856Slling error = ENOTSUP; 351990b4856Slling break; 352990b4856Slling } 353990b4856Slling 354990b4856Slling /* 35515e6edf1Sgw * Make sure the vdev config is bootable 356990b4856Slling */ 35715e6edf1Sgw if (!vdev_is_bootable(spa->spa_root_vdev)) { 358990b4856Slling error = ENOTSUP; 359990b4856Slling break; 360990b4856Slling } 361990b4856Slling 362990b4856Slling reset_bootfs = 1; 363990b4856Slling 364990b4856Slling error = nvpair_value_string(elem, &strval); 365990b4856Slling 366990b4856Slling if (!error) { 36715e6edf1Sgw uint64_t compress; 36815e6edf1Sgw 369990b4856Slling if (strval == NULL || strval[0] == '\0') { 370990b4856Slling objnum = zpool_prop_default_numeric( 371990b4856Slling ZPOOL_PROP_BOOTFS); 372990b4856Slling break; 373990b4856Slling } 374990b4856Slling 375503ad85cSMatthew Ahrens if (error = dmu_objset_hold(strval, FTAG, &os)) 376990b4856Slling break; 37715e6edf1Sgw 378503ad85cSMatthew Ahrens /* Must be ZPL and not gzip compressed. */ 379503ad85cSMatthew Ahrens 380503ad85cSMatthew Ahrens if (dmu_objset_type(os) != DMU_OST_ZFS) { 381503ad85cSMatthew Ahrens error = ENOTSUP; 382503ad85cSMatthew Ahrens } else if ((error = dsl_prop_get_integer(strval, 38315e6edf1Sgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), 38415e6edf1Sgw &compress, NULL)) == 0 && 38515e6edf1Sgw !BOOTFS_COMPRESS_VALID(compress)) { 38615e6edf1Sgw error = ENOTSUP; 38715e6edf1Sgw } else { 38815e6edf1Sgw objnum = dmu_objset_id(os); 38915e6edf1Sgw } 390503ad85cSMatthew Ahrens dmu_objset_rele(os, FTAG); 391990b4856Slling } 392990b4856Slling break; 393e14bb325SJeff Bonwick 3940a4e9518Sgw case ZPOOL_PROP_FAILUREMODE: 3950a4e9518Sgw error = nvpair_value_uint64(elem, &intval); 3960a4e9518Sgw if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 3970a4e9518Sgw intval > ZIO_FAILURE_MODE_PANIC)) 3980a4e9518Sgw error = EINVAL; 3990a4e9518Sgw 4000a4e9518Sgw /* 4010a4e9518Sgw * This is a special case which only occurs when 4020a4e9518Sgw * the pool has completely failed. This allows 4030a4e9518Sgw * the user to change the in-core failmode property 4040a4e9518Sgw * without syncing it out to disk (I/Os might 4050a4e9518Sgw * currently be blocked). We do this by returning 4060a4e9518Sgw * EIO to the caller (spa_prop_set) to trick it 4070a4e9518Sgw * into thinking we encountered a property validation 4080a4e9518Sgw * error. 4090a4e9518Sgw */ 410e14bb325SJeff Bonwick if (!error && spa_suspended(spa)) { 4110a4e9518Sgw spa->spa_failmode = intval; 4120a4e9518Sgw error = EIO; 4130a4e9518Sgw } 4140a4e9518Sgw break; 4152f8aaab3Seschrock 4162f8aaab3Seschrock case ZPOOL_PROP_CACHEFILE: 4172f8aaab3Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 4182f8aaab3Seschrock break; 4192f8aaab3Seschrock 4202f8aaab3Seschrock if (strval[0] == '\0') 4212f8aaab3Seschrock break; 4222f8aaab3Seschrock 4232f8aaab3Seschrock if (strcmp(strval, "none") == 0) 4242f8aaab3Seschrock break; 4252f8aaab3Seschrock 4262f8aaab3Seschrock if (strval[0] != '/') { 4272f8aaab3Seschrock error = EINVAL; 4282f8aaab3Seschrock break; 4292f8aaab3Seschrock } 4302f8aaab3Seschrock 4312f8aaab3Seschrock slash = strrchr(strval, '/'); 4322f8aaab3Seschrock ASSERT(slash != NULL); 4332f8aaab3Seschrock 4342f8aaab3Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 4352f8aaab3Seschrock strcmp(slash, "/..") == 0) 4362f8aaab3Seschrock error = EINVAL; 4372f8aaab3Seschrock break; 438990b4856Slling } 439990b4856Slling 440990b4856Slling if (error) 441990b4856Slling break; 442990b4856Slling } 443990b4856Slling 444990b4856Slling if (!error && reset_bootfs) { 445990b4856Slling error = nvlist_remove(props, 446990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 447990b4856Slling 448990b4856Slling if (!error) { 449990b4856Slling error = nvlist_add_uint64(props, 450990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 451990b4856Slling } 452990b4856Slling } 453990b4856Slling 454990b4856Slling return (error); 455990b4856Slling } 456990b4856Slling 457379c004dSEric Schrock void 458379c004dSEric Schrock spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) 459379c004dSEric Schrock { 460379c004dSEric Schrock char *cachefile; 461379c004dSEric Schrock spa_config_dirent_t *dp; 462379c004dSEric Schrock 463379c004dSEric Schrock if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), 464379c004dSEric Schrock &cachefile) != 0) 465379c004dSEric Schrock return; 466379c004dSEric Schrock 467379c004dSEric Schrock dp = kmem_alloc(sizeof (spa_config_dirent_t), 468379c004dSEric Schrock KM_SLEEP); 469379c004dSEric Schrock 470379c004dSEric Schrock if (cachefile[0] == '\0') 471379c004dSEric Schrock dp->scd_path = spa_strdup(spa_config_path); 472379c004dSEric Schrock else if (strcmp(cachefile, "none") == 0) 473379c004dSEric Schrock dp->scd_path = NULL; 474379c004dSEric Schrock else 475379c004dSEric Schrock dp->scd_path = spa_strdup(cachefile); 476379c004dSEric Schrock 477379c004dSEric Schrock list_insert_head(&spa->spa_config_list, dp); 478379c004dSEric Schrock if (need_sync) 479379c004dSEric Schrock spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 480379c004dSEric Schrock } 481379c004dSEric Schrock 482990b4856Slling int 483990b4856Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 484990b4856Slling { 485990b4856Slling int error; 486379c004dSEric Schrock nvpair_t *elem; 487379c004dSEric Schrock boolean_t need_sync = B_FALSE; 488379c004dSEric Schrock zpool_prop_t prop; 489990b4856Slling 490990b4856Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 491990b4856Slling return (error); 492990b4856Slling 493379c004dSEric Schrock elem = NULL; 494379c004dSEric Schrock while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { 495379c004dSEric Schrock if ((prop = zpool_name_to_prop( 496379c004dSEric Schrock nvpair_name(elem))) == ZPROP_INVAL) 497379c004dSEric Schrock return (EINVAL); 498379c004dSEric Schrock 499379c004dSEric Schrock if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT) 500379c004dSEric Schrock continue; 501379c004dSEric Schrock 502379c004dSEric Schrock need_sync = B_TRUE; 503379c004dSEric Schrock break; 504379c004dSEric Schrock } 505379c004dSEric Schrock 506379c004dSEric Schrock if (need_sync) 507379c004dSEric Schrock return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 508379c004dSEric Schrock spa, nvp, 3)); 509379c004dSEric Schrock else 510379c004dSEric Schrock return (0); 511990b4856Slling } 512990b4856Slling 513990b4856Slling /* 514990b4856Slling * If the bootfs property value is dsobj, clear it. 515990b4856Slling */ 516990b4856Slling void 517990b4856Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 518990b4856Slling { 519990b4856Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 520990b4856Slling VERIFY(zap_remove(spa->spa_meta_objset, 521990b4856Slling spa->spa_pool_props_object, 522990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 523990b4856Slling spa->spa_bootfs = 0; 524990b4856Slling } 525990b4856Slling } 526990b4856Slling 527fa9e4066Sahrens /* 528fa9e4066Sahrens * ========================================================================== 529fa9e4066Sahrens * SPA state manipulation (open/create/destroy/import/export) 530fa9e4066Sahrens * ========================================================================== 531fa9e4066Sahrens */ 532fa9e4066Sahrens 533ea8dc4b6Seschrock static int 534ea8dc4b6Seschrock spa_error_entry_compare(const void *a, const void *b) 535ea8dc4b6Seschrock { 536ea8dc4b6Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 537ea8dc4b6Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 538ea8dc4b6Seschrock int ret; 539ea8dc4b6Seschrock 540ea8dc4b6Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 541ea8dc4b6Seschrock sizeof (zbookmark_t)); 542ea8dc4b6Seschrock 543ea8dc4b6Seschrock if (ret < 0) 544ea8dc4b6Seschrock return (-1); 545ea8dc4b6Seschrock else if (ret > 0) 546ea8dc4b6Seschrock return (1); 547ea8dc4b6Seschrock else 548ea8dc4b6Seschrock return (0); 549ea8dc4b6Seschrock } 550ea8dc4b6Seschrock 551ea8dc4b6Seschrock /* 552ea8dc4b6Seschrock * Utility function which retrieves copies of the current logs and 553ea8dc4b6Seschrock * re-initializes them in the process. 554ea8dc4b6Seschrock */ 555ea8dc4b6Seschrock void 556ea8dc4b6Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 557ea8dc4b6Seschrock { 558ea8dc4b6Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 559ea8dc4b6Seschrock 560ea8dc4b6Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 561ea8dc4b6Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 562ea8dc4b6Seschrock 563ea8dc4b6Seschrock avl_create(&spa->spa_errlist_scrub, 564ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 565ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 566ea8dc4b6Seschrock avl_create(&spa->spa_errlist_last, 567ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 568ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 569ea8dc4b6Seschrock } 570ea8dc4b6Seschrock 571fa9e4066Sahrens /* 572fa9e4066Sahrens * Activate an uninitialized pool. 573fa9e4066Sahrens */ 574fa9e4066Sahrens static void 5758ad4d6ddSJeff Bonwick spa_activate(spa_t *spa, int mode) 576fa9e4066Sahrens { 577fa9e4066Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 578fa9e4066Sahrens 579fa9e4066Sahrens spa->spa_state = POOL_STATE_ACTIVE; 5808ad4d6ddSJeff Bonwick spa->spa_mode = mode; 581fa9e4066Sahrens 582*88ecc943SGeorge Wilson spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); 583*88ecc943SGeorge Wilson spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); 584fa9e4066Sahrens 585e14bb325SJeff Bonwick for (int t = 0; t < ZIO_TYPES; t++) { 5862e0c549eSJonathan Adams const zio_taskq_info_t *ztip = &zio_taskqs[t]; 587e14bb325SJeff Bonwick for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 5882e0c549eSJonathan Adams enum zti_modes mode = ztip->zti_nthreads[q].zti_mode; 5892e0c549eSJonathan Adams uint_t value = ztip->zti_nthreads[q].zti_value; 5902e0c549eSJonathan Adams char name[32]; 5912e0c549eSJonathan Adams 5922e0c549eSJonathan Adams (void) snprintf(name, sizeof (name), 5932e0c549eSJonathan Adams "%s_%s", ztip->zti_name, zio_taskq_types[q]); 5942e0c549eSJonathan Adams 5952e0c549eSJonathan Adams if (mode == zti_mode_tune) { 5962e0c549eSJonathan Adams mode = zio_taskq_tune_mode; 5972e0c549eSJonathan Adams value = zio_taskq_tune_value; 5982e0c549eSJonathan Adams if (mode == zti_mode_tune) 5992e0c549eSJonathan Adams mode = zti_mode_online_percent; 6002e0c549eSJonathan Adams } 6012e0c549eSJonathan Adams 6022e0c549eSJonathan Adams switch (mode) { 6032e0c549eSJonathan Adams case zti_mode_fixed: 6042e0c549eSJonathan Adams ASSERT3U(value, >=, 1); 6052e0c549eSJonathan Adams value = MAX(value, 1); 6062e0c549eSJonathan Adams 6072e0c549eSJonathan Adams spa->spa_zio_taskq[t][q] = taskq_create(name, 6082e0c549eSJonathan Adams value, maxclsyspri, 50, INT_MAX, 6092e0c549eSJonathan Adams TASKQ_PREPOPULATE); 6102e0c549eSJonathan Adams break; 6112e0c549eSJonathan Adams 6122e0c549eSJonathan Adams case zti_mode_online_percent: 6132e0c549eSJonathan Adams spa->spa_zio_taskq[t][q] = taskq_create(name, 6142e0c549eSJonathan Adams value, maxclsyspri, 50, INT_MAX, 6152e0c549eSJonathan Adams TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT); 6162e0c549eSJonathan Adams break; 6172e0c549eSJonathan Adams 6182e0c549eSJonathan Adams case zti_mode_tune: 6192e0c549eSJonathan Adams default: 6202e0c549eSJonathan Adams panic("unrecognized mode for " 6212e0c549eSJonathan Adams "zio_taskqs[%u]->zti_nthreads[%u] (%u:%u) " 6222e0c549eSJonathan Adams "in spa_activate()", 6232e0c549eSJonathan Adams t, q, mode, value); 6242e0c549eSJonathan Adams break; 6252e0c549eSJonathan Adams } 626e14bb325SJeff Bonwick } 627fa9e4066Sahrens } 628fa9e4066Sahrens 629e14bb325SJeff Bonwick list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 630e14bb325SJeff Bonwick offsetof(vdev_t, vdev_config_dirty_node)); 631e14bb325SJeff Bonwick list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 632e14bb325SJeff Bonwick offsetof(vdev_t, vdev_state_dirty_node)); 633fa9e4066Sahrens 634fa9e4066Sahrens txg_list_create(&spa->spa_vdev_txg_list, 635fa9e4066Sahrens offsetof(struct vdev, vdev_txg_node)); 636ea8dc4b6Seschrock 637ea8dc4b6Seschrock avl_create(&spa->spa_errlist_scrub, 638ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 639ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 640ea8dc4b6Seschrock avl_create(&spa->spa_errlist_last, 641ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 642ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 643fa9e4066Sahrens } 644fa9e4066Sahrens 645fa9e4066Sahrens /* 646fa9e4066Sahrens * Opposite of spa_activate(). 647fa9e4066Sahrens */ 648fa9e4066Sahrens static void 649fa9e4066Sahrens spa_deactivate(spa_t *spa) 650fa9e4066Sahrens { 651fa9e4066Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 652fa9e4066Sahrens ASSERT(spa->spa_dsl_pool == NULL); 653fa9e4066Sahrens ASSERT(spa->spa_root_vdev == NULL); 65425f89ee2SJeff Bonwick ASSERT(spa->spa_async_zio_root == NULL); 655fa9e4066Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 656fa9e4066Sahrens 657fa9e4066Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 658fa9e4066Sahrens 659e14bb325SJeff Bonwick list_destroy(&spa->spa_config_dirty_list); 660e14bb325SJeff Bonwick list_destroy(&spa->spa_state_dirty_list); 661fa9e4066Sahrens 662e14bb325SJeff Bonwick for (int t = 0; t < ZIO_TYPES; t++) { 663e14bb325SJeff Bonwick for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 664e14bb325SJeff Bonwick taskq_destroy(spa->spa_zio_taskq[t][q]); 665e14bb325SJeff Bonwick spa->spa_zio_taskq[t][q] = NULL; 666e14bb325SJeff Bonwick } 667fa9e4066Sahrens } 668fa9e4066Sahrens 669fa9e4066Sahrens metaslab_class_destroy(spa->spa_normal_class); 670fa9e4066Sahrens spa->spa_normal_class = NULL; 671fa9e4066Sahrens 6728654d025Sperrin metaslab_class_destroy(spa->spa_log_class); 6738654d025Sperrin spa->spa_log_class = NULL; 6748654d025Sperrin 675ea8dc4b6Seschrock /* 676ea8dc4b6Seschrock * If this was part of an import or the open otherwise failed, we may 677ea8dc4b6Seschrock * still have errors left in the queues. Empty them just in case. 678ea8dc4b6Seschrock */ 679ea8dc4b6Seschrock spa_errlog_drain(spa); 680ea8dc4b6Seschrock 681ea8dc4b6Seschrock avl_destroy(&spa->spa_errlist_scrub); 682ea8dc4b6Seschrock avl_destroy(&spa->spa_errlist_last); 683ea8dc4b6Seschrock 684fa9e4066Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 685fa9e4066Sahrens } 686fa9e4066Sahrens 687fa9e4066Sahrens /* 688fa9e4066Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 689fa9e4066Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 690fa9e4066Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 691fa9e4066Sahrens * All vdev validation is done by the vdev_alloc() routine. 692fa9e4066Sahrens */ 69399653d4eSeschrock static int 69499653d4eSeschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 69599653d4eSeschrock uint_t id, int atype) 696fa9e4066Sahrens { 697fa9e4066Sahrens nvlist_t **child; 698573ca77eSGeorge Wilson uint_t children; 69999653d4eSeschrock int error; 700fa9e4066Sahrens 70199653d4eSeschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 70299653d4eSeschrock return (error); 703fa9e4066Sahrens 70499653d4eSeschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 70599653d4eSeschrock return (0); 706fa9e4066Sahrens 707e14bb325SJeff Bonwick error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 708e14bb325SJeff Bonwick &child, &children); 709e14bb325SJeff Bonwick 710e14bb325SJeff Bonwick if (error == ENOENT) 711e14bb325SJeff Bonwick return (0); 712e14bb325SJeff Bonwick 713e14bb325SJeff Bonwick if (error) { 71499653d4eSeschrock vdev_free(*vdp); 71599653d4eSeschrock *vdp = NULL; 71699653d4eSeschrock return (EINVAL); 717fa9e4066Sahrens } 718fa9e4066Sahrens 719573ca77eSGeorge Wilson for (int c = 0; c < children; c++) { 72099653d4eSeschrock vdev_t *vd; 72199653d4eSeschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 72299653d4eSeschrock atype)) != 0) { 72399653d4eSeschrock vdev_free(*vdp); 72499653d4eSeschrock *vdp = NULL; 72599653d4eSeschrock return (error); 726fa9e4066Sahrens } 727fa9e4066Sahrens } 728fa9e4066Sahrens 72999653d4eSeschrock ASSERT(*vdp != NULL); 73099653d4eSeschrock 73199653d4eSeschrock return (0); 732fa9e4066Sahrens } 733fa9e4066Sahrens 734fa9e4066Sahrens /* 735fa9e4066Sahrens * Opposite of spa_load(). 736fa9e4066Sahrens */ 737fa9e4066Sahrens static void 738fa9e4066Sahrens spa_unload(spa_t *spa) 739fa9e4066Sahrens { 74099653d4eSeschrock int i; 74199653d4eSeschrock 742e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa_namespace_lock)); 743e14bb325SJeff Bonwick 744ea8dc4b6Seschrock /* 745ea8dc4b6Seschrock * Stop async tasks. 746ea8dc4b6Seschrock */ 747ea8dc4b6Seschrock spa_async_suspend(spa); 748ea8dc4b6Seschrock 749fa9e4066Sahrens /* 750fa9e4066Sahrens * Stop syncing. 751fa9e4066Sahrens */ 752fa9e4066Sahrens if (spa->spa_sync_on) { 753fa9e4066Sahrens txg_sync_stop(spa->spa_dsl_pool); 754fa9e4066Sahrens spa->spa_sync_on = B_FALSE; 755fa9e4066Sahrens } 756fa9e4066Sahrens 757fa9e4066Sahrens /* 758e14bb325SJeff Bonwick * Wait for any outstanding async I/O to complete. 759fa9e4066Sahrens */ 76054d692b7SGeorge Wilson if (spa->spa_async_zio_root != NULL) { 76154d692b7SGeorge Wilson (void) zio_wait(spa->spa_async_zio_root); 76254d692b7SGeorge Wilson spa->spa_async_zio_root = NULL; 76354d692b7SGeorge Wilson } 764fa9e4066Sahrens 765fa9e4066Sahrens /* 766fa9e4066Sahrens * Close the dsl pool. 767fa9e4066Sahrens */ 768fa9e4066Sahrens if (spa->spa_dsl_pool) { 769fa9e4066Sahrens dsl_pool_close(spa->spa_dsl_pool); 770fa9e4066Sahrens spa->spa_dsl_pool = NULL; 771fa9e4066Sahrens } 772fa9e4066Sahrens 7738ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 7748ad4d6ddSJeff Bonwick 7758ad4d6ddSJeff Bonwick /* 7768ad4d6ddSJeff Bonwick * Drop and purge level 2 cache 7778ad4d6ddSJeff Bonwick */ 7788ad4d6ddSJeff Bonwick spa_l2cache_drop(spa); 7798ad4d6ddSJeff Bonwick 780fa9e4066Sahrens /* 781fa9e4066Sahrens * Close all vdevs. 782fa9e4066Sahrens */ 7830e34b6a7Sbonwick if (spa->spa_root_vdev) 784fa9e4066Sahrens vdev_free(spa->spa_root_vdev); 7850e34b6a7Sbonwick ASSERT(spa->spa_root_vdev == NULL); 786ea8dc4b6Seschrock 787fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 788fa94a07fSbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 789fa94a07fSbrendan if (spa->spa_spares.sav_vdevs) { 790fa94a07fSbrendan kmem_free(spa->spa_spares.sav_vdevs, 791fa94a07fSbrendan spa->spa_spares.sav_count * sizeof (void *)); 792fa94a07fSbrendan spa->spa_spares.sav_vdevs = NULL; 79399653d4eSeschrock } 794fa94a07fSbrendan if (spa->spa_spares.sav_config) { 795fa94a07fSbrendan nvlist_free(spa->spa_spares.sav_config); 796fa94a07fSbrendan spa->spa_spares.sav_config = NULL; 797fa94a07fSbrendan } 7982ce8af81SEric Schrock spa->spa_spares.sav_count = 0; 799fa94a07fSbrendan 800fa94a07fSbrendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 801fa94a07fSbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 802fa94a07fSbrendan if (spa->spa_l2cache.sav_vdevs) { 803fa94a07fSbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 804fa94a07fSbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 805fa94a07fSbrendan spa->spa_l2cache.sav_vdevs = NULL; 806fa94a07fSbrendan } 807fa94a07fSbrendan if (spa->spa_l2cache.sav_config) { 808fa94a07fSbrendan nvlist_free(spa->spa_l2cache.sav_config); 809fa94a07fSbrendan spa->spa_l2cache.sav_config = NULL; 81099653d4eSeschrock } 8112ce8af81SEric Schrock spa->spa_l2cache.sav_count = 0; 81299653d4eSeschrock 813ea8dc4b6Seschrock spa->spa_async_suspended = 0; 8148ad4d6ddSJeff Bonwick 8158ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 816fa9e4066Sahrens } 817fa9e4066Sahrens 81899653d4eSeschrock /* 81999653d4eSeschrock * Load (or re-load) the current list of vdevs describing the active spares for 82099653d4eSeschrock * this pool. When this is called, we have some form of basic information in 821fa94a07fSbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 822fa94a07fSbrendan * then re-generate a more complete list including status information. 82399653d4eSeschrock */ 82499653d4eSeschrock static void 82599653d4eSeschrock spa_load_spares(spa_t *spa) 82699653d4eSeschrock { 82799653d4eSeschrock nvlist_t **spares; 82899653d4eSeschrock uint_t nspares; 82999653d4eSeschrock int i; 83039c23413Seschrock vdev_t *vd, *tvd; 83199653d4eSeschrock 832e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 833e14bb325SJeff Bonwick 83499653d4eSeschrock /* 83599653d4eSeschrock * First, close and free any existing spare vdevs. 83699653d4eSeschrock */ 837fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 838fa94a07fSbrendan vd = spa->spa_spares.sav_vdevs[i]; 83939c23413Seschrock 84039c23413Seschrock /* Undo the call to spa_activate() below */ 841c5904d13Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 842c5904d13Seschrock B_FALSE)) != NULL && tvd->vdev_isspare) 84339c23413Seschrock spa_spare_remove(tvd); 84439c23413Seschrock vdev_close(vd); 84539c23413Seschrock vdev_free(vd); 84699653d4eSeschrock } 84739c23413Seschrock 848fa94a07fSbrendan if (spa->spa_spares.sav_vdevs) 849fa94a07fSbrendan kmem_free(spa->spa_spares.sav_vdevs, 850fa94a07fSbrendan spa->spa_spares.sav_count * sizeof (void *)); 85199653d4eSeschrock 852fa94a07fSbrendan if (spa->spa_spares.sav_config == NULL) 85399653d4eSeschrock nspares = 0; 85499653d4eSeschrock else 855fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 85699653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 85799653d4eSeschrock 858fa94a07fSbrendan spa->spa_spares.sav_count = (int)nspares; 859fa94a07fSbrendan spa->spa_spares.sav_vdevs = NULL; 86099653d4eSeschrock 86199653d4eSeschrock if (nspares == 0) 86299653d4eSeschrock return; 86399653d4eSeschrock 86499653d4eSeschrock /* 86599653d4eSeschrock * Construct the array of vdevs, opening them to get status in the 86639c23413Seschrock * process. For each spare, there is potentially two different vdev_t 86739c23413Seschrock * structures associated with it: one in the list of spares (used only 86839c23413Seschrock * for basic validation purposes) and one in the active vdev 86939c23413Seschrock * configuration (if it's spared in). During this phase we open and 87039c23413Seschrock * validate each vdev on the spare list. If the vdev also exists in the 87139c23413Seschrock * active configuration, then we also mark this vdev as an active spare. 87299653d4eSeschrock */ 873fa94a07fSbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 874fa94a07fSbrendan KM_SLEEP); 875fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 87699653d4eSeschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 87799653d4eSeschrock VDEV_ALLOC_SPARE) == 0); 87899653d4eSeschrock ASSERT(vd != NULL); 87999653d4eSeschrock 880fa94a07fSbrendan spa->spa_spares.sav_vdevs[i] = vd; 88199653d4eSeschrock 882c5904d13Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 883c5904d13Seschrock B_FALSE)) != NULL) { 88439c23413Seschrock if (!tvd->vdev_isspare) 88539c23413Seschrock spa_spare_add(tvd); 88639c23413Seschrock 88739c23413Seschrock /* 88839c23413Seschrock * We only mark the spare active if we were successfully 88939c23413Seschrock * able to load the vdev. Otherwise, importing a pool 89039c23413Seschrock * with a bad active spare would result in strange 89139c23413Seschrock * behavior, because multiple pool would think the spare 89239c23413Seschrock * is actively in use. 89339c23413Seschrock * 89439c23413Seschrock * There is a vulnerability here to an equally bizarre 89539c23413Seschrock * circumstance, where a dead active spare is later 89639c23413Seschrock * brought back to life (onlined or otherwise). Given 89739c23413Seschrock * the rarity of this scenario, and the extra complexity 89839c23413Seschrock * it adds, we ignore the possibility. 89939c23413Seschrock */ 90039c23413Seschrock if (!vdev_is_dead(tvd)) 90139c23413Seschrock spa_spare_activate(tvd); 90239c23413Seschrock } 90339c23413Seschrock 904e14bb325SJeff Bonwick vd->vdev_top = vd; 9056809eb4eSEric Schrock vd->vdev_aux = &spa->spa_spares; 906e14bb325SJeff Bonwick 90799653d4eSeschrock if (vdev_open(vd) != 0) 90899653d4eSeschrock continue; 90999653d4eSeschrock 910fa94a07fSbrendan if (vdev_validate_aux(vd) == 0) 911fa94a07fSbrendan spa_spare_add(vd); 91299653d4eSeschrock } 91399653d4eSeschrock 91499653d4eSeschrock /* 91599653d4eSeschrock * Recompute the stashed list of spares, with status information 91699653d4eSeschrock * this time. 91799653d4eSeschrock */ 918fa94a07fSbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 91999653d4eSeschrock DATA_TYPE_NVLIST_ARRAY) == 0); 92099653d4eSeschrock 921fa94a07fSbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 922fa94a07fSbrendan KM_SLEEP); 923fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 924fa94a07fSbrendan spares[i] = vdev_config_generate(spa, 925fa94a07fSbrendan spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 926fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 927fa94a07fSbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 928fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 92999653d4eSeschrock nvlist_free(spares[i]); 930fa94a07fSbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 931fa94a07fSbrendan } 932fa94a07fSbrendan 933fa94a07fSbrendan /* 934fa94a07fSbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 935fa94a07fSbrendan * this pool. When this is called, we have some form of basic information in 936fa94a07fSbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 937fa94a07fSbrendan * then re-generate a more complete list including status information. 938fa94a07fSbrendan * Devices which are already active have their details maintained, and are 939fa94a07fSbrendan * not re-opened. 940fa94a07fSbrendan */ 941fa94a07fSbrendan static void 942fa94a07fSbrendan spa_load_l2cache(spa_t *spa) 943fa94a07fSbrendan { 944fa94a07fSbrendan nvlist_t **l2cache; 945fa94a07fSbrendan uint_t nl2cache; 946fa94a07fSbrendan int i, j, oldnvdevs; 947573ca77eSGeorge Wilson uint64_t guid; 948fa94a07fSbrendan vdev_t *vd, **oldvdevs, **newvdevs; 949fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 950fa94a07fSbrendan 951e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 952e14bb325SJeff Bonwick 953fa94a07fSbrendan if (sav->sav_config != NULL) { 954fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 955fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 956fa94a07fSbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 957fa94a07fSbrendan } else { 958fa94a07fSbrendan nl2cache = 0; 959fa94a07fSbrendan } 960fa94a07fSbrendan 961fa94a07fSbrendan oldvdevs = sav->sav_vdevs; 962fa94a07fSbrendan oldnvdevs = sav->sav_count; 963fa94a07fSbrendan sav->sav_vdevs = NULL; 964fa94a07fSbrendan sav->sav_count = 0; 965fa94a07fSbrendan 966fa94a07fSbrendan /* 967fa94a07fSbrendan * Process new nvlist of vdevs. 968fa94a07fSbrendan */ 969fa94a07fSbrendan for (i = 0; i < nl2cache; i++) { 970fa94a07fSbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 971fa94a07fSbrendan &guid) == 0); 972fa94a07fSbrendan 973fa94a07fSbrendan newvdevs[i] = NULL; 974fa94a07fSbrendan for (j = 0; j < oldnvdevs; j++) { 975fa94a07fSbrendan vd = oldvdevs[j]; 976fa94a07fSbrendan if (vd != NULL && guid == vd->vdev_guid) { 977fa94a07fSbrendan /* 978fa94a07fSbrendan * Retain previous vdev for add/remove ops. 979fa94a07fSbrendan */ 980fa94a07fSbrendan newvdevs[i] = vd; 981fa94a07fSbrendan oldvdevs[j] = NULL; 982fa94a07fSbrendan break; 983fa94a07fSbrendan } 984fa94a07fSbrendan } 985fa94a07fSbrendan 986fa94a07fSbrendan if (newvdevs[i] == NULL) { 987fa94a07fSbrendan /* 988fa94a07fSbrendan * Create new vdev 989fa94a07fSbrendan */ 990fa94a07fSbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 991fa94a07fSbrendan VDEV_ALLOC_L2CACHE) == 0); 992fa94a07fSbrendan ASSERT(vd != NULL); 993fa94a07fSbrendan newvdevs[i] = vd; 994fa94a07fSbrendan 995fa94a07fSbrendan /* 996fa94a07fSbrendan * Commit this vdev as an l2cache device, 997fa94a07fSbrendan * even if it fails to open. 998fa94a07fSbrendan */ 999fa94a07fSbrendan spa_l2cache_add(vd); 1000fa94a07fSbrendan 1001c5904d13Seschrock vd->vdev_top = vd; 1002c5904d13Seschrock vd->vdev_aux = sav; 1003c5904d13Seschrock 1004c5904d13Seschrock spa_l2cache_activate(vd); 1005c5904d13Seschrock 1006fa94a07fSbrendan if (vdev_open(vd) != 0) 1007fa94a07fSbrendan continue; 1008fa94a07fSbrendan 1009fa94a07fSbrendan (void) vdev_validate_aux(vd); 1010fa94a07fSbrendan 1011573ca77eSGeorge Wilson if (!vdev_is_dead(vd)) 1012573ca77eSGeorge Wilson l2arc_add_vdev(spa, vd); 1013fa94a07fSbrendan } 1014fa94a07fSbrendan } 1015fa94a07fSbrendan 1016fa94a07fSbrendan /* 1017fa94a07fSbrendan * Purge vdevs that were dropped 1018fa94a07fSbrendan */ 1019fa94a07fSbrendan for (i = 0; i < oldnvdevs; i++) { 1020fa94a07fSbrendan uint64_t pool; 1021fa94a07fSbrendan 1022fa94a07fSbrendan vd = oldvdevs[i]; 1023fa94a07fSbrendan if (vd != NULL) { 10248ad4d6ddSJeff Bonwick if (spa_l2cache_exists(vd->vdev_guid, &pool) && 10258ad4d6ddSJeff Bonwick pool != 0ULL && l2arc_vdev_present(vd)) 1026fa94a07fSbrendan l2arc_remove_vdev(vd); 1027fa94a07fSbrendan (void) vdev_close(vd); 1028fa94a07fSbrendan spa_l2cache_remove(vd); 1029fa94a07fSbrendan } 1030fa94a07fSbrendan } 1031fa94a07fSbrendan 1032fa94a07fSbrendan if (oldvdevs) 1033fa94a07fSbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 1034fa94a07fSbrendan 1035fa94a07fSbrendan if (sav->sav_config == NULL) 1036fa94a07fSbrendan goto out; 1037fa94a07fSbrendan 1038fa94a07fSbrendan sav->sav_vdevs = newvdevs; 1039fa94a07fSbrendan sav->sav_count = (int)nl2cache; 1040fa94a07fSbrendan 1041fa94a07fSbrendan /* 1042fa94a07fSbrendan * Recompute the stashed list of l2cache devices, with status 1043fa94a07fSbrendan * information this time. 1044fa94a07fSbrendan */ 1045fa94a07fSbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 1046fa94a07fSbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 1047fa94a07fSbrendan 1048fa94a07fSbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 1049fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 1050fa94a07fSbrendan l2cache[i] = vdev_config_generate(spa, 1051fa94a07fSbrendan sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 1052fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 1053fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 1054fa94a07fSbrendan out: 1055fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 1056fa94a07fSbrendan nvlist_free(l2cache[i]); 1057fa94a07fSbrendan if (sav->sav_count) 1058fa94a07fSbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 105999653d4eSeschrock } 106099653d4eSeschrock 106199653d4eSeschrock static int 106299653d4eSeschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 106399653d4eSeschrock { 106499653d4eSeschrock dmu_buf_t *db; 106599653d4eSeschrock char *packed = NULL; 106699653d4eSeschrock size_t nvsize = 0; 106799653d4eSeschrock int error; 106899653d4eSeschrock *value = NULL; 106999653d4eSeschrock 107099653d4eSeschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 107199653d4eSeschrock nvsize = *(uint64_t *)db->db_data; 107299653d4eSeschrock dmu_buf_rele(db, FTAG); 107399653d4eSeschrock 107499653d4eSeschrock packed = kmem_alloc(nvsize, KM_SLEEP); 10757bfdf011SNeil Perrin error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, 10767bfdf011SNeil Perrin DMU_READ_PREFETCH); 107799653d4eSeschrock if (error == 0) 107899653d4eSeschrock error = nvlist_unpack(packed, nvsize, value, 0); 107999653d4eSeschrock kmem_free(packed, nvsize); 108099653d4eSeschrock 108199653d4eSeschrock return (error); 108299653d4eSeschrock } 108399653d4eSeschrock 10843d7072f8Seschrock /* 10853d7072f8Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 10863d7072f8Seschrock * sysevent to notify the autoreplace code that the device has been removed. 10873d7072f8Seschrock */ 10883d7072f8Seschrock static void 10893d7072f8Seschrock spa_check_removed(vdev_t *vd) 10903d7072f8Seschrock { 1091573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) 10923d7072f8Seschrock spa_check_removed(vd->vdev_child[c]); 10933d7072f8Seschrock 10943d7072f8Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 10953d7072f8Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 10963d7072f8Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 10973d7072f8Seschrock } 10983d7072f8Seschrock } 10993d7072f8Seschrock 1100e6ca193dSGeorge Wilson /* 1101e6ca193dSGeorge Wilson * Load the slog device state from the config object since it's possible 1102e6ca193dSGeorge Wilson * that the label does not contain the most up-to-date information. 1103e6ca193dSGeorge Wilson */ 1104e6ca193dSGeorge Wilson void 1105*88ecc943SGeorge Wilson spa_load_log_state(spa_t *spa, nvlist_t *nv) 1106e6ca193dSGeorge Wilson { 1107*88ecc943SGeorge Wilson vdev_t *ovd, *rvd = spa->spa_root_vdev; 1108e6ca193dSGeorge Wilson 1109*88ecc943SGeorge Wilson /* 1110*88ecc943SGeorge Wilson * Load the original root vdev tree from the passed config. 1111*88ecc943SGeorge Wilson */ 1112*88ecc943SGeorge Wilson spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1113*88ecc943SGeorge Wilson VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); 1114e6ca193dSGeorge Wilson 1115*88ecc943SGeorge Wilson for (int c = 0; c < rvd->vdev_children; c++) { 1116*88ecc943SGeorge Wilson vdev_t *cvd = rvd->vdev_child[c]; 1117*88ecc943SGeorge Wilson if (cvd->vdev_islog) 1118*88ecc943SGeorge Wilson vdev_load_log_state(cvd, ovd->vdev_child[c]); 1119e6ca193dSGeorge Wilson } 1120*88ecc943SGeorge Wilson vdev_free(ovd); 1121*88ecc943SGeorge Wilson spa_config_exit(spa, SCL_ALL, FTAG); 1122e6ca193dSGeorge Wilson } 1123e6ca193dSGeorge Wilson 1124b87f3af3Sperrin /* 1125b87f3af3Sperrin * Check for missing log devices 1126b87f3af3Sperrin */ 1127b87f3af3Sperrin int 1128b87f3af3Sperrin spa_check_logs(spa_t *spa) 1129b87f3af3Sperrin { 1130b87f3af3Sperrin switch (spa->spa_log_state) { 1131b87f3af3Sperrin case SPA_LOG_MISSING: 1132b87f3af3Sperrin /* need to recheck in case slog has been restored */ 1133b87f3af3Sperrin case SPA_LOG_UNKNOWN: 1134b87f3af3Sperrin if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 1135b87f3af3Sperrin DS_FIND_CHILDREN)) { 1136b87f3af3Sperrin spa->spa_log_state = SPA_LOG_MISSING; 1137b87f3af3Sperrin return (1); 1138b87f3af3Sperrin } 1139b87f3af3Sperrin break; 1140b87f3af3Sperrin } 1141b87f3af3Sperrin return (0); 1142b87f3af3Sperrin } 1143b87f3af3Sperrin 1144fa9e4066Sahrens /* 1145fa9e4066Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 1146ea8dc4b6Seschrock * source of configuration information. 1147fa9e4066Sahrens */ 1148fa9e4066Sahrens static int 1149ea8dc4b6Seschrock spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) 1150fa9e4066Sahrens { 1151fa9e4066Sahrens int error = 0; 1152*88ecc943SGeorge Wilson nvlist_t *nvconfig, *nvroot = NULL; 1153fa9e4066Sahrens vdev_t *rvd; 1154fa9e4066Sahrens uberblock_t *ub = &spa->spa_uberblock; 11550373e76bSbonwick uint64_t config_cache_txg = spa->spa_config_txg; 1156fa9e4066Sahrens uint64_t pool_guid; 115799653d4eSeschrock uint64_t version; 11583d7072f8Seschrock uint64_t autoreplace = 0; 11598ad4d6ddSJeff Bonwick int orig_mode = spa->spa_mode; 1160b87f3af3Sperrin char *ereport = FM_EREPORT_ZFS_POOL; 1161fa9e4066Sahrens 11628ad4d6ddSJeff Bonwick /* 11638ad4d6ddSJeff Bonwick * If this is an untrusted config, access the pool in read-only mode. 11648ad4d6ddSJeff Bonwick * This prevents things like resilvering recently removed devices. 11658ad4d6ddSJeff Bonwick */ 11668ad4d6ddSJeff Bonwick if (!mosconfig) 11678ad4d6ddSJeff Bonwick spa->spa_mode = FREAD; 11688ad4d6ddSJeff Bonwick 1169e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1170e14bb325SJeff Bonwick 1171ea8dc4b6Seschrock spa->spa_load_state = state; 11720373e76bSbonwick 1173fa9e4066Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 1174a9926bf0Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 1175ea8dc4b6Seschrock error = EINVAL; 1176ea8dc4b6Seschrock goto out; 1177ea8dc4b6Seschrock } 1178fa9e4066Sahrens 117999653d4eSeschrock /* 118099653d4eSeschrock * Versioning wasn't explicitly added to the label until later, so if 118199653d4eSeschrock * it's not present treat it as the initial version. 118299653d4eSeschrock */ 118399653d4eSeschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 1184e7437265Sahrens version = SPA_VERSION_INITIAL; 118599653d4eSeschrock 1186a9926bf0Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 1187a9926bf0Sbonwick &spa->spa_config_txg); 1188a9926bf0Sbonwick 11890373e76bSbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 1190ea8dc4b6Seschrock spa_guid_exists(pool_guid, 0)) { 1191ea8dc4b6Seschrock error = EEXIST; 1192ea8dc4b6Seschrock goto out; 1193ea8dc4b6Seschrock } 1194fa9e4066Sahrens 1195b5989ec7Seschrock spa->spa_load_guid = pool_guid; 1196b5989ec7Seschrock 119754d692b7SGeorge Wilson /* 119854d692b7SGeorge Wilson * Create "The Godfather" zio to hold all async IOs 119954d692b7SGeorge Wilson */ 120025f89ee2SJeff Bonwick spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 120125f89ee2SJeff Bonwick ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 120254d692b7SGeorge Wilson 1203fa9e4066Sahrens /* 120499653d4eSeschrock * Parse the configuration into a vdev tree. We explicitly set the 120599653d4eSeschrock * value that will be returned by spa_version() since parsing the 120699653d4eSeschrock * configuration requires knowing the version number. 1207fa9e4066Sahrens */ 1208e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 120999653d4eSeschrock spa->spa_ubsync.ub_version = version; 121099653d4eSeschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 1211e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1212fa9e4066Sahrens 121399653d4eSeschrock if (error != 0) 1214ea8dc4b6Seschrock goto out; 1215fa9e4066Sahrens 12160e34b6a7Sbonwick ASSERT(spa->spa_root_vdev == rvd); 1217fa9e4066Sahrens ASSERT(spa_guid(spa) == pool_guid); 1218fa9e4066Sahrens 1219fa9e4066Sahrens /* 1220fa9e4066Sahrens * Try to open all vdevs, loading each label in the process. 1221fa9e4066Sahrens */ 1222e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12230bf246f5Smc error = vdev_open(rvd); 1224e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 12250bf246f5Smc if (error != 0) 1226ea8dc4b6Seschrock goto out; 1227fa9e4066Sahrens 1228560e6e96Seschrock /* 122977e3a39cSMark J Musante * We need to validate the vdev labels against the configuration that 123077e3a39cSMark J Musante * we have in hand, which is dependent on the setting of mosconfig. If 123177e3a39cSMark J Musante * mosconfig is true then we're validating the vdev labels based on 123277e3a39cSMark J Musante * that config. Otherwise, we're validating against the cached config 123377e3a39cSMark J Musante * (zpool.cache) that was read when we loaded the zfs module, and then 123477e3a39cSMark J Musante * later we will recursively call spa_load() and validate against 123577e3a39cSMark J Musante * the vdev config. 1236560e6e96Seschrock */ 123777e3a39cSMark J Musante spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 123877e3a39cSMark J Musante error = vdev_validate(rvd); 123977e3a39cSMark J Musante spa_config_exit(spa, SCL_ALL, FTAG); 124077e3a39cSMark J Musante if (error != 0) 124177e3a39cSMark J Musante goto out; 1242560e6e96Seschrock 1243560e6e96Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 1244560e6e96Seschrock error = ENXIO; 1245560e6e96Seschrock goto out; 1246560e6e96Seschrock } 1247560e6e96Seschrock 1248fa9e4066Sahrens /* 1249fa9e4066Sahrens * Find the best uberblock. 1250fa9e4066Sahrens */ 1251e14bb325SJeff Bonwick vdev_uberblock_load(NULL, rvd, ub); 1252fa9e4066Sahrens 1253fa9e4066Sahrens /* 1254fa9e4066Sahrens * If we weren't able to find a single valid uberblock, return failure. 1255fa9e4066Sahrens */ 1256fa9e4066Sahrens if (ub->ub_txg == 0) { 1257eaca9bbdSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1258eaca9bbdSeschrock VDEV_AUX_CORRUPT_DATA); 1259ea8dc4b6Seschrock error = ENXIO; 1260ea8dc4b6Seschrock goto out; 1261ea8dc4b6Seschrock } 1262ea8dc4b6Seschrock 1263ea8dc4b6Seschrock /* 1264ea8dc4b6Seschrock * If the pool is newer than the code, we can't open it. 1265ea8dc4b6Seschrock */ 1266e7437265Sahrens if (ub->ub_version > SPA_VERSION) { 1267eaca9bbdSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1268eaca9bbdSeschrock VDEV_AUX_VERSION_NEWER); 1269ea8dc4b6Seschrock error = ENOTSUP; 1270ea8dc4b6Seschrock goto out; 1271fa9e4066Sahrens } 1272fa9e4066Sahrens 1273fa9e4066Sahrens /* 1274fa9e4066Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 1275fa9e4066Sahrens * incomplete configuration. 1276fa9e4066Sahrens */ 1277ecc2d604Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 1278ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1279ea8dc4b6Seschrock VDEV_AUX_BAD_GUID_SUM); 1280ea8dc4b6Seschrock error = ENXIO; 1281ea8dc4b6Seschrock goto out; 1282fa9e4066Sahrens } 1283fa9e4066Sahrens 1284fa9e4066Sahrens /* 1285fa9e4066Sahrens * Initialize internal SPA structures. 1286fa9e4066Sahrens */ 1287fa9e4066Sahrens spa->spa_state = POOL_STATE_ACTIVE; 1288fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 1289fa9e4066Sahrens spa->spa_first_txg = spa_last_synced_txg(spa) + 1; 1290ea8dc4b6Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 1291ea8dc4b6Seschrock if (error) { 1292ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1293ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1294ea8dc4b6Seschrock goto out; 1295ea8dc4b6Seschrock } 1296fa9e4066Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1297fa9e4066Sahrens 1298ea8dc4b6Seschrock if (zap_lookup(spa->spa_meta_objset, 1299fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 1300ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 1301ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1302ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1303ea8dc4b6Seschrock error = EIO; 1304ea8dc4b6Seschrock goto out; 1305ea8dc4b6Seschrock } 1306fa9e4066Sahrens 1307*88ecc943SGeorge Wilson if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) { 1308*88ecc943SGeorge Wilson vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1309*88ecc943SGeorge Wilson VDEV_AUX_CORRUPT_DATA); 1310*88ecc943SGeorge Wilson error = EIO; 1311*88ecc943SGeorge Wilson goto out; 1312*88ecc943SGeorge Wilson } 1313*88ecc943SGeorge Wilson 1314fa9e4066Sahrens if (!mosconfig) { 131595173954Sek uint64_t hostid; 1316fa9e4066Sahrens 1317*88ecc943SGeorge Wilson if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig, 131877650510SLin Ling ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 131995173954Sek char *hostname; 132095173954Sek unsigned long myhostid = 0; 132195173954Sek 1322*88ecc943SGeorge Wilson VERIFY(nvlist_lookup_string(nvconfig, 132395173954Sek ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 132495173954Sek 13255679c89fSjv #ifdef _KERNEL 13265679c89fSjv myhostid = zone_get_hostid(NULL); 13275679c89fSjv #else /* _KERNEL */ 13285679c89fSjv /* 13295679c89fSjv * We're emulating the system's hostid in userland, so 13305679c89fSjv * we can't use zone_get_hostid(). 13315679c89fSjv */ 133295173954Sek (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 13335679c89fSjv #endif /* _KERNEL */ 133417194a52Slling if (hostid != 0 && myhostid != 0 && 13355679c89fSjv hostid != myhostid) { 133695173954Sek cmn_err(CE_WARN, "pool '%s' could not be " 133795173954Sek "loaded as it was last accessed by " 133877650510SLin Ling "another system (host: %s hostid: 0x%lx). " 133995173954Sek "See: http://www.sun.com/msg/ZFS-8000-EY", 1340e14bb325SJeff Bonwick spa_name(spa), hostname, 134195173954Sek (unsigned long)hostid); 134295173954Sek error = EBADF; 134395173954Sek goto out; 134495173954Sek } 134595173954Sek } 134695173954Sek 1347*88ecc943SGeorge Wilson spa_config_set(spa, nvconfig); 1348fa9e4066Sahrens spa_unload(spa); 1349fa9e4066Sahrens spa_deactivate(spa); 13508ad4d6ddSJeff Bonwick spa_activate(spa, orig_mode); 1351fa9e4066Sahrens 1352*88ecc943SGeorge Wilson return (spa_load(spa, nvconfig, state, B_TRUE)); 1353fa9e4066Sahrens } 1354fa9e4066Sahrens 1355ea8dc4b6Seschrock if (zap_lookup(spa->spa_meta_objset, 1356fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 1357ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { 1358ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1359ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1360ea8dc4b6Seschrock error = EIO; 1361ea8dc4b6Seschrock goto out; 1362ea8dc4b6Seschrock } 1363fa9e4066Sahrens 136499653d4eSeschrock /* 136599653d4eSeschrock * Load the bit that tells us to use the new accounting function 136699653d4eSeschrock * (raid-z deflation). If we have an older pool, this will not 136799653d4eSeschrock * be present. 136899653d4eSeschrock */ 136999653d4eSeschrock error = zap_lookup(spa->spa_meta_objset, 137099653d4eSeschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 137199653d4eSeschrock sizeof (uint64_t), 1, &spa->spa_deflate); 137299653d4eSeschrock if (error != 0 && error != ENOENT) { 137399653d4eSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 137499653d4eSeschrock VDEV_AUX_CORRUPT_DATA); 137599653d4eSeschrock error = EIO; 137699653d4eSeschrock goto out; 137799653d4eSeschrock } 137899653d4eSeschrock 1379fa9e4066Sahrens /* 1380ea8dc4b6Seschrock * Load the persistent error log. If we have an older pool, this will 1381ea8dc4b6Seschrock * not be present. 1382fa9e4066Sahrens */ 1383ea8dc4b6Seschrock error = zap_lookup(spa->spa_meta_objset, 1384ea8dc4b6Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 1385ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 1386d80c45e0Sbonwick if (error != 0 && error != ENOENT) { 1387ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1388ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1389ea8dc4b6Seschrock error = EIO; 1390ea8dc4b6Seschrock goto out; 1391ea8dc4b6Seschrock } 1392ea8dc4b6Seschrock 1393ea8dc4b6Seschrock error = zap_lookup(spa->spa_meta_objset, 1394ea8dc4b6Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 1395ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 1396ea8dc4b6Seschrock if (error != 0 && error != ENOENT) { 1397ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1398ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1399ea8dc4b6Seschrock error = EIO; 1400ea8dc4b6Seschrock goto out; 1401ea8dc4b6Seschrock } 1402ea8dc4b6Seschrock 140306eeb2adSek /* 140406eeb2adSek * Load the history object. If we have an older pool, this 140506eeb2adSek * will not be present. 140606eeb2adSek */ 140706eeb2adSek error = zap_lookup(spa->spa_meta_objset, 140806eeb2adSek DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 140906eeb2adSek sizeof (uint64_t), 1, &spa->spa_history); 141006eeb2adSek if (error != 0 && error != ENOENT) { 141106eeb2adSek vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 141206eeb2adSek VDEV_AUX_CORRUPT_DATA); 141306eeb2adSek error = EIO; 141406eeb2adSek goto out; 141506eeb2adSek } 141606eeb2adSek 141799653d4eSeschrock /* 141899653d4eSeschrock * Load any hot spares for this pool. 141999653d4eSeschrock */ 142099653d4eSeschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1421fa94a07fSbrendan DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 142299653d4eSeschrock if (error != 0 && error != ENOENT) { 142399653d4eSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 142499653d4eSeschrock VDEV_AUX_CORRUPT_DATA); 142599653d4eSeschrock error = EIO; 142699653d4eSeschrock goto out; 142799653d4eSeschrock } 142899653d4eSeschrock if (error == 0) { 1429e7437265Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 1430fa94a07fSbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 1431fa94a07fSbrendan &spa->spa_spares.sav_config) != 0) { 143299653d4eSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 143399653d4eSeschrock VDEV_AUX_CORRUPT_DATA); 143499653d4eSeschrock error = EIO; 143599653d4eSeschrock goto out; 143699653d4eSeschrock } 143799653d4eSeschrock 1438e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 143999653d4eSeschrock spa_load_spares(spa); 1440e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 144199653d4eSeschrock } 144299653d4eSeschrock 1443fa94a07fSbrendan /* 1444fa94a07fSbrendan * Load any level 2 ARC devices for this pool. 1445fa94a07fSbrendan */ 1446fa94a07fSbrendan error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1447fa94a07fSbrendan DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 1448fa94a07fSbrendan &spa->spa_l2cache.sav_object); 1449fa94a07fSbrendan if (error != 0 && error != ENOENT) { 1450fa94a07fSbrendan vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1451fa94a07fSbrendan VDEV_AUX_CORRUPT_DATA); 1452fa94a07fSbrendan error = EIO; 1453fa94a07fSbrendan goto out; 1454fa94a07fSbrendan } 1455fa94a07fSbrendan if (error == 0) { 1456fa94a07fSbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 1457fa94a07fSbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 1458fa94a07fSbrendan &spa->spa_l2cache.sav_config) != 0) { 1459fa94a07fSbrendan vdev_set_state(rvd, B_TRUE, 1460fa94a07fSbrendan VDEV_STATE_CANT_OPEN, 1461fa94a07fSbrendan VDEV_AUX_CORRUPT_DATA); 1462fa94a07fSbrendan error = EIO; 1463fa94a07fSbrendan goto out; 1464fa94a07fSbrendan } 1465fa94a07fSbrendan 1466e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1467fa94a07fSbrendan spa_load_l2cache(spa); 1468e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1469fa94a07fSbrendan } 1470fa94a07fSbrendan 1471*88ecc943SGeorge Wilson VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE, 1472*88ecc943SGeorge Wilson &nvroot) == 0); 1473*88ecc943SGeorge Wilson spa_load_log_state(spa, nvroot); 1474*88ecc943SGeorge Wilson nvlist_free(nvconfig); 1475e6ca193dSGeorge Wilson 1476b87f3af3Sperrin if (spa_check_logs(spa)) { 1477b87f3af3Sperrin vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1478b87f3af3Sperrin VDEV_AUX_BAD_LOG); 1479b87f3af3Sperrin error = ENXIO; 1480b87f3af3Sperrin ereport = FM_EREPORT_ZFS_LOG_REPLAY; 1481b87f3af3Sperrin goto out; 1482b87f3af3Sperrin } 1483b87f3af3Sperrin 1484b87f3af3Sperrin 1485990b4856Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 1486ecd6cf80Smarks 1487b1b8ab34Slling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1488b1b8ab34Slling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 1489b1b8ab34Slling 1490b1b8ab34Slling if (error && error != ENOENT) { 1491b1b8ab34Slling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1492b1b8ab34Slling VDEV_AUX_CORRUPT_DATA); 1493b1b8ab34Slling error = EIO; 1494b1b8ab34Slling goto out; 1495b1b8ab34Slling } 1496b1b8ab34Slling 1497b1b8ab34Slling if (error == 0) { 1498b1b8ab34Slling (void) zap_lookup(spa->spa_meta_objset, 1499b1b8ab34Slling spa->spa_pool_props_object, 15003d7072f8Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 1501b1b8ab34Slling sizeof (uint64_t), 1, &spa->spa_bootfs); 15023d7072f8Seschrock (void) zap_lookup(spa->spa_meta_objset, 15033d7072f8Seschrock spa->spa_pool_props_object, 15043d7072f8Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 15053d7072f8Seschrock sizeof (uint64_t), 1, &autoreplace); 1506ecd6cf80Smarks (void) zap_lookup(spa->spa_meta_objset, 1507ecd6cf80Smarks spa->spa_pool_props_object, 1508ecd6cf80Smarks zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 1509ecd6cf80Smarks sizeof (uint64_t), 1, &spa->spa_delegation); 15100a4e9518Sgw (void) zap_lookup(spa->spa_meta_objset, 15110a4e9518Sgw spa->spa_pool_props_object, 15120a4e9518Sgw zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 15130a4e9518Sgw sizeof (uint64_t), 1, &spa->spa_failmode); 1514573ca77eSGeorge Wilson (void) zap_lookup(spa->spa_meta_objset, 1515573ca77eSGeorge Wilson spa->spa_pool_props_object, 1516573ca77eSGeorge Wilson zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND), 1517573ca77eSGeorge Wilson sizeof (uint64_t), 1, &spa->spa_autoexpand); 1518b1b8ab34Slling } 1519b1b8ab34Slling 15203d7072f8Seschrock /* 15213d7072f8Seschrock * If the 'autoreplace' property is set, then post a resource notifying 15223d7072f8Seschrock * the ZFS DE that it should not issue any faults for unopenable 15233d7072f8Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 15243d7072f8Seschrock * unopenable vdevs so that the normal autoreplace handler can take 15253d7072f8Seschrock * over. 15263d7072f8Seschrock */ 1527b01c3b58Seschrock if (autoreplace && state != SPA_LOAD_TRYIMPORT) 15283d7072f8Seschrock spa_check_removed(spa->spa_root_vdev); 15293d7072f8Seschrock 1530ea8dc4b6Seschrock /* 1531560e6e96Seschrock * Load the vdev state for all toplevel vdevs. 1532ea8dc4b6Seschrock */ 1533560e6e96Seschrock vdev_load(rvd); 15340373e76bSbonwick 1535fa9e4066Sahrens /* 1536fa9e4066Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 1537fa9e4066Sahrens */ 1538e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1539fa9e4066Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 1540e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1541fa9e4066Sahrens 1542fa9e4066Sahrens /* 1543fa9e4066Sahrens * Check the state of the root vdev. If it can't be opened, it 1544fa9e4066Sahrens * indicates one or more toplevel vdevs are faulted. 1545fa9e4066Sahrens */ 1546ea8dc4b6Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 1547ea8dc4b6Seschrock error = ENXIO; 1548ea8dc4b6Seschrock goto out; 1549ea8dc4b6Seschrock } 1550fa9e4066Sahrens 15518ad4d6ddSJeff Bonwick if (spa_writeable(spa)) { 15525dabedeeSbonwick dmu_tx_t *tx; 15530373e76bSbonwick int need_update = B_FALSE; 15548ad4d6ddSJeff Bonwick 15558ad4d6ddSJeff Bonwick ASSERT(state != SPA_LOAD_TRYIMPORT); 15565dabedeeSbonwick 15570373e76bSbonwick /* 15580373e76bSbonwick * Claim log blocks that haven't been committed yet. 15590373e76bSbonwick * This must all happen in a single txg. 15600373e76bSbonwick */ 15615dabedeeSbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1562fa9e4066Sahrens spa_first_txg(spa)); 1563e14bb325SJeff Bonwick (void) dmu_objset_find(spa_name(spa), 15640b69c2f0Sahrens zil_claim, tx, DS_FIND_CHILDREN); 1565fa9e4066Sahrens dmu_tx_commit(tx); 1566fa9e4066Sahrens 1567e6ca193dSGeorge Wilson spa->spa_log_state = SPA_LOG_GOOD; 1568fa9e4066Sahrens spa->spa_sync_on = B_TRUE; 1569fa9e4066Sahrens txg_sync_start(spa->spa_dsl_pool); 1570fa9e4066Sahrens 1571fa9e4066Sahrens /* 1572fa9e4066Sahrens * Wait for all claims to sync. 1573fa9e4066Sahrens */ 1574fa9e4066Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 15750e34b6a7Sbonwick 15760e34b6a7Sbonwick /* 15770373e76bSbonwick * If the config cache is stale, or we have uninitialized 15780373e76bSbonwick * metaslabs (see spa_vdev_add()), then update the config. 1579bc758434SLin Ling * 1580bc758434SLin Ling * If spa_load_verbatim is true, trust the current 1581bc758434SLin Ling * in-core spa_config and update the disk labels. 15820e34b6a7Sbonwick */ 15830373e76bSbonwick if (config_cache_txg != spa->spa_config_txg || 1584bc758434SLin Ling state == SPA_LOAD_IMPORT || spa->spa_load_verbatim) 15850373e76bSbonwick need_update = B_TRUE; 15860373e76bSbonwick 15878ad4d6ddSJeff Bonwick for (int c = 0; c < rvd->vdev_children; c++) 15880373e76bSbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 15890373e76bSbonwick need_update = B_TRUE; 15900e34b6a7Sbonwick 15910e34b6a7Sbonwick /* 15920373e76bSbonwick * Update the config cache asychronously in case we're the 15930373e76bSbonwick * root pool, in which case the config cache isn't writable yet. 15940e34b6a7Sbonwick */ 15950373e76bSbonwick if (need_update) 15960373e76bSbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 15978ad4d6ddSJeff Bonwick 15988ad4d6ddSJeff Bonwick /* 15998ad4d6ddSJeff Bonwick * Check all DTLs to see if anything needs resilvering. 16008ad4d6ddSJeff Bonwick */ 16018ad4d6ddSJeff Bonwick if (vdev_resilver_needed(rvd, NULL, NULL)) 16028ad4d6ddSJeff Bonwick spa_async_request(spa, SPA_ASYNC_RESILVER); 1603503ad85cSMatthew Ahrens 1604503ad85cSMatthew Ahrens /* 1605503ad85cSMatthew Ahrens * Delete any inconsistent datasets. 1606503ad85cSMatthew Ahrens */ 1607503ad85cSMatthew Ahrens (void) dmu_objset_find(spa_name(spa), 1608503ad85cSMatthew Ahrens dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); 1609ca45db41SChris Kirby 1610ca45db41SChris Kirby /* 1611ca45db41SChris Kirby * Clean up any stale temporary dataset userrefs. 1612ca45db41SChris Kirby */ 1613ca45db41SChris Kirby dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); 1614fa9e4066Sahrens } 1615fa9e4066Sahrens 1616ea8dc4b6Seschrock error = 0; 1617ea8dc4b6Seschrock out: 1618088f3894Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 161999653d4eSeschrock if (error && error != EBADF) 1620b87f3af3Sperrin zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 1621ea8dc4b6Seschrock spa->spa_load_state = SPA_LOAD_NONE; 1622ea8dc4b6Seschrock spa->spa_ena = 0; 1623ea8dc4b6Seschrock 1624ea8dc4b6Seschrock return (error); 1625fa9e4066Sahrens } 1626fa9e4066Sahrens 1627fa9e4066Sahrens /* 1628fa9e4066Sahrens * Pool Open/Import 1629fa9e4066Sahrens * 1630fa9e4066Sahrens * The import case is identical to an open except that the configuration is sent 1631fa9e4066Sahrens * down from userland, instead of grabbed from the configuration cache. For the 1632fa9e4066Sahrens * case of an open, the pool configuration will exist in the 16333d7072f8Seschrock * POOL_STATE_UNINITIALIZED state. 1634fa9e4066Sahrens * 1635fa9e4066Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 1636fa9e4066Sahrens * the same time open the pool, without having to keep around the spa_t in some 1637fa9e4066Sahrens * ambiguous state. 1638fa9e4066Sahrens */ 1639fa9e4066Sahrens static int 1640fa9e4066Sahrens spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) 1641fa9e4066Sahrens { 1642fa9e4066Sahrens spa_t *spa; 1643fa9e4066Sahrens int error; 1644fa9e4066Sahrens int locked = B_FALSE; 1645fa9e4066Sahrens 1646fa9e4066Sahrens *spapp = NULL; 1647fa9e4066Sahrens 1648fa9e4066Sahrens /* 1649fa9e4066Sahrens * As disgusting as this is, we need to support recursive calls to this 1650fa9e4066Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 1651fa9e4066Sahrens * up calling spa_open() again. The real fix is to figure out how to 1652fa9e4066Sahrens * avoid dsl_dir_open() calling this in the first place. 1653fa9e4066Sahrens */ 1654fa9e4066Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 1655fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 1656fa9e4066Sahrens locked = B_TRUE; 1657fa9e4066Sahrens } 1658fa9e4066Sahrens 1659fa9e4066Sahrens if ((spa = spa_lookup(pool)) == NULL) { 1660fa9e4066Sahrens if (locked) 1661fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1662fa9e4066Sahrens return (ENOENT); 1663fa9e4066Sahrens } 1664fa9e4066Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 1665fa9e4066Sahrens 16668ad4d6ddSJeff Bonwick spa_activate(spa, spa_mode_global); 1667fa9e4066Sahrens 16680373e76bSbonwick error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); 1669fa9e4066Sahrens 1670fa9e4066Sahrens if (error == EBADF) { 1671fa9e4066Sahrens /* 1672560e6e96Seschrock * If vdev_validate() returns failure (indicated by 1673560e6e96Seschrock * EBADF), it indicates that one of the vdevs indicates 1674560e6e96Seschrock * that the pool has been exported or destroyed. If 1675560e6e96Seschrock * this is the case, the config cache is out of sync and 1676560e6e96Seschrock * we should remove the pool from the namespace. 1677fa9e4066Sahrens */ 1678fa9e4066Sahrens spa_unload(spa); 1679fa9e4066Sahrens spa_deactivate(spa); 1680c5904d13Seschrock spa_config_sync(spa, B_TRUE, B_TRUE); 1681fa9e4066Sahrens spa_remove(spa); 1682fa9e4066Sahrens if (locked) 1683fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1684fa9e4066Sahrens return (ENOENT); 1685ea8dc4b6Seschrock } 1686ea8dc4b6Seschrock 1687ea8dc4b6Seschrock if (error) { 1688fa9e4066Sahrens /* 1689fa9e4066Sahrens * We can't open the pool, but we still have useful 1690fa9e4066Sahrens * information: the state of each vdev after the 1691fa9e4066Sahrens * attempted vdev_open(). Return this to the user. 1692fa9e4066Sahrens */ 1693e14bb325SJeff Bonwick if (config != NULL && spa->spa_root_vdev != NULL) 1694fa9e4066Sahrens *config = spa_config_generate(spa, NULL, -1ULL, 1695fa9e4066Sahrens B_TRUE); 1696fa9e4066Sahrens spa_unload(spa); 1697fa9e4066Sahrens spa_deactivate(spa); 1698ea8dc4b6Seschrock spa->spa_last_open_failed = B_TRUE; 1699fa9e4066Sahrens if (locked) 1700fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1701fa9e4066Sahrens *spapp = NULL; 1702fa9e4066Sahrens return (error); 1703ea8dc4b6Seschrock } else { 1704ea8dc4b6Seschrock spa->spa_last_open_failed = B_FALSE; 1705fa9e4066Sahrens } 1706fa9e4066Sahrens } 1707fa9e4066Sahrens 1708fa9e4066Sahrens spa_open_ref(spa, tag); 17093d7072f8Seschrock 1710fa9e4066Sahrens if (locked) 1711fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1712fa9e4066Sahrens 1713fa9e4066Sahrens *spapp = spa; 1714fa9e4066Sahrens 1715e14bb325SJeff Bonwick if (config != NULL) 1716fa9e4066Sahrens *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 1717fa9e4066Sahrens 1718fa9e4066Sahrens return (0); 1719fa9e4066Sahrens } 1720fa9e4066Sahrens 1721fa9e4066Sahrens int 1722fa9e4066Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 1723fa9e4066Sahrens { 1724fa9e4066Sahrens return (spa_open_common(name, spapp, tag, NULL)); 1725fa9e4066Sahrens } 1726fa9e4066Sahrens 1727ea8dc4b6Seschrock /* 1728ea8dc4b6Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 1729ea8dc4b6Seschrock * preventing it from being exported or destroyed. 1730ea8dc4b6Seschrock */ 1731ea8dc4b6Seschrock spa_t * 1732ea8dc4b6Seschrock spa_inject_addref(char *name) 1733ea8dc4b6Seschrock { 1734ea8dc4b6Seschrock spa_t *spa; 1735ea8dc4b6Seschrock 1736ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 1737ea8dc4b6Seschrock if ((spa = spa_lookup(name)) == NULL) { 1738ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 1739ea8dc4b6Seschrock return (NULL); 1740ea8dc4b6Seschrock } 1741ea8dc4b6Seschrock spa->spa_inject_ref++; 1742ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 1743ea8dc4b6Seschrock 1744ea8dc4b6Seschrock return (spa); 1745ea8dc4b6Seschrock } 1746ea8dc4b6Seschrock 1747ea8dc4b6Seschrock void 1748ea8dc4b6Seschrock spa_inject_delref(spa_t *spa) 1749ea8dc4b6Seschrock { 1750ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 1751ea8dc4b6Seschrock spa->spa_inject_ref--; 1752ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 1753ea8dc4b6Seschrock } 1754ea8dc4b6Seschrock 1755fa94a07fSbrendan /* 1756fa94a07fSbrendan * Add spares device information to the nvlist. 1757fa94a07fSbrendan */ 175899653d4eSeschrock static void 175999653d4eSeschrock spa_add_spares(spa_t *spa, nvlist_t *config) 176099653d4eSeschrock { 176199653d4eSeschrock nvlist_t **spares; 176299653d4eSeschrock uint_t i, nspares; 176399653d4eSeschrock nvlist_t *nvroot; 176499653d4eSeschrock uint64_t guid; 176599653d4eSeschrock vdev_stat_t *vs; 176699653d4eSeschrock uint_t vsc; 176739c23413Seschrock uint64_t pool; 176899653d4eSeschrock 17696809eb4eSEric Schrock ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 17706809eb4eSEric Schrock 1771fa94a07fSbrendan if (spa->spa_spares.sav_count == 0) 177299653d4eSeschrock return; 177399653d4eSeschrock 177499653d4eSeschrock VERIFY(nvlist_lookup_nvlist(config, 177599653d4eSeschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 1776fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 177799653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 177899653d4eSeschrock if (nspares != 0) { 177999653d4eSeschrock VERIFY(nvlist_add_nvlist_array(nvroot, 178099653d4eSeschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 178199653d4eSeschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 178299653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 178399653d4eSeschrock 178499653d4eSeschrock /* 178599653d4eSeschrock * Go through and find any spares which have since been 178699653d4eSeschrock * repurposed as an active spare. If this is the case, update 178799653d4eSeschrock * their status appropriately. 178899653d4eSeschrock */ 178999653d4eSeschrock for (i = 0; i < nspares; i++) { 179099653d4eSeschrock VERIFY(nvlist_lookup_uint64(spares[i], 179199653d4eSeschrock ZPOOL_CONFIG_GUID, &guid) == 0); 179289a89ebfSlling if (spa_spare_exists(guid, &pool, NULL) && 179389a89ebfSlling pool != 0ULL) { 179499653d4eSeschrock VERIFY(nvlist_lookup_uint64_array( 179599653d4eSeschrock spares[i], ZPOOL_CONFIG_STATS, 179699653d4eSeschrock (uint64_t **)&vs, &vsc) == 0); 179799653d4eSeschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 179899653d4eSeschrock vs->vs_aux = VDEV_AUX_SPARED; 179999653d4eSeschrock } 180099653d4eSeschrock } 180199653d4eSeschrock } 180299653d4eSeschrock } 180399653d4eSeschrock 1804fa94a07fSbrendan /* 1805fa94a07fSbrendan * Add l2cache device information to the nvlist, including vdev stats. 1806fa94a07fSbrendan */ 1807fa94a07fSbrendan static void 1808fa94a07fSbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 1809fa94a07fSbrendan { 1810fa94a07fSbrendan nvlist_t **l2cache; 1811fa94a07fSbrendan uint_t i, j, nl2cache; 1812fa94a07fSbrendan nvlist_t *nvroot; 1813fa94a07fSbrendan uint64_t guid; 1814fa94a07fSbrendan vdev_t *vd; 1815fa94a07fSbrendan vdev_stat_t *vs; 1816fa94a07fSbrendan uint_t vsc; 1817fa94a07fSbrendan 18186809eb4eSEric Schrock ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 18196809eb4eSEric Schrock 1820fa94a07fSbrendan if (spa->spa_l2cache.sav_count == 0) 1821fa94a07fSbrendan return; 1822fa94a07fSbrendan 1823fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist(config, 1824fa94a07fSbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 1825fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 1826fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 1827fa94a07fSbrendan if (nl2cache != 0) { 1828fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 1829fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 1830fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 1831fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 1832fa94a07fSbrendan 1833fa94a07fSbrendan /* 1834fa94a07fSbrendan * Update level 2 cache device stats. 1835fa94a07fSbrendan */ 1836fa94a07fSbrendan 1837fa94a07fSbrendan for (i = 0; i < nl2cache; i++) { 1838fa94a07fSbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 1839fa94a07fSbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 1840fa94a07fSbrendan 1841fa94a07fSbrendan vd = NULL; 1842fa94a07fSbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 1843fa94a07fSbrendan if (guid == 1844fa94a07fSbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 1845fa94a07fSbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 1846fa94a07fSbrendan break; 1847fa94a07fSbrendan } 1848fa94a07fSbrendan } 1849fa94a07fSbrendan ASSERT(vd != NULL); 1850fa94a07fSbrendan 1851fa94a07fSbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 1852fa94a07fSbrendan ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 1853fa94a07fSbrendan vdev_get_stats(vd, vs); 1854fa94a07fSbrendan } 1855fa94a07fSbrendan } 1856fa94a07fSbrendan } 1857fa94a07fSbrendan 1858fa9e4066Sahrens int 1859ea8dc4b6Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 1860fa9e4066Sahrens { 1861fa9e4066Sahrens int error; 1862fa9e4066Sahrens spa_t *spa; 1863fa9e4066Sahrens 1864fa9e4066Sahrens *config = NULL; 1865fa9e4066Sahrens error = spa_open_common(name, &spa, FTAG, config); 1866fa9e4066Sahrens 18676809eb4eSEric Schrock if (spa != NULL) { 18686809eb4eSEric Schrock /* 18696809eb4eSEric Schrock * This still leaves a window of inconsistency where the spares 18706809eb4eSEric Schrock * or l2cache devices could change and the config would be 18716809eb4eSEric Schrock * self-inconsistent. 18726809eb4eSEric Schrock */ 18736809eb4eSEric Schrock spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 1874ea8dc4b6Seschrock 18756809eb4eSEric Schrock if (*config != NULL) { 1876e14bb325SJeff Bonwick VERIFY(nvlist_add_uint64(*config, 18776809eb4eSEric Schrock ZPOOL_CONFIG_ERRCOUNT, 18786809eb4eSEric Schrock spa_get_errlog_size(spa)) == 0); 1879e14bb325SJeff Bonwick 18806809eb4eSEric Schrock if (spa_suspended(spa)) 18816809eb4eSEric Schrock VERIFY(nvlist_add_uint64(*config, 18826809eb4eSEric Schrock ZPOOL_CONFIG_SUSPENDED, 18836809eb4eSEric Schrock spa->spa_failmode) == 0); 18846809eb4eSEric Schrock 18856809eb4eSEric Schrock spa_add_spares(spa, *config); 18866809eb4eSEric Schrock spa_add_l2cache(spa, *config); 18876809eb4eSEric Schrock } 188899653d4eSeschrock } 188999653d4eSeschrock 1890ea8dc4b6Seschrock /* 1891ea8dc4b6Seschrock * We want to get the alternate root even for faulted pools, so we cheat 1892ea8dc4b6Seschrock * and call spa_lookup() directly. 1893ea8dc4b6Seschrock */ 1894ea8dc4b6Seschrock if (altroot) { 1895ea8dc4b6Seschrock if (spa == NULL) { 1896ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 1897ea8dc4b6Seschrock spa = spa_lookup(name); 1898ea8dc4b6Seschrock if (spa) 1899ea8dc4b6Seschrock spa_altroot(spa, altroot, buflen); 1900ea8dc4b6Seschrock else 1901ea8dc4b6Seschrock altroot[0] = '\0'; 1902ea8dc4b6Seschrock spa = NULL; 1903ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 1904ea8dc4b6Seschrock } else { 1905ea8dc4b6Seschrock spa_altroot(spa, altroot, buflen); 1906ea8dc4b6Seschrock } 1907ea8dc4b6Seschrock } 1908ea8dc4b6Seschrock 19096809eb4eSEric Schrock if (spa != NULL) { 19106809eb4eSEric Schrock spa_config_exit(spa, SCL_CONFIG, FTAG); 1911fa9e4066Sahrens spa_close(spa, FTAG); 19126809eb4eSEric Schrock } 1913fa9e4066Sahrens 1914fa9e4066Sahrens return (error); 1915fa9e4066Sahrens } 1916fa9e4066Sahrens 191799653d4eSeschrock /* 1918fa94a07fSbrendan * Validate that the auxiliary device array is well formed. We must have an 1919fa94a07fSbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 1920fa94a07fSbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 1921fa94a07fSbrendan * specified, as long as they are well-formed. 192299653d4eSeschrock */ 192399653d4eSeschrock static int 1924fa94a07fSbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 1925fa94a07fSbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 1926fa94a07fSbrendan vdev_labeltype_t label) 192799653d4eSeschrock { 1928fa94a07fSbrendan nvlist_t **dev; 1929fa94a07fSbrendan uint_t i, ndev; 193099653d4eSeschrock vdev_t *vd; 193199653d4eSeschrock int error; 193299653d4eSeschrock 1933e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 1934e14bb325SJeff Bonwick 193599653d4eSeschrock /* 1936fa94a07fSbrendan * It's acceptable to have no devs specified. 193799653d4eSeschrock */ 1938fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 193999653d4eSeschrock return (0); 194099653d4eSeschrock 1941fa94a07fSbrendan if (ndev == 0) 194299653d4eSeschrock return (EINVAL); 194399653d4eSeschrock 194499653d4eSeschrock /* 1945fa94a07fSbrendan * Make sure the pool is formatted with a version that supports this 1946fa94a07fSbrendan * device type. 194799653d4eSeschrock */ 1948fa94a07fSbrendan if (spa_version(spa) < version) 194999653d4eSeschrock return (ENOTSUP); 195099653d4eSeschrock 195139c23413Seschrock /* 1952fa94a07fSbrendan * Set the pending device list so we correctly handle device in-use 195339c23413Seschrock * checking. 195439c23413Seschrock */ 1955fa94a07fSbrendan sav->sav_pending = dev; 1956fa94a07fSbrendan sav->sav_npending = ndev; 195739c23413Seschrock 1958fa94a07fSbrendan for (i = 0; i < ndev; i++) { 1959fa94a07fSbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 196099653d4eSeschrock mode)) != 0) 196139c23413Seschrock goto out; 196299653d4eSeschrock 196399653d4eSeschrock if (!vd->vdev_ops->vdev_op_leaf) { 196499653d4eSeschrock vdev_free(vd); 196539c23413Seschrock error = EINVAL; 196639c23413Seschrock goto out; 196799653d4eSeschrock } 196899653d4eSeschrock 1969fa94a07fSbrendan /* 1970e14bb325SJeff Bonwick * The L2ARC currently only supports disk devices in 1971e14bb325SJeff Bonwick * kernel context. For user-level testing, we allow it. 1972fa94a07fSbrendan */ 1973e14bb325SJeff Bonwick #ifdef _KERNEL 1974fa94a07fSbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 1975fa94a07fSbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 1976fa94a07fSbrendan error = ENOTBLK; 1977fa94a07fSbrendan goto out; 1978fa94a07fSbrendan } 1979e14bb325SJeff Bonwick #endif 198099653d4eSeschrock vd->vdev_top = vd; 198199653d4eSeschrock 198239c23413Seschrock if ((error = vdev_open(vd)) == 0 && 1983fa94a07fSbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 1984fa94a07fSbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 198539c23413Seschrock vd->vdev_guid) == 0); 198639c23413Seschrock } 198799653d4eSeschrock 198899653d4eSeschrock vdev_free(vd); 198939c23413Seschrock 1990fa94a07fSbrendan if (error && 1991fa94a07fSbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 199239c23413Seschrock goto out; 199339c23413Seschrock else 199439c23413Seschrock error = 0; 199599653d4eSeschrock } 199699653d4eSeschrock 199739c23413Seschrock out: 1998fa94a07fSbrendan sav->sav_pending = NULL; 1999fa94a07fSbrendan sav->sav_npending = 0; 200039c23413Seschrock return (error); 200199653d4eSeschrock } 200299653d4eSeschrock 2003fa94a07fSbrendan static int 2004fa94a07fSbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 2005fa94a07fSbrendan { 2006fa94a07fSbrendan int error; 2007fa94a07fSbrendan 2008e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 2009e14bb325SJeff Bonwick 2010fa94a07fSbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 2011fa94a07fSbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 2012fa94a07fSbrendan VDEV_LABEL_SPARE)) != 0) { 2013fa94a07fSbrendan return (error); 2014fa94a07fSbrendan } 2015fa94a07fSbrendan 2016fa94a07fSbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 2017fa94a07fSbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 2018fa94a07fSbrendan VDEV_LABEL_L2CACHE)); 2019fa94a07fSbrendan } 2020fa94a07fSbrendan 2021fa94a07fSbrendan static void 2022fa94a07fSbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 2023fa94a07fSbrendan const char *config) 2024fa94a07fSbrendan { 2025fa94a07fSbrendan int i; 2026fa94a07fSbrendan 2027fa94a07fSbrendan if (sav->sav_config != NULL) { 2028fa94a07fSbrendan nvlist_t **olddevs; 2029fa94a07fSbrendan uint_t oldndevs; 2030fa94a07fSbrendan nvlist_t **newdevs; 2031fa94a07fSbrendan 2032fa94a07fSbrendan /* 2033fa94a07fSbrendan * Generate new dev list by concatentating with the 2034fa94a07fSbrendan * current dev list. 2035fa94a07fSbrendan */ 2036fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 2037fa94a07fSbrendan &olddevs, &oldndevs) == 0); 2038fa94a07fSbrendan 2039fa94a07fSbrendan newdevs = kmem_alloc(sizeof (void *) * 2040fa94a07fSbrendan (ndevs + oldndevs), KM_SLEEP); 2041fa94a07fSbrendan for (i = 0; i < oldndevs; i++) 2042fa94a07fSbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 2043fa94a07fSbrendan KM_SLEEP) == 0); 2044fa94a07fSbrendan for (i = 0; i < ndevs; i++) 2045fa94a07fSbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 2046fa94a07fSbrendan KM_SLEEP) == 0); 2047fa94a07fSbrendan 2048fa94a07fSbrendan VERIFY(nvlist_remove(sav->sav_config, config, 2049fa94a07fSbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 2050fa94a07fSbrendan 2051fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 2052fa94a07fSbrendan config, newdevs, ndevs + oldndevs) == 0); 2053fa94a07fSbrendan for (i = 0; i < oldndevs + ndevs; i++) 2054fa94a07fSbrendan nvlist_free(newdevs[i]); 2055fa94a07fSbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 2056fa94a07fSbrendan } else { 2057fa94a07fSbrendan /* 2058fa94a07fSbrendan * Generate a new dev list. 2059fa94a07fSbrendan */ 2060fa94a07fSbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 2061fa94a07fSbrendan KM_SLEEP) == 0); 2062fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 2063fa94a07fSbrendan devs, ndevs) == 0); 2064fa94a07fSbrendan } 2065fa94a07fSbrendan } 2066fa94a07fSbrendan 2067fa94a07fSbrendan /* 2068fa94a07fSbrendan * Stop and drop level 2 ARC devices 2069fa94a07fSbrendan */ 2070fa94a07fSbrendan void 2071fa94a07fSbrendan spa_l2cache_drop(spa_t *spa) 2072fa94a07fSbrendan { 2073fa94a07fSbrendan vdev_t *vd; 2074fa94a07fSbrendan int i; 2075fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 2076fa94a07fSbrendan 2077fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) { 2078fa94a07fSbrendan uint64_t pool; 2079fa94a07fSbrendan 2080fa94a07fSbrendan vd = sav->sav_vdevs[i]; 2081fa94a07fSbrendan ASSERT(vd != NULL); 2082fa94a07fSbrendan 20838ad4d6ddSJeff Bonwick if (spa_l2cache_exists(vd->vdev_guid, &pool) && 20848ad4d6ddSJeff Bonwick pool != 0ULL && l2arc_vdev_present(vd)) 2085fa94a07fSbrendan l2arc_remove_vdev(vd); 2086fa94a07fSbrendan if (vd->vdev_isl2cache) 2087fa94a07fSbrendan spa_l2cache_remove(vd); 2088fa94a07fSbrendan vdev_clear_stats(vd); 2089fa94a07fSbrendan (void) vdev_close(vd); 2090fa94a07fSbrendan } 2091fa94a07fSbrendan } 2092fa94a07fSbrendan 2093fa9e4066Sahrens /* 2094fa9e4066Sahrens * Pool Creation 2095fa9e4066Sahrens */ 2096fa9e4066Sahrens int 2097990b4856Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 20980a48a24eStimh const char *history_str, nvlist_t *zplprops) 2099fa9e4066Sahrens { 2100fa9e4066Sahrens spa_t *spa; 2101990b4856Slling char *altroot = NULL; 21020373e76bSbonwick vdev_t *rvd; 2103fa9e4066Sahrens dsl_pool_t *dp; 2104fa9e4066Sahrens dmu_tx_t *tx; 2105573ca77eSGeorge Wilson int error = 0; 2106fa9e4066Sahrens uint64_t txg = TXG_INITIAL; 2107fa94a07fSbrendan nvlist_t **spares, **l2cache; 2108fa94a07fSbrendan uint_t nspares, nl2cache; 2109990b4856Slling uint64_t version; 2110fa9e4066Sahrens 2111fa9e4066Sahrens /* 2112fa9e4066Sahrens * If this pool already exists, return failure. 2113fa9e4066Sahrens */ 2114fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 2115fa9e4066Sahrens if (spa_lookup(pool) != NULL) { 2116fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2117fa9e4066Sahrens return (EEXIST); 2118fa9e4066Sahrens } 2119fa9e4066Sahrens 2120fa9e4066Sahrens /* 2121fa9e4066Sahrens * Allocate a new spa_t structure. 2122fa9e4066Sahrens */ 2123990b4856Slling (void) nvlist_lookup_string(props, 2124990b4856Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 21250373e76bSbonwick spa = spa_add(pool, altroot); 21268ad4d6ddSJeff Bonwick spa_activate(spa, spa_mode_global); 2127fa9e4066Sahrens 2128fa9e4066Sahrens spa->spa_uberblock.ub_txg = txg - 1; 2129990b4856Slling 2130990b4856Slling if (props && (error = spa_prop_validate(spa, props))) { 2131990b4856Slling spa_deactivate(spa); 2132990b4856Slling spa_remove(spa); 2133c5904d13Seschrock mutex_exit(&spa_namespace_lock); 2134990b4856Slling return (error); 2135990b4856Slling } 2136990b4856Slling 2137990b4856Slling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 2138990b4856Slling &version) != 0) 2139990b4856Slling version = SPA_VERSION; 2140990b4856Slling ASSERT(version <= SPA_VERSION); 2141990b4856Slling spa->spa_uberblock.ub_version = version; 2142fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 2143fa9e4066Sahrens 214454d692b7SGeorge Wilson /* 214554d692b7SGeorge Wilson * Create "The Godfather" zio to hold all async IOs 214654d692b7SGeorge Wilson */ 214725f89ee2SJeff Bonwick spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 214825f89ee2SJeff Bonwick ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 214954d692b7SGeorge Wilson 21500373e76bSbonwick /* 21510373e76bSbonwick * Create the root vdev. 21520373e76bSbonwick */ 2153e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 21540373e76bSbonwick 215599653d4eSeschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 21560373e76bSbonwick 215799653d4eSeschrock ASSERT(error != 0 || rvd != NULL); 215899653d4eSeschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 21590373e76bSbonwick 2160b7b97454Sperrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 21610373e76bSbonwick error = EINVAL; 216299653d4eSeschrock 216399653d4eSeschrock if (error == 0 && 216499653d4eSeschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 2165fa94a07fSbrendan (error = spa_validate_aux(spa, nvroot, txg, 216699653d4eSeschrock VDEV_ALLOC_ADD)) == 0) { 2167573ca77eSGeorge Wilson for (int c = 0; c < rvd->vdev_children; c++) { 2168573ca77eSGeorge Wilson vdev_metaslab_set_size(rvd->vdev_child[c]); 2169573ca77eSGeorge Wilson vdev_expand(rvd->vdev_child[c], txg); 2170573ca77eSGeorge Wilson } 21710373e76bSbonwick } 21720373e76bSbonwick 2173e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2174fa9e4066Sahrens 217599653d4eSeschrock if (error != 0) { 2176fa9e4066Sahrens spa_unload(spa); 2177fa9e4066Sahrens spa_deactivate(spa); 2178fa9e4066Sahrens spa_remove(spa); 2179fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2180fa9e4066Sahrens return (error); 2181fa9e4066Sahrens } 2182fa9e4066Sahrens 218399653d4eSeschrock /* 218499653d4eSeschrock * Get the list of spares, if specified. 218599653d4eSeschrock */ 218699653d4eSeschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 218799653d4eSeschrock &spares, &nspares) == 0) { 2188fa94a07fSbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 218999653d4eSeschrock KM_SLEEP) == 0); 2190fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 219199653d4eSeschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 2192e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 219399653d4eSeschrock spa_load_spares(spa); 2194e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2195fa94a07fSbrendan spa->spa_spares.sav_sync = B_TRUE; 2196fa94a07fSbrendan } 2197fa94a07fSbrendan 2198fa94a07fSbrendan /* 2199fa94a07fSbrendan * Get the list of level 2 cache devices, if specified. 2200fa94a07fSbrendan */ 2201fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 2202fa94a07fSbrendan &l2cache, &nl2cache) == 0) { 2203fa94a07fSbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 2204fa94a07fSbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 2205fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 2206fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 2207e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2208fa94a07fSbrendan spa_load_l2cache(spa); 2209e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2210fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 221199653d4eSeschrock } 221299653d4eSeschrock 22130a48a24eStimh spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 2214fa9e4066Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 2215fa9e4066Sahrens 2216fa9e4066Sahrens tx = dmu_tx_create_assigned(dp, txg); 2217fa9e4066Sahrens 2218fa9e4066Sahrens /* 2219fa9e4066Sahrens * Create the pool config object. 2220fa9e4066Sahrens */ 2221fa9e4066Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 2222f7991ba4STim Haley DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 2223fa9e4066Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 2224fa9e4066Sahrens 2225ea8dc4b6Seschrock if (zap_add(spa->spa_meta_objset, 2226fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 2227ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 2228ea8dc4b6Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 2229ea8dc4b6Seschrock } 2230fa9e4066Sahrens 2231990b4856Slling /* Newly created pools with the right version are always deflated. */ 2232990b4856Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 2233990b4856Slling spa->spa_deflate = TRUE; 2234990b4856Slling if (zap_add(spa->spa_meta_objset, 2235990b4856Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 2236990b4856Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 2237990b4856Slling cmn_err(CE_PANIC, "failed to add deflate"); 2238990b4856Slling } 223999653d4eSeschrock } 224099653d4eSeschrock 2241fa9e4066Sahrens /* 2242fa9e4066Sahrens * Create the deferred-free bplist object. Turn off compression 2243fa9e4066Sahrens * because sync-to-convergence takes longer if the blocksize 2244fa9e4066Sahrens * keeps changing. 2245fa9e4066Sahrens */ 2246fa9e4066Sahrens spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, 2247fa9e4066Sahrens 1 << 14, tx); 2248fa9e4066Sahrens dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 2249fa9e4066Sahrens ZIO_COMPRESS_OFF, tx); 2250fa9e4066Sahrens 2251ea8dc4b6Seschrock if (zap_add(spa->spa_meta_objset, 2252fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 2253ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { 2254ea8dc4b6Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 2255ea8dc4b6Seschrock } 2256fa9e4066Sahrens 225706eeb2adSek /* 225806eeb2adSek * Create the pool's history object. 225906eeb2adSek */ 2260990b4856Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 2261990b4856Slling spa_history_create_obj(spa, tx); 2262990b4856Slling 2263990b4856Slling /* 2264990b4856Slling * Set pool properties. 2265990b4856Slling */ 2266990b4856Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 2267990b4856Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 22680a4e9518Sgw spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 2269573ca77eSGeorge Wilson spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); 2270379c004dSEric Schrock if (props != NULL) { 2271379c004dSEric Schrock spa_configfile_set(spa, props, B_FALSE); 2272990b4856Slling spa_sync_props(spa, props, CRED(), tx); 2273379c004dSEric Schrock } 227406eeb2adSek 2275fa9e4066Sahrens dmu_tx_commit(tx); 2276fa9e4066Sahrens 2277fa9e4066Sahrens spa->spa_sync_on = B_TRUE; 2278fa9e4066Sahrens txg_sync_start(spa->spa_dsl_pool); 2279fa9e4066Sahrens 2280fa9e4066Sahrens /* 2281fa9e4066Sahrens * We explicitly wait for the first transaction to complete so that our 2282fa9e4066Sahrens * bean counters are appropriately updated. 2283fa9e4066Sahrens */ 2284fa9e4066Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 2285fa9e4066Sahrens 2286c5904d13Seschrock spa_config_sync(spa, B_FALSE, B_TRUE); 2287fa9e4066Sahrens 2288990b4856Slling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 2289228975ccSek (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 2290c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_CREATE); 2291228975ccSek 2292088f3894Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 2293088f3894Sahrens 2294daaa36a7SGeorge Wilson mutex_exit(&spa_namespace_lock); 2295daaa36a7SGeorge Wilson 2296fa9e4066Sahrens return (0); 2297fa9e4066Sahrens } 2298fa9e4066Sahrens 2299e7cbe64fSgw #ifdef _KERNEL 2300e7cbe64fSgw /* 230121ecdf64SLin Ling * Get the root pool information from the root disk, then import the root pool 230221ecdf64SLin Ling * during the system boot up time. 2303e7cbe64fSgw */ 230421ecdf64SLin Ling extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 230521ecdf64SLin Ling 230621ecdf64SLin Ling static nvlist_t * 230721ecdf64SLin Ling spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) 2308e7cbe64fSgw { 230921ecdf64SLin Ling nvlist_t *config; 2310e7cbe64fSgw nvlist_t *nvtop, *nvroot; 2311e7cbe64fSgw uint64_t pgid; 2312e7cbe64fSgw 231321ecdf64SLin Ling if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) 231421ecdf64SLin Ling return (NULL); 231521ecdf64SLin Ling 2316e7cbe64fSgw /* 2317e7cbe64fSgw * Add this top-level vdev to the child array. 2318e7cbe64fSgw */ 231921ecdf64SLin Ling VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 232021ecdf64SLin Ling &nvtop) == 0); 232121ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 232221ecdf64SLin Ling &pgid) == 0); 232321ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); 2324e7cbe64fSgw 2325e7cbe64fSgw /* 2326e7cbe64fSgw * Put this pool's top-level vdevs into a root vdev. 2327e7cbe64fSgw */ 2328e7cbe64fSgw VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 232921ecdf64SLin Ling VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 233021ecdf64SLin Ling VDEV_TYPE_ROOT) == 0); 2331e7cbe64fSgw VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 2332e7cbe64fSgw VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 2333e7cbe64fSgw VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 2334e7cbe64fSgw &nvtop, 1) == 0); 2335e7cbe64fSgw 2336e7cbe64fSgw /* 2337e7cbe64fSgw * Replace the existing vdev_tree with the new root vdev in 2338e7cbe64fSgw * this pool's configuration (remove the old, add the new). 2339e7cbe64fSgw */ 2340e7cbe64fSgw VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 2341e7cbe64fSgw nvlist_free(nvroot); 234221ecdf64SLin Ling return (config); 2343e7cbe64fSgw } 2344e7cbe64fSgw 2345e7cbe64fSgw /* 234621ecdf64SLin Ling * Walk the vdev tree and see if we can find a device with "better" 234721ecdf64SLin Ling * configuration. A configuration is "better" if the label on that 234821ecdf64SLin Ling * device has a more recent txg. 2349051aabe6Staylor */ 235021ecdf64SLin Ling static void 235121ecdf64SLin Ling spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) 2352051aabe6Staylor { 2353573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) 235421ecdf64SLin Ling spa_alt_rootvdev(vd->vdev_child[c], avd, txg); 2355051aabe6Staylor 235621ecdf64SLin Ling if (vd->vdev_ops->vdev_op_leaf) { 235721ecdf64SLin Ling nvlist_t *label; 235821ecdf64SLin Ling uint64_t label_txg; 2359051aabe6Staylor 236021ecdf64SLin Ling if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, 236121ecdf64SLin Ling &label) != 0) 236221ecdf64SLin Ling return; 2363051aabe6Staylor 236421ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, 236521ecdf64SLin Ling &label_txg) == 0); 2366051aabe6Staylor 236721ecdf64SLin Ling /* 236821ecdf64SLin Ling * Do we have a better boot device? 236921ecdf64SLin Ling */ 237021ecdf64SLin Ling if (label_txg > *txg) { 237121ecdf64SLin Ling *txg = label_txg; 237221ecdf64SLin Ling *avd = vd; 2373051aabe6Staylor } 237421ecdf64SLin Ling nvlist_free(label); 2375051aabe6Staylor } 2376051aabe6Staylor } 2377051aabe6Staylor 2378e7cbe64fSgw /* 2379e7cbe64fSgw * Import a root pool. 2380e7cbe64fSgw * 2381051aabe6Staylor * For x86. devpath_list will consist of devid and/or physpath name of 2382051aabe6Staylor * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 2383051aabe6Staylor * The GRUB "findroot" command will return the vdev we should boot. 2384e7cbe64fSgw * 2385e7cbe64fSgw * For Sparc, devpath_list consists the physpath name of the booting device 2386e7cbe64fSgw * no matter the rootpool is a single device pool or a mirrored pool. 2387e7cbe64fSgw * e.g. 2388e7cbe64fSgw * "/pci@1f,0/ide@d/disk@0,0:a" 2389e7cbe64fSgw */ 2390e7cbe64fSgw int 2391051aabe6Staylor spa_import_rootpool(char *devpath, char *devid) 2392e7cbe64fSgw { 239321ecdf64SLin Ling spa_t *spa; 239421ecdf64SLin Ling vdev_t *rvd, *bvd, *avd = NULL; 239521ecdf64SLin Ling nvlist_t *config, *nvtop; 239621ecdf64SLin Ling uint64_t guid, txg; 2397e7cbe64fSgw char *pname; 2398e7cbe64fSgw int error; 2399e7cbe64fSgw 2400e7cbe64fSgw /* 240121ecdf64SLin Ling * Read the label from the boot device and generate a configuration. 2402e7cbe64fSgw */ 240321ecdf64SLin Ling if ((config = spa_generate_rootconf(devpath, devid, &guid)) == NULL) { 240421ecdf64SLin Ling cmn_err(CE_NOTE, "Can not read the pool label from '%s'", 240521ecdf64SLin Ling devpath); 240621ecdf64SLin Ling return (EIO); 240721ecdf64SLin Ling } 2408e7cbe64fSgw 240921ecdf64SLin Ling VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 241021ecdf64SLin Ling &pname) == 0); 241121ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 2412e7cbe64fSgw 24136809eb4eSEric Schrock mutex_enter(&spa_namespace_lock); 24146809eb4eSEric Schrock if ((spa = spa_lookup(pname)) != NULL) { 24156809eb4eSEric Schrock /* 24166809eb4eSEric Schrock * Remove the existing root pool from the namespace so that we 24176809eb4eSEric Schrock * can replace it with the correct config we just read in. 24186809eb4eSEric Schrock */ 24196809eb4eSEric Schrock spa_remove(spa); 24206809eb4eSEric Schrock } 24216809eb4eSEric Schrock 24226809eb4eSEric Schrock spa = spa_add(pname, NULL); 24236809eb4eSEric Schrock spa->spa_is_root = B_TRUE; 2424bc758434SLin Ling spa->spa_load_verbatim = B_TRUE; 2425e7cbe64fSgw 242621ecdf64SLin Ling /* 242721ecdf64SLin Ling * Build up a vdev tree based on the boot device's label config. 242821ecdf64SLin Ling */ 242921ecdf64SLin Ling VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 243021ecdf64SLin Ling &nvtop) == 0); 243121ecdf64SLin Ling spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 243221ecdf64SLin Ling error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, 243321ecdf64SLin Ling VDEV_ALLOC_ROOTPOOL); 243421ecdf64SLin Ling spa_config_exit(spa, SCL_ALL, FTAG); 243521ecdf64SLin Ling if (error) { 243621ecdf64SLin Ling mutex_exit(&spa_namespace_lock); 243721ecdf64SLin Ling nvlist_free(config); 243821ecdf64SLin Ling cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", 243921ecdf64SLin Ling pname); 244021ecdf64SLin Ling return (error); 244121ecdf64SLin Ling } 244221ecdf64SLin Ling 244321ecdf64SLin Ling /* 244421ecdf64SLin Ling * Get the boot vdev. 244521ecdf64SLin Ling */ 244621ecdf64SLin Ling if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 244721ecdf64SLin Ling cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", 244821ecdf64SLin Ling (u_longlong_t)guid); 244921ecdf64SLin Ling error = ENOENT; 245021ecdf64SLin Ling goto out; 245121ecdf64SLin Ling } 2452e7cbe64fSgw 245321ecdf64SLin Ling /* 245421ecdf64SLin Ling * Determine if there is a better boot device. 245521ecdf64SLin Ling */ 245621ecdf64SLin Ling avd = bvd; 245721ecdf64SLin Ling spa_alt_rootvdev(rvd, &avd, &txg); 245821ecdf64SLin Ling if (avd != bvd) { 245921ecdf64SLin Ling cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " 246021ecdf64SLin Ling "try booting from '%s'", avd->vdev_path); 246121ecdf64SLin Ling error = EINVAL; 246221ecdf64SLin Ling goto out; 246321ecdf64SLin Ling } 2464e7cbe64fSgw 246521ecdf64SLin Ling /* 246621ecdf64SLin Ling * If the boot device is part of a spare vdev then ensure that 246721ecdf64SLin Ling * we're booting off the active spare. 246821ecdf64SLin Ling */ 246921ecdf64SLin Ling if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && 247021ecdf64SLin Ling !bvd->vdev_isspare) { 247121ecdf64SLin Ling cmn_err(CE_NOTE, "The boot device is currently spared. Please " 247221ecdf64SLin Ling "try booting from '%s'", 247321ecdf64SLin Ling bvd->vdev_parent->vdev_child[1]->vdev_path); 247421ecdf64SLin Ling error = EINVAL; 247521ecdf64SLin Ling goto out; 247621ecdf64SLin Ling } 247721ecdf64SLin Ling 247821ecdf64SLin Ling VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); 247921ecdf64SLin Ling error = 0; 2480c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_IMPORT); 248121ecdf64SLin Ling out: 248221ecdf64SLin Ling spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 248321ecdf64SLin Ling vdev_free(rvd); 248421ecdf64SLin Ling spa_config_exit(spa, SCL_ALL, FTAG); 248521ecdf64SLin Ling mutex_exit(&spa_namespace_lock); 248621ecdf64SLin Ling 248721ecdf64SLin Ling nvlist_free(config); 2488e7cbe64fSgw return (error); 2489e7cbe64fSgw } 249021ecdf64SLin Ling 2491e7cbe64fSgw #endif 2492e7cbe64fSgw 2493e7cbe64fSgw /* 24946809eb4eSEric Schrock * Take a pool and insert it into the namespace as if it had been loaded at 24956809eb4eSEric Schrock * boot. 2496e7cbe64fSgw */ 2497e7cbe64fSgw int 24986809eb4eSEric Schrock spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props) 2499e7cbe64fSgw { 25006809eb4eSEric Schrock spa_t *spa; 25016809eb4eSEric Schrock char *altroot = NULL; 25026809eb4eSEric Schrock 25036809eb4eSEric Schrock mutex_enter(&spa_namespace_lock); 25046809eb4eSEric Schrock if (spa_lookup(pool) != NULL) { 25056809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 25066809eb4eSEric Schrock return (EEXIST); 25076809eb4eSEric Schrock } 25086809eb4eSEric Schrock 25096809eb4eSEric Schrock (void) nvlist_lookup_string(props, 25106809eb4eSEric Schrock zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 25116809eb4eSEric Schrock spa = spa_add(pool, altroot); 25126809eb4eSEric Schrock 2513bc758434SLin Ling spa->spa_load_verbatim = B_TRUE; 25144f0f5e5bSVictor Latushkin 25156809eb4eSEric Schrock VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); 25166809eb4eSEric Schrock 25176809eb4eSEric Schrock if (props != NULL) 25186809eb4eSEric Schrock spa_configfile_set(spa, props, B_FALSE); 25196809eb4eSEric Schrock 25206809eb4eSEric Schrock spa_config_sync(spa, B_FALSE, B_TRUE); 25216809eb4eSEric Schrock 25226809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 2523c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_IMPORT); 25246809eb4eSEric Schrock 25256809eb4eSEric Schrock return (0); 2526e7cbe64fSgw } 2527e7cbe64fSgw 25286809eb4eSEric Schrock /* 25296809eb4eSEric Schrock * Import a non-root pool into the system. 25306809eb4eSEric Schrock */ 2531c5904d13Seschrock int 25326809eb4eSEric Schrock spa_import(const char *pool, nvlist_t *config, nvlist_t *props) 2533c5904d13Seschrock { 25346809eb4eSEric Schrock spa_t *spa; 25356809eb4eSEric Schrock char *altroot = NULL; 25366809eb4eSEric Schrock int error; 25376809eb4eSEric Schrock nvlist_t *nvroot; 25386809eb4eSEric Schrock nvlist_t **spares, **l2cache; 25396809eb4eSEric Schrock uint_t nspares, nl2cache; 25406809eb4eSEric Schrock 25416809eb4eSEric Schrock /* 25426809eb4eSEric Schrock * If a pool with this name exists, return failure. 25436809eb4eSEric Schrock */ 25446809eb4eSEric Schrock mutex_enter(&spa_namespace_lock); 25456809eb4eSEric Schrock if ((spa = spa_lookup(pool)) != NULL) { 25466809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 25476809eb4eSEric Schrock return (EEXIST); 25486809eb4eSEric Schrock } 25496809eb4eSEric Schrock 25506809eb4eSEric Schrock /* 25516809eb4eSEric Schrock * Create and initialize the spa structure. 25526809eb4eSEric Schrock */ 25536809eb4eSEric Schrock (void) nvlist_lookup_string(props, 25546809eb4eSEric Schrock zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 25556809eb4eSEric Schrock spa = spa_add(pool, altroot); 25566809eb4eSEric Schrock spa_activate(spa, spa_mode_global); 25576809eb4eSEric Schrock 255825f89ee2SJeff Bonwick /* 255925f89ee2SJeff Bonwick * Don't start async tasks until we know everything is healthy. 256025f89ee2SJeff Bonwick */ 256125f89ee2SJeff Bonwick spa_async_suspend(spa); 256225f89ee2SJeff Bonwick 25636809eb4eSEric Schrock /* 25646809eb4eSEric Schrock * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig 25656809eb4eSEric Schrock * because the user-supplied config is actually the one to trust when 25666809eb4eSEric Schrock * doing an import. 25676809eb4eSEric Schrock */ 25686809eb4eSEric Schrock error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); 25696809eb4eSEric Schrock 25706809eb4eSEric Schrock spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 25716809eb4eSEric Schrock /* 25726809eb4eSEric Schrock * Toss any existing sparelist, as it doesn't have any validity 25736809eb4eSEric Schrock * anymore, and conflicts with spa_has_spare(). 25746809eb4eSEric Schrock */ 25756809eb4eSEric Schrock if (spa->spa_spares.sav_config) { 25766809eb4eSEric Schrock nvlist_free(spa->spa_spares.sav_config); 25776809eb4eSEric Schrock spa->spa_spares.sav_config = NULL; 25786809eb4eSEric Schrock spa_load_spares(spa); 25796809eb4eSEric Schrock } 25806809eb4eSEric Schrock if (spa->spa_l2cache.sav_config) { 25816809eb4eSEric Schrock nvlist_free(spa->spa_l2cache.sav_config); 25826809eb4eSEric Schrock spa->spa_l2cache.sav_config = NULL; 25836809eb4eSEric Schrock spa_load_l2cache(spa); 25846809eb4eSEric Schrock } 25856809eb4eSEric Schrock 25866809eb4eSEric Schrock VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 25876809eb4eSEric Schrock &nvroot) == 0); 25886809eb4eSEric Schrock if (error == 0) 25896809eb4eSEric Schrock error = spa_validate_aux(spa, nvroot, -1ULL, 25906809eb4eSEric Schrock VDEV_ALLOC_SPARE); 25916809eb4eSEric Schrock if (error == 0) 25926809eb4eSEric Schrock error = spa_validate_aux(spa, nvroot, -1ULL, 25936809eb4eSEric Schrock VDEV_ALLOC_L2CACHE); 25946809eb4eSEric Schrock spa_config_exit(spa, SCL_ALL, FTAG); 25956809eb4eSEric Schrock 25966809eb4eSEric Schrock if (props != NULL) 25976809eb4eSEric Schrock spa_configfile_set(spa, props, B_FALSE); 25986809eb4eSEric Schrock 25996809eb4eSEric Schrock if (error != 0 || (props && spa_writeable(spa) && 26006809eb4eSEric Schrock (error = spa_prop_set(spa, props)))) { 26016809eb4eSEric Schrock spa_unload(spa); 26026809eb4eSEric Schrock spa_deactivate(spa); 26036809eb4eSEric Schrock spa_remove(spa); 26046809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 26056809eb4eSEric Schrock return (error); 26066809eb4eSEric Schrock } 26076809eb4eSEric Schrock 260825f89ee2SJeff Bonwick spa_async_resume(spa); 260925f89ee2SJeff Bonwick 26106809eb4eSEric Schrock /* 26116809eb4eSEric Schrock * Override any spares and level 2 cache devices as specified by 26126809eb4eSEric Schrock * the user, as these may have correct device names/devids, etc. 26136809eb4eSEric Schrock */ 26146809eb4eSEric Schrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 26156809eb4eSEric Schrock &spares, &nspares) == 0) { 26166809eb4eSEric Schrock if (spa->spa_spares.sav_config) 26176809eb4eSEric Schrock VERIFY(nvlist_remove(spa->spa_spares.sav_config, 26186809eb4eSEric Schrock ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 26196809eb4eSEric Schrock else 26206809eb4eSEric Schrock VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 26216809eb4eSEric Schrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 26226809eb4eSEric Schrock VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 26236809eb4eSEric Schrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 26246809eb4eSEric Schrock spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26256809eb4eSEric Schrock spa_load_spares(spa); 26266809eb4eSEric Schrock spa_config_exit(spa, SCL_ALL, FTAG); 26276809eb4eSEric Schrock spa->spa_spares.sav_sync = B_TRUE; 26286809eb4eSEric Schrock } 26296809eb4eSEric Schrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 26306809eb4eSEric Schrock &l2cache, &nl2cache) == 0) { 26316809eb4eSEric Schrock if (spa->spa_l2cache.sav_config) 26326809eb4eSEric Schrock VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 26336809eb4eSEric Schrock ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 26346809eb4eSEric Schrock else 26356809eb4eSEric Schrock VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 26366809eb4eSEric Schrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 26376809eb4eSEric Schrock VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 26386809eb4eSEric Schrock ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 26396809eb4eSEric Schrock spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26406809eb4eSEric Schrock spa_load_l2cache(spa); 26416809eb4eSEric Schrock spa_config_exit(spa, SCL_ALL, FTAG); 26426809eb4eSEric Schrock spa->spa_l2cache.sav_sync = B_TRUE; 26436809eb4eSEric Schrock } 26446809eb4eSEric Schrock 26456809eb4eSEric Schrock if (spa_writeable(spa)) { 26466809eb4eSEric Schrock /* 26476809eb4eSEric Schrock * Update the config cache to include the newly-imported pool. 26486809eb4eSEric Schrock */ 2649bc758434SLin Ling spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 26506809eb4eSEric Schrock } 26516809eb4eSEric Schrock 2652573ca77eSGeorge Wilson /* 2653573ca77eSGeorge Wilson * It's possible that the pool was expanded while it was exported. 2654573ca77eSGeorge Wilson * We kick off an async task to handle this for us. 2655573ca77eSGeorge Wilson */ 2656573ca77eSGeorge Wilson spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 2657573ca77eSGeorge Wilson 26586809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 2659c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_IMPORT); 26606809eb4eSEric Schrock 26616809eb4eSEric Schrock return (0); 2662c5904d13Seschrock } 2663c5904d13Seschrock 2664c5904d13Seschrock 2665fa9e4066Sahrens /* 2666fa9e4066Sahrens * This (illegal) pool name is used when temporarily importing a spa_t in order 2667fa9e4066Sahrens * to get the vdev stats associated with the imported devices. 2668fa9e4066Sahrens */ 2669fa9e4066Sahrens #define TRYIMPORT_NAME "$import" 2670fa9e4066Sahrens 2671fa9e4066Sahrens nvlist_t * 2672fa9e4066Sahrens spa_tryimport(nvlist_t *tryconfig) 2673fa9e4066Sahrens { 2674fa9e4066Sahrens nvlist_t *config = NULL; 2675fa9e4066Sahrens char *poolname; 2676fa9e4066Sahrens spa_t *spa; 2677fa9e4066Sahrens uint64_t state; 26787b7154beSLin Ling int error; 2679fa9e4066Sahrens 2680fa9e4066Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 2681fa9e4066Sahrens return (NULL); 2682fa9e4066Sahrens 2683fa9e4066Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 2684fa9e4066Sahrens return (NULL); 2685fa9e4066Sahrens 2686fa9e4066Sahrens /* 26870373e76bSbonwick * Create and initialize the spa structure. 2688fa9e4066Sahrens */ 26890373e76bSbonwick mutex_enter(&spa_namespace_lock); 26900373e76bSbonwick spa = spa_add(TRYIMPORT_NAME, NULL); 26918ad4d6ddSJeff Bonwick spa_activate(spa, FREAD); 2692fa9e4066Sahrens 2693fa9e4066Sahrens /* 26940373e76bSbonwick * Pass off the heavy lifting to spa_load(). 2695ecc2d604Sbonwick * Pass TRUE for mosconfig because the user-supplied config 2696ecc2d604Sbonwick * is actually the one to trust when doing an import. 2697fa9e4066Sahrens */ 26987b7154beSLin Ling error = spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); 2699fa9e4066Sahrens 2700fa9e4066Sahrens /* 2701fa9e4066Sahrens * If 'tryconfig' was at least parsable, return the current config. 2702fa9e4066Sahrens */ 2703fa9e4066Sahrens if (spa->spa_root_vdev != NULL) { 2704fa9e4066Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 2705fa9e4066Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 2706fa9e4066Sahrens poolname) == 0); 2707fa9e4066Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 2708fa9e4066Sahrens state) == 0); 270995173954Sek VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 271095173954Sek spa->spa_uberblock.ub_timestamp) == 0); 271199653d4eSeschrock 2712e7cbe64fSgw /* 2713e7cbe64fSgw * If the bootfs property exists on this pool then we 2714e7cbe64fSgw * copy it out so that external consumers can tell which 2715e7cbe64fSgw * pools are bootable. 2716e7cbe64fSgw */ 27177b7154beSLin Ling if ((!error || error == EEXIST) && spa->spa_bootfs) { 2718e7cbe64fSgw char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2719e7cbe64fSgw 2720e7cbe64fSgw /* 2721e7cbe64fSgw * We have to play games with the name since the 2722e7cbe64fSgw * pool was opened as TRYIMPORT_NAME. 2723e7cbe64fSgw */ 2724e14bb325SJeff Bonwick if (dsl_dsobj_to_dsname(spa_name(spa), 2725e7cbe64fSgw spa->spa_bootfs, tmpname) == 0) { 2726e7cbe64fSgw char *cp; 2727e7cbe64fSgw char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2728e7cbe64fSgw 2729e7cbe64fSgw cp = strchr(tmpname, '/'); 2730e7cbe64fSgw if (cp == NULL) { 2731e7cbe64fSgw (void) strlcpy(dsname, tmpname, 2732e7cbe64fSgw MAXPATHLEN); 2733e7cbe64fSgw } else { 2734e7cbe64fSgw (void) snprintf(dsname, MAXPATHLEN, 2735e7cbe64fSgw "%s/%s", poolname, ++cp); 2736e7cbe64fSgw } 2737e7cbe64fSgw VERIFY(nvlist_add_string(config, 2738e7cbe64fSgw ZPOOL_CONFIG_BOOTFS, dsname) == 0); 2739e7cbe64fSgw kmem_free(dsname, MAXPATHLEN); 2740e7cbe64fSgw } 2741e7cbe64fSgw kmem_free(tmpname, MAXPATHLEN); 2742e7cbe64fSgw } 2743e7cbe64fSgw 274499653d4eSeschrock /* 2745fa94a07fSbrendan * Add the list of hot spares and level 2 cache devices. 274699653d4eSeschrock */ 27476809eb4eSEric Schrock spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 274899653d4eSeschrock spa_add_spares(spa, config); 2749fa94a07fSbrendan spa_add_l2cache(spa, config); 27506809eb4eSEric Schrock spa_config_exit(spa, SCL_CONFIG, FTAG); 2751fa9e4066Sahrens } 2752fa9e4066Sahrens 2753fa9e4066Sahrens spa_unload(spa); 2754fa9e4066Sahrens spa_deactivate(spa); 2755fa9e4066Sahrens spa_remove(spa); 2756fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2757fa9e4066Sahrens 2758fa9e4066Sahrens return (config); 2759fa9e4066Sahrens } 2760fa9e4066Sahrens 2761fa9e4066Sahrens /* 2762fa9e4066Sahrens * Pool export/destroy 2763fa9e4066Sahrens * 2764fa9e4066Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 2765fa9e4066Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 2766fa9e4066Sahrens * update the pool state and sync all the labels to disk, removing the 2767394ab0cbSGeorge Wilson * configuration from the cache afterwards. If the 'hardforce' flag is set, then 2768394ab0cbSGeorge Wilson * we don't sync the labels or remove the configuration cache. 2769fa9e4066Sahrens */ 2770fa9e4066Sahrens static int 277189a89ebfSlling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 2772394ab0cbSGeorge Wilson boolean_t force, boolean_t hardforce) 2773fa9e4066Sahrens { 2774fa9e4066Sahrens spa_t *spa; 2775fa9e4066Sahrens 277644cd46caSbillm if (oldconfig) 277744cd46caSbillm *oldconfig = NULL; 277844cd46caSbillm 27798ad4d6ddSJeff Bonwick if (!(spa_mode_global & FWRITE)) 2780fa9e4066Sahrens return (EROFS); 2781fa9e4066Sahrens 2782fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 2783fa9e4066Sahrens if ((spa = spa_lookup(pool)) == NULL) { 2784fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2785fa9e4066Sahrens return (ENOENT); 2786fa9e4066Sahrens } 2787fa9e4066Sahrens 2788ea8dc4b6Seschrock /* 2789ea8dc4b6Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 2790ea8dc4b6Seschrock * reacquire the namespace lock, and see if we can export. 2791ea8dc4b6Seschrock */ 2792ea8dc4b6Seschrock spa_open_ref(spa, FTAG); 2793ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 2794ea8dc4b6Seschrock spa_async_suspend(spa); 2795ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 2796ea8dc4b6Seschrock spa_close(spa, FTAG); 2797ea8dc4b6Seschrock 2798fa9e4066Sahrens /* 2799fa9e4066Sahrens * The pool will be in core if it's openable, 2800fa9e4066Sahrens * in which case we can modify its state. 2801fa9e4066Sahrens */ 2802fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 2803fa9e4066Sahrens /* 2804fa9e4066Sahrens * Objsets may be open only because they're dirty, so we 2805fa9e4066Sahrens * have to force it to sync before checking spa_refcnt. 2806fa9e4066Sahrens */ 2807fa9e4066Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 2808fa9e4066Sahrens 2809ea8dc4b6Seschrock /* 2810ea8dc4b6Seschrock * A pool cannot be exported or destroyed if there are active 2811ea8dc4b6Seschrock * references. If we are resetting a pool, allow references by 2812ea8dc4b6Seschrock * fault injection handlers. 2813ea8dc4b6Seschrock */ 2814ea8dc4b6Seschrock if (!spa_refcount_zero(spa) || 2815ea8dc4b6Seschrock (spa->spa_inject_ref != 0 && 2816ea8dc4b6Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 2817ea8dc4b6Seschrock spa_async_resume(spa); 2818fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2819fa9e4066Sahrens return (EBUSY); 2820fa9e4066Sahrens } 2821fa9e4066Sahrens 282289a89ebfSlling /* 282389a89ebfSlling * A pool cannot be exported if it has an active shared spare. 282489a89ebfSlling * This is to prevent other pools stealing the active spare 282589a89ebfSlling * from an exported pool. At user's own will, such pool can 282689a89ebfSlling * be forcedly exported. 282789a89ebfSlling */ 282889a89ebfSlling if (!force && new_state == POOL_STATE_EXPORTED && 282989a89ebfSlling spa_has_active_shared_spare(spa)) { 283089a89ebfSlling spa_async_resume(spa); 283189a89ebfSlling mutex_exit(&spa_namespace_lock); 283289a89ebfSlling return (EXDEV); 283389a89ebfSlling } 283489a89ebfSlling 2835fa9e4066Sahrens /* 2836fa9e4066Sahrens * We want this to be reflected on every label, 2837fa9e4066Sahrens * so mark them all dirty. spa_unload() will do the 2838fa9e4066Sahrens * final sync that pushes these changes out. 2839fa9e4066Sahrens */ 2840394ab0cbSGeorge Wilson if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 2841e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2842ea8dc4b6Seschrock spa->spa_state = new_state; 28430373e76bSbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 2844ea8dc4b6Seschrock vdev_config_dirty(spa->spa_root_vdev); 2845e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2846ea8dc4b6Seschrock } 2847fa9e4066Sahrens } 2848fa9e4066Sahrens 28493d7072f8Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 28503d7072f8Seschrock 2851fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 2852fa9e4066Sahrens spa_unload(spa); 2853fa9e4066Sahrens spa_deactivate(spa); 2854fa9e4066Sahrens } 2855fa9e4066Sahrens 285644cd46caSbillm if (oldconfig && spa->spa_config) 285744cd46caSbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 285844cd46caSbillm 2859ea8dc4b6Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 2860394ab0cbSGeorge Wilson if (!hardforce) 2861394ab0cbSGeorge Wilson spa_config_sync(spa, B_TRUE, B_TRUE); 2862ea8dc4b6Seschrock spa_remove(spa); 2863ea8dc4b6Seschrock } 2864fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2865fa9e4066Sahrens 2866fa9e4066Sahrens return (0); 2867fa9e4066Sahrens } 2868fa9e4066Sahrens 2869fa9e4066Sahrens /* 2870fa9e4066Sahrens * Destroy a storage pool. 2871fa9e4066Sahrens */ 2872fa9e4066Sahrens int 2873fa9e4066Sahrens spa_destroy(char *pool) 2874fa9e4066Sahrens { 2875394ab0cbSGeorge Wilson return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 2876394ab0cbSGeorge Wilson B_FALSE, B_FALSE)); 2877fa9e4066Sahrens } 2878fa9e4066Sahrens 2879fa9e4066Sahrens /* 2880fa9e4066Sahrens * Export a storage pool. 2881fa9e4066Sahrens */ 2882fa9e4066Sahrens int 2883394ab0cbSGeorge Wilson spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 2884394ab0cbSGeorge Wilson boolean_t hardforce) 2885fa9e4066Sahrens { 2886394ab0cbSGeorge Wilson return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 2887394ab0cbSGeorge Wilson force, hardforce)); 2888fa9e4066Sahrens } 2889fa9e4066Sahrens 2890ea8dc4b6Seschrock /* 2891ea8dc4b6Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 2892ea8dc4b6Seschrock * from the namespace in any way. 2893ea8dc4b6Seschrock */ 2894ea8dc4b6Seschrock int 2895ea8dc4b6Seschrock spa_reset(char *pool) 2896ea8dc4b6Seschrock { 289789a89ebfSlling return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 2898394ab0cbSGeorge Wilson B_FALSE, B_FALSE)); 2899ea8dc4b6Seschrock } 2900ea8dc4b6Seschrock 2901fa9e4066Sahrens /* 2902fa9e4066Sahrens * ========================================================================== 2903fa9e4066Sahrens * Device manipulation 2904fa9e4066Sahrens * ========================================================================== 2905fa9e4066Sahrens */ 2906fa9e4066Sahrens 2907fa9e4066Sahrens /* 29088654d025Sperrin * Add a device to a storage pool. 2909fa9e4066Sahrens */ 2910fa9e4066Sahrens int 2911fa9e4066Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 2912fa9e4066Sahrens { 2913*88ecc943SGeorge Wilson uint64_t txg, id; 29148ad4d6ddSJeff Bonwick int error; 2915fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 29160e34b6a7Sbonwick vdev_t *vd, *tvd; 2917fa94a07fSbrendan nvlist_t **spares, **l2cache; 2918fa94a07fSbrendan uint_t nspares, nl2cache; 2919fa9e4066Sahrens 2920fa9e4066Sahrens txg = spa_vdev_enter(spa); 2921fa9e4066Sahrens 292299653d4eSeschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 292399653d4eSeschrock VDEV_ALLOC_ADD)) != 0) 292499653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, error)); 2925fa9e4066Sahrens 2926e14bb325SJeff Bonwick spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 292799653d4eSeschrock 2928fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 2929fa94a07fSbrendan &nspares) != 0) 293099653d4eSeschrock nspares = 0; 293199653d4eSeschrock 2932fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 2933fa94a07fSbrendan &nl2cache) != 0) 2934fa94a07fSbrendan nl2cache = 0; 2935fa94a07fSbrendan 2936e14bb325SJeff Bonwick if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 2937fa9e4066Sahrens return (spa_vdev_exit(spa, vd, txg, EINVAL)); 2938fa9e4066Sahrens 2939e14bb325SJeff Bonwick if (vd->vdev_children != 0 && 2940e14bb325SJeff Bonwick (error = vdev_create(vd, txg, B_FALSE)) != 0) 2941e14bb325SJeff Bonwick return (spa_vdev_exit(spa, vd, txg, error)); 294299653d4eSeschrock 294339c23413Seschrock /* 2944fa94a07fSbrendan * We must validate the spares and l2cache devices after checking the 2945fa94a07fSbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 294639c23413Seschrock */ 2947e14bb325SJeff Bonwick if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 294839c23413Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 294939c23413Seschrock 295039c23413Seschrock /* 295139c23413Seschrock * Transfer each new top-level vdev from vd to rvd. 295239c23413Seschrock */ 29538ad4d6ddSJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) { 2954*88ecc943SGeorge Wilson 2955*88ecc943SGeorge Wilson /* 2956*88ecc943SGeorge Wilson * Set the vdev id to the first hole, if one exists. 2957*88ecc943SGeorge Wilson */ 2958*88ecc943SGeorge Wilson for (id = 0; id < rvd->vdev_children; id++) { 2959*88ecc943SGeorge Wilson if (rvd->vdev_child[id]->vdev_ishole) { 2960*88ecc943SGeorge Wilson vdev_free(rvd->vdev_child[id]); 2961*88ecc943SGeorge Wilson break; 2962*88ecc943SGeorge Wilson } 2963*88ecc943SGeorge Wilson } 296439c23413Seschrock tvd = vd->vdev_child[c]; 296539c23413Seschrock vdev_remove_child(vd, tvd); 2966*88ecc943SGeorge Wilson tvd->vdev_id = id; 296739c23413Seschrock vdev_add_child(rvd, tvd); 296839c23413Seschrock vdev_config_dirty(tvd); 296939c23413Seschrock } 297039c23413Seschrock 297199653d4eSeschrock if (nspares != 0) { 2972fa94a07fSbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 2973fa94a07fSbrendan ZPOOL_CONFIG_SPARES); 297499653d4eSeschrock spa_load_spares(spa); 2975fa94a07fSbrendan spa->spa_spares.sav_sync = B_TRUE; 2976fa94a07fSbrendan } 2977fa94a07fSbrendan 2978fa94a07fSbrendan if (nl2cache != 0) { 2979fa94a07fSbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 2980fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE); 2981fa94a07fSbrendan spa_load_l2cache(spa); 2982fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 2983fa9e4066Sahrens } 2984fa9e4066Sahrens 2985fa9e4066Sahrens /* 29860e34b6a7Sbonwick * We have to be careful when adding new vdevs to an existing pool. 29870e34b6a7Sbonwick * If other threads start allocating from these vdevs before we 29880e34b6a7Sbonwick * sync the config cache, and we lose power, then upon reboot we may 29890e34b6a7Sbonwick * fail to open the pool because there are DVAs that the config cache 29900e34b6a7Sbonwick * can't translate. Therefore, we first add the vdevs without 29910e34b6a7Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 29920373e76bSbonwick * and then let spa_config_update() initialize the new metaslabs. 29930e34b6a7Sbonwick * 29940e34b6a7Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 29950e34b6a7Sbonwick * if we lose power at any point in this sequence, the remaining 29960e34b6a7Sbonwick * steps will be completed the next time we load the pool. 29970e34b6a7Sbonwick */ 29980373e76bSbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 29990e34b6a7Sbonwick 30000373e76bSbonwick mutex_enter(&spa_namespace_lock); 30010373e76bSbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 30020373e76bSbonwick mutex_exit(&spa_namespace_lock); 3003fa9e4066Sahrens 30040373e76bSbonwick return (0); 3005fa9e4066Sahrens } 3006fa9e4066Sahrens 3007fa9e4066Sahrens /* 3008fa9e4066Sahrens * Attach a device to a mirror. The arguments are the path to any device 3009fa9e4066Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 3010fa9e4066Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 3011fa9e4066Sahrens * 3012fa9e4066Sahrens * If 'replacing' is specified, the new device is intended to replace the 3013fa9e4066Sahrens * existing device; in this case the two devices are made into their own 30143d7072f8Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 3015fa9e4066Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 3016fa9e4066Sahrens * extra rules: you can't attach to it after it's been created, and upon 3017fa9e4066Sahrens * completion of resilvering, the first disk (the one being replaced) 3018fa9e4066Sahrens * is automatically detached. 3019fa9e4066Sahrens */ 3020fa9e4066Sahrens int 3021ea8dc4b6Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 3022fa9e4066Sahrens { 3023fa9e4066Sahrens uint64_t txg, open_txg; 3024fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 3025fa9e4066Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 302699653d4eSeschrock vdev_ops_t *pvops; 30279b3f6b42SEric Kustarz char *oldvdpath, *newvdpath; 30289b3f6b42SEric Kustarz int newvd_isspare; 30299b3f6b42SEric Kustarz int error; 3030fa9e4066Sahrens 3031fa9e4066Sahrens txg = spa_vdev_enter(spa); 3032fa9e4066Sahrens 3033c5904d13Seschrock oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 3034fa9e4066Sahrens 3035fa9e4066Sahrens if (oldvd == NULL) 3036fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3037fa9e4066Sahrens 30380e34b6a7Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 30390e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 30400e34b6a7Sbonwick 3041fa9e4066Sahrens pvd = oldvd->vdev_parent; 3042fa9e4066Sahrens 304399653d4eSeschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 30443d7072f8Seschrock VDEV_ALLOC_ADD)) != 0) 30453d7072f8Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 30463d7072f8Seschrock 30473d7072f8Seschrock if (newrootvd->vdev_children != 1) 3048fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3049fa9e4066Sahrens 3050fa9e4066Sahrens newvd = newrootvd->vdev_child[0]; 3051fa9e4066Sahrens 3052fa9e4066Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 3053fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3054fa9e4066Sahrens 305599653d4eSeschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 3056fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 3057fa9e4066Sahrens 30588654d025Sperrin /* 30598654d025Sperrin * Spares can't replace logs 30608654d025Sperrin */ 3061ee0eb9f2SEric Schrock if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 30628654d025Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 30638654d025Sperrin 306499653d4eSeschrock if (!replacing) { 306599653d4eSeschrock /* 306699653d4eSeschrock * For attach, the only allowable parent is a mirror or the root 306799653d4eSeschrock * vdev. 306899653d4eSeschrock */ 306999653d4eSeschrock if (pvd->vdev_ops != &vdev_mirror_ops && 307099653d4eSeschrock pvd->vdev_ops != &vdev_root_ops) 307199653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 307299653d4eSeschrock 307399653d4eSeschrock pvops = &vdev_mirror_ops; 307499653d4eSeschrock } else { 307599653d4eSeschrock /* 307699653d4eSeschrock * Active hot spares can only be replaced by inactive hot 307799653d4eSeschrock * spares. 307899653d4eSeschrock */ 307999653d4eSeschrock if (pvd->vdev_ops == &vdev_spare_ops && 308099653d4eSeschrock pvd->vdev_child[1] == oldvd && 308199653d4eSeschrock !spa_has_spare(spa, newvd->vdev_guid)) 308299653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 308399653d4eSeschrock 308499653d4eSeschrock /* 308599653d4eSeschrock * If the source is a hot spare, and the parent isn't already a 308699653d4eSeschrock * spare, then we want to create a new hot spare. Otherwise, we 308739c23413Seschrock * want to create a replacing vdev. The user is not allowed to 308839c23413Seschrock * attach to a spared vdev child unless the 'isspare' state is 308939c23413Seschrock * the same (spare replaces spare, non-spare replaces 309039c23413Seschrock * non-spare). 309199653d4eSeschrock */ 309299653d4eSeschrock if (pvd->vdev_ops == &vdev_replacing_ops) 309399653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 309439c23413Seschrock else if (pvd->vdev_ops == &vdev_spare_ops && 309539c23413Seschrock newvd->vdev_isspare != oldvd->vdev_isspare) 309639c23413Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 309799653d4eSeschrock else if (pvd->vdev_ops != &vdev_spare_ops && 309899653d4eSeschrock newvd->vdev_isspare) 309999653d4eSeschrock pvops = &vdev_spare_ops; 310099653d4eSeschrock else 310199653d4eSeschrock pvops = &vdev_replacing_ops; 310299653d4eSeschrock } 310399653d4eSeschrock 31042a79c5feSlling /* 3105573ca77eSGeorge Wilson * Make sure the new device is big enough. 31062a79c5feSlling */ 3107573ca77eSGeorge Wilson if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) 3108fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 3109fa9e4066Sahrens 3110ecc2d604Sbonwick /* 3111ecc2d604Sbonwick * The new device cannot have a higher alignment requirement 3112ecc2d604Sbonwick * than the top-level vdev. 3113ecc2d604Sbonwick */ 3114ecc2d604Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 3115fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 3116fa9e4066Sahrens 3117fa9e4066Sahrens /* 3118fa9e4066Sahrens * If this is an in-place replacement, update oldvd's path and devid 3119fa9e4066Sahrens * to make it distinguishable from newvd, and unopenable from now on. 3120fa9e4066Sahrens */ 3121fa9e4066Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 3122fa9e4066Sahrens spa_strfree(oldvd->vdev_path); 3123fa9e4066Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 3124fa9e4066Sahrens KM_SLEEP); 3125fa9e4066Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 3126fa9e4066Sahrens newvd->vdev_path, "old"); 3127fa9e4066Sahrens if (oldvd->vdev_devid != NULL) { 3128fa9e4066Sahrens spa_strfree(oldvd->vdev_devid); 3129fa9e4066Sahrens oldvd->vdev_devid = NULL; 3130fa9e4066Sahrens } 3131fa9e4066Sahrens } 3132fa9e4066Sahrens 3133fa9e4066Sahrens /* 313499653d4eSeschrock * If the parent is not a mirror, or if we're replacing, insert the new 313599653d4eSeschrock * mirror/replacing/spare vdev above oldvd. 3136fa9e4066Sahrens */ 3137fa9e4066Sahrens if (pvd->vdev_ops != pvops) 3138fa9e4066Sahrens pvd = vdev_add_parent(oldvd, pvops); 3139fa9e4066Sahrens 3140fa9e4066Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 3141fa9e4066Sahrens ASSERT(pvd->vdev_ops == pvops); 3142fa9e4066Sahrens ASSERT(oldvd->vdev_parent == pvd); 3143fa9e4066Sahrens 3144fa9e4066Sahrens /* 3145fa9e4066Sahrens * Extract the new device from its root and add it to pvd. 3146fa9e4066Sahrens */ 3147fa9e4066Sahrens vdev_remove_child(newrootvd, newvd); 3148fa9e4066Sahrens newvd->vdev_id = pvd->vdev_children; 3149*88ecc943SGeorge Wilson newvd->vdev_crtxg = oldvd->vdev_crtxg; 3150fa9e4066Sahrens vdev_add_child(pvd, newvd); 3151fa9e4066Sahrens 3152fa9e4066Sahrens tvd = newvd->vdev_top; 3153fa9e4066Sahrens ASSERT(pvd->vdev_top == tvd); 3154fa9e4066Sahrens ASSERT(tvd->vdev_parent == rvd); 3155fa9e4066Sahrens 3156fa9e4066Sahrens vdev_config_dirty(tvd); 3157fa9e4066Sahrens 3158fa9e4066Sahrens /* 3159fa9e4066Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 3160fa9e4066Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 3161fa9e4066Sahrens */ 3162fa9e4066Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 3163fa9e4066Sahrens 31648ad4d6ddSJeff Bonwick vdev_dtl_dirty(newvd, DTL_MISSING, 31658ad4d6ddSJeff Bonwick TXG_INITIAL, open_txg - TXG_INITIAL + 1); 3166fa9e4066Sahrens 31676809eb4eSEric Schrock if (newvd->vdev_isspare) { 316839c23413Seschrock spa_spare_activate(newvd); 31696809eb4eSEric Schrock spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); 31706809eb4eSEric Schrock } 31716809eb4eSEric Schrock 3172e14bb325SJeff Bonwick oldvdpath = spa_strdup(oldvd->vdev_path); 3173e14bb325SJeff Bonwick newvdpath = spa_strdup(newvd->vdev_path); 31749b3f6b42SEric Kustarz newvd_isspare = newvd->vdev_isspare; 3175ea8dc4b6Seschrock 3176fa9e4066Sahrens /* 3177fa9e4066Sahrens * Mark newvd's DTL dirty in this txg. 3178fa9e4066Sahrens */ 3179ecc2d604Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 3180fa9e4066Sahrens 3181fa9e4066Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 3182fa9e4066Sahrens 3183c8e1f6d2SMark J Musante spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, NULL, 3184c8e1f6d2SMark J Musante CRED(), "%s vdev=%s %s vdev=%s", 3185c8e1f6d2SMark J Musante replacing && newvd_isspare ? "spare in" : 3186c8e1f6d2SMark J Musante replacing ? "replace" : "attach", newvdpath, 3187c8e1f6d2SMark J Musante replacing ? "for" : "to", oldvdpath); 31889b3f6b42SEric Kustarz 31899b3f6b42SEric Kustarz spa_strfree(oldvdpath); 31909b3f6b42SEric Kustarz spa_strfree(newvdpath); 31919b3f6b42SEric Kustarz 3192fa9e4066Sahrens /* 3193088f3894Sahrens * Kick off a resilver to update newvd. 3194fa9e4066Sahrens */ 3195088f3894Sahrens VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0); 3196fa9e4066Sahrens 3197fa9e4066Sahrens return (0); 3198fa9e4066Sahrens } 3199fa9e4066Sahrens 3200fa9e4066Sahrens /* 3201fa9e4066Sahrens * Detach a device from a mirror or replacing vdev. 3202fa9e4066Sahrens * If 'replace_done' is specified, only detach if the parent 3203fa9e4066Sahrens * is a replacing vdev. 3204fa9e4066Sahrens */ 3205fa9e4066Sahrens int 32068ad4d6ddSJeff Bonwick spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) 3207fa9e4066Sahrens { 3208fa9e4066Sahrens uint64_t txg; 32098ad4d6ddSJeff Bonwick int error; 3210fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 3211fa9e4066Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 321299653d4eSeschrock boolean_t unspare = B_FALSE; 321399653d4eSeschrock uint64_t unspare_guid; 3214bf82a41bSeschrock size_t len; 3215fa9e4066Sahrens 3216fa9e4066Sahrens txg = spa_vdev_enter(spa); 3217fa9e4066Sahrens 3218c5904d13Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3219fa9e4066Sahrens 3220fa9e4066Sahrens if (vd == NULL) 3221fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3222fa9e4066Sahrens 32230e34b6a7Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 32240e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32250e34b6a7Sbonwick 3226fa9e4066Sahrens pvd = vd->vdev_parent; 3227fa9e4066Sahrens 32288ad4d6ddSJeff Bonwick /* 32298ad4d6ddSJeff Bonwick * If the parent/child relationship is not as expected, don't do it. 32308ad4d6ddSJeff Bonwick * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing 32318ad4d6ddSJeff Bonwick * vdev that's replacing B with C. The user's intent in replacing 32328ad4d6ddSJeff Bonwick * is to go from M(A,B) to M(A,C). If the user decides to cancel 32338ad4d6ddSJeff Bonwick * the replace by detaching C, the expected behavior is to end up 32348ad4d6ddSJeff Bonwick * M(A,B). But suppose that right after deciding to detach C, 32358ad4d6ddSJeff Bonwick * the replacement of B completes. We would have M(A,C), and then 32368ad4d6ddSJeff Bonwick * ask to detach C, which would leave us with just A -- not what 32378ad4d6ddSJeff Bonwick * the user wanted. To prevent this, we make sure that the 32388ad4d6ddSJeff Bonwick * parent/child relationship hasn't changed -- in this example, 32398ad4d6ddSJeff Bonwick * that C's parent is still the replacing vdev R. 32408ad4d6ddSJeff Bonwick */ 32418ad4d6ddSJeff Bonwick if (pvd->vdev_guid != pguid && pguid != 0) 32428ad4d6ddSJeff Bonwick return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 32438ad4d6ddSJeff Bonwick 3244fa9e4066Sahrens /* 3245fa9e4066Sahrens * If replace_done is specified, only remove this device if it's 324699653d4eSeschrock * the first child of a replacing vdev. For the 'spare' vdev, either 324799653d4eSeschrock * disk can be removed. 324899653d4eSeschrock */ 324999653d4eSeschrock if (replace_done) { 325099653d4eSeschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 325199653d4eSeschrock if (vd->vdev_id != 0) 325299653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 325399653d4eSeschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 325499653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 325599653d4eSeschrock } 325699653d4eSeschrock } 325799653d4eSeschrock 325899653d4eSeschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 3259e7437265Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 3260fa9e4066Sahrens 3261fa9e4066Sahrens /* 326299653d4eSeschrock * Only mirror, replacing, and spare vdevs support detach. 3263fa9e4066Sahrens */ 3264fa9e4066Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 326599653d4eSeschrock pvd->vdev_ops != &vdev_mirror_ops && 326699653d4eSeschrock pvd->vdev_ops != &vdev_spare_ops) 3267fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3268fa9e4066Sahrens 3269fa9e4066Sahrens /* 32708ad4d6ddSJeff Bonwick * If this device has the only valid copy of some data, 32718ad4d6ddSJeff Bonwick * we cannot safely detach it. 3272fa9e4066Sahrens */ 32738ad4d6ddSJeff Bonwick if (vdev_dtl_required(vd)) 3274fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 3275fa9e4066Sahrens 32768ad4d6ddSJeff Bonwick ASSERT(pvd->vdev_children >= 2); 3277fa9e4066Sahrens 3278bf82a41bSeschrock /* 3279bf82a41bSeschrock * If we are detaching the second disk from a replacing vdev, then 3280bf82a41bSeschrock * check to see if we changed the original vdev's path to have "/old" 3281bf82a41bSeschrock * at the end in spa_vdev_attach(). If so, undo that change now. 3282bf82a41bSeschrock */ 3283bf82a41bSeschrock if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 && 3284bf82a41bSeschrock pvd->vdev_child[0]->vdev_path != NULL && 3285bf82a41bSeschrock pvd->vdev_child[1]->vdev_path != NULL) { 3286bf82a41bSeschrock ASSERT(pvd->vdev_child[1] == vd); 3287bf82a41bSeschrock cvd = pvd->vdev_child[0]; 3288bf82a41bSeschrock len = strlen(vd->vdev_path); 3289bf82a41bSeschrock if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 3290bf82a41bSeschrock strcmp(cvd->vdev_path + len, "/old") == 0) { 3291bf82a41bSeschrock spa_strfree(cvd->vdev_path); 3292bf82a41bSeschrock cvd->vdev_path = spa_strdup(vd->vdev_path); 3293bf82a41bSeschrock } 3294bf82a41bSeschrock } 3295bf82a41bSeschrock 329699653d4eSeschrock /* 329799653d4eSeschrock * If we are detaching the original disk from a spare, then it implies 329899653d4eSeschrock * that the spare should become a real disk, and be removed from the 329999653d4eSeschrock * active spare list for the pool. 330099653d4eSeschrock */ 330199653d4eSeschrock if (pvd->vdev_ops == &vdev_spare_ops && 33028ad4d6ddSJeff Bonwick vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare) 330399653d4eSeschrock unspare = B_TRUE; 330499653d4eSeschrock 3305fa9e4066Sahrens /* 3306fa9e4066Sahrens * Erase the disk labels so the disk can be used for other things. 3307fa9e4066Sahrens * This must be done after all other error cases are handled, 3308fa9e4066Sahrens * but before we disembowel vd (so we can still do I/O to it). 3309fa9e4066Sahrens * But if we can't do it, don't treat the error as fatal -- 3310fa9e4066Sahrens * it may be that the unwritability of the disk is the reason 3311fa9e4066Sahrens * it's being detached! 3312fa9e4066Sahrens */ 331339c23413Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3314fa9e4066Sahrens 3315fa9e4066Sahrens /* 3316fa9e4066Sahrens * Remove vd from its parent and compact the parent's children. 3317fa9e4066Sahrens */ 3318fa9e4066Sahrens vdev_remove_child(pvd, vd); 3319fa9e4066Sahrens vdev_compact_children(pvd); 3320fa9e4066Sahrens 3321fa9e4066Sahrens /* 3322fa9e4066Sahrens * Remember one of the remaining children so we can get tvd below. 3323fa9e4066Sahrens */ 3324fa9e4066Sahrens cvd = pvd->vdev_child[0]; 3325fa9e4066Sahrens 332699653d4eSeschrock /* 332799653d4eSeschrock * If we need to remove the remaining child from the list of hot spares, 33288ad4d6ddSJeff Bonwick * do it now, marking the vdev as no longer a spare in the process. 33298ad4d6ddSJeff Bonwick * We must do this before vdev_remove_parent(), because that can 33308ad4d6ddSJeff Bonwick * change the GUID if it creates a new toplevel GUID. For a similar 33318ad4d6ddSJeff Bonwick * reason, we must remove the spare now, in the same txg as the detach; 33328ad4d6ddSJeff Bonwick * otherwise someone could attach a new sibling, change the GUID, and 33338ad4d6ddSJeff Bonwick * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. 333499653d4eSeschrock */ 333599653d4eSeschrock if (unspare) { 333699653d4eSeschrock ASSERT(cvd->vdev_isspare); 333739c23413Seschrock spa_spare_remove(cvd); 333899653d4eSeschrock unspare_guid = cvd->vdev_guid; 33398ad4d6ddSJeff Bonwick (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 334099653d4eSeschrock } 334199653d4eSeschrock 3342fa9e4066Sahrens /* 3343fa9e4066Sahrens * If the parent mirror/replacing vdev only has one child, 3344fa9e4066Sahrens * the parent is no longer needed. Remove it from the tree. 3345fa9e4066Sahrens */ 3346fa9e4066Sahrens if (pvd->vdev_children == 1) 3347fa9e4066Sahrens vdev_remove_parent(cvd); 3348fa9e4066Sahrens 3349fa9e4066Sahrens /* 3350fa9e4066Sahrens * We don't set tvd until now because the parent we just removed 3351fa9e4066Sahrens * may have been the previous top-level vdev. 3352fa9e4066Sahrens */ 3353fa9e4066Sahrens tvd = cvd->vdev_top; 3354fa9e4066Sahrens ASSERT(tvd->vdev_parent == rvd); 3355fa9e4066Sahrens 3356fa9e4066Sahrens /* 335739c23413Seschrock * Reevaluate the parent vdev state. 3358fa9e4066Sahrens */ 33593d7072f8Seschrock vdev_propagate_state(cvd); 3360fa9e4066Sahrens 3361fa9e4066Sahrens /* 3362573ca77eSGeorge Wilson * If the 'autoexpand' property is set on the pool then automatically 3363573ca77eSGeorge Wilson * try to expand the size of the pool. For example if the device we 3364573ca77eSGeorge Wilson * just detached was smaller than the others, it may be possible to 3365573ca77eSGeorge Wilson * add metaslabs (i.e. grow the pool). We need to reopen the vdev 3366573ca77eSGeorge Wilson * first so that we can obtain the updated sizes of the leaf vdevs. 3367fa9e4066Sahrens */ 3368573ca77eSGeorge Wilson if (spa->spa_autoexpand) { 3369573ca77eSGeorge Wilson vdev_reopen(tvd); 3370573ca77eSGeorge Wilson vdev_expand(tvd, txg); 3371573ca77eSGeorge Wilson } 3372fa9e4066Sahrens 3373fa9e4066Sahrens vdev_config_dirty(tvd); 3374fa9e4066Sahrens 3375fa9e4066Sahrens /* 337639c23413Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 337739c23413Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 337839c23413Seschrock * But first make sure we're not on any *other* txg's DTL list, to 337939c23413Seschrock * prevent vd from being accessed after it's freed. 3380fa9e4066Sahrens */ 33818ad4d6ddSJeff Bonwick for (int t = 0; t < TXG_SIZE; t++) 3382fa9e4066Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 3383ecc2d604Sbonwick vd->vdev_detached = B_TRUE; 3384ecc2d604Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 3385fa9e4066Sahrens 33863d7072f8Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 33873d7072f8Seschrock 338899653d4eSeschrock error = spa_vdev_exit(spa, vd, txg, 0); 338999653d4eSeschrock 339099653d4eSeschrock /* 339139c23413Seschrock * If this was the removal of the original device in a hot spare vdev, 339239c23413Seschrock * then we want to go through and remove the device from the hot spare 339339c23413Seschrock * list of every other pool. 339499653d4eSeschrock */ 339599653d4eSeschrock if (unspare) { 33968ad4d6ddSJeff Bonwick spa_t *myspa = spa; 339799653d4eSeschrock spa = NULL; 339899653d4eSeschrock mutex_enter(&spa_namespace_lock); 339999653d4eSeschrock while ((spa = spa_next(spa)) != NULL) { 340099653d4eSeschrock if (spa->spa_state != POOL_STATE_ACTIVE) 340199653d4eSeschrock continue; 34028ad4d6ddSJeff Bonwick if (spa == myspa) 34038ad4d6ddSJeff Bonwick continue; 34049af0a4dfSJeff Bonwick spa_open_ref(spa, FTAG); 34059af0a4dfSJeff Bonwick mutex_exit(&spa_namespace_lock); 340699653d4eSeschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 34079af0a4dfSJeff Bonwick mutex_enter(&spa_namespace_lock); 34089af0a4dfSJeff Bonwick spa_close(spa, FTAG); 340999653d4eSeschrock } 341099653d4eSeschrock mutex_exit(&spa_namespace_lock); 341199653d4eSeschrock } 341299653d4eSeschrock 341399653d4eSeschrock return (error); 341499653d4eSeschrock } 341599653d4eSeschrock 3416e14bb325SJeff Bonwick static nvlist_t * 3417e14bb325SJeff Bonwick spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 341899653d4eSeschrock { 3419e14bb325SJeff Bonwick for (int i = 0; i < count; i++) { 3420e14bb325SJeff Bonwick uint64_t guid; 342199653d4eSeschrock 3422e14bb325SJeff Bonwick VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 3423e14bb325SJeff Bonwick &guid) == 0); 342499653d4eSeschrock 3425e14bb325SJeff Bonwick if (guid == target_guid) 3426e14bb325SJeff Bonwick return (nvpp[i]); 342799653d4eSeschrock } 342899653d4eSeschrock 3429e14bb325SJeff Bonwick return (NULL); 3430fa94a07fSbrendan } 3431fa94a07fSbrendan 3432e14bb325SJeff Bonwick static void 3433e14bb325SJeff Bonwick spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 3434e14bb325SJeff Bonwick nvlist_t *dev_to_remove) 3435fa94a07fSbrendan { 3436e14bb325SJeff Bonwick nvlist_t **newdev = NULL; 3437fa94a07fSbrendan 3438e14bb325SJeff Bonwick if (count > 1) 3439e14bb325SJeff Bonwick newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 3440fa94a07fSbrendan 3441e14bb325SJeff Bonwick for (int i = 0, j = 0; i < count; i++) { 3442e14bb325SJeff Bonwick if (dev[i] == dev_to_remove) 3443e14bb325SJeff Bonwick continue; 3444e14bb325SJeff Bonwick VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 3445fa94a07fSbrendan } 3446fa94a07fSbrendan 3447e14bb325SJeff Bonwick VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 3448e14bb325SJeff Bonwick VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 3449fa94a07fSbrendan 3450e14bb325SJeff Bonwick for (int i = 0; i < count - 1; i++) 3451e14bb325SJeff Bonwick nvlist_free(newdev[i]); 3452fa94a07fSbrendan 3453e14bb325SJeff Bonwick if (count > 1) 3454e14bb325SJeff Bonwick kmem_free(newdev, (count - 1) * sizeof (void *)); 3455fa94a07fSbrendan } 3456fa94a07fSbrendan 3457*88ecc943SGeorge Wilson /* 3458*88ecc943SGeorge Wilson * Removing a device from the vdev namespace requires several steps 3459*88ecc943SGeorge Wilson * and can take a significant amount of time. As a result we use 3460*88ecc943SGeorge Wilson * the spa_vdev_config_[enter/exit] functions which allow us to 3461*88ecc943SGeorge Wilson * grab and release the spa_config_lock while still holding the namespace 3462*88ecc943SGeorge Wilson * lock. During each step the configuration is synced out. 3463*88ecc943SGeorge Wilson */ 3464*88ecc943SGeorge Wilson 3465*88ecc943SGeorge Wilson /* 3466*88ecc943SGeorge Wilson * Initial phase of device removal - stop future allocations from this device. 3467*88ecc943SGeorge Wilson */ 3468*88ecc943SGeorge Wilson void 3469*88ecc943SGeorge Wilson spa_vdev_remove_start(spa_t *spa, vdev_t *vd) 3470*88ecc943SGeorge Wilson { 3471*88ecc943SGeorge Wilson metaslab_group_t *mg = vd->vdev_mg; 3472*88ecc943SGeorge Wilson 3473*88ecc943SGeorge Wilson ASSERT(MUTEX_HELD(&spa_namespace_lock)); 3474*88ecc943SGeorge Wilson ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 3475*88ecc943SGeorge Wilson 3476*88ecc943SGeorge Wilson /* 3477*88ecc943SGeorge Wilson * Remove our vdev from the allocatable vdevs 3478*88ecc943SGeorge Wilson */ 3479*88ecc943SGeorge Wilson if (mg) 3480*88ecc943SGeorge Wilson metaslab_class_remove(mg->mg_class, mg); 3481*88ecc943SGeorge Wilson } 3482*88ecc943SGeorge Wilson 3483*88ecc943SGeorge Wilson /* 3484*88ecc943SGeorge Wilson * Evacuate the device. 3485*88ecc943SGeorge Wilson */ 3486*88ecc943SGeorge Wilson int 3487*88ecc943SGeorge Wilson spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) 3488*88ecc943SGeorge Wilson { 3489*88ecc943SGeorge Wilson uint64_t txg; 3490*88ecc943SGeorge Wilson int error; 3491*88ecc943SGeorge Wilson 3492*88ecc943SGeorge Wilson ASSERT(MUTEX_HELD(&spa_namespace_lock)); 3493*88ecc943SGeorge Wilson ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 3494*88ecc943SGeorge Wilson 3495*88ecc943SGeorge Wilson /* 3496*88ecc943SGeorge Wilson * Evacuate the device. We don't hold the config lock as writer 3497*88ecc943SGeorge Wilson * since we need to do I/O but we do keep the 3498*88ecc943SGeorge Wilson * spa_namespace_lock held. Once this completes the device 3499*88ecc943SGeorge Wilson * should no longer have any blocks allocated on it. 3500*88ecc943SGeorge Wilson */ 3501*88ecc943SGeorge Wilson if (vd->vdev_islog) { 3502*88ecc943SGeorge Wilson /* 3503*88ecc943SGeorge Wilson * Evacuate the device. 3504*88ecc943SGeorge Wilson */ 3505*88ecc943SGeorge Wilson if (error = dmu_objset_find(spa_name(spa), 3506*88ecc943SGeorge Wilson zil_vdev_offline, NULL, DS_FIND_CHILDREN)) { 3507*88ecc943SGeorge Wilson uint64_t txg; 3508*88ecc943SGeorge Wilson 3509*88ecc943SGeorge Wilson txg = spa_vdev_config_enter(spa); 3510*88ecc943SGeorge Wilson metaslab_class_add(spa->spa_log_class, 3511*88ecc943SGeorge Wilson vd->vdev_mg); 3512*88ecc943SGeorge Wilson return (spa_vdev_exit(spa, NULL, txg, error)); 3513*88ecc943SGeorge Wilson } 3514*88ecc943SGeorge Wilson txg_wait_synced(spa_get_dsl(spa), 0); 3515*88ecc943SGeorge Wilson } 3516*88ecc943SGeorge Wilson 3517*88ecc943SGeorge Wilson /* 3518*88ecc943SGeorge Wilson * Remove any remaining MOS metadata associated with the device. 3519*88ecc943SGeorge Wilson */ 3520*88ecc943SGeorge Wilson txg = spa_vdev_config_enter(spa); 3521*88ecc943SGeorge Wilson vd->vdev_removing = B_TRUE; 3522*88ecc943SGeorge Wilson vdev_dirty(vd, 0, NULL, txg); 3523*88ecc943SGeorge Wilson vdev_config_dirty(vd); 3524*88ecc943SGeorge Wilson spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 3525*88ecc943SGeorge Wilson 3526*88ecc943SGeorge Wilson return (0); 3527*88ecc943SGeorge Wilson } 3528*88ecc943SGeorge Wilson 3529*88ecc943SGeorge Wilson /* 3530*88ecc943SGeorge Wilson * Complete the removal by cleaning up the namespace. 3531*88ecc943SGeorge Wilson */ 3532*88ecc943SGeorge Wilson void 3533*88ecc943SGeorge Wilson spa_vdev_remove_done(spa_t *spa, vdev_t *vd) 3534*88ecc943SGeorge Wilson { 3535*88ecc943SGeorge Wilson vdev_t *rvd = spa->spa_root_vdev; 3536*88ecc943SGeorge Wilson metaslab_group_t *mg = vd->vdev_mg; 3537*88ecc943SGeorge Wilson uint64_t id = vd->vdev_id; 3538*88ecc943SGeorge Wilson boolean_t last_vdev = (id == (rvd->vdev_children - 1)); 3539*88ecc943SGeorge Wilson 3540*88ecc943SGeorge Wilson ASSERT(MUTEX_HELD(&spa_namespace_lock)); 3541*88ecc943SGeorge Wilson ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 3542*88ecc943SGeorge Wilson 3543*88ecc943SGeorge Wilson (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3544*88ecc943SGeorge Wilson vdev_free(vd); 3545*88ecc943SGeorge Wilson 3546*88ecc943SGeorge Wilson /* 3547*88ecc943SGeorge Wilson * It's possible that another thread is trying todo a spa_vdev_add() 3548*88ecc943SGeorge Wilson * at the same time we're trying remove it. As a result the 3549*88ecc943SGeorge Wilson * added vdev may not have initialized its metaslabs yet. 3550*88ecc943SGeorge Wilson */ 3551*88ecc943SGeorge Wilson if (mg != NULL) 3552*88ecc943SGeorge Wilson metaslab_group_destroy(mg); 3553*88ecc943SGeorge Wilson 3554*88ecc943SGeorge Wilson if (last_vdev) { 3555*88ecc943SGeorge Wilson vdev_compact_children(rvd); 3556*88ecc943SGeorge Wilson } else { 3557*88ecc943SGeorge Wilson vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); 3558*88ecc943SGeorge Wilson vdev_add_child(rvd, vd); 3559*88ecc943SGeorge Wilson } 3560*88ecc943SGeorge Wilson vdev_config_dirty(rvd); 3561*88ecc943SGeorge Wilson 3562*88ecc943SGeorge Wilson /* 3563*88ecc943SGeorge Wilson * Reassess the health of our root vdev. 3564*88ecc943SGeorge Wilson */ 3565*88ecc943SGeorge Wilson vdev_reopen(rvd); 3566*88ecc943SGeorge Wilson } 3567*88ecc943SGeorge Wilson 3568fa94a07fSbrendan /* 3569fa94a07fSbrendan * Remove a device from the pool. Currently, this supports removing only hot 3570*88ecc943SGeorge Wilson * spares, slogs, and level 2 ARC devices. 3571fa94a07fSbrendan */ 3572fa94a07fSbrendan int 3573fa94a07fSbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 3574fa94a07fSbrendan { 3575fa94a07fSbrendan vdev_t *vd; 3576e14bb325SJeff Bonwick nvlist_t **spares, **l2cache, *nv; 35778ad4d6ddSJeff Bonwick uint64_t txg = 0; 3578*88ecc943SGeorge Wilson uint_t nspares, nl2cache; 3579fa94a07fSbrendan int error = 0; 35808ad4d6ddSJeff Bonwick boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 3581fa94a07fSbrendan 35828ad4d6ddSJeff Bonwick if (!locked) 35838ad4d6ddSJeff Bonwick txg = spa_vdev_enter(spa); 3584fa94a07fSbrendan 3585c5904d13Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3586fa94a07fSbrendan 3587fa94a07fSbrendan if (spa->spa_spares.sav_vdevs != NULL && 3588fa94a07fSbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 3589e14bb325SJeff Bonwick ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 3590e14bb325SJeff Bonwick (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 3591e14bb325SJeff Bonwick /* 3592e14bb325SJeff Bonwick * Only remove the hot spare if it's not currently in use 3593e14bb325SJeff Bonwick * in this pool. 3594e14bb325SJeff Bonwick */ 3595e14bb325SJeff Bonwick if (vd == NULL || unspare) { 3596e14bb325SJeff Bonwick spa_vdev_remove_aux(spa->spa_spares.sav_config, 3597e14bb325SJeff Bonwick ZPOOL_CONFIG_SPARES, spares, nspares, nv); 3598e14bb325SJeff Bonwick spa_load_spares(spa); 3599e14bb325SJeff Bonwick spa->spa_spares.sav_sync = B_TRUE; 3600e14bb325SJeff Bonwick } else { 3601e14bb325SJeff Bonwick error = EBUSY; 3602e14bb325SJeff Bonwick } 3603e14bb325SJeff Bonwick } else if (spa->spa_l2cache.sav_vdevs != NULL && 3604fa94a07fSbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 3605e14bb325SJeff Bonwick ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 3606e14bb325SJeff Bonwick (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 3607e14bb325SJeff Bonwick /* 3608e14bb325SJeff Bonwick * Cache devices can always be removed. 3609e14bb325SJeff Bonwick */ 3610e14bb325SJeff Bonwick spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 3611e14bb325SJeff Bonwick ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 3612fa94a07fSbrendan spa_load_l2cache(spa); 3613fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 3614*88ecc943SGeorge Wilson } else if (vd != NULL && vd->vdev_islog) { 3615*88ecc943SGeorge Wilson ASSERT(!locked); 3616*88ecc943SGeorge Wilson 3617*88ecc943SGeorge Wilson /* 3618*88ecc943SGeorge Wilson * XXX - Once we have bp-rewrite this should 3619*88ecc943SGeorge Wilson * become the common case. 3620*88ecc943SGeorge Wilson */ 3621*88ecc943SGeorge Wilson 3622*88ecc943SGeorge Wilson /* 3623*88ecc943SGeorge Wilson * 1. Stop allocations 3624*88ecc943SGeorge Wilson * 2. Evacuate the device (i.e. kill off stubby and 3625*88ecc943SGeorge Wilson * metadata) and wait for it to complete (i.e. sync). 3626*88ecc943SGeorge Wilson * 3. Cleanup the vdev namespace. 3627*88ecc943SGeorge Wilson */ 3628*88ecc943SGeorge Wilson spa_vdev_remove_start(spa, vd); 3629*88ecc943SGeorge Wilson 3630*88ecc943SGeorge Wilson spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 3631*88ecc943SGeorge Wilson if ((error = spa_vdev_remove_evacuate(spa, vd)) != 0) 3632*88ecc943SGeorge Wilson return (error); 3633*88ecc943SGeorge Wilson txg = spa_vdev_config_enter(spa); 3634*88ecc943SGeorge Wilson 3635*88ecc943SGeorge Wilson spa_vdev_remove_done(spa, vd); 3636*88ecc943SGeorge Wilson 3637e14bb325SJeff Bonwick } else if (vd != NULL) { 3638e14bb325SJeff Bonwick /* 3639e14bb325SJeff Bonwick * Normal vdevs cannot be removed (yet). 3640e14bb325SJeff Bonwick */ 3641e14bb325SJeff Bonwick error = ENOTSUP; 3642e14bb325SJeff Bonwick } else { 3643e14bb325SJeff Bonwick /* 3644e14bb325SJeff Bonwick * There is no vdev of any kind with the specified guid. 3645e14bb325SJeff Bonwick */ 3646e14bb325SJeff Bonwick error = ENOENT; 3647fa94a07fSbrendan } 364899653d4eSeschrock 36498ad4d6ddSJeff Bonwick if (!locked) 36508ad4d6ddSJeff Bonwick return (spa_vdev_exit(spa, NULL, txg, error)); 36518ad4d6ddSJeff Bonwick 36528ad4d6ddSJeff Bonwick return (error); 3653fa9e4066Sahrens } 3654fa9e4066Sahrens 3655fa9e4066Sahrens /* 36563d7072f8Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 36573d7072f8Seschrock * current spared, so we can detach it. 3658fa9e4066Sahrens */ 3659ea8dc4b6Seschrock static vdev_t * 36603d7072f8Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 3661fa9e4066Sahrens { 3662ea8dc4b6Seschrock vdev_t *newvd, *oldvd; 3663fa9e4066Sahrens 3664573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) { 36653d7072f8Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 3666ea8dc4b6Seschrock if (oldvd != NULL) 3667ea8dc4b6Seschrock return (oldvd); 3668ea8dc4b6Seschrock } 3669fa9e4066Sahrens 36703d7072f8Seschrock /* 36713d7072f8Seschrock * Check for a completed replacement. 36723d7072f8Seschrock */ 3673fa9e4066Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 3674ea8dc4b6Seschrock oldvd = vd->vdev_child[0]; 3675ea8dc4b6Seschrock newvd = vd->vdev_child[1]; 3676ea8dc4b6Seschrock 36778ad4d6ddSJeff Bonwick if (vdev_dtl_empty(newvd, DTL_MISSING) && 36788ad4d6ddSJeff Bonwick !vdev_dtl_required(oldvd)) 3679ea8dc4b6Seschrock return (oldvd); 3680fa9e4066Sahrens } 3681ea8dc4b6Seschrock 36823d7072f8Seschrock /* 36833d7072f8Seschrock * Check for a completed resilver with the 'unspare' flag set. 36843d7072f8Seschrock */ 36853d7072f8Seschrock if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 36863d7072f8Seschrock newvd = vd->vdev_child[0]; 36873d7072f8Seschrock oldvd = vd->vdev_child[1]; 36883d7072f8Seschrock 36893d7072f8Seschrock if (newvd->vdev_unspare && 36908ad4d6ddSJeff Bonwick vdev_dtl_empty(newvd, DTL_MISSING) && 36918ad4d6ddSJeff Bonwick !vdev_dtl_required(oldvd)) { 36923d7072f8Seschrock newvd->vdev_unspare = 0; 36933d7072f8Seschrock return (oldvd); 36943d7072f8Seschrock } 36953d7072f8Seschrock } 36963d7072f8Seschrock 3697ea8dc4b6Seschrock return (NULL); 3698fa9e4066Sahrens } 3699fa9e4066Sahrens 3700ea8dc4b6Seschrock static void 37013d7072f8Seschrock spa_vdev_resilver_done(spa_t *spa) 3702fa9e4066Sahrens { 37038ad4d6ddSJeff Bonwick vdev_t *vd, *pvd, *ppvd; 37048ad4d6ddSJeff Bonwick uint64_t guid, sguid, pguid, ppguid; 3705ea8dc4b6Seschrock 37068ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3707ea8dc4b6Seschrock 37083d7072f8Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 37098ad4d6ddSJeff Bonwick pvd = vd->vdev_parent; 37108ad4d6ddSJeff Bonwick ppvd = pvd->vdev_parent; 3711ea8dc4b6Seschrock guid = vd->vdev_guid; 37128ad4d6ddSJeff Bonwick pguid = pvd->vdev_guid; 37138ad4d6ddSJeff Bonwick ppguid = ppvd->vdev_guid; 37148ad4d6ddSJeff Bonwick sguid = 0; 371599653d4eSeschrock /* 371699653d4eSeschrock * If we have just finished replacing a hot spared device, then 371799653d4eSeschrock * we need to detach the parent's first child (the original hot 371899653d4eSeschrock * spare) as well. 371999653d4eSeschrock */ 37208ad4d6ddSJeff Bonwick if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) { 372199653d4eSeschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 37228ad4d6ddSJeff Bonwick ASSERT(ppvd->vdev_children == 2); 37238ad4d6ddSJeff Bonwick sguid = ppvd->vdev_child[1]->vdev_guid; 372499653d4eSeschrock } 37258ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 37268ad4d6ddSJeff Bonwick if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) 3727ea8dc4b6Seschrock return; 37288ad4d6ddSJeff Bonwick if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) 372999653d4eSeschrock return; 37308ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3731fa9e4066Sahrens } 3732fa9e4066Sahrens 37338ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 3734fa9e4066Sahrens } 3735fa9e4066Sahrens 3736c67d9675Seschrock /* 37376809eb4eSEric Schrock * Update the stored path or FRU for this vdev. Dirty the vdev configuration, 37386809eb4eSEric Schrock * relying on spa_vdev_enter/exit() to synchronize the labels and cache. 3739c67d9675Seschrock */ 3740c67d9675Seschrock int 37416809eb4eSEric Schrock spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, 37426809eb4eSEric Schrock boolean_t ispath) 3743c67d9675Seschrock { 3744c5904d13Seschrock vdev_t *vd; 3745c67d9675Seschrock uint64_t txg; 3746c67d9675Seschrock 3747c67d9675Seschrock txg = spa_vdev_enter(spa); 3748c67d9675Seschrock 37496809eb4eSEric Schrock if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 3750fa94a07fSbrendan return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 3751c67d9675Seschrock 37520e34b6a7Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 37530e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 37540e34b6a7Sbonwick 37556809eb4eSEric Schrock if (ispath) { 37566809eb4eSEric Schrock spa_strfree(vd->vdev_path); 37576809eb4eSEric Schrock vd->vdev_path = spa_strdup(value); 37586809eb4eSEric Schrock } else { 37596809eb4eSEric Schrock if (vd->vdev_fru != NULL) 37606809eb4eSEric Schrock spa_strfree(vd->vdev_fru); 37616809eb4eSEric Schrock vd->vdev_fru = spa_strdup(value); 37626809eb4eSEric Schrock } 3763c67d9675Seschrock 3764c67d9675Seschrock vdev_config_dirty(vd->vdev_top); 3765c67d9675Seschrock 3766c67d9675Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 3767c67d9675Seschrock } 3768c67d9675Seschrock 37696809eb4eSEric Schrock int 37706809eb4eSEric Schrock spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 37716809eb4eSEric Schrock { 37726809eb4eSEric Schrock return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); 37736809eb4eSEric Schrock } 37746809eb4eSEric Schrock 37756809eb4eSEric Schrock int 37766809eb4eSEric Schrock spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) 37776809eb4eSEric Schrock { 37786809eb4eSEric Schrock return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); 37796809eb4eSEric Schrock } 37806809eb4eSEric Schrock 3781fa9e4066Sahrens /* 3782fa9e4066Sahrens * ========================================================================== 3783fa9e4066Sahrens * SPA Scrubbing 3784fa9e4066Sahrens * ========================================================================== 3785fa9e4066Sahrens */ 3786fa9e4066Sahrens 3787ea8dc4b6Seschrock int 3788088f3894Sahrens spa_scrub(spa_t *spa, pool_scrub_type_t type) 3789fa9e4066Sahrens { 3790e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 3791bb8b5132Sek 3792fa9e4066Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 3793fa9e4066Sahrens return (ENOTSUP); 3794fa9e4066Sahrens 3795fa9e4066Sahrens /* 3796088f3894Sahrens * If a resilver was requested, but there is no DTL on a 3797088f3894Sahrens * writeable leaf device, we have nothing to do. 3798fa9e4066Sahrens */ 3799088f3894Sahrens if (type == POOL_SCRUB_RESILVER && 3800088f3894Sahrens !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 3801088f3894Sahrens spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 3802ea8dc4b6Seschrock return (0); 3803ea8dc4b6Seschrock } 3804fa9e4066Sahrens 3805088f3894Sahrens if (type == POOL_SCRUB_EVERYTHING && 3806088f3894Sahrens spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE && 3807088f3894Sahrens spa->spa_dsl_pool->dp_scrub_isresilver) 3808088f3894Sahrens return (EBUSY); 3809fa9e4066Sahrens 3810088f3894Sahrens if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) { 3811088f3894Sahrens return (dsl_pool_scrub_clean(spa->spa_dsl_pool)); 3812088f3894Sahrens } else if (type == POOL_SCRUB_NONE) { 3813088f3894Sahrens return (dsl_pool_scrub_cancel(spa->spa_dsl_pool)); 3814ea8dc4b6Seschrock } else { 3815088f3894Sahrens return (EINVAL); 3816fa9e4066Sahrens } 3817fa9e4066Sahrens } 3818fa9e4066Sahrens 3819ea8dc4b6Seschrock /* 3820ea8dc4b6Seschrock * ========================================================================== 3821ea8dc4b6Seschrock * SPA async task processing 3822ea8dc4b6Seschrock * ========================================================================== 3823ea8dc4b6Seschrock */ 3824ea8dc4b6Seschrock 3825ea8dc4b6Seschrock static void 38263d7072f8Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 3827fa9e4066Sahrens { 382849cf58c0SBrendan Gregg - Sun Microsystems if (vd->vdev_remove_wanted) { 382949cf58c0SBrendan Gregg - Sun Microsystems vd->vdev_remove_wanted = 0; 383049cf58c0SBrendan Gregg - Sun Microsystems vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 38311d713200SEric Schrock 38321d713200SEric Schrock /* 38331d713200SEric Schrock * We want to clear the stats, but we don't want to do a full 38341d713200SEric Schrock * vdev_clear() as that will cause us to throw away 38351d713200SEric Schrock * degraded/faulted state as well as attempt to reopen the 38361d713200SEric Schrock * device, all of which is a waste. 38371d713200SEric Schrock */ 38381d713200SEric Schrock vd->vdev_stat.vs_read_errors = 0; 38391d713200SEric Schrock vd->vdev_stat.vs_write_errors = 0; 38401d713200SEric Schrock vd->vdev_stat.vs_checksum_errors = 0; 38411d713200SEric Schrock 3842e14bb325SJeff Bonwick vdev_state_dirty(vd->vdev_top); 3843ea8dc4b6Seschrock } 384449cf58c0SBrendan Gregg - Sun Microsystems 3845e14bb325SJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) 384649cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, vd->vdev_child[c]); 3847ea8dc4b6Seschrock } 3848fa9e4066Sahrens 3849e14bb325SJeff Bonwick static void 3850e14bb325SJeff Bonwick spa_async_probe(spa_t *spa, vdev_t *vd) 3851e14bb325SJeff Bonwick { 3852e14bb325SJeff Bonwick if (vd->vdev_probe_wanted) { 3853e14bb325SJeff Bonwick vd->vdev_probe_wanted = 0; 3854e14bb325SJeff Bonwick vdev_reopen(vd); /* vdev_open() does the actual probe */ 3855e14bb325SJeff Bonwick } 3856e14bb325SJeff Bonwick 3857e14bb325SJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) 3858e14bb325SJeff Bonwick spa_async_probe(spa, vd->vdev_child[c]); 3859e14bb325SJeff Bonwick } 3860e14bb325SJeff Bonwick 3861573ca77eSGeorge Wilson static void 3862573ca77eSGeorge Wilson spa_async_autoexpand(spa_t *spa, vdev_t *vd) 3863573ca77eSGeorge Wilson { 3864573ca77eSGeorge Wilson sysevent_id_t eid; 3865573ca77eSGeorge Wilson nvlist_t *attr; 3866573ca77eSGeorge Wilson char *physpath; 3867573ca77eSGeorge Wilson 3868573ca77eSGeorge Wilson if (!spa->spa_autoexpand) 3869573ca77eSGeorge Wilson return; 3870573ca77eSGeorge Wilson 3871573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) { 3872573ca77eSGeorge Wilson vdev_t *cvd = vd->vdev_child[c]; 3873573ca77eSGeorge Wilson spa_async_autoexpand(spa, cvd); 3874573ca77eSGeorge Wilson } 3875573ca77eSGeorge Wilson 3876573ca77eSGeorge Wilson if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) 3877573ca77eSGeorge Wilson return; 3878573ca77eSGeorge Wilson 3879573ca77eSGeorge Wilson physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 3880573ca77eSGeorge Wilson (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); 3881573ca77eSGeorge Wilson 3882573ca77eSGeorge Wilson VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 3883573ca77eSGeorge Wilson VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 3884573ca77eSGeorge Wilson 3885573ca77eSGeorge Wilson (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 3886573ca77eSGeorge Wilson ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 3887573ca77eSGeorge Wilson 3888573ca77eSGeorge Wilson nvlist_free(attr); 3889573ca77eSGeorge Wilson kmem_free(physpath, MAXPATHLEN); 3890573ca77eSGeorge Wilson } 3891573ca77eSGeorge Wilson 3892ea8dc4b6Seschrock static void 3893ea8dc4b6Seschrock spa_async_thread(spa_t *spa) 3894ea8dc4b6Seschrock { 3895e14bb325SJeff Bonwick int tasks; 3896ea8dc4b6Seschrock 3897ea8dc4b6Seschrock ASSERT(spa->spa_sync_on); 3898ea8dc4b6Seschrock 3899ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 3900ea8dc4b6Seschrock tasks = spa->spa_async_tasks; 3901ea8dc4b6Seschrock spa->spa_async_tasks = 0; 3902ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 3903ea8dc4b6Seschrock 39040373e76bSbonwick /* 39050373e76bSbonwick * See if the config needs to be updated. 39060373e76bSbonwick */ 39070373e76bSbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 3908573ca77eSGeorge Wilson uint64_t oldsz, space_update; 3909573ca77eSGeorge Wilson 39100373e76bSbonwick mutex_enter(&spa_namespace_lock); 3911573ca77eSGeorge Wilson oldsz = spa_get_space(spa); 39120373e76bSbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 3913573ca77eSGeorge Wilson space_update = spa_get_space(spa) - oldsz; 39140373e76bSbonwick mutex_exit(&spa_namespace_lock); 3915573ca77eSGeorge Wilson 3916573ca77eSGeorge Wilson /* 3917573ca77eSGeorge Wilson * If the pool grew as a result of the config update, 3918573ca77eSGeorge Wilson * then log an internal history event. 3919573ca77eSGeorge Wilson */ 3920573ca77eSGeorge Wilson if (space_update) { 3921c8e1f6d2SMark J Musante spa_history_internal_log(LOG_POOL_VDEV_ONLINE, 3922c8e1f6d2SMark J Musante spa, NULL, CRED(), 3923c8e1f6d2SMark J Musante "pool '%s' size: %llu(+%llu)", 3924c8e1f6d2SMark J Musante spa_name(spa), spa_get_space(spa), 3925c8e1f6d2SMark J Musante space_update); 3926573ca77eSGeorge Wilson } 39270373e76bSbonwick } 39280373e76bSbonwick 3929ea8dc4b6Seschrock /* 39303d7072f8Seschrock * See if any devices need to be marked REMOVED. 3931ea8dc4b6Seschrock */ 3932e14bb325SJeff Bonwick if (tasks & SPA_ASYNC_REMOVE) { 3933e14bb325SJeff Bonwick spa_vdev_state_enter(spa); 39343d7072f8Seschrock spa_async_remove(spa, spa->spa_root_vdev); 3935e14bb325SJeff Bonwick for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 393649cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 3937e14bb325SJeff Bonwick for (int i = 0; i < spa->spa_spares.sav_count; i++) 393849cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 3939e14bb325SJeff Bonwick (void) spa_vdev_state_exit(spa, NULL, 0); 3940e14bb325SJeff Bonwick } 3941e14bb325SJeff Bonwick 3942573ca77eSGeorge Wilson if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { 3943573ca77eSGeorge Wilson spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 3944573ca77eSGeorge Wilson spa_async_autoexpand(spa, spa->spa_root_vdev); 3945573ca77eSGeorge Wilson spa_config_exit(spa, SCL_CONFIG, FTAG); 3946573ca77eSGeorge Wilson } 3947573ca77eSGeorge Wilson 3948e14bb325SJeff Bonwick /* 3949e14bb325SJeff Bonwick * See if any devices need to be probed. 3950e14bb325SJeff Bonwick */ 3951e14bb325SJeff Bonwick if (tasks & SPA_ASYNC_PROBE) { 3952e14bb325SJeff Bonwick spa_vdev_state_enter(spa); 3953e14bb325SJeff Bonwick spa_async_probe(spa, spa->spa_root_vdev); 3954e14bb325SJeff Bonwick (void) spa_vdev_state_exit(spa, NULL, 0); 39553d7072f8Seschrock } 3956ea8dc4b6Seschrock 3957ea8dc4b6Seschrock /* 3958ea8dc4b6Seschrock * If any devices are done replacing, detach them. 3959ea8dc4b6Seschrock */ 39603d7072f8Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 39613d7072f8Seschrock spa_vdev_resilver_done(spa); 3962fa9e4066Sahrens 3963ea8dc4b6Seschrock /* 3964ea8dc4b6Seschrock * Kick off a resilver. 3965ea8dc4b6Seschrock */ 3966088f3894Sahrens if (tasks & SPA_ASYNC_RESILVER) 3967088f3894Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0); 3968ea8dc4b6Seschrock 3969ea8dc4b6Seschrock /* 3970ea8dc4b6Seschrock * Let the world know that we're done. 3971ea8dc4b6Seschrock */ 3972ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 3973ea8dc4b6Seschrock spa->spa_async_thread = NULL; 3974ea8dc4b6Seschrock cv_broadcast(&spa->spa_async_cv); 3975ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 3976ea8dc4b6Seschrock thread_exit(); 3977ea8dc4b6Seschrock } 3978ea8dc4b6Seschrock 3979ea8dc4b6Seschrock void 3980ea8dc4b6Seschrock spa_async_suspend(spa_t *spa) 3981ea8dc4b6Seschrock { 3982ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 3983ea8dc4b6Seschrock spa->spa_async_suspended++; 3984ea8dc4b6Seschrock while (spa->spa_async_thread != NULL) 3985ea8dc4b6Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 3986ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 3987ea8dc4b6Seschrock } 3988ea8dc4b6Seschrock 3989ea8dc4b6Seschrock void 3990ea8dc4b6Seschrock spa_async_resume(spa_t *spa) 3991ea8dc4b6Seschrock { 3992ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 3993ea8dc4b6Seschrock ASSERT(spa->spa_async_suspended != 0); 3994ea8dc4b6Seschrock spa->spa_async_suspended--; 3995ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 3996ea8dc4b6Seschrock } 3997ea8dc4b6Seschrock 3998ea8dc4b6Seschrock static void 3999ea8dc4b6Seschrock spa_async_dispatch(spa_t *spa) 4000ea8dc4b6Seschrock { 4001ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4002ea8dc4b6Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 40030373e76bSbonwick spa->spa_async_thread == NULL && 40040373e76bSbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 4005ea8dc4b6Seschrock spa->spa_async_thread = thread_create(NULL, 0, 4006ea8dc4b6Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 4007ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4008ea8dc4b6Seschrock } 4009ea8dc4b6Seschrock 4010ea8dc4b6Seschrock void 4011ea8dc4b6Seschrock spa_async_request(spa_t *spa, int task) 4012ea8dc4b6Seschrock { 4013ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4014ea8dc4b6Seschrock spa->spa_async_tasks |= task; 4015ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4016fa9e4066Sahrens } 4017fa9e4066Sahrens 4018fa9e4066Sahrens /* 4019fa9e4066Sahrens * ========================================================================== 4020fa9e4066Sahrens * SPA syncing routines 4021fa9e4066Sahrens * ========================================================================== 4022fa9e4066Sahrens */ 4023fa9e4066Sahrens 4024fa9e4066Sahrens static void 4025fa9e4066Sahrens spa_sync_deferred_frees(spa_t *spa, uint64_t txg) 4026fa9e4066Sahrens { 4027fa9e4066Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 4028fa9e4066Sahrens dmu_tx_t *tx; 4029fa9e4066Sahrens blkptr_t blk; 4030fa9e4066Sahrens uint64_t itor = 0; 4031fa9e4066Sahrens zio_t *zio; 4032fa9e4066Sahrens int error; 4033fa9e4066Sahrens uint8_t c = 1; 4034fa9e4066Sahrens 4035e14bb325SJeff Bonwick zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 4036fa9e4066Sahrens 4037e14bb325SJeff Bonwick while (bplist_iterate(bpl, &itor, &blk) == 0) { 4038e14bb325SJeff Bonwick ASSERT(blk.blk_birth < txg); 4039e14bb325SJeff Bonwick zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL, 4040e14bb325SJeff Bonwick ZIO_FLAG_MUSTSUCCEED)); 4041e14bb325SJeff Bonwick } 4042fa9e4066Sahrens 4043fa9e4066Sahrens error = zio_wait(zio); 4044fa9e4066Sahrens ASSERT3U(error, ==, 0); 4045fa9e4066Sahrens 4046fa9e4066Sahrens tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 4047fa9e4066Sahrens bplist_vacate(bpl, tx); 4048fa9e4066Sahrens 4049fa9e4066Sahrens /* 4050fa9e4066Sahrens * Pre-dirty the first block so we sync to convergence faster. 4051fa9e4066Sahrens * (Usually only the first block is needed.) 4052fa9e4066Sahrens */ 4053fa9e4066Sahrens dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); 4054fa9e4066Sahrens dmu_tx_commit(tx); 4055fa9e4066Sahrens } 4056fa9e4066Sahrens 4057fa9e4066Sahrens static void 405899653d4eSeschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 4059fa9e4066Sahrens { 4060fa9e4066Sahrens char *packed = NULL; 4061f7991ba4STim Haley size_t bufsize; 4062fa9e4066Sahrens size_t nvsize = 0; 4063fa9e4066Sahrens dmu_buf_t *db; 4064fa9e4066Sahrens 406599653d4eSeschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 4066fa9e4066Sahrens 4067f7991ba4STim Haley /* 4068f7991ba4STim Haley * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 4069f7991ba4STim Haley * information. This avoids the dbuf_will_dirty() path and 4070f7991ba4STim Haley * saves us a pre-read to get data we don't actually care about. 4071f7991ba4STim Haley */ 4072f7991ba4STim Haley bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); 4073f7991ba4STim Haley packed = kmem_alloc(bufsize, KM_SLEEP); 4074fa9e4066Sahrens 407599653d4eSeschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 4076ea8dc4b6Seschrock KM_SLEEP) == 0); 4077f7991ba4STim Haley bzero(packed + nvsize, bufsize - nvsize); 4078fa9e4066Sahrens 4079f7991ba4STim Haley dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 4080fa9e4066Sahrens 4081f7991ba4STim Haley kmem_free(packed, bufsize); 4082fa9e4066Sahrens 408399653d4eSeschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 4084fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 4085fa9e4066Sahrens *(uint64_t *)db->db_data = nvsize; 4086ea8dc4b6Seschrock dmu_buf_rele(db, FTAG); 4087fa9e4066Sahrens } 4088fa9e4066Sahrens 408999653d4eSeschrock static void 4090fa94a07fSbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 4091fa94a07fSbrendan const char *config, const char *entry) 409299653d4eSeschrock { 409399653d4eSeschrock nvlist_t *nvroot; 4094fa94a07fSbrendan nvlist_t **list; 409599653d4eSeschrock int i; 409699653d4eSeschrock 4097fa94a07fSbrendan if (!sav->sav_sync) 409899653d4eSeschrock return; 409999653d4eSeschrock 410099653d4eSeschrock /* 4101fa94a07fSbrendan * Update the MOS nvlist describing the list of available devices. 4102fa94a07fSbrendan * spa_validate_aux() will have already made sure this nvlist is 41033d7072f8Seschrock * valid and the vdevs are labeled appropriately. 410499653d4eSeschrock */ 4105fa94a07fSbrendan if (sav->sav_object == 0) { 4106fa94a07fSbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 4107fa94a07fSbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 4108fa94a07fSbrendan sizeof (uint64_t), tx); 410999653d4eSeschrock VERIFY(zap_update(spa->spa_meta_objset, 4110fa94a07fSbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 4111fa94a07fSbrendan &sav->sav_object, tx) == 0); 411299653d4eSeschrock } 411399653d4eSeschrock 411499653d4eSeschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 4115fa94a07fSbrendan if (sav->sav_count == 0) { 4116fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 411799653d4eSeschrock } else { 4118fa94a07fSbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 4119fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 4120fa94a07fSbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 4121fa94a07fSbrendan B_FALSE, B_FALSE, B_TRUE); 4122fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 4123fa94a07fSbrendan sav->sav_count) == 0); 4124fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 4125fa94a07fSbrendan nvlist_free(list[i]); 4126fa94a07fSbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 412799653d4eSeschrock } 412899653d4eSeschrock 4129fa94a07fSbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 413006eeb2adSek nvlist_free(nvroot); 413199653d4eSeschrock 4132fa94a07fSbrendan sav->sav_sync = B_FALSE; 413399653d4eSeschrock } 413499653d4eSeschrock 413599653d4eSeschrock static void 413699653d4eSeschrock spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 413799653d4eSeschrock { 413899653d4eSeschrock nvlist_t *config; 413999653d4eSeschrock 4140e14bb325SJeff Bonwick if (list_is_empty(&spa->spa_config_dirty_list)) 414199653d4eSeschrock return; 414299653d4eSeschrock 4143e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4144e14bb325SJeff Bonwick 4145e14bb325SJeff Bonwick config = spa_config_generate(spa, spa->spa_root_vdev, 4146e14bb325SJeff Bonwick dmu_tx_get_txg(tx), B_FALSE); 4147e14bb325SJeff Bonwick 4148e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 414999653d4eSeschrock 415099653d4eSeschrock if (spa->spa_config_syncing) 415199653d4eSeschrock nvlist_free(spa->spa_config_syncing); 415299653d4eSeschrock spa->spa_config_syncing = config; 415399653d4eSeschrock 415499653d4eSeschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 415599653d4eSeschrock } 415699653d4eSeschrock 4157990b4856Slling /* 4158990b4856Slling * Set zpool properties. 4159990b4856Slling */ 4160b1b8ab34Slling static void 4161ecd6cf80Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 4162b1b8ab34Slling { 4163b1b8ab34Slling spa_t *spa = arg1; 4164b1b8ab34Slling objset_t *mos = spa->spa_meta_objset; 4165990b4856Slling nvlist_t *nvp = arg2; 4166990b4856Slling nvpair_t *elem; 41673d7072f8Seschrock uint64_t intval; 4168c5904d13Seschrock char *strval; 4169990b4856Slling zpool_prop_t prop; 4170990b4856Slling const char *propname; 4171990b4856Slling zprop_type_t proptype; 4172b1b8ab34Slling 4173e14bb325SJeff Bonwick mutex_enter(&spa->spa_props_lock); 4174e14bb325SJeff Bonwick 4175990b4856Slling elem = NULL; 4176990b4856Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 4177990b4856Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 4178990b4856Slling case ZPOOL_PROP_VERSION: 4179990b4856Slling /* 4180990b4856Slling * Only set version for non-zpool-creation cases 4181990b4856Slling * (set/import). spa_create() needs special care 4182990b4856Slling * for version setting. 4183990b4856Slling */ 4184990b4856Slling if (tx->tx_txg != TXG_INITIAL) { 4185990b4856Slling VERIFY(nvpair_value_uint64(elem, 4186990b4856Slling &intval) == 0); 4187990b4856Slling ASSERT(intval <= SPA_VERSION); 4188990b4856Slling ASSERT(intval >= spa_version(spa)); 4189990b4856Slling spa->spa_uberblock.ub_version = intval; 4190990b4856Slling vdev_config_dirty(spa->spa_root_vdev); 4191990b4856Slling } 4192ecd6cf80Smarks break; 4193990b4856Slling 4194990b4856Slling case ZPOOL_PROP_ALTROOT: 4195990b4856Slling /* 4196990b4856Slling * 'altroot' is a non-persistent property. It should 4197990b4856Slling * have been set temporarily at creation or import time. 4198990b4856Slling */ 4199990b4856Slling ASSERT(spa->spa_root != NULL); 4200b1b8ab34Slling break; 42013d7072f8Seschrock 42022f8aaab3Seschrock case ZPOOL_PROP_CACHEFILE: 4203990b4856Slling /* 4204379c004dSEric Schrock * 'cachefile' is also a non-persisitent property. 4205990b4856Slling */ 42063d7072f8Seschrock break; 4207990b4856Slling default: 4208990b4856Slling /* 4209990b4856Slling * Set pool property values in the poolprops mos object. 4210990b4856Slling */ 4211990b4856Slling if (spa->spa_pool_props_object == 0) { 4212990b4856Slling objset_t *mos = spa->spa_meta_objset; 4213990b4856Slling 4214990b4856Slling VERIFY((spa->spa_pool_props_object = 4215990b4856Slling zap_create(mos, DMU_OT_POOL_PROPS, 4216990b4856Slling DMU_OT_NONE, 0, tx)) > 0); 4217990b4856Slling 4218990b4856Slling VERIFY(zap_update(mos, 4219990b4856Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 4220990b4856Slling 8, 1, &spa->spa_pool_props_object, tx) 4221990b4856Slling == 0); 4222990b4856Slling } 4223990b4856Slling 4224990b4856Slling /* normalize the property name */ 4225990b4856Slling propname = zpool_prop_to_name(prop); 4226990b4856Slling proptype = zpool_prop_get_type(prop); 4227990b4856Slling 4228990b4856Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 4229990b4856Slling ASSERT(proptype == PROP_TYPE_STRING); 4230990b4856Slling VERIFY(nvpair_value_string(elem, &strval) == 0); 4231990b4856Slling VERIFY(zap_update(mos, 4232990b4856Slling spa->spa_pool_props_object, propname, 4233990b4856Slling 1, strlen(strval) + 1, strval, tx) == 0); 4234990b4856Slling 4235990b4856Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 4236990b4856Slling VERIFY(nvpair_value_uint64(elem, &intval) == 0); 4237990b4856Slling 4238990b4856Slling if (proptype == PROP_TYPE_INDEX) { 4239990b4856Slling const char *unused; 4240990b4856Slling VERIFY(zpool_prop_index_to_string( 4241990b4856Slling prop, intval, &unused) == 0); 4242990b4856Slling } 4243990b4856Slling VERIFY(zap_update(mos, 4244990b4856Slling spa->spa_pool_props_object, propname, 4245990b4856Slling 8, 1, &intval, tx) == 0); 4246990b4856Slling } else { 4247990b4856Slling ASSERT(0); /* not allowed */ 4248990b4856Slling } 4249990b4856Slling 42500a4e9518Sgw switch (prop) { 42510a4e9518Sgw case ZPOOL_PROP_DELEGATION: 4252990b4856Slling spa->spa_delegation = intval; 42530a4e9518Sgw break; 42540a4e9518Sgw case ZPOOL_PROP_BOOTFS: 4255990b4856Slling spa->spa_bootfs = intval; 42560a4e9518Sgw break; 42570a4e9518Sgw case ZPOOL_PROP_FAILUREMODE: 42580a4e9518Sgw spa->spa_failmode = intval; 42590a4e9518Sgw break; 4260573ca77eSGeorge Wilson case ZPOOL_PROP_AUTOEXPAND: 4261573ca77eSGeorge Wilson spa->spa_autoexpand = intval; 4262573ca77eSGeorge Wilson spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 4263573ca77eSGeorge Wilson break; 42640a4e9518Sgw default: 42650a4e9518Sgw break; 42660a4e9518Sgw } 4267990b4856Slling } 4268990b4856Slling 4269990b4856Slling /* log internal history if this is not a zpool create */ 4270990b4856Slling if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 4271990b4856Slling tx->tx_txg != TXG_INITIAL) { 4272990b4856Slling spa_history_internal_log(LOG_POOL_PROPSET, 4273990b4856Slling spa, tx, cr, "%s %lld %s", 4274e14bb325SJeff Bonwick nvpair_name(elem), intval, spa_name(spa)); 4275b1b8ab34Slling } 4276b1b8ab34Slling } 4277e14bb325SJeff Bonwick 4278e14bb325SJeff Bonwick mutex_exit(&spa->spa_props_lock); 4279b1b8ab34Slling } 4280b1b8ab34Slling 4281fa9e4066Sahrens /* 4282fa9e4066Sahrens * Sync the specified transaction group. New blocks may be dirtied as 4283fa9e4066Sahrens * part of the process, so we iterate until it converges. 4284fa9e4066Sahrens */ 4285fa9e4066Sahrens void 4286fa9e4066Sahrens spa_sync(spa_t *spa, uint64_t txg) 4287fa9e4066Sahrens { 4288fa9e4066Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 4289fa9e4066Sahrens objset_t *mos = spa->spa_meta_objset; 4290fa9e4066Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 42910373e76bSbonwick vdev_t *rvd = spa->spa_root_vdev; 4292fa9e4066Sahrens vdev_t *vd; 4293fa9e4066Sahrens dmu_tx_t *tx; 4294fa9e4066Sahrens int dirty_vdevs; 4295e14bb325SJeff Bonwick int error; 4296fa9e4066Sahrens 4297fa9e4066Sahrens /* 4298fa9e4066Sahrens * Lock out configuration changes. 4299fa9e4066Sahrens */ 4300e14bb325SJeff Bonwick spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 4301fa9e4066Sahrens 4302fa9e4066Sahrens spa->spa_syncing_txg = txg; 4303fa9e4066Sahrens spa->spa_sync_pass = 0; 4304fa9e4066Sahrens 4305e14bb325SJeff Bonwick /* 4306e14bb325SJeff Bonwick * If there are any pending vdev state changes, convert them 4307e14bb325SJeff Bonwick * into config changes that go out with this transaction group. 4308e14bb325SJeff Bonwick */ 4309e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 43108ad4d6ddSJeff Bonwick while (list_head(&spa->spa_state_dirty_list) != NULL) { 43118ad4d6ddSJeff Bonwick /* 43128ad4d6ddSJeff Bonwick * We need the write lock here because, for aux vdevs, 43138ad4d6ddSJeff Bonwick * calling vdev_config_dirty() modifies sav_config. 43148ad4d6ddSJeff Bonwick * This is ugly and will become unnecessary when we 43158ad4d6ddSJeff Bonwick * eliminate the aux vdev wart by integrating all vdevs 43168ad4d6ddSJeff Bonwick * into the root vdev tree. 43178ad4d6ddSJeff Bonwick */ 43188ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 43198ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); 43208ad4d6ddSJeff Bonwick while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 43218ad4d6ddSJeff Bonwick vdev_state_clean(vd); 43228ad4d6ddSJeff Bonwick vdev_config_dirty(vd); 43238ad4d6ddSJeff Bonwick } 43248ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 43258ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); 4326e14bb325SJeff Bonwick } 4327e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 4328e14bb325SJeff Bonwick 4329ea8dc4b6Seschrock VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); 4330fa9e4066Sahrens 433199653d4eSeschrock tx = dmu_tx_create_assigned(dp, txg); 433299653d4eSeschrock 433399653d4eSeschrock /* 4334e7437265Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 433599653d4eSeschrock * set spa_deflate if we have no raid-z vdevs. 433699653d4eSeschrock */ 4337e7437265Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 4338e7437265Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 433999653d4eSeschrock int i; 434099653d4eSeschrock 434199653d4eSeschrock for (i = 0; i < rvd->vdev_children; i++) { 434299653d4eSeschrock vd = rvd->vdev_child[i]; 434399653d4eSeschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 434499653d4eSeschrock break; 434599653d4eSeschrock } 434699653d4eSeschrock if (i == rvd->vdev_children) { 434799653d4eSeschrock spa->spa_deflate = TRUE; 434899653d4eSeschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 434999653d4eSeschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 435099653d4eSeschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 435199653d4eSeschrock } 435299653d4eSeschrock } 435399653d4eSeschrock 4354088f3894Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 4355088f3894Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 4356088f3894Sahrens dsl_pool_create_origin(dp, tx); 4357088f3894Sahrens 4358088f3894Sahrens /* Keeping the origin open increases spa_minref */ 4359088f3894Sahrens spa->spa_minref += 3; 4360088f3894Sahrens } 4361088f3894Sahrens 4362088f3894Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 4363088f3894Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 4364088f3894Sahrens dsl_pool_upgrade_clones(dp, tx); 4365088f3894Sahrens } 4366088f3894Sahrens 4367fa9e4066Sahrens /* 4368fa9e4066Sahrens * If anything has changed in this txg, push the deferred frees 4369fa9e4066Sahrens * from the previous txg. If not, leave them alone so that we 4370fa9e4066Sahrens * don't generate work on an otherwise idle system. 4371fa9e4066Sahrens */ 4372fa9e4066Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 43731615a317Sek !txg_list_empty(&dp->dp_dirty_dirs, txg) || 43741615a317Sek !txg_list_empty(&dp->dp_sync_tasks, txg)) 4375fa9e4066Sahrens spa_sync_deferred_frees(spa, txg); 4376fa9e4066Sahrens 4377fa9e4066Sahrens /* 4378fa9e4066Sahrens * Iterate to convergence. 4379fa9e4066Sahrens */ 4380fa9e4066Sahrens do { 4381fa9e4066Sahrens spa->spa_sync_pass++; 4382fa9e4066Sahrens 4383fa9e4066Sahrens spa_sync_config_object(spa, tx); 4384fa94a07fSbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 4385fa94a07fSbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 4386fa94a07fSbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 4387fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 4388ea8dc4b6Seschrock spa_errlog_sync(spa, txg); 4389fa9e4066Sahrens dsl_pool_sync(dp, txg); 4390fa9e4066Sahrens 4391fa9e4066Sahrens dirty_vdevs = 0; 4392fa9e4066Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { 4393fa9e4066Sahrens vdev_sync(vd, txg); 4394fa9e4066Sahrens dirty_vdevs++; 4395fa9e4066Sahrens } 4396fa9e4066Sahrens 4397fa9e4066Sahrens bplist_sync(bpl, tx); 4398fa9e4066Sahrens } while (dirty_vdevs); 4399fa9e4066Sahrens 4400fa9e4066Sahrens bplist_close(bpl); 4401fa9e4066Sahrens 4402fa9e4066Sahrens dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); 4403fa9e4066Sahrens 4404fa9e4066Sahrens /* 4405fa9e4066Sahrens * Rewrite the vdev configuration (which includes the uberblock) 4406fa9e4066Sahrens * to commit the transaction group. 44070373e76bSbonwick * 440817f17c2dSbonwick * If there are no dirty vdevs, we sync the uberblock to a few 440917f17c2dSbonwick * random top-level vdevs that are known to be visible in the 4410e14bb325SJeff Bonwick * config cache (see spa_vdev_add() for a complete description). 4411e14bb325SJeff Bonwick * If there *are* dirty vdevs, sync the uberblock to all vdevs. 44120373e76bSbonwick */ 4413e14bb325SJeff Bonwick for (;;) { 4414e14bb325SJeff Bonwick /* 4415e14bb325SJeff Bonwick * We hold SCL_STATE to prevent vdev open/close/etc. 4416e14bb325SJeff Bonwick * while we're attempting to write the vdev labels. 4417e14bb325SJeff Bonwick */ 4418e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4419e14bb325SJeff Bonwick 4420e14bb325SJeff Bonwick if (list_is_empty(&spa->spa_config_dirty_list)) { 4421e14bb325SJeff Bonwick vdev_t *svd[SPA_DVAS_PER_BP]; 4422e14bb325SJeff Bonwick int svdcount = 0; 4423e14bb325SJeff Bonwick int children = rvd->vdev_children; 4424e14bb325SJeff Bonwick int c0 = spa_get_random(children); 4425e14bb325SJeff Bonwick 4426573ca77eSGeorge Wilson for (int c = 0; c < children; c++) { 4427e14bb325SJeff Bonwick vd = rvd->vdev_child[(c0 + c) % children]; 4428e14bb325SJeff Bonwick if (vd->vdev_ms_array == 0 || vd->vdev_islog) 4429e14bb325SJeff Bonwick continue; 4430e14bb325SJeff Bonwick svd[svdcount++] = vd; 4431e14bb325SJeff Bonwick if (svdcount == SPA_DVAS_PER_BP) 4432e14bb325SJeff Bonwick break; 4433e14bb325SJeff Bonwick } 44348956713aSEric Schrock error = vdev_config_sync(svd, svdcount, txg, B_FALSE); 44358956713aSEric Schrock if (error != 0) 44368956713aSEric Schrock error = vdev_config_sync(svd, svdcount, txg, 44378956713aSEric Schrock B_TRUE); 4438e14bb325SJeff Bonwick } else { 4439e14bb325SJeff Bonwick error = vdev_config_sync(rvd->vdev_child, 44408956713aSEric Schrock rvd->vdev_children, txg, B_FALSE); 44418956713aSEric Schrock if (error != 0) 44428956713aSEric Schrock error = vdev_config_sync(rvd->vdev_child, 44438956713aSEric Schrock rvd->vdev_children, txg, B_TRUE); 44440373e76bSbonwick } 4445e14bb325SJeff Bonwick 4446e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 4447e14bb325SJeff Bonwick 4448e14bb325SJeff Bonwick if (error == 0) 4449e14bb325SJeff Bonwick break; 4450e14bb325SJeff Bonwick zio_suspend(spa, NULL); 4451e14bb325SJeff Bonwick zio_resume_wait(spa); 44520373e76bSbonwick } 445399653d4eSeschrock dmu_tx_commit(tx); 445499653d4eSeschrock 44550373e76bSbonwick /* 44560373e76bSbonwick * Clear the dirty config list. 4457fa9e4066Sahrens */ 4458e14bb325SJeff Bonwick while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 44590373e76bSbonwick vdev_config_clean(vd); 44600373e76bSbonwick 44610373e76bSbonwick /* 44620373e76bSbonwick * Now that the new config has synced transactionally, 44630373e76bSbonwick * let it become visible to the config cache. 44640373e76bSbonwick */ 44650373e76bSbonwick if (spa->spa_config_syncing != NULL) { 44660373e76bSbonwick spa_config_set(spa, spa->spa_config_syncing); 44670373e76bSbonwick spa->spa_config_txg = txg; 44680373e76bSbonwick spa->spa_config_syncing = NULL; 44690373e76bSbonwick } 4470fa9e4066Sahrens 4471fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 4472fa9e4066Sahrens 4473fa9e4066Sahrens /* 4474fa9e4066Sahrens * Clean up the ZIL records for the synced txg. 4475fa9e4066Sahrens */ 4476fa9e4066Sahrens dsl_pool_zil_clean(dp); 4477fa9e4066Sahrens 4478fa9e4066Sahrens /* 4479fa9e4066Sahrens * Update usable space statistics. 4480fa9e4066Sahrens */ 4481fa9e4066Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 4482fa9e4066Sahrens vdev_sync_done(vd, txg); 4483fa9e4066Sahrens 4484fa9e4066Sahrens /* 4485fa9e4066Sahrens * It had better be the case that we didn't dirty anything 448699653d4eSeschrock * since vdev_config_sync(). 4487fa9e4066Sahrens */ 4488fa9e4066Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 4489fa9e4066Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 4490fa9e4066Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 4491fa9e4066Sahrens ASSERT(bpl->bpl_queue == NULL); 4492fa9e4066Sahrens 4493e14bb325SJeff Bonwick spa_config_exit(spa, SCL_CONFIG, FTAG); 4494ea8dc4b6Seschrock 4495ea8dc4b6Seschrock /* 4496ea8dc4b6Seschrock * If any async tasks have been requested, kick them off. 4497ea8dc4b6Seschrock */ 4498ea8dc4b6Seschrock spa_async_dispatch(spa); 4499fa9e4066Sahrens } 4500fa9e4066Sahrens 4501fa9e4066Sahrens /* 4502fa9e4066Sahrens * Sync all pools. We don't want to hold the namespace lock across these 4503fa9e4066Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 4504fa9e4066Sahrens * sync. 4505fa9e4066Sahrens */ 4506fa9e4066Sahrens void 4507fa9e4066Sahrens spa_sync_allpools(void) 4508fa9e4066Sahrens { 4509fa9e4066Sahrens spa_t *spa = NULL; 4510fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4511fa9e4066Sahrens while ((spa = spa_next(spa)) != NULL) { 4512e14bb325SJeff Bonwick if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa)) 4513fa9e4066Sahrens continue; 4514fa9e4066Sahrens spa_open_ref(spa, FTAG); 4515fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4516fa9e4066Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 4517fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4518fa9e4066Sahrens spa_close(spa, FTAG); 4519fa9e4066Sahrens } 4520fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4521fa9e4066Sahrens } 4522fa9e4066Sahrens 4523fa9e4066Sahrens /* 4524fa9e4066Sahrens * ========================================================================== 4525fa9e4066Sahrens * Miscellaneous routines 4526fa9e4066Sahrens * ========================================================================== 4527fa9e4066Sahrens */ 4528fa9e4066Sahrens 4529fa9e4066Sahrens /* 4530fa9e4066Sahrens * Remove all pools in the system. 4531fa9e4066Sahrens */ 4532fa9e4066Sahrens void 4533fa9e4066Sahrens spa_evict_all(void) 4534fa9e4066Sahrens { 4535fa9e4066Sahrens spa_t *spa; 4536fa9e4066Sahrens 4537fa9e4066Sahrens /* 4538fa9e4066Sahrens * Remove all cached state. All pools should be closed now, 4539fa9e4066Sahrens * so every spa in the AVL tree should be unreferenced. 4540fa9e4066Sahrens */ 4541fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4542fa9e4066Sahrens while ((spa = spa_next(NULL)) != NULL) { 4543fa9e4066Sahrens /* 4544ea8dc4b6Seschrock * Stop async tasks. The async thread may need to detach 4545ea8dc4b6Seschrock * a device that's been replaced, which requires grabbing 4546ea8dc4b6Seschrock * spa_namespace_lock, so we must drop it here. 4547fa9e4066Sahrens */ 4548fa9e4066Sahrens spa_open_ref(spa, FTAG); 4549fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4550ea8dc4b6Seschrock spa_async_suspend(spa); 4551fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4552fa9e4066Sahrens spa_close(spa, FTAG); 4553fa9e4066Sahrens 4554fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4555fa9e4066Sahrens spa_unload(spa); 4556fa9e4066Sahrens spa_deactivate(spa); 4557fa9e4066Sahrens } 4558fa9e4066Sahrens spa_remove(spa); 4559fa9e4066Sahrens } 4560fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4561fa9e4066Sahrens } 4562ea8dc4b6Seschrock 4563ea8dc4b6Seschrock vdev_t * 45646809eb4eSEric Schrock spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) 4565ea8dc4b6Seschrock { 4566c5904d13Seschrock vdev_t *vd; 4567c5904d13Seschrock int i; 4568c5904d13Seschrock 4569c5904d13Seschrock if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 4570c5904d13Seschrock return (vd); 4571c5904d13Seschrock 45726809eb4eSEric Schrock if (aux) { 4573c5904d13Seschrock for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 4574c5904d13Seschrock vd = spa->spa_l2cache.sav_vdevs[i]; 45756809eb4eSEric Schrock if (vd->vdev_guid == guid) 45766809eb4eSEric Schrock return (vd); 45776809eb4eSEric Schrock } 45786809eb4eSEric Schrock 45796809eb4eSEric Schrock for (i = 0; i < spa->spa_spares.sav_count; i++) { 45806809eb4eSEric Schrock vd = spa->spa_spares.sav_vdevs[i]; 4581c5904d13Seschrock if (vd->vdev_guid == guid) 4582c5904d13Seschrock return (vd); 4583c5904d13Seschrock } 4584c5904d13Seschrock } 4585c5904d13Seschrock 4586c5904d13Seschrock return (NULL); 4587ea8dc4b6Seschrock } 4588eaca9bbdSeschrock 4589eaca9bbdSeschrock void 4590990b4856Slling spa_upgrade(spa_t *spa, uint64_t version) 4591eaca9bbdSeschrock { 4592e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4593eaca9bbdSeschrock 4594eaca9bbdSeschrock /* 4595eaca9bbdSeschrock * This should only be called for a non-faulted pool, and since a 4596eaca9bbdSeschrock * future version would result in an unopenable pool, this shouldn't be 4597eaca9bbdSeschrock * possible. 4598eaca9bbdSeschrock */ 4599e7437265Sahrens ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 4600990b4856Slling ASSERT(version >= spa->spa_uberblock.ub_version); 4601eaca9bbdSeschrock 4602990b4856Slling spa->spa_uberblock.ub_version = version; 4603eaca9bbdSeschrock vdev_config_dirty(spa->spa_root_vdev); 4604eaca9bbdSeschrock 4605e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 460699653d4eSeschrock 460799653d4eSeschrock txg_wait_synced(spa_get_dsl(spa), 0); 460899653d4eSeschrock } 460999653d4eSeschrock 461099653d4eSeschrock boolean_t 461199653d4eSeschrock spa_has_spare(spa_t *spa, uint64_t guid) 461299653d4eSeschrock { 461399653d4eSeschrock int i; 461439c23413Seschrock uint64_t spareguid; 4615fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 461699653d4eSeschrock 4617fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 4618fa94a07fSbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 461999653d4eSeschrock return (B_TRUE); 462099653d4eSeschrock 4621fa94a07fSbrendan for (i = 0; i < sav->sav_npending; i++) { 4622fa94a07fSbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 4623fa94a07fSbrendan &spareguid) == 0 && spareguid == guid) 462439c23413Seschrock return (B_TRUE); 462539c23413Seschrock } 462639c23413Seschrock 462799653d4eSeschrock return (B_FALSE); 4628eaca9bbdSeschrock } 4629b1b8ab34Slling 463089a89ebfSlling /* 463189a89ebfSlling * Check if a pool has an active shared spare device. 463289a89ebfSlling * Note: reference count of an active spare is 2, as a spare and as a replace 463389a89ebfSlling */ 463489a89ebfSlling static boolean_t 463589a89ebfSlling spa_has_active_shared_spare(spa_t *spa) 463689a89ebfSlling { 463789a89ebfSlling int i, refcnt; 463889a89ebfSlling uint64_t pool; 463989a89ebfSlling spa_aux_vdev_t *sav = &spa->spa_spares; 464089a89ebfSlling 464189a89ebfSlling for (i = 0; i < sav->sav_count; i++) { 464289a89ebfSlling if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 464389a89ebfSlling &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 464489a89ebfSlling refcnt > 2) 464589a89ebfSlling return (B_TRUE); 464689a89ebfSlling } 464789a89ebfSlling 464889a89ebfSlling return (B_FALSE); 464989a89ebfSlling } 465089a89ebfSlling 46513d7072f8Seschrock /* 46523d7072f8Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 46533d7072f8Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 46543d7072f8Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 46553d7072f8Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 46563d7072f8Seschrock * or zdb as real changes. 46573d7072f8Seschrock */ 46583d7072f8Seschrock void 46593d7072f8Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 46603d7072f8Seschrock { 46613d7072f8Seschrock #ifdef _KERNEL 46623d7072f8Seschrock sysevent_t *ev; 46633d7072f8Seschrock sysevent_attr_list_t *attr = NULL; 46643d7072f8Seschrock sysevent_value_t value; 46653d7072f8Seschrock sysevent_id_t eid; 46663d7072f8Seschrock 46673d7072f8Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 46683d7072f8Seschrock SE_SLEEP); 46693d7072f8Seschrock 46703d7072f8Seschrock value.value_type = SE_DATA_TYPE_STRING; 46713d7072f8Seschrock value.value.sv_string = spa_name(spa); 46723d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 46733d7072f8Seschrock goto done; 46743d7072f8Seschrock 46753d7072f8Seschrock value.value_type = SE_DATA_TYPE_UINT64; 46763d7072f8Seschrock value.value.sv_uint64 = spa_guid(spa); 46773d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 46783d7072f8Seschrock goto done; 46793d7072f8Seschrock 46803d7072f8Seschrock if (vd) { 46813d7072f8Seschrock value.value_type = SE_DATA_TYPE_UINT64; 46823d7072f8Seschrock value.value.sv_uint64 = vd->vdev_guid; 46833d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 46843d7072f8Seschrock SE_SLEEP) != 0) 46853d7072f8Seschrock goto done; 46863d7072f8Seschrock 46873d7072f8Seschrock if (vd->vdev_path) { 46883d7072f8Seschrock value.value_type = SE_DATA_TYPE_STRING; 46893d7072f8Seschrock value.value.sv_string = vd->vdev_path; 46903d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 46913d7072f8Seschrock &value, SE_SLEEP) != 0) 46923d7072f8Seschrock goto done; 46933d7072f8Seschrock } 46943d7072f8Seschrock } 46953d7072f8Seschrock 4696b01c3b58Seschrock if (sysevent_attach_attributes(ev, attr) != 0) 4697b01c3b58Seschrock goto done; 4698b01c3b58Seschrock attr = NULL; 4699b01c3b58Seschrock 47003d7072f8Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 47013d7072f8Seschrock 47023d7072f8Seschrock done: 47033d7072f8Seschrock if (attr) 47043d7072f8Seschrock sysevent_free_attr(attr); 47053d7072f8Seschrock sysevent_free(ev); 47063d7072f8Seschrock #endif 47073d7072f8Seschrock } 4708