1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 2199653d4eSeschrock 22fa9e4066Sahrens /* 23379c004dSEric Schrock * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24fa9e4066Sahrens * Use is subject to license terms. 25fa9e4066Sahrens */ 26fa9e4066Sahrens 27fa9e4066Sahrens /* 28fa9e4066Sahrens * This file contains all the routines used when modifying on-disk SPA state. 29fa9e4066Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 30fa9e4066Sahrens * pool. 31fa9e4066Sahrens */ 32fa9e4066Sahrens 33fa9e4066Sahrens #include <sys/zfs_context.h> 34ea8dc4b6Seschrock #include <sys/fm/fs/zfs.h> 35fa9e4066Sahrens #include <sys/spa_impl.h> 36fa9e4066Sahrens #include <sys/zio.h> 37fa9e4066Sahrens #include <sys/zio_checksum.h> 38fa9e4066Sahrens #include <sys/zio_compress.h> 39fa9e4066Sahrens #include <sys/dmu.h> 40fa9e4066Sahrens #include <sys/dmu_tx.h> 41fa9e4066Sahrens #include <sys/zap.h> 42fa9e4066Sahrens #include <sys/zil.h> 43fa9e4066Sahrens #include <sys/vdev_impl.h> 44fa9e4066Sahrens #include <sys/metaslab.h> 4588ecc943SGeorge Wilson #include <sys/metaslab_impl.h> 46fa9e4066Sahrens #include <sys/uberblock_impl.h> 47fa9e4066Sahrens #include <sys/txg.h> 48fa9e4066Sahrens #include <sys/avl.h> 49fa9e4066Sahrens #include <sys/dmu_traverse.h> 50b1b8ab34Slling #include <sys/dmu_objset.h> 51fa9e4066Sahrens #include <sys/unique.h> 52fa9e4066Sahrens #include <sys/dsl_pool.h> 53b1b8ab34Slling #include <sys/dsl_dataset.h> 54fa9e4066Sahrens #include <sys/dsl_dir.h> 55fa9e4066Sahrens #include <sys/dsl_prop.h> 56b1b8ab34Slling #include <sys/dsl_synctask.h> 57fa9e4066Sahrens #include <sys/fs/zfs.h> 58fa94a07fSbrendan #include <sys/arc.h> 59fa9e4066Sahrens #include <sys/callb.h> 6095173954Sek #include <sys/systeminfo.h> 6195173954Sek #include <sys/sunddi.h> 62e7cbe64fSgw #include <sys/spa_boot.h> 63573ca77eSGeorge Wilson #include <sys/zfs_ioctl.h> 64fa9e4066Sahrens 655679c89fSjv #ifdef _KERNEL 665679c89fSjv #include <sys/zone.h> 67*dedec472SJack Meng #include <sys/bootprops.h> 685679c89fSjv #endif /* _KERNEL */ 695679c89fSjv 70990b4856Slling #include "zfs_prop.h" 71b7b97454Sperrin #include "zfs_comutil.h" 72990b4856Slling 732e0c549eSJonathan Adams enum zti_modes { 742e0c549eSJonathan Adams zti_mode_fixed, /* value is # of threads (min 1) */ 752e0c549eSJonathan Adams zti_mode_online_percent, /* value is % of online CPUs */ 762e0c549eSJonathan Adams zti_mode_tune, /* fill from zio_taskq_tune_* */ 772e0c549eSJonathan Adams zti_nmodes 78e14bb325SJeff Bonwick }; 79416e0cd8Sek 802e0c549eSJonathan Adams #define ZTI_THREAD_FIX(n) { zti_mode_fixed, (n) } 812e0c549eSJonathan Adams #define ZTI_THREAD_PCT(n) { zti_mode_online_percent, (n) } 822e0c549eSJonathan Adams #define ZTI_THREAD_TUNE { zti_mode_tune, 0 } 832e0c549eSJonathan Adams 842e0c549eSJonathan Adams #define ZTI_THREAD_ONE ZTI_THREAD_FIX(1) 852e0c549eSJonathan Adams 862e0c549eSJonathan Adams typedef struct zio_taskq_info { 872e0c549eSJonathan Adams const char *zti_name; 882e0c549eSJonathan Adams struct { 892e0c549eSJonathan Adams enum zti_modes zti_mode; 902e0c549eSJonathan Adams uint_t zti_value; 912e0c549eSJonathan Adams } zti_nthreads[ZIO_TASKQ_TYPES]; 922e0c549eSJonathan Adams } zio_taskq_info_t; 932e0c549eSJonathan Adams 942e0c549eSJonathan Adams static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { 952e0c549eSJonathan Adams "issue", "intr" 962e0c549eSJonathan Adams }; 972e0c549eSJonathan Adams 982e0c549eSJonathan Adams const zio_taskq_info_t zio_taskqs[ZIO_TYPES] = { 992e0c549eSJonathan Adams /* ISSUE INTR */ 1002e0c549eSJonathan Adams { "spa_zio_null", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1012e0c549eSJonathan Adams { "spa_zio_read", { ZTI_THREAD_FIX(8), ZTI_THREAD_TUNE } }, 1022e0c549eSJonathan Adams { "spa_zio_write", { ZTI_THREAD_TUNE, ZTI_THREAD_FIX(8) } }, 1032e0c549eSJonathan Adams { "spa_zio_free", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1042e0c549eSJonathan Adams { "spa_zio_claim", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1052e0c549eSJonathan Adams { "spa_zio_ioctl", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1062e0c549eSJonathan Adams }; 1072e0c549eSJonathan Adams 1082e0c549eSJonathan Adams enum zti_modes zio_taskq_tune_mode = zti_mode_online_percent; 1092e0c549eSJonathan Adams uint_t zio_taskq_tune_value = 80; /* #threads = 80% of # online CPUs */ 1102e0c549eSJonathan Adams 111990b4856Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 11289a89ebfSlling static boolean_t spa_has_active_shared_spare(spa_t *spa); 113990b4856Slling 114990b4856Slling /* 115990b4856Slling * ========================================================================== 116990b4856Slling * SPA properties routines 117990b4856Slling * ========================================================================== 118990b4856Slling */ 119990b4856Slling 120990b4856Slling /* 121990b4856Slling * Add a (source=src, propname=propval) list to an nvlist. 122990b4856Slling */ 1239d82f4f6Slling static void 124990b4856Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 125990b4856Slling uint64_t intval, zprop_source_t src) 126990b4856Slling { 127990b4856Slling const char *propname = zpool_prop_to_name(prop); 128990b4856Slling nvlist_t *propval; 129990b4856Slling 1309d82f4f6Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1319d82f4f6Slling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 132990b4856Slling 1339d82f4f6Slling if (strval != NULL) 1349d82f4f6Slling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 1359d82f4f6Slling else 1369d82f4f6Slling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 137990b4856Slling 1389d82f4f6Slling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 139990b4856Slling nvlist_free(propval); 140990b4856Slling } 141990b4856Slling 142990b4856Slling /* 143990b4856Slling * Get property values from the spa configuration. 144990b4856Slling */ 1459d82f4f6Slling static void 146990b4856Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 147990b4856Slling { 148379c004dSEric Schrock uint64_t size; 149379c004dSEric Schrock uint64_t used; 150990b4856Slling uint64_t cap, version; 151990b4856Slling zprop_source_t src = ZPROP_SRC_NONE; 152c5904d13Seschrock spa_config_dirent_t *dp; 153990b4856Slling 154e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 155e14bb325SJeff Bonwick 156379c004dSEric Schrock if (spa->spa_root_vdev != NULL) { 157379c004dSEric Schrock size = spa_get_space(spa); 158379c004dSEric Schrock used = spa_get_alloc(spa); 159379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 160379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 161379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src); 162379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, 163379c004dSEric Schrock size - used, src); 164379c004dSEric Schrock 165379c004dSEric Schrock cap = (size == 0) ? 0 : (used * 100 / size); 166379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 167379c004dSEric Schrock 168379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 169379c004dSEric Schrock spa->spa_root_vdev->vdev_state, src); 170379c004dSEric Schrock 171379c004dSEric Schrock version = spa_version(spa); 172379c004dSEric Schrock if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 173379c004dSEric Schrock src = ZPROP_SRC_DEFAULT; 174379c004dSEric Schrock else 175379c004dSEric Schrock src = ZPROP_SRC_LOCAL; 176379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 177379c004dSEric Schrock } 178990b4856Slling 1799d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 180990b4856Slling 1819d82f4f6Slling if (spa->spa_root != NULL) 1829d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 1839d82f4f6Slling 0, ZPROP_SRC_LOCAL); 184990b4856Slling 185c5904d13Seschrock if ((dp = list_head(&spa->spa_config_list)) != NULL) { 186c5904d13Seschrock if (dp->scd_path == NULL) { 1879d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 188c5904d13Seschrock "none", 0, ZPROP_SRC_LOCAL); 189c5904d13Seschrock } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 1909d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 191c5904d13Seschrock dp->scd_path, 0, ZPROP_SRC_LOCAL); 1922f8aaab3Seschrock } 1932f8aaab3Seschrock } 194990b4856Slling } 195990b4856Slling 196990b4856Slling /* 197990b4856Slling * Get zpool property values. 198990b4856Slling */ 199990b4856Slling int 200990b4856Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 201990b4856Slling { 202990b4856Slling zap_cursor_t zc; 203990b4856Slling zap_attribute_t za; 204990b4856Slling objset_t *mos = spa->spa_meta_objset; 205990b4856Slling int err; 206990b4856Slling 2079d82f4f6Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 208990b4856Slling 209e14bb325SJeff Bonwick mutex_enter(&spa->spa_props_lock); 210e14bb325SJeff Bonwick 211990b4856Slling /* 212990b4856Slling * Get properties from the spa config. 213990b4856Slling */ 2149d82f4f6Slling spa_prop_get_config(spa, nvp); 215990b4856Slling 216990b4856Slling /* If no pool property object, no more prop to get. */ 217990b4856Slling if (spa->spa_pool_props_object == 0) { 218990b4856Slling mutex_exit(&spa->spa_props_lock); 219990b4856Slling return (0); 220990b4856Slling } 221990b4856Slling 222990b4856Slling /* 223990b4856Slling * Get properties from the MOS pool property object. 224990b4856Slling */ 225990b4856Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 226990b4856Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 227990b4856Slling zap_cursor_advance(&zc)) { 228990b4856Slling uint64_t intval = 0; 229990b4856Slling char *strval = NULL; 230990b4856Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 231990b4856Slling zpool_prop_t prop; 232990b4856Slling 233990b4856Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 234990b4856Slling continue; 235990b4856Slling 236990b4856Slling switch (za.za_integer_length) { 237990b4856Slling case 8: 238990b4856Slling /* integer property */ 239990b4856Slling if (za.za_first_integer != 240990b4856Slling zpool_prop_default_numeric(prop)) 241990b4856Slling src = ZPROP_SRC_LOCAL; 242990b4856Slling 243990b4856Slling if (prop == ZPOOL_PROP_BOOTFS) { 244990b4856Slling dsl_pool_t *dp; 245990b4856Slling dsl_dataset_t *ds = NULL; 246990b4856Slling 247990b4856Slling dp = spa_get_dsl(spa); 248990b4856Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 249745cd3c5Smaybee if (err = dsl_dataset_hold_obj(dp, 250745cd3c5Smaybee za.za_first_integer, FTAG, &ds)) { 251990b4856Slling rw_exit(&dp->dp_config_rwlock); 252990b4856Slling break; 253990b4856Slling } 254990b4856Slling 255990b4856Slling strval = kmem_alloc( 256990b4856Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 257990b4856Slling KM_SLEEP); 258990b4856Slling dsl_dataset_name(ds, strval); 259745cd3c5Smaybee dsl_dataset_rele(ds, FTAG); 260990b4856Slling rw_exit(&dp->dp_config_rwlock); 261990b4856Slling } else { 262990b4856Slling strval = NULL; 263990b4856Slling intval = za.za_first_integer; 264990b4856Slling } 265990b4856Slling 2669d82f4f6Slling spa_prop_add_list(*nvp, prop, strval, intval, src); 267990b4856Slling 268990b4856Slling if (strval != NULL) 269990b4856Slling kmem_free(strval, 270990b4856Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 271990b4856Slling 272990b4856Slling break; 273990b4856Slling 274990b4856Slling case 1: 275990b4856Slling /* string property */ 276990b4856Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 277990b4856Slling err = zap_lookup(mos, spa->spa_pool_props_object, 278990b4856Slling za.za_name, 1, za.za_num_integers, strval); 279990b4856Slling if (err) { 280990b4856Slling kmem_free(strval, za.za_num_integers); 281990b4856Slling break; 282990b4856Slling } 2839d82f4f6Slling spa_prop_add_list(*nvp, prop, strval, 0, src); 284990b4856Slling kmem_free(strval, za.za_num_integers); 285990b4856Slling break; 286990b4856Slling 287990b4856Slling default: 288990b4856Slling break; 289990b4856Slling } 290990b4856Slling } 291990b4856Slling zap_cursor_fini(&zc); 292990b4856Slling mutex_exit(&spa->spa_props_lock); 293990b4856Slling out: 294990b4856Slling if (err && err != ENOENT) { 295990b4856Slling nvlist_free(*nvp); 2969d82f4f6Slling *nvp = NULL; 297990b4856Slling return (err); 298990b4856Slling } 299990b4856Slling 300990b4856Slling return (0); 301990b4856Slling } 302990b4856Slling 303990b4856Slling /* 304990b4856Slling * Validate the given pool properties nvlist and modify the list 305990b4856Slling * for the property values to be set. 306990b4856Slling */ 307990b4856Slling static int 308990b4856Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 309990b4856Slling { 310990b4856Slling nvpair_t *elem; 311990b4856Slling int error = 0, reset_bootfs = 0; 312990b4856Slling uint64_t objnum; 313990b4856Slling 314990b4856Slling elem = NULL; 315990b4856Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 316990b4856Slling zpool_prop_t prop; 317990b4856Slling char *propname, *strval; 318990b4856Slling uint64_t intval; 319990b4856Slling objset_t *os; 3202f8aaab3Seschrock char *slash; 321990b4856Slling 322990b4856Slling propname = nvpair_name(elem); 323990b4856Slling 324990b4856Slling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 325990b4856Slling return (EINVAL); 326990b4856Slling 327990b4856Slling switch (prop) { 328990b4856Slling case ZPOOL_PROP_VERSION: 329990b4856Slling error = nvpair_value_uint64(elem, &intval); 330990b4856Slling if (!error && 331990b4856Slling (intval < spa_version(spa) || intval > SPA_VERSION)) 332990b4856Slling error = EINVAL; 333990b4856Slling break; 334990b4856Slling 335990b4856Slling case ZPOOL_PROP_DELEGATION: 336990b4856Slling case ZPOOL_PROP_AUTOREPLACE: 337d5b5bb25SRich Morris case ZPOOL_PROP_LISTSNAPS: 338573ca77eSGeorge Wilson case ZPOOL_PROP_AUTOEXPAND: 339990b4856Slling error = nvpair_value_uint64(elem, &intval); 340990b4856Slling if (!error && intval > 1) 341990b4856Slling error = EINVAL; 342990b4856Slling break; 343990b4856Slling 344990b4856Slling case ZPOOL_PROP_BOOTFS: 34525f89ee2SJeff Bonwick /* 34625f89ee2SJeff Bonwick * If the pool version is less than SPA_VERSION_BOOTFS, 34725f89ee2SJeff Bonwick * or the pool is still being created (version == 0), 34825f89ee2SJeff Bonwick * the bootfs property cannot be set. 34925f89ee2SJeff Bonwick */ 350990b4856Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 351990b4856Slling error = ENOTSUP; 352990b4856Slling break; 353990b4856Slling } 354990b4856Slling 355990b4856Slling /* 35615e6edf1Sgw * Make sure the vdev config is bootable 357990b4856Slling */ 35815e6edf1Sgw if (!vdev_is_bootable(spa->spa_root_vdev)) { 359990b4856Slling error = ENOTSUP; 360990b4856Slling break; 361990b4856Slling } 362990b4856Slling 363990b4856Slling reset_bootfs = 1; 364990b4856Slling 365990b4856Slling error = nvpair_value_string(elem, &strval); 366990b4856Slling 367990b4856Slling if (!error) { 36815e6edf1Sgw uint64_t compress; 36915e6edf1Sgw 370990b4856Slling if (strval == NULL || strval[0] == '\0') { 371990b4856Slling objnum = zpool_prop_default_numeric( 372990b4856Slling ZPOOL_PROP_BOOTFS); 373990b4856Slling break; 374990b4856Slling } 375990b4856Slling 376503ad85cSMatthew Ahrens if (error = dmu_objset_hold(strval, FTAG, &os)) 377990b4856Slling break; 37815e6edf1Sgw 379503ad85cSMatthew Ahrens /* Must be ZPL and not gzip compressed. */ 380503ad85cSMatthew Ahrens 381503ad85cSMatthew Ahrens if (dmu_objset_type(os) != DMU_OST_ZFS) { 382503ad85cSMatthew Ahrens error = ENOTSUP; 383503ad85cSMatthew Ahrens } else if ((error = dsl_prop_get_integer(strval, 38415e6edf1Sgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), 38515e6edf1Sgw &compress, NULL)) == 0 && 38615e6edf1Sgw !BOOTFS_COMPRESS_VALID(compress)) { 38715e6edf1Sgw error = ENOTSUP; 38815e6edf1Sgw } else { 38915e6edf1Sgw objnum = dmu_objset_id(os); 39015e6edf1Sgw } 391503ad85cSMatthew Ahrens dmu_objset_rele(os, FTAG); 392990b4856Slling } 393990b4856Slling break; 394e14bb325SJeff Bonwick 3950a4e9518Sgw case ZPOOL_PROP_FAILUREMODE: 3960a4e9518Sgw error = nvpair_value_uint64(elem, &intval); 3970a4e9518Sgw if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 3980a4e9518Sgw intval > ZIO_FAILURE_MODE_PANIC)) 3990a4e9518Sgw error = EINVAL; 4000a4e9518Sgw 4010a4e9518Sgw /* 4020a4e9518Sgw * This is a special case which only occurs when 4030a4e9518Sgw * the pool has completely failed. This allows 4040a4e9518Sgw * the user to change the in-core failmode property 4050a4e9518Sgw * without syncing it out to disk (I/Os might 4060a4e9518Sgw * currently be blocked). We do this by returning 4070a4e9518Sgw * EIO to the caller (spa_prop_set) to trick it 4080a4e9518Sgw * into thinking we encountered a property validation 4090a4e9518Sgw * error. 4100a4e9518Sgw */ 411e14bb325SJeff Bonwick if (!error && spa_suspended(spa)) { 4120a4e9518Sgw spa->spa_failmode = intval; 4130a4e9518Sgw error = EIO; 4140a4e9518Sgw } 4150a4e9518Sgw break; 4162f8aaab3Seschrock 4172f8aaab3Seschrock case ZPOOL_PROP_CACHEFILE: 4182f8aaab3Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 4192f8aaab3Seschrock break; 4202f8aaab3Seschrock 4212f8aaab3Seschrock if (strval[0] == '\0') 4222f8aaab3Seschrock break; 4232f8aaab3Seschrock 4242f8aaab3Seschrock if (strcmp(strval, "none") == 0) 4252f8aaab3Seschrock break; 4262f8aaab3Seschrock 4272f8aaab3Seschrock if (strval[0] != '/') { 4282f8aaab3Seschrock error = EINVAL; 4292f8aaab3Seschrock break; 4302f8aaab3Seschrock } 4312f8aaab3Seschrock 4322f8aaab3Seschrock slash = strrchr(strval, '/'); 4332f8aaab3Seschrock ASSERT(slash != NULL); 4342f8aaab3Seschrock 4352f8aaab3Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 4362f8aaab3Seschrock strcmp(slash, "/..") == 0) 4372f8aaab3Seschrock error = EINVAL; 4382f8aaab3Seschrock break; 439990b4856Slling } 440990b4856Slling 441990b4856Slling if (error) 442990b4856Slling break; 443990b4856Slling } 444990b4856Slling 445990b4856Slling if (!error && reset_bootfs) { 446990b4856Slling error = nvlist_remove(props, 447990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 448990b4856Slling 449990b4856Slling if (!error) { 450990b4856Slling error = nvlist_add_uint64(props, 451990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 452990b4856Slling } 453990b4856Slling } 454990b4856Slling 455990b4856Slling return (error); 456990b4856Slling } 457990b4856Slling 458379c004dSEric Schrock void 459379c004dSEric Schrock spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) 460379c004dSEric Schrock { 461379c004dSEric Schrock char *cachefile; 462379c004dSEric Schrock spa_config_dirent_t *dp; 463379c004dSEric Schrock 464379c004dSEric Schrock if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), 465379c004dSEric Schrock &cachefile) != 0) 466379c004dSEric Schrock return; 467379c004dSEric Schrock 468379c004dSEric Schrock dp = kmem_alloc(sizeof (spa_config_dirent_t), 469379c004dSEric Schrock KM_SLEEP); 470379c004dSEric Schrock 471379c004dSEric Schrock if (cachefile[0] == '\0') 472379c004dSEric Schrock dp->scd_path = spa_strdup(spa_config_path); 473379c004dSEric Schrock else if (strcmp(cachefile, "none") == 0) 474379c004dSEric Schrock dp->scd_path = NULL; 475379c004dSEric Schrock else 476379c004dSEric Schrock dp->scd_path = spa_strdup(cachefile); 477379c004dSEric Schrock 478379c004dSEric Schrock list_insert_head(&spa->spa_config_list, dp); 479379c004dSEric Schrock if (need_sync) 480379c004dSEric Schrock spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 481379c004dSEric Schrock } 482379c004dSEric Schrock 483990b4856Slling int 484990b4856Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 485990b4856Slling { 486990b4856Slling int error; 487379c004dSEric Schrock nvpair_t *elem; 488379c004dSEric Schrock boolean_t need_sync = B_FALSE; 489379c004dSEric Schrock zpool_prop_t prop; 490990b4856Slling 491990b4856Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 492990b4856Slling return (error); 493990b4856Slling 494379c004dSEric Schrock elem = NULL; 495379c004dSEric Schrock while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { 496379c004dSEric Schrock if ((prop = zpool_name_to_prop( 497379c004dSEric Schrock nvpair_name(elem))) == ZPROP_INVAL) 498379c004dSEric Schrock return (EINVAL); 499379c004dSEric Schrock 500379c004dSEric Schrock if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT) 501379c004dSEric Schrock continue; 502379c004dSEric Schrock 503379c004dSEric Schrock need_sync = B_TRUE; 504379c004dSEric Schrock break; 505379c004dSEric Schrock } 506379c004dSEric Schrock 507379c004dSEric Schrock if (need_sync) 508379c004dSEric Schrock return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 509379c004dSEric Schrock spa, nvp, 3)); 510379c004dSEric Schrock else 511379c004dSEric Schrock return (0); 512990b4856Slling } 513990b4856Slling 514990b4856Slling /* 515990b4856Slling * If the bootfs property value is dsobj, clear it. 516990b4856Slling */ 517990b4856Slling void 518990b4856Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 519990b4856Slling { 520990b4856Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 521990b4856Slling VERIFY(zap_remove(spa->spa_meta_objset, 522990b4856Slling spa->spa_pool_props_object, 523990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 524990b4856Slling spa->spa_bootfs = 0; 525990b4856Slling } 526990b4856Slling } 527990b4856Slling 528fa9e4066Sahrens /* 529fa9e4066Sahrens * ========================================================================== 530fa9e4066Sahrens * SPA state manipulation (open/create/destroy/import/export) 531fa9e4066Sahrens * ========================================================================== 532fa9e4066Sahrens */ 533fa9e4066Sahrens 534ea8dc4b6Seschrock static int 535ea8dc4b6Seschrock spa_error_entry_compare(const void *a, const void *b) 536ea8dc4b6Seschrock { 537ea8dc4b6Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 538ea8dc4b6Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 539ea8dc4b6Seschrock int ret; 540ea8dc4b6Seschrock 541ea8dc4b6Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 542ea8dc4b6Seschrock sizeof (zbookmark_t)); 543ea8dc4b6Seschrock 544ea8dc4b6Seschrock if (ret < 0) 545ea8dc4b6Seschrock return (-1); 546ea8dc4b6Seschrock else if (ret > 0) 547ea8dc4b6Seschrock return (1); 548ea8dc4b6Seschrock else 549ea8dc4b6Seschrock return (0); 550ea8dc4b6Seschrock } 551ea8dc4b6Seschrock 552ea8dc4b6Seschrock /* 553ea8dc4b6Seschrock * Utility function which retrieves copies of the current logs and 554ea8dc4b6Seschrock * re-initializes them in the process. 555ea8dc4b6Seschrock */ 556ea8dc4b6Seschrock void 557ea8dc4b6Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 558ea8dc4b6Seschrock { 559ea8dc4b6Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 560ea8dc4b6Seschrock 561ea8dc4b6Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 562ea8dc4b6Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 563ea8dc4b6Seschrock 564ea8dc4b6Seschrock avl_create(&spa->spa_errlist_scrub, 565ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 566ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 567ea8dc4b6Seschrock avl_create(&spa->spa_errlist_last, 568ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 569ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 570ea8dc4b6Seschrock } 571ea8dc4b6Seschrock 572fa9e4066Sahrens /* 573fa9e4066Sahrens * Activate an uninitialized pool. 574fa9e4066Sahrens */ 575fa9e4066Sahrens static void 5768ad4d6ddSJeff Bonwick spa_activate(spa_t *spa, int mode) 577fa9e4066Sahrens { 578fa9e4066Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 579fa9e4066Sahrens 580fa9e4066Sahrens spa->spa_state = POOL_STATE_ACTIVE; 5818ad4d6ddSJeff Bonwick spa->spa_mode = mode; 582fa9e4066Sahrens 58388ecc943SGeorge Wilson spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); 58488ecc943SGeorge Wilson spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); 585fa9e4066Sahrens 586e14bb325SJeff Bonwick for (int t = 0; t < ZIO_TYPES; t++) { 5872e0c549eSJonathan Adams const zio_taskq_info_t *ztip = &zio_taskqs[t]; 588e14bb325SJeff Bonwick for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 5892e0c549eSJonathan Adams enum zti_modes mode = ztip->zti_nthreads[q].zti_mode; 5902e0c549eSJonathan Adams uint_t value = ztip->zti_nthreads[q].zti_value; 5912e0c549eSJonathan Adams char name[32]; 5922e0c549eSJonathan Adams 5932e0c549eSJonathan Adams (void) snprintf(name, sizeof (name), 5942e0c549eSJonathan Adams "%s_%s", ztip->zti_name, zio_taskq_types[q]); 5952e0c549eSJonathan Adams 5962e0c549eSJonathan Adams if (mode == zti_mode_tune) { 5972e0c549eSJonathan Adams mode = zio_taskq_tune_mode; 5982e0c549eSJonathan Adams value = zio_taskq_tune_value; 5992e0c549eSJonathan Adams if (mode == zti_mode_tune) 6002e0c549eSJonathan Adams mode = zti_mode_online_percent; 6012e0c549eSJonathan Adams } 6022e0c549eSJonathan Adams 6032e0c549eSJonathan Adams switch (mode) { 6042e0c549eSJonathan Adams case zti_mode_fixed: 6052e0c549eSJonathan Adams ASSERT3U(value, >=, 1); 6062e0c549eSJonathan Adams value = MAX(value, 1); 6072e0c549eSJonathan Adams 6082e0c549eSJonathan Adams spa->spa_zio_taskq[t][q] = taskq_create(name, 6092e0c549eSJonathan Adams value, maxclsyspri, 50, INT_MAX, 6102e0c549eSJonathan Adams TASKQ_PREPOPULATE); 6112e0c549eSJonathan Adams break; 6122e0c549eSJonathan Adams 6132e0c549eSJonathan Adams case zti_mode_online_percent: 6142e0c549eSJonathan Adams spa->spa_zio_taskq[t][q] = taskq_create(name, 6152e0c549eSJonathan Adams value, maxclsyspri, 50, INT_MAX, 6162e0c549eSJonathan Adams TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT); 6172e0c549eSJonathan Adams break; 6182e0c549eSJonathan Adams 6192e0c549eSJonathan Adams case zti_mode_tune: 6202e0c549eSJonathan Adams default: 6212e0c549eSJonathan Adams panic("unrecognized mode for " 6222e0c549eSJonathan Adams "zio_taskqs[%u]->zti_nthreads[%u] (%u:%u) " 6232e0c549eSJonathan Adams "in spa_activate()", 6242e0c549eSJonathan Adams t, q, mode, value); 6252e0c549eSJonathan Adams break; 6262e0c549eSJonathan Adams } 627e14bb325SJeff Bonwick } 628fa9e4066Sahrens } 629fa9e4066Sahrens 630e14bb325SJeff Bonwick list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 631e14bb325SJeff Bonwick offsetof(vdev_t, vdev_config_dirty_node)); 632e14bb325SJeff Bonwick list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 633e14bb325SJeff Bonwick offsetof(vdev_t, vdev_state_dirty_node)); 634fa9e4066Sahrens 635fa9e4066Sahrens txg_list_create(&spa->spa_vdev_txg_list, 636fa9e4066Sahrens offsetof(struct vdev, vdev_txg_node)); 637ea8dc4b6Seschrock 638ea8dc4b6Seschrock avl_create(&spa->spa_errlist_scrub, 639ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 640ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 641ea8dc4b6Seschrock avl_create(&spa->spa_errlist_last, 642ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 643ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 644fa9e4066Sahrens } 645fa9e4066Sahrens 646fa9e4066Sahrens /* 647fa9e4066Sahrens * Opposite of spa_activate(). 648fa9e4066Sahrens */ 649fa9e4066Sahrens static void 650fa9e4066Sahrens spa_deactivate(spa_t *spa) 651fa9e4066Sahrens { 652fa9e4066Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 653fa9e4066Sahrens ASSERT(spa->spa_dsl_pool == NULL); 654fa9e4066Sahrens ASSERT(spa->spa_root_vdev == NULL); 65525f89ee2SJeff Bonwick ASSERT(spa->spa_async_zio_root == NULL); 656fa9e4066Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 657fa9e4066Sahrens 658fa9e4066Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 659fa9e4066Sahrens 660e14bb325SJeff Bonwick list_destroy(&spa->spa_config_dirty_list); 661e14bb325SJeff Bonwick list_destroy(&spa->spa_state_dirty_list); 662fa9e4066Sahrens 663e14bb325SJeff Bonwick for (int t = 0; t < ZIO_TYPES; t++) { 664e14bb325SJeff Bonwick for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 665e14bb325SJeff Bonwick taskq_destroy(spa->spa_zio_taskq[t][q]); 666e14bb325SJeff Bonwick spa->spa_zio_taskq[t][q] = NULL; 667e14bb325SJeff Bonwick } 668fa9e4066Sahrens } 669fa9e4066Sahrens 670fa9e4066Sahrens metaslab_class_destroy(spa->spa_normal_class); 671fa9e4066Sahrens spa->spa_normal_class = NULL; 672fa9e4066Sahrens 6738654d025Sperrin metaslab_class_destroy(spa->spa_log_class); 6748654d025Sperrin spa->spa_log_class = NULL; 6758654d025Sperrin 676ea8dc4b6Seschrock /* 677ea8dc4b6Seschrock * If this was part of an import or the open otherwise failed, we may 678ea8dc4b6Seschrock * still have errors left in the queues. Empty them just in case. 679ea8dc4b6Seschrock */ 680ea8dc4b6Seschrock spa_errlog_drain(spa); 681ea8dc4b6Seschrock 682ea8dc4b6Seschrock avl_destroy(&spa->spa_errlist_scrub); 683ea8dc4b6Seschrock avl_destroy(&spa->spa_errlist_last); 684ea8dc4b6Seschrock 685fa9e4066Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 686fa9e4066Sahrens } 687fa9e4066Sahrens 688fa9e4066Sahrens /* 689fa9e4066Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 690fa9e4066Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 691fa9e4066Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 692fa9e4066Sahrens * All vdev validation is done by the vdev_alloc() routine. 693fa9e4066Sahrens */ 69499653d4eSeschrock static int 69599653d4eSeschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 69699653d4eSeschrock uint_t id, int atype) 697fa9e4066Sahrens { 698fa9e4066Sahrens nvlist_t **child; 699573ca77eSGeorge Wilson uint_t children; 70099653d4eSeschrock int error; 701fa9e4066Sahrens 70299653d4eSeschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 70399653d4eSeschrock return (error); 704fa9e4066Sahrens 70599653d4eSeschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 70699653d4eSeschrock return (0); 707fa9e4066Sahrens 708e14bb325SJeff Bonwick error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 709e14bb325SJeff Bonwick &child, &children); 710e14bb325SJeff Bonwick 711e14bb325SJeff Bonwick if (error == ENOENT) 712e14bb325SJeff Bonwick return (0); 713e14bb325SJeff Bonwick 714e14bb325SJeff Bonwick if (error) { 71599653d4eSeschrock vdev_free(*vdp); 71699653d4eSeschrock *vdp = NULL; 71799653d4eSeschrock return (EINVAL); 718fa9e4066Sahrens } 719fa9e4066Sahrens 720573ca77eSGeorge Wilson for (int c = 0; c < children; c++) { 72199653d4eSeschrock vdev_t *vd; 72299653d4eSeschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 72399653d4eSeschrock atype)) != 0) { 72499653d4eSeschrock vdev_free(*vdp); 72599653d4eSeschrock *vdp = NULL; 72699653d4eSeschrock return (error); 727fa9e4066Sahrens } 728fa9e4066Sahrens } 729fa9e4066Sahrens 73099653d4eSeschrock ASSERT(*vdp != NULL); 73199653d4eSeschrock 73299653d4eSeschrock return (0); 733fa9e4066Sahrens } 734fa9e4066Sahrens 735fa9e4066Sahrens /* 736fa9e4066Sahrens * Opposite of spa_load(). 737fa9e4066Sahrens */ 738fa9e4066Sahrens static void 739fa9e4066Sahrens spa_unload(spa_t *spa) 740fa9e4066Sahrens { 74199653d4eSeschrock int i; 74299653d4eSeschrock 743e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa_namespace_lock)); 744e14bb325SJeff Bonwick 745ea8dc4b6Seschrock /* 746ea8dc4b6Seschrock * Stop async tasks. 747ea8dc4b6Seschrock */ 748ea8dc4b6Seschrock spa_async_suspend(spa); 749ea8dc4b6Seschrock 750fa9e4066Sahrens /* 751fa9e4066Sahrens * Stop syncing. 752fa9e4066Sahrens */ 753fa9e4066Sahrens if (spa->spa_sync_on) { 754fa9e4066Sahrens txg_sync_stop(spa->spa_dsl_pool); 755fa9e4066Sahrens spa->spa_sync_on = B_FALSE; 756fa9e4066Sahrens } 757fa9e4066Sahrens 758fa9e4066Sahrens /* 759e14bb325SJeff Bonwick * Wait for any outstanding async I/O to complete. 760fa9e4066Sahrens */ 76154d692b7SGeorge Wilson if (spa->spa_async_zio_root != NULL) { 76254d692b7SGeorge Wilson (void) zio_wait(spa->spa_async_zio_root); 76354d692b7SGeorge Wilson spa->spa_async_zio_root = NULL; 76454d692b7SGeorge Wilson } 765fa9e4066Sahrens 766fa9e4066Sahrens /* 767fa9e4066Sahrens * Close the dsl pool. 768fa9e4066Sahrens */ 769fa9e4066Sahrens if (spa->spa_dsl_pool) { 770fa9e4066Sahrens dsl_pool_close(spa->spa_dsl_pool); 771fa9e4066Sahrens spa->spa_dsl_pool = NULL; 772fa9e4066Sahrens } 773fa9e4066Sahrens 7748ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 7758ad4d6ddSJeff Bonwick 7768ad4d6ddSJeff Bonwick /* 7778ad4d6ddSJeff Bonwick * Drop and purge level 2 cache 7788ad4d6ddSJeff Bonwick */ 7798ad4d6ddSJeff Bonwick spa_l2cache_drop(spa); 7808ad4d6ddSJeff Bonwick 781fa9e4066Sahrens /* 782fa9e4066Sahrens * Close all vdevs. 783fa9e4066Sahrens */ 7840e34b6a7Sbonwick if (spa->spa_root_vdev) 785fa9e4066Sahrens vdev_free(spa->spa_root_vdev); 7860e34b6a7Sbonwick ASSERT(spa->spa_root_vdev == NULL); 787ea8dc4b6Seschrock 788fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 789fa94a07fSbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 790fa94a07fSbrendan if (spa->spa_spares.sav_vdevs) { 791fa94a07fSbrendan kmem_free(spa->spa_spares.sav_vdevs, 792fa94a07fSbrendan spa->spa_spares.sav_count * sizeof (void *)); 793fa94a07fSbrendan spa->spa_spares.sav_vdevs = NULL; 79499653d4eSeschrock } 795fa94a07fSbrendan if (spa->spa_spares.sav_config) { 796fa94a07fSbrendan nvlist_free(spa->spa_spares.sav_config); 797fa94a07fSbrendan spa->spa_spares.sav_config = NULL; 798fa94a07fSbrendan } 7992ce8af81SEric Schrock spa->spa_spares.sav_count = 0; 800fa94a07fSbrendan 801fa94a07fSbrendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 802fa94a07fSbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 803fa94a07fSbrendan if (spa->spa_l2cache.sav_vdevs) { 804fa94a07fSbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 805fa94a07fSbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 806fa94a07fSbrendan spa->spa_l2cache.sav_vdevs = NULL; 807fa94a07fSbrendan } 808fa94a07fSbrendan if (spa->spa_l2cache.sav_config) { 809fa94a07fSbrendan nvlist_free(spa->spa_l2cache.sav_config); 810fa94a07fSbrendan spa->spa_l2cache.sav_config = NULL; 81199653d4eSeschrock } 8122ce8af81SEric Schrock spa->spa_l2cache.sav_count = 0; 81399653d4eSeschrock 814ea8dc4b6Seschrock spa->spa_async_suspended = 0; 8158ad4d6ddSJeff Bonwick 8168ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 817fa9e4066Sahrens } 818fa9e4066Sahrens 81999653d4eSeschrock /* 82099653d4eSeschrock * Load (or re-load) the current list of vdevs describing the active spares for 82199653d4eSeschrock * this pool. When this is called, we have some form of basic information in 822fa94a07fSbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 823fa94a07fSbrendan * then re-generate a more complete list including status information. 82499653d4eSeschrock */ 82599653d4eSeschrock static void 82699653d4eSeschrock spa_load_spares(spa_t *spa) 82799653d4eSeschrock { 82899653d4eSeschrock nvlist_t **spares; 82999653d4eSeschrock uint_t nspares; 83099653d4eSeschrock int i; 83139c23413Seschrock vdev_t *vd, *tvd; 83299653d4eSeschrock 833e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 834e14bb325SJeff Bonwick 83599653d4eSeschrock /* 83699653d4eSeschrock * First, close and free any existing spare vdevs. 83799653d4eSeschrock */ 838fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 839fa94a07fSbrendan vd = spa->spa_spares.sav_vdevs[i]; 84039c23413Seschrock 84139c23413Seschrock /* Undo the call to spa_activate() below */ 842c5904d13Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 843c5904d13Seschrock B_FALSE)) != NULL && tvd->vdev_isspare) 84439c23413Seschrock spa_spare_remove(tvd); 84539c23413Seschrock vdev_close(vd); 84639c23413Seschrock vdev_free(vd); 84799653d4eSeschrock } 84839c23413Seschrock 849fa94a07fSbrendan if (spa->spa_spares.sav_vdevs) 850fa94a07fSbrendan kmem_free(spa->spa_spares.sav_vdevs, 851fa94a07fSbrendan spa->spa_spares.sav_count * sizeof (void *)); 85299653d4eSeschrock 853fa94a07fSbrendan if (spa->spa_spares.sav_config == NULL) 85499653d4eSeschrock nspares = 0; 85599653d4eSeschrock else 856fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 85799653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 85899653d4eSeschrock 859fa94a07fSbrendan spa->spa_spares.sav_count = (int)nspares; 860fa94a07fSbrendan spa->spa_spares.sav_vdevs = NULL; 86199653d4eSeschrock 86299653d4eSeschrock if (nspares == 0) 86399653d4eSeschrock return; 86499653d4eSeschrock 86599653d4eSeschrock /* 86699653d4eSeschrock * Construct the array of vdevs, opening them to get status in the 86739c23413Seschrock * process. For each spare, there is potentially two different vdev_t 86839c23413Seschrock * structures associated with it: one in the list of spares (used only 86939c23413Seschrock * for basic validation purposes) and one in the active vdev 87039c23413Seschrock * configuration (if it's spared in). During this phase we open and 87139c23413Seschrock * validate each vdev on the spare list. If the vdev also exists in the 87239c23413Seschrock * active configuration, then we also mark this vdev as an active spare. 87399653d4eSeschrock */ 874fa94a07fSbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 875fa94a07fSbrendan KM_SLEEP); 876fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 87799653d4eSeschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 87899653d4eSeschrock VDEV_ALLOC_SPARE) == 0); 87999653d4eSeschrock ASSERT(vd != NULL); 88099653d4eSeschrock 881fa94a07fSbrendan spa->spa_spares.sav_vdevs[i] = vd; 88299653d4eSeschrock 883c5904d13Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 884c5904d13Seschrock B_FALSE)) != NULL) { 88539c23413Seschrock if (!tvd->vdev_isspare) 88639c23413Seschrock spa_spare_add(tvd); 88739c23413Seschrock 88839c23413Seschrock /* 88939c23413Seschrock * We only mark the spare active if we were successfully 89039c23413Seschrock * able to load the vdev. Otherwise, importing a pool 89139c23413Seschrock * with a bad active spare would result in strange 89239c23413Seschrock * behavior, because multiple pool would think the spare 89339c23413Seschrock * is actively in use. 89439c23413Seschrock * 89539c23413Seschrock * There is a vulnerability here to an equally bizarre 89639c23413Seschrock * circumstance, where a dead active spare is later 89739c23413Seschrock * brought back to life (onlined or otherwise). Given 89839c23413Seschrock * the rarity of this scenario, and the extra complexity 89939c23413Seschrock * it adds, we ignore the possibility. 90039c23413Seschrock */ 90139c23413Seschrock if (!vdev_is_dead(tvd)) 90239c23413Seschrock spa_spare_activate(tvd); 90339c23413Seschrock } 90439c23413Seschrock 905e14bb325SJeff Bonwick vd->vdev_top = vd; 9066809eb4eSEric Schrock vd->vdev_aux = &spa->spa_spares; 907e14bb325SJeff Bonwick 90899653d4eSeschrock if (vdev_open(vd) != 0) 90999653d4eSeschrock continue; 91099653d4eSeschrock 911fa94a07fSbrendan if (vdev_validate_aux(vd) == 0) 912fa94a07fSbrendan spa_spare_add(vd); 91399653d4eSeschrock } 91499653d4eSeschrock 91599653d4eSeschrock /* 91699653d4eSeschrock * Recompute the stashed list of spares, with status information 91799653d4eSeschrock * this time. 91899653d4eSeschrock */ 919fa94a07fSbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 92099653d4eSeschrock DATA_TYPE_NVLIST_ARRAY) == 0); 92199653d4eSeschrock 922fa94a07fSbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 923fa94a07fSbrendan KM_SLEEP); 924fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 925fa94a07fSbrendan spares[i] = vdev_config_generate(spa, 926fa94a07fSbrendan spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 927fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 928fa94a07fSbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 929fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 93099653d4eSeschrock nvlist_free(spares[i]); 931fa94a07fSbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 932fa94a07fSbrendan } 933fa94a07fSbrendan 934fa94a07fSbrendan /* 935fa94a07fSbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 936fa94a07fSbrendan * this pool. When this is called, we have some form of basic information in 937fa94a07fSbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 938fa94a07fSbrendan * then re-generate a more complete list including status information. 939fa94a07fSbrendan * Devices which are already active have their details maintained, and are 940fa94a07fSbrendan * not re-opened. 941fa94a07fSbrendan */ 942fa94a07fSbrendan static void 943fa94a07fSbrendan spa_load_l2cache(spa_t *spa) 944fa94a07fSbrendan { 945fa94a07fSbrendan nvlist_t **l2cache; 946fa94a07fSbrendan uint_t nl2cache; 947fa94a07fSbrendan int i, j, oldnvdevs; 948573ca77eSGeorge Wilson uint64_t guid; 949fa94a07fSbrendan vdev_t *vd, **oldvdevs, **newvdevs; 950fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 951fa94a07fSbrendan 952e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 953e14bb325SJeff Bonwick 954fa94a07fSbrendan if (sav->sav_config != NULL) { 955fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 956fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 957fa94a07fSbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 958fa94a07fSbrendan } else { 959fa94a07fSbrendan nl2cache = 0; 960fa94a07fSbrendan } 961fa94a07fSbrendan 962fa94a07fSbrendan oldvdevs = sav->sav_vdevs; 963fa94a07fSbrendan oldnvdevs = sav->sav_count; 964fa94a07fSbrendan sav->sav_vdevs = NULL; 965fa94a07fSbrendan sav->sav_count = 0; 966fa94a07fSbrendan 967fa94a07fSbrendan /* 968fa94a07fSbrendan * Process new nvlist of vdevs. 969fa94a07fSbrendan */ 970fa94a07fSbrendan for (i = 0; i < nl2cache; i++) { 971fa94a07fSbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 972fa94a07fSbrendan &guid) == 0); 973fa94a07fSbrendan 974fa94a07fSbrendan newvdevs[i] = NULL; 975fa94a07fSbrendan for (j = 0; j < oldnvdevs; j++) { 976fa94a07fSbrendan vd = oldvdevs[j]; 977fa94a07fSbrendan if (vd != NULL && guid == vd->vdev_guid) { 978fa94a07fSbrendan /* 979fa94a07fSbrendan * Retain previous vdev for add/remove ops. 980fa94a07fSbrendan */ 981fa94a07fSbrendan newvdevs[i] = vd; 982fa94a07fSbrendan oldvdevs[j] = NULL; 983fa94a07fSbrendan break; 984fa94a07fSbrendan } 985fa94a07fSbrendan } 986fa94a07fSbrendan 987fa94a07fSbrendan if (newvdevs[i] == NULL) { 988fa94a07fSbrendan /* 989fa94a07fSbrendan * Create new vdev 990fa94a07fSbrendan */ 991fa94a07fSbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 992fa94a07fSbrendan VDEV_ALLOC_L2CACHE) == 0); 993fa94a07fSbrendan ASSERT(vd != NULL); 994fa94a07fSbrendan newvdevs[i] = vd; 995fa94a07fSbrendan 996fa94a07fSbrendan /* 997fa94a07fSbrendan * Commit this vdev as an l2cache device, 998fa94a07fSbrendan * even if it fails to open. 999fa94a07fSbrendan */ 1000fa94a07fSbrendan spa_l2cache_add(vd); 1001fa94a07fSbrendan 1002c5904d13Seschrock vd->vdev_top = vd; 1003c5904d13Seschrock vd->vdev_aux = sav; 1004c5904d13Seschrock 1005c5904d13Seschrock spa_l2cache_activate(vd); 1006c5904d13Seschrock 1007fa94a07fSbrendan if (vdev_open(vd) != 0) 1008fa94a07fSbrendan continue; 1009fa94a07fSbrendan 1010fa94a07fSbrendan (void) vdev_validate_aux(vd); 1011fa94a07fSbrendan 1012573ca77eSGeorge Wilson if (!vdev_is_dead(vd)) 1013573ca77eSGeorge Wilson l2arc_add_vdev(spa, vd); 1014fa94a07fSbrendan } 1015fa94a07fSbrendan } 1016fa94a07fSbrendan 1017fa94a07fSbrendan /* 1018fa94a07fSbrendan * Purge vdevs that were dropped 1019fa94a07fSbrendan */ 1020fa94a07fSbrendan for (i = 0; i < oldnvdevs; i++) { 1021fa94a07fSbrendan uint64_t pool; 1022fa94a07fSbrendan 1023fa94a07fSbrendan vd = oldvdevs[i]; 1024fa94a07fSbrendan if (vd != NULL) { 10258ad4d6ddSJeff Bonwick if (spa_l2cache_exists(vd->vdev_guid, &pool) && 10268ad4d6ddSJeff Bonwick pool != 0ULL && l2arc_vdev_present(vd)) 1027fa94a07fSbrendan l2arc_remove_vdev(vd); 1028fa94a07fSbrendan (void) vdev_close(vd); 1029fa94a07fSbrendan spa_l2cache_remove(vd); 1030fa94a07fSbrendan } 1031fa94a07fSbrendan } 1032fa94a07fSbrendan 1033fa94a07fSbrendan if (oldvdevs) 1034fa94a07fSbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 1035fa94a07fSbrendan 1036fa94a07fSbrendan if (sav->sav_config == NULL) 1037fa94a07fSbrendan goto out; 1038fa94a07fSbrendan 1039fa94a07fSbrendan sav->sav_vdevs = newvdevs; 1040fa94a07fSbrendan sav->sav_count = (int)nl2cache; 1041fa94a07fSbrendan 1042fa94a07fSbrendan /* 1043fa94a07fSbrendan * Recompute the stashed list of l2cache devices, with status 1044fa94a07fSbrendan * information this time. 1045fa94a07fSbrendan */ 1046fa94a07fSbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 1047fa94a07fSbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 1048fa94a07fSbrendan 1049fa94a07fSbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 1050fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 1051fa94a07fSbrendan l2cache[i] = vdev_config_generate(spa, 1052fa94a07fSbrendan sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 1053fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 1054fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 1055fa94a07fSbrendan out: 1056fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 1057fa94a07fSbrendan nvlist_free(l2cache[i]); 1058fa94a07fSbrendan if (sav->sav_count) 1059fa94a07fSbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 106099653d4eSeschrock } 106199653d4eSeschrock 106299653d4eSeschrock static int 106399653d4eSeschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 106499653d4eSeschrock { 106599653d4eSeschrock dmu_buf_t *db; 106699653d4eSeschrock char *packed = NULL; 106799653d4eSeschrock size_t nvsize = 0; 106899653d4eSeschrock int error; 106999653d4eSeschrock *value = NULL; 107099653d4eSeschrock 107199653d4eSeschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 107299653d4eSeschrock nvsize = *(uint64_t *)db->db_data; 107399653d4eSeschrock dmu_buf_rele(db, FTAG); 107499653d4eSeschrock 107599653d4eSeschrock packed = kmem_alloc(nvsize, KM_SLEEP); 10767bfdf011SNeil Perrin error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, 10777bfdf011SNeil Perrin DMU_READ_PREFETCH); 107899653d4eSeschrock if (error == 0) 107999653d4eSeschrock error = nvlist_unpack(packed, nvsize, value, 0); 108099653d4eSeschrock kmem_free(packed, nvsize); 108199653d4eSeschrock 108299653d4eSeschrock return (error); 108399653d4eSeschrock } 108499653d4eSeschrock 10853d7072f8Seschrock /* 10863d7072f8Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 10873d7072f8Seschrock * sysevent to notify the autoreplace code that the device has been removed. 10883d7072f8Seschrock */ 10893d7072f8Seschrock static void 10903d7072f8Seschrock spa_check_removed(vdev_t *vd) 10913d7072f8Seschrock { 1092573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) 10933d7072f8Seschrock spa_check_removed(vd->vdev_child[c]); 10943d7072f8Seschrock 10953d7072f8Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 10963d7072f8Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 10973d7072f8Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 10983d7072f8Seschrock } 10993d7072f8Seschrock } 11003d7072f8Seschrock 1101e6ca193dSGeorge Wilson /* 1102e6ca193dSGeorge Wilson * Load the slog device state from the config object since it's possible 1103e6ca193dSGeorge Wilson * that the label does not contain the most up-to-date information. 1104e6ca193dSGeorge Wilson */ 1105e6ca193dSGeorge Wilson void 110688ecc943SGeorge Wilson spa_load_log_state(spa_t *spa, nvlist_t *nv) 1107e6ca193dSGeorge Wilson { 110888ecc943SGeorge Wilson vdev_t *ovd, *rvd = spa->spa_root_vdev; 1109e6ca193dSGeorge Wilson 111088ecc943SGeorge Wilson /* 111188ecc943SGeorge Wilson * Load the original root vdev tree from the passed config. 111288ecc943SGeorge Wilson */ 111388ecc943SGeorge Wilson spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 111488ecc943SGeorge Wilson VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); 1115e6ca193dSGeorge Wilson 111688ecc943SGeorge Wilson for (int c = 0; c < rvd->vdev_children; c++) { 111788ecc943SGeorge Wilson vdev_t *cvd = rvd->vdev_child[c]; 111888ecc943SGeorge Wilson if (cvd->vdev_islog) 111988ecc943SGeorge Wilson vdev_load_log_state(cvd, ovd->vdev_child[c]); 1120e6ca193dSGeorge Wilson } 112188ecc943SGeorge Wilson vdev_free(ovd); 112288ecc943SGeorge Wilson spa_config_exit(spa, SCL_ALL, FTAG); 1123e6ca193dSGeorge Wilson } 1124e6ca193dSGeorge Wilson 1125b87f3af3Sperrin /* 1126b87f3af3Sperrin * Check for missing log devices 1127b87f3af3Sperrin */ 1128b87f3af3Sperrin int 1129b87f3af3Sperrin spa_check_logs(spa_t *spa) 1130b87f3af3Sperrin { 1131b87f3af3Sperrin switch (spa->spa_log_state) { 1132b87f3af3Sperrin case SPA_LOG_MISSING: 1133b87f3af3Sperrin /* need to recheck in case slog has been restored */ 1134b87f3af3Sperrin case SPA_LOG_UNKNOWN: 1135b87f3af3Sperrin if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 1136b87f3af3Sperrin DS_FIND_CHILDREN)) { 1137b87f3af3Sperrin spa->spa_log_state = SPA_LOG_MISSING; 1138b87f3af3Sperrin return (1); 1139b87f3af3Sperrin } 1140b87f3af3Sperrin break; 1141b87f3af3Sperrin } 1142b87f3af3Sperrin return (0); 1143b87f3af3Sperrin } 1144b87f3af3Sperrin 1145b693757aSEric Schrock static void 1146b693757aSEric Schrock spa_aux_check_removed(spa_aux_vdev_t *sav) 1147b693757aSEric Schrock { 1148b693757aSEric Schrock int i; 1149b693757aSEric Schrock 1150b693757aSEric Schrock for (i = 0; i < sav->sav_count; i++) 1151b693757aSEric Schrock spa_check_removed(sav->sav_vdevs[i]); 1152b693757aSEric Schrock } 1153b693757aSEric Schrock 1154fa9e4066Sahrens /* 1155fa9e4066Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 1156ea8dc4b6Seschrock * source of configuration information. 1157fa9e4066Sahrens */ 1158fa9e4066Sahrens static int 1159ea8dc4b6Seschrock spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) 1160fa9e4066Sahrens { 1161fa9e4066Sahrens int error = 0; 116288ecc943SGeorge Wilson nvlist_t *nvconfig, *nvroot = NULL; 1163fa9e4066Sahrens vdev_t *rvd; 1164fa9e4066Sahrens uberblock_t *ub = &spa->spa_uberblock; 11650373e76bSbonwick uint64_t config_cache_txg = spa->spa_config_txg; 1166fa9e4066Sahrens uint64_t pool_guid; 116799653d4eSeschrock uint64_t version; 11683d7072f8Seschrock uint64_t autoreplace = 0; 11698ad4d6ddSJeff Bonwick int orig_mode = spa->spa_mode; 1170b87f3af3Sperrin char *ereport = FM_EREPORT_ZFS_POOL; 1171fa9e4066Sahrens 11728ad4d6ddSJeff Bonwick /* 11738ad4d6ddSJeff Bonwick * If this is an untrusted config, access the pool in read-only mode. 11748ad4d6ddSJeff Bonwick * This prevents things like resilvering recently removed devices. 11758ad4d6ddSJeff Bonwick */ 11768ad4d6ddSJeff Bonwick if (!mosconfig) 11778ad4d6ddSJeff Bonwick spa->spa_mode = FREAD; 11788ad4d6ddSJeff Bonwick 1179e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1180e14bb325SJeff Bonwick 1181ea8dc4b6Seschrock spa->spa_load_state = state; 11820373e76bSbonwick 1183fa9e4066Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 1184a9926bf0Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 1185ea8dc4b6Seschrock error = EINVAL; 1186ea8dc4b6Seschrock goto out; 1187ea8dc4b6Seschrock } 1188fa9e4066Sahrens 118999653d4eSeschrock /* 119099653d4eSeschrock * Versioning wasn't explicitly added to the label until later, so if 119199653d4eSeschrock * it's not present treat it as the initial version. 119299653d4eSeschrock */ 119399653d4eSeschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 1194e7437265Sahrens version = SPA_VERSION_INITIAL; 119599653d4eSeschrock 1196a9926bf0Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 1197a9926bf0Sbonwick &spa->spa_config_txg); 1198a9926bf0Sbonwick 11990373e76bSbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 1200ea8dc4b6Seschrock spa_guid_exists(pool_guid, 0)) { 1201ea8dc4b6Seschrock error = EEXIST; 1202ea8dc4b6Seschrock goto out; 1203ea8dc4b6Seschrock } 1204fa9e4066Sahrens 1205b5989ec7Seschrock spa->spa_load_guid = pool_guid; 1206b5989ec7Seschrock 120754d692b7SGeorge Wilson /* 120854d692b7SGeorge Wilson * Create "The Godfather" zio to hold all async IOs 120954d692b7SGeorge Wilson */ 121025f89ee2SJeff Bonwick spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 121125f89ee2SJeff Bonwick ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 121254d692b7SGeorge Wilson 1213fa9e4066Sahrens /* 121499653d4eSeschrock * Parse the configuration into a vdev tree. We explicitly set the 121599653d4eSeschrock * value that will be returned by spa_version() since parsing the 121699653d4eSeschrock * configuration requires knowing the version number. 1217fa9e4066Sahrens */ 1218e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 121999653d4eSeschrock spa->spa_ubsync.ub_version = version; 122099653d4eSeschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 1221e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1222fa9e4066Sahrens 122399653d4eSeschrock if (error != 0) 1224ea8dc4b6Seschrock goto out; 1225fa9e4066Sahrens 12260e34b6a7Sbonwick ASSERT(spa->spa_root_vdev == rvd); 1227fa9e4066Sahrens ASSERT(spa_guid(spa) == pool_guid); 1228fa9e4066Sahrens 1229fa9e4066Sahrens /* 1230fa9e4066Sahrens * Try to open all vdevs, loading each label in the process. 1231fa9e4066Sahrens */ 1232e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12330bf246f5Smc error = vdev_open(rvd); 1234e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 12350bf246f5Smc if (error != 0) 1236ea8dc4b6Seschrock goto out; 1237fa9e4066Sahrens 1238560e6e96Seschrock /* 123977e3a39cSMark J Musante * We need to validate the vdev labels against the configuration that 124077e3a39cSMark J Musante * we have in hand, which is dependent on the setting of mosconfig. If 124177e3a39cSMark J Musante * mosconfig is true then we're validating the vdev labels based on 124277e3a39cSMark J Musante * that config. Otherwise, we're validating against the cached config 124377e3a39cSMark J Musante * (zpool.cache) that was read when we loaded the zfs module, and then 124477e3a39cSMark J Musante * later we will recursively call spa_load() and validate against 124577e3a39cSMark J Musante * the vdev config. 1246560e6e96Seschrock */ 124777e3a39cSMark J Musante spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 124877e3a39cSMark J Musante error = vdev_validate(rvd); 124977e3a39cSMark J Musante spa_config_exit(spa, SCL_ALL, FTAG); 125077e3a39cSMark J Musante if (error != 0) 125177e3a39cSMark J Musante goto out; 1252560e6e96Seschrock 1253560e6e96Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 1254560e6e96Seschrock error = ENXIO; 1255560e6e96Seschrock goto out; 1256560e6e96Seschrock } 1257560e6e96Seschrock 1258fa9e4066Sahrens /* 1259fa9e4066Sahrens * Find the best uberblock. 1260fa9e4066Sahrens */ 1261e14bb325SJeff Bonwick vdev_uberblock_load(NULL, rvd, ub); 1262fa9e4066Sahrens 1263fa9e4066Sahrens /* 1264fa9e4066Sahrens * If we weren't able to find a single valid uberblock, return failure. 1265fa9e4066Sahrens */ 1266fa9e4066Sahrens if (ub->ub_txg == 0) { 1267eaca9bbdSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1268eaca9bbdSeschrock VDEV_AUX_CORRUPT_DATA); 1269ea8dc4b6Seschrock error = ENXIO; 1270ea8dc4b6Seschrock goto out; 1271ea8dc4b6Seschrock } 1272ea8dc4b6Seschrock 1273ea8dc4b6Seschrock /* 1274ea8dc4b6Seschrock * If the pool is newer than the code, we can't open it. 1275ea8dc4b6Seschrock */ 1276e7437265Sahrens if (ub->ub_version > SPA_VERSION) { 1277eaca9bbdSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1278eaca9bbdSeschrock VDEV_AUX_VERSION_NEWER); 1279ea8dc4b6Seschrock error = ENOTSUP; 1280ea8dc4b6Seschrock goto out; 1281fa9e4066Sahrens } 1282fa9e4066Sahrens 1283fa9e4066Sahrens /* 1284fa9e4066Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 1285fa9e4066Sahrens * incomplete configuration. 1286fa9e4066Sahrens */ 1287ecc2d604Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 1288ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1289ea8dc4b6Seschrock VDEV_AUX_BAD_GUID_SUM); 1290ea8dc4b6Seschrock error = ENXIO; 1291ea8dc4b6Seschrock goto out; 1292fa9e4066Sahrens } 1293fa9e4066Sahrens 1294fa9e4066Sahrens /* 1295fa9e4066Sahrens * Initialize internal SPA structures. 1296fa9e4066Sahrens */ 1297fa9e4066Sahrens spa->spa_state = POOL_STATE_ACTIVE; 1298fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 1299fa9e4066Sahrens spa->spa_first_txg = spa_last_synced_txg(spa) + 1; 1300ea8dc4b6Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 1301ea8dc4b6Seschrock if (error) { 1302ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1303ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1304ea8dc4b6Seschrock goto out; 1305ea8dc4b6Seschrock } 1306fa9e4066Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1307fa9e4066Sahrens 1308ea8dc4b6Seschrock if (zap_lookup(spa->spa_meta_objset, 1309fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 1310ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 1311ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1312ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1313ea8dc4b6Seschrock error = EIO; 1314ea8dc4b6Seschrock goto out; 1315ea8dc4b6Seschrock } 1316fa9e4066Sahrens 131788ecc943SGeorge Wilson if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) { 131888ecc943SGeorge Wilson vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 131988ecc943SGeorge Wilson VDEV_AUX_CORRUPT_DATA); 132088ecc943SGeorge Wilson error = EIO; 132188ecc943SGeorge Wilson goto out; 132288ecc943SGeorge Wilson } 132388ecc943SGeorge Wilson 1324fa9e4066Sahrens if (!mosconfig) { 132595173954Sek uint64_t hostid; 1326fa9e4066Sahrens 132788ecc943SGeorge Wilson if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig, 132877650510SLin Ling ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 132995173954Sek char *hostname; 133095173954Sek unsigned long myhostid = 0; 133195173954Sek 133288ecc943SGeorge Wilson VERIFY(nvlist_lookup_string(nvconfig, 133395173954Sek ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 133495173954Sek 13355679c89fSjv #ifdef _KERNEL 13365679c89fSjv myhostid = zone_get_hostid(NULL); 13375679c89fSjv #else /* _KERNEL */ 13385679c89fSjv /* 13395679c89fSjv * We're emulating the system's hostid in userland, so 13405679c89fSjv * we can't use zone_get_hostid(). 13415679c89fSjv */ 134295173954Sek (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 13435679c89fSjv #endif /* _KERNEL */ 134417194a52Slling if (hostid != 0 && myhostid != 0 && 13455679c89fSjv hostid != myhostid) { 134695173954Sek cmn_err(CE_WARN, "pool '%s' could not be " 134795173954Sek "loaded as it was last accessed by " 134877650510SLin Ling "another system (host: %s hostid: 0x%lx). " 134995173954Sek "See: http://www.sun.com/msg/ZFS-8000-EY", 1350e14bb325SJeff Bonwick spa_name(spa), hostname, 135195173954Sek (unsigned long)hostid); 135295173954Sek error = EBADF; 135395173954Sek goto out; 135495173954Sek } 135595173954Sek } 135695173954Sek 135788ecc943SGeorge Wilson spa_config_set(spa, nvconfig); 1358fa9e4066Sahrens spa_unload(spa); 1359fa9e4066Sahrens spa_deactivate(spa); 13608ad4d6ddSJeff Bonwick spa_activate(spa, orig_mode); 1361fa9e4066Sahrens 136288ecc943SGeorge Wilson return (spa_load(spa, nvconfig, state, B_TRUE)); 1363fa9e4066Sahrens } 1364fa9e4066Sahrens 1365ea8dc4b6Seschrock if (zap_lookup(spa->spa_meta_objset, 1366fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 1367ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { 1368ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1369ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1370ea8dc4b6Seschrock error = EIO; 1371ea8dc4b6Seschrock goto out; 1372ea8dc4b6Seschrock } 1373fa9e4066Sahrens 137499653d4eSeschrock /* 137599653d4eSeschrock * Load the bit that tells us to use the new accounting function 137699653d4eSeschrock * (raid-z deflation). If we have an older pool, this will not 137799653d4eSeschrock * be present. 137899653d4eSeschrock */ 137999653d4eSeschrock error = zap_lookup(spa->spa_meta_objset, 138099653d4eSeschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 138199653d4eSeschrock sizeof (uint64_t), 1, &spa->spa_deflate); 138299653d4eSeschrock if (error != 0 && error != ENOENT) { 138399653d4eSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 138499653d4eSeschrock VDEV_AUX_CORRUPT_DATA); 138599653d4eSeschrock error = EIO; 138699653d4eSeschrock goto out; 138799653d4eSeschrock } 138899653d4eSeschrock 1389fa9e4066Sahrens /* 1390ea8dc4b6Seschrock * Load the persistent error log. If we have an older pool, this will 1391ea8dc4b6Seschrock * not be present. 1392fa9e4066Sahrens */ 1393ea8dc4b6Seschrock error = zap_lookup(spa->spa_meta_objset, 1394ea8dc4b6Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 1395ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 1396d80c45e0Sbonwick if (error != 0 && error != ENOENT) { 1397ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1398ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1399ea8dc4b6Seschrock error = EIO; 1400ea8dc4b6Seschrock goto out; 1401ea8dc4b6Seschrock } 1402ea8dc4b6Seschrock 1403ea8dc4b6Seschrock error = zap_lookup(spa->spa_meta_objset, 1404ea8dc4b6Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 1405ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 1406ea8dc4b6Seschrock if (error != 0 && error != ENOENT) { 1407ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1408ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1409ea8dc4b6Seschrock error = EIO; 1410ea8dc4b6Seschrock goto out; 1411ea8dc4b6Seschrock } 1412ea8dc4b6Seschrock 141306eeb2adSek /* 141406eeb2adSek * Load the history object. If we have an older pool, this 141506eeb2adSek * will not be present. 141606eeb2adSek */ 141706eeb2adSek error = zap_lookup(spa->spa_meta_objset, 141806eeb2adSek DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 141906eeb2adSek sizeof (uint64_t), 1, &spa->spa_history); 142006eeb2adSek if (error != 0 && error != ENOENT) { 142106eeb2adSek vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 142206eeb2adSek VDEV_AUX_CORRUPT_DATA); 142306eeb2adSek error = EIO; 142406eeb2adSek goto out; 142506eeb2adSek } 142606eeb2adSek 142799653d4eSeschrock /* 142899653d4eSeschrock * Load any hot spares for this pool. 142999653d4eSeschrock */ 143099653d4eSeschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1431fa94a07fSbrendan DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 143299653d4eSeschrock if (error != 0 && error != ENOENT) { 143399653d4eSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 143499653d4eSeschrock VDEV_AUX_CORRUPT_DATA); 143599653d4eSeschrock error = EIO; 143699653d4eSeschrock goto out; 143799653d4eSeschrock } 143899653d4eSeschrock if (error == 0) { 1439e7437265Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 1440fa94a07fSbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 1441fa94a07fSbrendan &spa->spa_spares.sav_config) != 0) { 144299653d4eSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 144399653d4eSeschrock VDEV_AUX_CORRUPT_DATA); 144499653d4eSeschrock error = EIO; 144599653d4eSeschrock goto out; 144699653d4eSeschrock } 144799653d4eSeschrock 1448e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 144999653d4eSeschrock spa_load_spares(spa); 1450e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 145199653d4eSeschrock } 145299653d4eSeschrock 1453fa94a07fSbrendan /* 1454fa94a07fSbrendan * Load any level 2 ARC devices for this pool. 1455fa94a07fSbrendan */ 1456fa94a07fSbrendan error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1457fa94a07fSbrendan DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 1458fa94a07fSbrendan &spa->spa_l2cache.sav_object); 1459fa94a07fSbrendan if (error != 0 && error != ENOENT) { 1460fa94a07fSbrendan vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1461fa94a07fSbrendan VDEV_AUX_CORRUPT_DATA); 1462fa94a07fSbrendan error = EIO; 1463fa94a07fSbrendan goto out; 1464fa94a07fSbrendan } 1465fa94a07fSbrendan if (error == 0) { 1466fa94a07fSbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 1467fa94a07fSbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 1468fa94a07fSbrendan &spa->spa_l2cache.sav_config) != 0) { 1469fa94a07fSbrendan vdev_set_state(rvd, B_TRUE, 1470fa94a07fSbrendan VDEV_STATE_CANT_OPEN, 1471fa94a07fSbrendan VDEV_AUX_CORRUPT_DATA); 1472fa94a07fSbrendan error = EIO; 1473fa94a07fSbrendan goto out; 1474fa94a07fSbrendan } 1475fa94a07fSbrendan 1476e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1477fa94a07fSbrendan spa_load_l2cache(spa); 1478e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1479fa94a07fSbrendan } 1480fa94a07fSbrendan 148188ecc943SGeorge Wilson VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE, 148288ecc943SGeorge Wilson &nvroot) == 0); 148388ecc943SGeorge Wilson spa_load_log_state(spa, nvroot); 148488ecc943SGeorge Wilson nvlist_free(nvconfig); 1485e6ca193dSGeorge Wilson 1486b87f3af3Sperrin if (spa_check_logs(spa)) { 1487b87f3af3Sperrin vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1488b87f3af3Sperrin VDEV_AUX_BAD_LOG); 1489b87f3af3Sperrin error = ENXIO; 1490b87f3af3Sperrin ereport = FM_EREPORT_ZFS_LOG_REPLAY; 1491b87f3af3Sperrin goto out; 1492b87f3af3Sperrin } 1493b87f3af3Sperrin 1494b87f3af3Sperrin 1495990b4856Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 1496ecd6cf80Smarks 1497b1b8ab34Slling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1498b1b8ab34Slling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 1499b1b8ab34Slling 1500b1b8ab34Slling if (error && error != ENOENT) { 1501b1b8ab34Slling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1502b1b8ab34Slling VDEV_AUX_CORRUPT_DATA); 1503b1b8ab34Slling error = EIO; 1504b1b8ab34Slling goto out; 1505b1b8ab34Slling } 1506b1b8ab34Slling 1507b1b8ab34Slling if (error == 0) { 1508b1b8ab34Slling (void) zap_lookup(spa->spa_meta_objset, 1509b1b8ab34Slling spa->spa_pool_props_object, 15103d7072f8Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 1511b1b8ab34Slling sizeof (uint64_t), 1, &spa->spa_bootfs); 15123d7072f8Seschrock (void) zap_lookup(spa->spa_meta_objset, 15133d7072f8Seschrock spa->spa_pool_props_object, 15143d7072f8Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 15153d7072f8Seschrock sizeof (uint64_t), 1, &autoreplace); 1516b693757aSEric Schrock spa->spa_autoreplace = (autoreplace != 0); 1517ecd6cf80Smarks (void) zap_lookup(spa->spa_meta_objset, 1518ecd6cf80Smarks spa->spa_pool_props_object, 1519ecd6cf80Smarks zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 1520ecd6cf80Smarks sizeof (uint64_t), 1, &spa->spa_delegation); 15210a4e9518Sgw (void) zap_lookup(spa->spa_meta_objset, 15220a4e9518Sgw spa->spa_pool_props_object, 15230a4e9518Sgw zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 15240a4e9518Sgw sizeof (uint64_t), 1, &spa->spa_failmode); 1525573ca77eSGeorge Wilson (void) zap_lookup(spa->spa_meta_objset, 1526573ca77eSGeorge Wilson spa->spa_pool_props_object, 1527573ca77eSGeorge Wilson zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND), 1528573ca77eSGeorge Wilson sizeof (uint64_t), 1, &spa->spa_autoexpand); 1529b1b8ab34Slling } 1530b1b8ab34Slling 15313d7072f8Seschrock /* 15323d7072f8Seschrock * If the 'autoreplace' property is set, then post a resource notifying 15333d7072f8Seschrock * the ZFS DE that it should not issue any faults for unopenable 15343d7072f8Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 15353d7072f8Seschrock * unopenable vdevs so that the normal autoreplace handler can take 15363d7072f8Seschrock * over. 15373d7072f8Seschrock */ 1538b693757aSEric Schrock if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) { 15393d7072f8Seschrock spa_check_removed(spa->spa_root_vdev); 1540b693757aSEric Schrock /* 1541b693757aSEric Schrock * For the import case, this is done in spa_import(), because 1542b693757aSEric Schrock * at this point we're using the spare definitions from 1543b693757aSEric Schrock * the MOS config, not necessarily from the userland config. 1544b693757aSEric Schrock */ 1545b693757aSEric Schrock if (state != SPA_LOAD_IMPORT) { 1546b693757aSEric Schrock spa_aux_check_removed(&spa->spa_spares); 1547b693757aSEric Schrock spa_aux_check_removed(&spa->spa_l2cache); 1548b693757aSEric Schrock } 1549b693757aSEric Schrock } 15503d7072f8Seschrock 1551ea8dc4b6Seschrock /* 1552560e6e96Seschrock * Load the vdev state for all toplevel vdevs. 1553ea8dc4b6Seschrock */ 1554560e6e96Seschrock vdev_load(rvd); 15550373e76bSbonwick 1556fa9e4066Sahrens /* 1557fa9e4066Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 1558fa9e4066Sahrens */ 1559e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1560fa9e4066Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 1561e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1562fa9e4066Sahrens 1563fa9e4066Sahrens /* 1564fa9e4066Sahrens * Check the state of the root vdev. If it can't be opened, it 1565fa9e4066Sahrens * indicates one or more toplevel vdevs are faulted. 1566fa9e4066Sahrens */ 1567ea8dc4b6Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 1568ea8dc4b6Seschrock error = ENXIO; 1569ea8dc4b6Seschrock goto out; 1570ea8dc4b6Seschrock } 1571fa9e4066Sahrens 15728ad4d6ddSJeff Bonwick if (spa_writeable(spa)) { 15735dabedeeSbonwick dmu_tx_t *tx; 15740373e76bSbonwick int need_update = B_FALSE; 15758ad4d6ddSJeff Bonwick 15768ad4d6ddSJeff Bonwick ASSERT(state != SPA_LOAD_TRYIMPORT); 15775dabedeeSbonwick 15780373e76bSbonwick /* 15790373e76bSbonwick * Claim log blocks that haven't been committed yet. 15800373e76bSbonwick * This must all happen in a single txg. 15810373e76bSbonwick */ 15825dabedeeSbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1583fa9e4066Sahrens spa_first_txg(spa)); 1584e14bb325SJeff Bonwick (void) dmu_objset_find(spa_name(spa), 15850b69c2f0Sahrens zil_claim, tx, DS_FIND_CHILDREN); 1586fa9e4066Sahrens dmu_tx_commit(tx); 1587fa9e4066Sahrens 1588e6ca193dSGeorge Wilson spa->spa_log_state = SPA_LOG_GOOD; 1589fa9e4066Sahrens spa->spa_sync_on = B_TRUE; 1590fa9e4066Sahrens txg_sync_start(spa->spa_dsl_pool); 1591fa9e4066Sahrens 1592fa9e4066Sahrens /* 1593fa9e4066Sahrens * Wait for all claims to sync. 1594fa9e4066Sahrens */ 1595fa9e4066Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 15960e34b6a7Sbonwick 15970e34b6a7Sbonwick /* 15980373e76bSbonwick * If the config cache is stale, or we have uninitialized 15990373e76bSbonwick * metaslabs (see spa_vdev_add()), then update the config. 1600bc758434SLin Ling * 1601bc758434SLin Ling * If spa_load_verbatim is true, trust the current 1602bc758434SLin Ling * in-core spa_config and update the disk labels. 16030e34b6a7Sbonwick */ 16040373e76bSbonwick if (config_cache_txg != spa->spa_config_txg || 1605bc758434SLin Ling state == SPA_LOAD_IMPORT || spa->spa_load_verbatim) 16060373e76bSbonwick need_update = B_TRUE; 16070373e76bSbonwick 16088ad4d6ddSJeff Bonwick for (int c = 0; c < rvd->vdev_children; c++) 16090373e76bSbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 16100373e76bSbonwick need_update = B_TRUE; 16110e34b6a7Sbonwick 16120e34b6a7Sbonwick /* 16130373e76bSbonwick * Update the config cache asychronously in case we're the 16140373e76bSbonwick * root pool, in which case the config cache isn't writable yet. 16150e34b6a7Sbonwick */ 16160373e76bSbonwick if (need_update) 16170373e76bSbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 16188ad4d6ddSJeff Bonwick 16198ad4d6ddSJeff Bonwick /* 16208ad4d6ddSJeff Bonwick * Check all DTLs to see if anything needs resilvering. 16218ad4d6ddSJeff Bonwick */ 16228ad4d6ddSJeff Bonwick if (vdev_resilver_needed(rvd, NULL, NULL)) 16238ad4d6ddSJeff Bonwick spa_async_request(spa, SPA_ASYNC_RESILVER); 1624503ad85cSMatthew Ahrens 1625503ad85cSMatthew Ahrens /* 1626503ad85cSMatthew Ahrens * Delete any inconsistent datasets. 1627503ad85cSMatthew Ahrens */ 1628503ad85cSMatthew Ahrens (void) dmu_objset_find(spa_name(spa), 1629503ad85cSMatthew Ahrens dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); 1630ca45db41SChris Kirby 1631ca45db41SChris Kirby /* 1632ca45db41SChris Kirby * Clean up any stale temporary dataset userrefs. 1633ca45db41SChris Kirby */ 1634ca45db41SChris Kirby dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); 1635fa9e4066Sahrens } 1636fa9e4066Sahrens 1637ea8dc4b6Seschrock error = 0; 1638ea8dc4b6Seschrock out: 1639088f3894Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 164099653d4eSeschrock if (error && error != EBADF) 1641b87f3af3Sperrin zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 1642ea8dc4b6Seschrock spa->spa_load_state = SPA_LOAD_NONE; 1643ea8dc4b6Seschrock spa->spa_ena = 0; 1644ea8dc4b6Seschrock 1645ea8dc4b6Seschrock return (error); 1646fa9e4066Sahrens } 1647fa9e4066Sahrens 1648fa9e4066Sahrens /* 1649fa9e4066Sahrens * Pool Open/Import 1650fa9e4066Sahrens * 1651fa9e4066Sahrens * The import case is identical to an open except that the configuration is sent 1652fa9e4066Sahrens * down from userland, instead of grabbed from the configuration cache. For the 1653fa9e4066Sahrens * case of an open, the pool configuration will exist in the 16543d7072f8Seschrock * POOL_STATE_UNINITIALIZED state. 1655fa9e4066Sahrens * 1656fa9e4066Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 1657fa9e4066Sahrens * the same time open the pool, without having to keep around the spa_t in some 1658fa9e4066Sahrens * ambiguous state. 1659fa9e4066Sahrens */ 1660fa9e4066Sahrens static int 1661fa9e4066Sahrens spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) 1662fa9e4066Sahrens { 1663fa9e4066Sahrens spa_t *spa; 1664fa9e4066Sahrens int error; 1665fa9e4066Sahrens int locked = B_FALSE; 1666fa9e4066Sahrens 1667fa9e4066Sahrens *spapp = NULL; 1668fa9e4066Sahrens 1669fa9e4066Sahrens /* 1670fa9e4066Sahrens * As disgusting as this is, we need to support recursive calls to this 1671fa9e4066Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 1672fa9e4066Sahrens * up calling spa_open() again. The real fix is to figure out how to 1673fa9e4066Sahrens * avoid dsl_dir_open() calling this in the first place. 1674fa9e4066Sahrens */ 1675fa9e4066Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 1676fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 1677fa9e4066Sahrens locked = B_TRUE; 1678fa9e4066Sahrens } 1679fa9e4066Sahrens 1680fa9e4066Sahrens if ((spa = spa_lookup(pool)) == NULL) { 1681fa9e4066Sahrens if (locked) 1682fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1683fa9e4066Sahrens return (ENOENT); 1684fa9e4066Sahrens } 1685fa9e4066Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 1686fa9e4066Sahrens 16878ad4d6ddSJeff Bonwick spa_activate(spa, spa_mode_global); 1688fa9e4066Sahrens 16890373e76bSbonwick error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); 1690fa9e4066Sahrens 1691fa9e4066Sahrens if (error == EBADF) { 1692fa9e4066Sahrens /* 1693560e6e96Seschrock * If vdev_validate() returns failure (indicated by 1694560e6e96Seschrock * EBADF), it indicates that one of the vdevs indicates 1695560e6e96Seschrock * that the pool has been exported or destroyed. If 1696560e6e96Seschrock * this is the case, the config cache is out of sync and 1697560e6e96Seschrock * we should remove the pool from the namespace. 1698fa9e4066Sahrens */ 1699fa9e4066Sahrens spa_unload(spa); 1700fa9e4066Sahrens spa_deactivate(spa); 1701c5904d13Seschrock spa_config_sync(spa, B_TRUE, B_TRUE); 1702fa9e4066Sahrens spa_remove(spa); 1703fa9e4066Sahrens if (locked) 1704fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1705fa9e4066Sahrens return (ENOENT); 1706ea8dc4b6Seschrock } 1707ea8dc4b6Seschrock 1708ea8dc4b6Seschrock if (error) { 1709fa9e4066Sahrens /* 1710fa9e4066Sahrens * We can't open the pool, but we still have useful 1711fa9e4066Sahrens * information: the state of each vdev after the 1712fa9e4066Sahrens * attempted vdev_open(). Return this to the user. 1713fa9e4066Sahrens */ 1714e14bb325SJeff Bonwick if (config != NULL && spa->spa_root_vdev != NULL) 1715fa9e4066Sahrens *config = spa_config_generate(spa, NULL, -1ULL, 1716fa9e4066Sahrens B_TRUE); 1717fa9e4066Sahrens spa_unload(spa); 1718fa9e4066Sahrens spa_deactivate(spa); 1719ea8dc4b6Seschrock spa->spa_last_open_failed = B_TRUE; 1720fa9e4066Sahrens if (locked) 1721fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1722fa9e4066Sahrens *spapp = NULL; 1723fa9e4066Sahrens return (error); 1724ea8dc4b6Seschrock } else { 1725ea8dc4b6Seschrock spa->spa_last_open_failed = B_FALSE; 1726fa9e4066Sahrens } 1727fa9e4066Sahrens } 1728fa9e4066Sahrens 1729fa9e4066Sahrens spa_open_ref(spa, tag); 17303d7072f8Seschrock 1731fa9e4066Sahrens if (locked) 1732fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1733fa9e4066Sahrens 1734fa9e4066Sahrens *spapp = spa; 1735fa9e4066Sahrens 1736e14bb325SJeff Bonwick if (config != NULL) 1737fa9e4066Sahrens *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 1738fa9e4066Sahrens 1739fa9e4066Sahrens return (0); 1740fa9e4066Sahrens } 1741fa9e4066Sahrens 1742fa9e4066Sahrens int 1743fa9e4066Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 1744fa9e4066Sahrens { 1745fa9e4066Sahrens return (spa_open_common(name, spapp, tag, NULL)); 1746fa9e4066Sahrens } 1747fa9e4066Sahrens 1748ea8dc4b6Seschrock /* 1749ea8dc4b6Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 1750ea8dc4b6Seschrock * preventing it from being exported or destroyed. 1751ea8dc4b6Seschrock */ 1752ea8dc4b6Seschrock spa_t * 1753ea8dc4b6Seschrock spa_inject_addref(char *name) 1754ea8dc4b6Seschrock { 1755ea8dc4b6Seschrock spa_t *spa; 1756ea8dc4b6Seschrock 1757ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 1758ea8dc4b6Seschrock if ((spa = spa_lookup(name)) == NULL) { 1759ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 1760ea8dc4b6Seschrock return (NULL); 1761ea8dc4b6Seschrock } 1762ea8dc4b6Seschrock spa->spa_inject_ref++; 1763ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 1764ea8dc4b6Seschrock 1765ea8dc4b6Seschrock return (spa); 1766ea8dc4b6Seschrock } 1767ea8dc4b6Seschrock 1768ea8dc4b6Seschrock void 1769ea8dc4b6Seschrock spa_inject_delref(spa_t *spa) 1770ea8dc4b6Seschrock { 1771ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 1772ea8dc4b6Seschrock spa->spa_inject_ref--; 1773ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 1774ea8dc4b6Seschrock } 1775ea8dc4b6Seschrock 1776fa94a07fSbrendan /* 1777fa94a07fSbrendan * Add spares device information to the nvlist. 1778fa94a07fSbrendan */ 177999653d4eSeschrock static void 178099653d4eSeschrock spa_add_spares(spa_t *spa, nvlist_t *config) 178199653d4eSeschrock { 178299653d4eSeschrock nvlist_t **spares; 178399653d4eSeschrock uint_t i, nspares; 178499653d4eSeschrock nvlist_t *nvroot; 178599653d4eSeschrock uint64_t guid; 178699653d4eSeschrock vdev_stat_t *vs; 178799653d4eSeschrock uint_t vsc; 178839c23413Seschrock uint64_t pool; 178999653d4eSeschrock 17906809eb4eSEric Schrock ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 17916809eb4eSEric Schrock 1792fa94a07fSbrendan if (spa->spa_spares.sav_count == 0) 179399653d4eSeschrock return; 179499653d4eSeschrock 179599653d4eSeschrock VERIFY(nvlist_lookup_nvlist(config, 179699653d4eSeschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 1797fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 179899653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 179999653d4eSeschrock if (nspares != 0) { 180099653d4eSeschrock VERIFY(nvlist_add_nvlist_array(nvroot, 180199653d4eSeschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 180299653d4eSeschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 180399653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 180499653d4eSeschrock 180599653d4eSeschrock /* 180699653d4eSeschrock * Go through and find any spares which have since been 180799653d4eSeschrock * repurposed as an active spare. If this is the case, update 180899653d4eSeschrock * their status appropriately. 180999653d4eSeschrock */ 181099653d4eSeschrock for (i = 0; i < nspares; i++) { 181199653d4eSeschrock VERIFY(nvlist_lookup_uint64(spares[i], 181299653d4eSeschrock ZPOOL_CONFIG_GUID, &guid) == 0); 181389a89ebfSlling if (spa_spare_exists(guid, &pool, NULL) && 181489a89ebfSlling pool != 0ULL) { 181599653d4eSeschrock VERIFY(nvlist_lookup_uint64_array( 181699653d4eSeschrock spares[i], ZPOOL_CONFIG_STATS, 181799653d4eSeschrock (uint64_t **)&vs, &vsc) == 0); 181899653d4eSeschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 181999653d4eSeschrock vs->vs_aux = VDEV_AUX_SPARED; 182099653d4eSeschrock } 182199653d4eSeschrock } 182299653d4eSeschrock } 182399653d4eSeschrock } 182499653d4eSeschrock 1825fa94a07fSbrendan /* 1826fa94a07fSbrendan * Add l2cache device information to the nvlist, including vdev stats. 1827fa94a07fSbrendan */ 1828fa94a07fSbrendan static void 1829fa94a07fSbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 1830fa94a07fSbrendan { 1831fa94a07fSbrendan nvlist_t **l2cache; 1832fa94a07fSbrendan uint_t i, j, nl2cache; 1833fa94a07fSbrendan nvlist_t *nvroot; 1834fa94a07fSbrendan uint64_t guid; 1835fa94a07fSbrendan vdev_t *vd; 1836fa94a07fSbrendan vdev_stat_t *vs; 1837fa94a07fSbrendan uint_t vsc; 1838fa94a07fSbrendan 18396809eb4eSEric Schrock ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 18406809eb4eSEric Schrock 1841fa94a07fSbrendan if (spa->spa_l2cache.sav_count == 0) 1842fa94a07fSbrendan return; 1843fa94a07fSbrendan 1844fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist(config, 1845fa94a07fSbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 1846fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 1847fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 1848fa94a07fSbrendan if (nl2cache != 0) { 1849fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 1850fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 1851fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 1852fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 1853fa94a07fSbrendan 1854fa94a07fSbrendan /* 1855fa94a07fSbrendan * Update level 2 cache device stats. 1856fa94a07fSbrendan */ 1857fa94a07fSbrendan 1858fa94a07fSbrendan for (i = 0; i < nl2cache; i++) { 1859fa94a07fSbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 1860fa94a07fSbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 1861fa94a07fSbrendan 1862fa94a07fSbrendan vd = NULL; 1863fa94a07fSbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 1864fa94a07fSbrendan if (guid == 1865fa94a07fSbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 1866fa94a07fSbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 1867fa94a07fSbrendan break; 1868fa94a07fSbrendan } 1869fa94a07fSbrendan } 1870fa94a07fSbrendan ASSERT(vd != NULL); 1871fa94a07fSbrendan 1872fa94a07fSbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 1873fa94a07fSbrendan ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 1874fa94a07fSbrendan vdev_get_stats(vd, vs); 1875fa94a07fSbrendan } 1876fa94a07fSbrendan } 1877fa94a07fSbrendan } 1878fa94a07fSbrendan 1879fa9e4066Sahrens int 1880ea8dc4b6Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 1881fa9e4066Sahrens { 1882fa9e4066Sahrens int error; 1883fa9e4066Sahrens spa_t *spa; 1884fa9e4066Sahrens 1885fa9e4066Sahrens *config = NULL; 1886fa9e4066Sahrens error = spa_open_common(name, &spa, FTAG, config); 1887fa9e4066Sahrens 18886809eb4eSEric Schrock if (spa != NULL) { 18896809eb4eSEric Schrock /* 18906809eb4eSEric Schrock * This still leaves a window of inconsistency where the spares 18916809eb4eSEric Schrock * or l2cache devices could change and the config would be 18926809eb4eSEric Schrock * self-inconsistent. 18936809eb4eSEric Schrock */ 18946809eb4eSEric Schrock spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 1895ea8dc4b6Seschrock 18966809eb4eSEric Schrock if (*config != NULL) { 1897e14bb325SJeff Bonwick VERIFY(nvlist_add_uint64(*config, 18986809eb4eSEric Schrock ZPOOL_CONFIG_ERRCOUNT, 18996809eb4eSEric Schrock spa_get_errlog_size(spa)) == 0); 1900e14bb325SJeff Bonwick 19016809eb4eSEric Schrock if (spa_suspended(spa)) 19026809eb4eSEric Schrock VERIFY(nvlist_add_uint64(*config, 19036809eb4eSEric Schrock ZPOOL_CONFIG_SUSPENDED, 19046809eb4eSEric Schrock spa->spa_failmode) == 0); 19056809eb4eSEric Schrock 19066809eb4eSEric Schrock spa_add_spares(spa, *config); 19076809eb4eSEric Schrock spa_add_l2cache(spa, *config); 19086809eb4eSEric Schrock } 190999653d4eSeschrock } 191099653d4eSeschrock 1911ea8dc4b6Seschrock /* 1912ea8dc4b6Seschrock * We want to get the alternate root even for faulted pools, so we cheat 1913ea8dc4b6Seschrock * and call spa_lookup() directly. 1914ea8dc4b6Seschrock */ 1915ea8dc4b6Seschrock if (altroot) { 1916ea8dc4b6Seschrock if (spa == NULL) { 1917ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 1918ea8dc4b6Seschrock spa = spa_lookup(name); 1919ea8dc4b6Seschrock if (spa) 1920ea8dc4b6Seschrock spa_altroot(spa, altroot, buflen); 1921ea8dc4b6Seschrock else 1922ea8dc4b6Seschrock altroot[0] = '\0'; 1923ea8dc4b6Seschrock spa = NULL; 1924ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 1925ea8dc4b6Seschrock } else { 1926ea8dc4b6Seschrock spa_altroot(spa, altroot, buflen); 1927ea8dc4b6Seschrock } 1928ea8dc4b6Seschrock } 1929ea8dc4b6Seschrock 19306809eb4eSEric Schrock if (spa != NULL) { 19316809eb4eSEric Schrock spa_config_exit(spa, SCL_CONFIG, FTAG); 1932fa9e4066Sahrens spa_close(spa, FTAG); 19336809eb4eSEric Schrock } 1934fa9e4066Sahrens 1935fa9e4066Sahrens return (error); 1936fa9e4066Sahrens } 1937fa9e4066Sahrens 193899653d4eSeschrock /* 1939fa94a07fSbrendan * Validate that the auxiliary device array is well formed. We must have an 1940fa94a07fSbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 1941fa94a07fSbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 1942fa94a07fSbrendan * specified, as long as they are well-formed. 194399653d4eSeschrock */ 194499653d4eSeschrock static int 1945fa94a07fSbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 1946fa94a07fSbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 1947fa94a07fSbrendan vdev_labeltype_t label) 194899653d4eSeschrock { 1949fa94a07fSbrendan nvlist_t **dev; 1950fa94a07fSbrendan uint_t i, ndev; 195199653d4eSeschrock vdev_t *vd; 195299653d4eSeschrock int error; 195399653d4eSeschrock 1954e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 1955e14bb325SJeff Bonwick 195699653d4eSeschrock /* 1957fa94a07fSbrendan * It's acceptable to have no devs specified. 195899653d4eSeschrock */ 1959fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 196099653d4eSeschrock return (0); 196199653d4eSeschrock 1962fa94a07fSbrendan if (ndev == 0) 196399653d4eSeschrock return (EINVAL); 196499653d4eSeschrock 196599653d4eSeschrock /* 1966fa94a07fSbrendan * Make sure the pool is formatted with a version that supports this 1967fa94a07fSbrendan * device type. 196899653d4eSeschrock */ 1969fa94a07fSbrendan if (spa_version(spa) < version) 197099653d4eSeschrock return (ENOTSUP); 197199653d4eSeschrock 197239c23413Seschrock /* 1973fa94a07fSbrendan * Set the pending device list so we correctly handle device in-use 197439c23413Seschrock * checking. 197539c23413Seschrock */ 1976fa94a07fSbrendan sav->sav_pending = dev; 1977fa94a07fSbrendan sav->sav_npending = ndev; 197839c23413Seschrock 1979fa94a07fSbrendan for (i = 0; i < ndev; i++) { 1980fa94a07fSbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 198199653d4eSeschrock mode)) != 0) 198239c23413Seschrock goto out; 198399653d4eSeschrock 198499653d4eSeschrock if (!vd->vdev_ops->vdev_op_leaf) { 198599653d4eSeschrock vdev_free(vd); 198639c23413Seschrock error = EINVAL; 198739c23413Seschrock goto out; 198899653d4eSeschrock } 198999653d4eSeschrock 1990fa94a07fSbrendan /* 1991e14bb325SJeff Bonwick * The L2ARC currently only supports disk devices in 1992e14bb325SJeff Bonwick * kernel context. For user-level testing, we allow it. 1993fa94a07fSbrendan */ 1994e14bb325SJeff Bonwick #ifdef _KERNEL 1995fa94a07fSbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 1996fa94a07fSbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 1997fa94a07fSbrendan error = ENOTBLK; 1998fa94a07fSbrendan goto out; 1999fa94a07fSbrendan } 2000e14bb325SJeff Bonwick #endif 200199653d4eSeschrock vd->vdev_top = vd; 200299653d4eSeschrock 200339c23413Seschrock if ((error = vdev_open(vd)) == 0 && 2004fa94a07fSbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 2005fa94a07fSbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 200639c23413Seschrock vd->vdev_guid) == 0); 200739c23413Seschrock } 200899653d4eSeschrock 200999653d4eSeschrock vdev_free(vd); 201039c23413Seschrock 2011fa94a07fSbrendan if (error && 2012fa94a07fSbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 201339c23413Seschrock goto out; 201439c23413Seschrock else 201539c23413Seschrock error = 0; 201699653d4eSeschrock } 201799653d4eSeschrock 201839c23413Seschrock out: 2019fa94a07fSbrendan sav->sav_pending = NULL; 2020fa94a07fSbrendan sav->sav_npending = 0; 202139c23413Seschrock return (error); 202299653d4eSeschrock } 202399653d4eSeschrock 2024fa94a07fSbrendan static int 2025fa94a07fSbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 2026fa94a07fSbrendan { 2027fa94a07fSbrendan int error; 2028fa94a07fSbrendan 2029e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 2030e14bb325SJeff Bonwick 2031fa94a07fSbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 2032fa94a07fSbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 2033fa94a07fSbrendan VDEV_LABEL_SPARE)) != 0) { 2034fa94a07fSbrendan return (error); 2035fa94a07fSbrendan } 2036fa94a07fSbrendan 2037fa94a07fSbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 2038fa94a07fSbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 2039fa94a07fSbrendan VDEV_LABEL_L2CACHE)); 2040fa94a07fSbrendan } 2041fa94a07fSbrendan 2042fa94a07fSbrendan static void 2043fa94a07fSbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 2044fa94a07fSbrendan const char *config) 2045fa94a07fSbrendan { 2046fa94a07fSbrendan int i; 2047fa94a07fSbrendan 2048fa94a07fSbrendan if (sav->sav_config != NULL) { 2049fa94a07fSbrendan nvlist_t **olddevs; 2050fa94a07fSbrendan uint_t oldndevs; 2051fa94a07fSbrendan nvlist_t **newdevs; 2052fa94a07fSbrendan 2053fa94a07fSbrendan /* 2054fa94a07fSbrendan * Generate new dev list by concatentating with the 2055fa94a07fSbrendan * current dev list. 2056fa94a07fSbrendan */ 2057fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 2058fa94a07fSbrendan &olddevs, &oldndevs) == 0); 2059fa94a07fSbrendan 2060fa94a07fSbrendan newdevs = kmem_alloc(sizeof (void *) * 2061fa94a07fSbrendan (ndevs + oldndevs), KM_SLEEP); 2062fa94a07fSbrendan for (i = 0; i < oldndevs; i++) 2063fa94a07fSbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 2064fa94a07fSbrendan KM_SLEEP) == 0); 2065fa94a07fSbrendan for (i = 0; i < ndevs; i++) 2066fa94a07fSbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 2067fa94a07fSbrendan KM_SLEEP) == 0); 2068fa94a07fSbrendan 2069fa94a07fSbrendan VERIFY(nvlist_remove(sav->sav_config, config, 2070fa94a07fSbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 2071fa94a07fSbrendan 2072fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 2073fa94a07fSbrendan config, newdevs, ndevs + oldndevs) == 0); 2074fa94a07fSbrendan for (i = 0; i < oldndevs + ndevs; i++) 2075fa94a07fSbrendan nvlist_free(newdevs[i]); 2076fa94a07fSbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 2077fa94a07fSbrendan } else { 2078fa94a07fSbrendan /* 2079fa94a07fSbrendan * Generate a new dev list. 2080fa94a07fSbrendan */ 2081fa94a07fSbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 2082fa94a07fSbrendan KM_SLEEP) == 0); 2083fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 2084fa94a07fSbrendan devs, ndevs) == 0); 2085fa94a07fSbrendan } 2086fa94a07fSbrendan } 2087fa94a07fSbrendan 2088fa94a07fSbrendan /* 2089fa94a07fSbrendan * Stop and drop level 2 ARC devices 2090fa94a07fSbrendan */ 2091fa94a07fSbrendan void 2092fa94a07fSbrendan spa_l2cache_drop(spa_t *spa) 2093fa94a07fSbrendan { 2094fa94a07fSbrendan vdev_t *vd; 2095fa94a07fSbrendan int i; 2096fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 2097fa94a07fSbrendan 2098fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) { 2099fa94a07fSbrendan uint64_t pool; 2100fa94a07fSbrendan 2101fa94a07fSbrendan vd = sav->sav_vdevs[i]; 2102fa94a07fSbrendan ASSERT(vd != NULL); 2103fa94a07fSbrendan 21048ad4d6ddSJeff Bonwick if (spa_l2cache_exists(vd->vdev_guid, &pool) && 21058ad4d6ddSJeff Bonwick pool != 0ULL && l2arc_vdev_present(vd)) 2106fa94a07fSbrendan l2arc_remove_vdev(vd); 2107fa94a07fSbrendan if (vd->vdev_isl2cache) 2108fa94a07fSbrendan spa_l2cache_remove(vd); 2109fa94a07fSbrendan vdev_clear_stats(vd); 2110fa94a07fSbrendan (void) vdev_close(vd); 2111fa94a07fSbrendan } 2112fa94a07fSbrendan } 2113fa94a07fSbrendan 2114fa9e4066Sahrens /* 2115fa9e4066Sahrens * Pool Creation 2116fa9e4066Sahrens */ 2117fa9e4066Sahrens int 2118990b4856Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 21190a48a24eStimh const char *history_str, nvlist_t *zplprops) 2120fa9e4066Sahrens { 2121fa9e4066Sahrens spa_t *spa; 2122990b4856Slling char *altroot = NULL; 21230373e76bSbonwick vdev_t *rvd; 2124fa9e4066Sahrens dsl_pool_t *dp; 2125fa9e4066Sahrens dmu_tx_t *tx; 2126573ca77eSGeorge Wilson int error = 0; 2127fa9e4066Sahrens uint64_t txg = TXG_INITIAL; 2128fa94a07fSbrendan nvlist_t **spares, **l2cache; 2129fa94a07fSbrendan uint_t nspares, nl2cache; 2130990b4856Slling uint64_t version; 2131fa9e4066Sahrens 2132fa9e4066Sahrens /* 2133fa9e4066Sahrens * If this pool already exists, return failure. 2134fa9e4066Sahrens */ 2135fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 2136fa9e4066Sahrens if (spa_lookup(pool) != NULL) { 2137fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2138fa9e4066Sahrens return (EEXIST); 2139fa9e4066Sahrens } 2140fa9e4066Sahrens 2141fa9e4066Sahrens /* 2142fa9e4066Sahrens * Allocate a new spa_t structure. 2143fa9e4066Sahrens */ 2144990b4856Slling (void) nvlist_lookup_string(props, 2145990b4856Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 21460373e76bSbonwick spa = spa_add(pool, altroot); 21478ad4d6ddSJeff Bonwick spa_activate(spa, spa_mode_global); 2148fa9e4066Sahrens 2149fa9e4066Sahrens spa->spa_uberblock.ub_txg = txg - 1; 2150990b4856Slling 2151990b4856Slling if (props && (error = spa_prop_validate(spa, props))) { 2152990b4856Slling spa_deactivate(spa); 2153990b4856Slling spa_remove(spa); 2154c5904d13Seschrock mutex_exit(&spa_namespace_lock); 2155990b4856Slling return (error); 2156990b4856Slling } 2157990b4856Slling 2158990b4856Slling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 2159990b4856Slling &version) != 0) 2160990b4856Slling version = SPA_VERSION; 2161990b4856Slling ASSERT(version <= SPA_VERSION); 2162990b4856Slling spa->spa_uberblock.ub_version = version; 2163fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 2164fa9e4066Sahrens 216554d692b7SGeorge Wilson /* 216654d692b7SGeorge Wilson * Create "The Godfather" zio to hold all async IOs 216754d692b7SGeorge Wilson */ 216825f89ee2SJeff Bonwick spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 216925f89ee2SJeff Bonwick ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 217054d692b7SGeorge Wilson 21710373e76bSbonwick /* 21720373e76bSbonwick * Create the root vdev. 21730373e76bSbonwick */ 2174e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 21750373e76bSbonwick 217699653d4eSeschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 21770373e76bSbonwick 217899653d4eSeschrock ASSERT(error != 0 || rvd != NULL); 217999653d4eSeschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 21800373e76bSbonwick 2181b7b97454Sperrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 21820373e76bSbonwick error = EINVAL; 218399653d4eSeschrock 218499653d4eSeschrock if (error == 0 && 218599653d4eSeschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 2186fa94a07fSbrendan (error = spa_validate_aux(spa, nvroot, txg, 218799653d4eSeschrock VDEV_ALLOC_ADD)) == 0) { 2188573ca77eSGeorge Wilson for (int c = 0; c < rvd->vdev_children; c++) { 2189573ca77eSGeorge Wilson vdev_metaslab_set_size(rvd->vdev_child[c]); 2190573ca77eSGeorge Wilson vdev_expand(rvd->vdev_child[c], txg); 2191573ca77eSGeorge Wilson } 21920373e76bSbonwick } 21930373e76bSbonwick 2194e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2195fa9e4066Sahrens 219699653d4eSeschrock if (error != 0) { 2197fa9e4066Sahrens spa_unload(spa); 2198fa9e4066Sahrens spa_deactivate(spa); 2199fa9e4066Sahrens spa_remove(spa); 2200fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2201fa9e4066Sahrens return (error); 2202fa9e4066Sahrens } 2203fa9e4066Sahrens 220499653d4eSeschrock /* 220599653d4eSeschrock * Get the list of spares, if specified. 220699653d4eSeschrock */ 220799653d4eSeschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 220899653d4eSeschrock &spares, &nspares) == 0) { 2209fa94a07fSbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 221099653d4eSeschrock KM_SLEEP) == 0); 2211fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 221299653d4eSeschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 2213e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 221499653d4eSeschrock spa_load_spares(spa); 2215e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2216fa94a07fSbrendan spa->spa_spares.sav_sync = B_TRUE; 2217fa94a07fSbrendan } 2218fa94a07fSbrendan 2219fa94a07fSbrendan /* 2220fa94a07fSbrendan * Get the list of level 2 cache devices, if specified. 2221fa94a07fSbrendan */ 2222fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 2223fa94a07fSbrendan &l2cache, &nl2cache) == 0) { 2224fa94a07fSbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 2225fa94a07fSbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 2226fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 2227fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 2228e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2229fa94a07fSbrendan spa_load_l2cache(spa); 2230e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2231fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 223299653d4eSeschrock } 223399653d4eSeschrock 22340a48a24eStimh spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 2235fa9e4066Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 2236fa9e4066Sahrens 2237fa9e4066Sahrens tx = dmu_tx_create_assigned(dp, txg); 2238fa9e4066Sahrens 2239fa9e4066Sahrens /* 2240fa9e4066Sahrens * Create the pool config object. 2241fa9e4066Sahrens */ 2242fa9e4066Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 2243f7991ba4STim Haley DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 2244fa9e4066Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 2245fa9e4066Sahrens 2246ea8dc4b6Seschrock if (zap_add(spa->spa_meta_objset, 2247fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 2248ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 2249ea8dc4b6Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 2250ea8dc4b6Seschrock } 2251fa9e4066Sahrens 2252990b4856Slling /* Newly created pools with the right version are always deflated. */ 2253990b4856Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 2254990b4856Slling spa->spa_deflate = TRUE; 2255990b4856Slling if (zap_add(spa->spa_meta_objset, 2256990b4856Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 2257990b4856Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 2258990b4856Slling cmn_err(CE_PANIC, "failed to add deflate"); 2259990b4856Slling } 226099653d4eSeschrock } 226199653d4eSeschrock 2262fa9e4066Sahrens /* 2263fa9e4066Sahrens * Create the deferred-free bplist object. Turn off compression 2264fa9e4066Sahrens * because sync-to-convergence takes longer if the blocksize 2265fa9e4066Sahrens * keeps changing. 2266fa9e4066Sahrens */ 2267fa9e4066Sahrens spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, 2268fa9e4066Sahrens 1 << 14, tx); 2269fa9e4066Sahrens dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 2270fa9e4066Sahrens ZIO_COMPRESS_OFF, tx); 2271fa9e4066Sahrens 2272ea8dc4b6Seschrock if (zap_add(spa->spa_meta_objset, 2273fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 2274ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { 2275ea8dc4b6Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 2276ea8dc4b6Seschrock } 2277fa9e4066Sahrens 227806eeb2adSek /* 227906eeb2adSek * Create the pool's history object. 228006eeb2adSek */ 2281990b4856Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 2282990b4856Slling spa_history_create_obj(spa, tx); 2283990b4856Slling 2284990b4856Slling /* 2285990b4856Slling * Set pool properties. 2286990b4856Slling */ 2287990b4856Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 2288990b4856Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 22890a4e9518Sgw spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 2290573ca77eSGeorge Wilson spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); 2291379c004dSEric Schrock if (props != NULL) { 2292379c004dSEric Schrock spa_configfile_set(spa, props, B_FALSE); 2293990b4856Slling spa_sync_props(spa, props, CRED(), tx); 2294379c004dSEric Schrock } 229506eeb2adSek 2296fa9e4066Sahrens dmu_tx_commit(tx); 2297fa9e4066Sahrens 2298fa9e4066Sahrens spa->spa_sync_on = B_TRUE; 2299fa9e4066Sahrens txg_sync_start(spa->spa_dsl_pool); 2300fa9e4066Sahrens 2301fa9e4066Sahrens /* 2302fa9e4066Sahrens * We explicitly wait for the first transaction to complete so that our 2303fa9e4066Sahrens * bean counters are appropriately updated. 2304fa9e4066Sahrens */ 2305fa9e4066Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 2306fa9e4066Sahrens 2307c5904d13Seschrock spa_config_sync(spa, B_FALSE, B_TRUE); 2308fa9e4066Sahrens 2309990b4856Slling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 2310228975ccSek (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 2311c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_CREATE); 2312228975ccSek 2313088f3894Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 2314088f3894Sahrens 2315daaa36a7SGeorge Wilson mutex_exit(&spa_namespace_lock); 2316daaa36a7SGeorge Wilson 2317fa9e4066Sahrens return (0); 2318fa9e4066Sahrens } 2319fa9e4066Sahrens 2320e7cbe64fSgw #ifdef _KERNEL 2321e7cbe64fSgw /* 232221ecdf64SLin Ling * Get the root pool information from the root disk, then import the root pool 232321ecdf64SLin Ling * during the system boot up time. 2324e7cbe64fSgw */ 232521ecdf64SLin Ling extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 232621ecdf64SLin Ling 232721ecdf64SLin Ling static nvlist_t * 232821ecdf64SLin Ling spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) 2329e7cbe64fSgw { 233021ecdf64SLin Ling nvlist_t *config; 2331e7cbe64fSgw nvlist_t *nvtop, *nvroot; 2332e7cbe64fSgw uint64_t pgid; 2333e7cbe64fSgw 233421ecdf64SLin Ling if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) 233521ecdf64SLin Ling return (NULL); 233621ecdf64SLin Ling 2337e7cbe64fSgw /* 2338e7cbe64fSgw * Add this top-level vdev to the child array. 2339e7cbe64fSgw */ 234021ecdf64SLin Ling VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 234121ecdf64SLin Ling &nvtop) == 0); 234221ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 234321ecdf64SLin Ling &pgid) == 0); 234421ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); 2345e7cbe64fSgw 2346e7cbe64fSgw /* 2347e7cbe64fSgw * Put this pool's top-level vdevs into a root vdev. 2348e7cbe64fSgw */ 2349e7cbe64fSgw VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 235021ecdf64SLin Ling VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 235121ecdf64SLin Ling VDEV_TYPE_ROOT) == 0); 2352e7cbe64fSgw VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 2353e7cbe64fSgw VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 2354e7cbe64fSgw VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 2355e7cbe64fSgw &nvtop, 1) == 0); 2356e7cbe64fSgw 2357e7cbe64fSgw /* 2358e7cbe64fSgw * Replace the existing vdev_tree with the new root vdev in 2359e7cbe64fSgw * this pool's configuration (remove the old, add the new). 2360e7cbe64fSgw */ 2361e7cbe64fSgw VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 2362e7cbe64fSgw nvlist_free(nvroot); 236321ecdf64SLin Ling return (config); 2364e7cbe64fSgw } 2365e7cbe64fSgw 2366e7cbe64fSgw /* 236721ecdf64SLin Ling * Walk the vdev tree and see if we can find a device with "better" 236821ecdf64SLin Ling * configuration. A configuration is "better" if the label on that 236921ecdf64SLin Ling * device has a more recent txg. 2370051aabe6Staylor */ 237121ecdf64SLin Ling static void 237221ecdf64SLin Ling spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) 2373051aabe6Staylor { 2374573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) 237521ecdf64SLin Ling spa_alt_rootvdev(vd->vdev_child[c], avd, txg); 2376051aabe6Staylor 237721ecdf64SLin Ling if (vd->vdev_ops->vdev_op_leaf) { 237821ecdf64SLin Ling nvlist_t *label; 237921ecdf64SLin Ling uint64_t label_txg; 2380051aabe6Staylor 238121ecdf64SLin Ling if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, 238221ecdf64SLin Ling &label) != 0) 238321ecdf64SLin Ling return; 2384051aabe6Staylor 238521ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, 238621ecdf64SLin Ling &label_txg) == 0); 2387051aabe6Staylor 238821ecdf64SLin Ling /* 238921ecdf64SLin Ling * Do we have a better boot device? 239021ecdf64SLin Ling */ 239121ecdf64SLin Ling if (label_txg > *txg) { 239221ecdf64SLin Ling *txg = label_txg; 239321ecdf64SLin Ling *avd = vd; 2394051aabe6Staylor } 239521ecdf64SLin Ling nvlist_free(label); 2396051aabe6Staylor } 2397051aabe6Staylor } 2398051aabe6Staylor 2399e7cbe64fSgw /* 2400e7cbe64fSgw * Import a root pool. 2401e7cbe64fSgw * 2402051aabe6Staylor * For x86. devpath_list will consist of devid and/or physpath name of 2403051aabe6Staylor * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 2404051aabe6Staylor * The GRUB "findroot" command will return the vdev we should boot. 2405e7cbe64fSgw * 2406e7cbe64fSgw * For Sparc, devpath_list consists the physpath name of the booting device 2407e7cbe64fSgw * no matter the rootpool is a single device pool or a mirrored pool. 2408e7cbe64fSgw * e.g. 2409e7cbe64fSgw * "/pci@1f,0/ide@d/disk@0,0:a" 2410e7cbe64fSgw */ 2411e7cbe64fSgw int 2412051aabe6Staylor spa_import_rootpool(char *devpath, char *devid) 2413e7cbe64fSgw { 241421ecdf64SLin Ling spa_t *spa; 241521ecdf64SLin Ling vdev_t *rvd, *bvd, *avd = NULL; 241621ecdf64SLin Ling nvlist_t *config, *nvtop; 241721ecdf64SLin Ling uint64_t guid, txg; 2418e7cbe64fSgw char *pname; 2419e7cbe64fSgw int error; 2420e7cbe64fSgw 2421e7cbe64fSgw /* 242221ecdf64SLin Ling * Read the label from the boot device and generate a configuration. 2423e7cbe64fSgw */ 2424*dedec472SJack Meng config = spa_generate_rootconf(devpath, devid, &guid); 2425*dedec472SJack Meng #if defined(_OBP) && defined(_KERNEL) 2426*dedec472SJack Meng if (config == NULL) { 2427*dedec472SJack Meng if (strstr(devpath, "/iscsi/ssd") != NULL) { 2428*dedec472SJack Meng /* iscsi boot */ 2429*dedec472SJack Meng get_iscsi_bootpath_phy(devpath); 2430*dedec472SJack Meng config = spa_generate_rootconf(devpath, devid, &guid); 2431*dedec472SJack Meng } 2432*dedec472SJack Meng } 2433*dedec472SJack Meng #endif 2434*dedec472SJack Meng if (config == NULL) { 243521ecdf64SLin Ling cmn_err(CE_NOTE, "Can not read the pool label from '%s'", 243621ecdf64SLin Ling devpath); 243721ecdf64SLin Ling return (EIO); 243821ecdf64SLin Ling } 2439e7cbe64fSgw 244021ecdf64SLin Ling VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 244121ecdf64SLin Ling &pname) == 0); 244221ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 2443e7cbe64fSgw 24446809eb4eSEric Schrock mutex_enter(&spa_namespace_lock); 24456809eb4eSEric Schrock if ((spa = spa_lookup(pname)) != NULL) { 24466809eb4eSEric Schrock /* 24476809eb4eSEric Schrock * Remove the existing root pool from the namespace so that we 24486809eb4eSEric Schrock * can replace it with the correct config we just read in. 24496809eb4eSEric Schrock */ 24506809eb4eSEric Schrock spa_remove(spa); 24516809eb4eSEric Schrock } 24526809eb4eSEric Schrock 24536809eb4eSEric Schrock spa = spa_add(pname, NULL); 24546809eb4eSEric Schrock spa->spa_is_root = B_TRUE; 2455bc758434SLin Ling spa->spa_load_verbatim = B_TRUE; 2456e7cbe64fSgw 245721ecdf64SLin Ling /* 245821ecdf64SLin Ling * Build up a vdev tree based on the boot device's label config. 245921ecdf64SLin Ling */ 246021ecdf64SLin Ling VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 246121ecdf64SLin Ling &nvtop) == 0); 246221ecdf64SLin Ling spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 246321ecdf64SLin Ling error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, 246421ecdf64SLin Ling VDEV_ALLOC_ROOTPOOL); 246521ecdf64SLin Ling spa_config_exit(spa, SCL_ALL, FTAG); 246621ecdf64SLin Ling if (error) { 246721ecdf64SLin Ling mutex_exit(&spa_namespace_lock); 246821ecdf64SLin Ling nvlist_free(config); 246921ecdf64SLin Ling cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", 247021ecdf64SLin Ling pname); 247121ecdf64SLin Ling return (error); 247221ecdf64SLin Ling } 247321ecdf64SLin Ling 247421ecdf64SLin Ling /* 247521ecdf64SLin Ling * Get the boot vdev. 247621ecdf64SLin Ling */ 247721ecdf64SLin Ling if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 247821ecdf64SLin Ling cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", 247921ecdf64SLin Ling (u_longlong_t)guid); 248021ecdf64SLin Ling error = ENOENT; 248121ecdf64SLin Ling goto out; 248221ecdf64SLin Ling } 2483e7cbe64fSgw 248421ecdf64SLin Ling /* 248521ecdf64SLin Ling * Determine if there is a better boot device. 248621ecdf64SLin Ling */ 248721ecdf64SLin Ling avd = bvd; 248821ecdf64SLin Ling spa_alt_rootvdev(rvd, &avd, &txg); 248921ecdf64SLin Ling if (avd != bvd) { 249021ecdf64SLin Ling cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " 249121ecdf64SLin Ling "try booting from '%s'", avd->vdev_path); 249221ecdf64SLin Ling error = EINVAL; 249321ecdf64SLin Ling goto out; 249421ecdf64SLin Ling } 2495e7cbe64fSgw 249621ecdf64SLin Ling /* 249721ecdf64SLin Ling * If the boot device is part of a spare vdev then ensure that 249821ecdf64SLin Ling * we're booting off the active spare. 249921ecdf64SLin Ling */ 250021ecdf64SLin Ling if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && 250121ecdf64SLin Ling !bvd->vdev_isspare) { 250221ecdf64SLin Ling cmn_err(CE_NOTE, "The boot device is currently spared. Please " 250321ecdf64SLin Ling "try booting from '%s'", 250421ecdf64SLin Ling bvd->vdev_parent->vdev_child[1]->vdev_path); 250521ecdf64SLin Ling error = EINVAL; 250621ecdf64SLin Ling goto out; 250721ecdf64SLin Ling } 250821ecdf64SLin Ling 250921ecdf64SLin Ling VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); 251021ecdf64SLin Ling error = 0; 2511c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_IMPORT); 251221ecdf64SLin Ling out: 251321ecdf64SLin Ling spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 251421ecdf64SLin Ling vdev_free(rvd); 251521ecdf64SLin Ling spa_config_exit(spa, SCL_ALL, FTAG); 251621ecdf64SLin Ling mutex_exit(&spa_namespace_lock); 251721ecdf64SLin Ling 251821ecdf64SLin Ling nvlist_free(config); 2519e7cbe64fSgw return (error); 2520e7cbe64fSgw } 252121ecdf64SLin Ling 2522e7cbe64fSgw #endif 2523e7cbe64fSgw 2524e7cbe64fSgw /* 25256809eb4eSEric Schrock * Take a pool and insert it into the namespace as if it had been loaded at 25266809eb4eSEric Schrock * boot. 2527e7cbe64fSgw */ 2528e7cbe64fSgw int 25296809eb4eSEric Schrock spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props) 2530e7cbe64fSgw { 25316809eb4eSEric Schrock spa_t *spa; 25326809eb4eSEric Schrock char *altroot = NULL; 25336809eb4eSEric Schrock 25346809eb4eSEric Schrock mutex_enter(&spa_namespace_lock); 25356809eb4eSEric Schrock if (spa_lookup(pool) != NULL) { 25366809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 25376809eb4eSEric Schrock return (EEXIST); 25386809eb4eSEric Schrock } 25396809eb4eSEric Schrock 25406809eb4eSEric Schrock (void) nvlist_lookup_string(props, 25416809eb4eSEric Schrock zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 25426809eb4eSEric Schrock spa = spa_add(pool, altroot); 25436809eb4eSEric Schrock 2544bc758434SLin Ling spa->spa_load_verbatim = B_TRUE; 25454f0f5e5bSVictor Latushkin 25466809eb4eSEric Schrock VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); 25476809eb4eSEric Schrock 25486809eb4eSEric Schrock if (props != NULL) 25496809eb4eSEric Schrock spa_configfile_set(spa, props, B_FALSE); 25506809eb4eSEric Schrock 25516809eb4eSEric Schrock spa_config_sync(spa, B_FALSE, B_TRUE); 25526809eb4eSEric Schrock 25536809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 2554c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_IMPORT); 25556809eb4eSEric Schrock 25566809eb4eSEric Schrock return (0); 2557e7cbe64fSgw } 2558e7cbe64fSgw 25596809eb4eSEric Schrock /* 25606809eb4eSEric Schrock * Import a non-root pool into the system. 25616809eb4eSEric Schrock */ 2562c5904d13Seschrock int 25636809eb4eSEric Schrock spa_import(const char *pool, nvlist_t *config, nvlist_t *props) 2564c5904d13Seschrock { 25656809eb4eSEric Schrock spa_t *spa; 25666809eb4eSEric Schrock char *altroot = NULL; 25676809eb4eSEric Schrock int error; 25686809eb4eSEric Schrock nvlist_t *nvroot; 25696809eb4eSEric Schrock nvlist_t **spares, **l2cache; 25706809eb4eSEric Schrock uint_t nspares, nl2cache; 25716809eb4eSEric Schrock 25726809eb4eSEric Schrock /* 25736809eb4eSEric Schrock * If a pool with this name exists, return failure. 25746809eb4eSEric Schrock */ 25756809eb4eSEric Schrock mutex_enter(&spa_namespace_lock); 25766809eb4eSEric Schrock if ((spa = spa_lookup(pool)) != NULL) { 25776809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 25786809eb4eSEric Schrock return (EEXIST); 25796809eb4eSEric Schrock } 25806809eb4eSEric Schrock 25816809eb4eSEric Schrock /* 25826809eb4eSEric Schrock * Create and initialize the spa structure. 25836809eb4eSEric Schrock */ 25846809eb4eSEric Schrock (void) nvlist_lookup_string(props, 25856809eb4eSEric Schrock zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 25866809eb4eSEric Schrock spa = spa_add(pool, altroot); 25876809eb4eSEric Schrock spa_activate(spa, spa_mode_global); 25886809eb4eSEric Schrock 258925f89ee2SJeff Bonwick /* 259025f89ee2SJeff Bonwick * Don't start async tasks until we know everything is healthy. 259125f89ee2SJeff Bonwick */ 259225f89ee2SJeff Bonwick spa_async_suspend(spa); 259325f89ee2SJeff Bonwick 25946809eb4eSEric Schrock /* 25956809eb4eSEric Schrock * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig 25966809eb4eSEric Schrock * because the user-supplied config is actually the one to trust when 25976809eb4eSEric Schrock * doing an import. 25986809eb4eSEric Schrock */ 25996809eb4eSEric Schrock error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); 26006809eb4eSEric Schrock 26016809eb4eSEric Schrock spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26026809eb4eSEric Schrock /* 26036809eb4eSEric Schrock * Toss any existing sparelist, as it doesn't have any validity 26046809eb4eSEric Schrock * anymore, and conflicts with spa_has_spare(). 26056809eb4eSEric Schrock */ 26066809eb4eSEric Schrock if (spa->spa_spares.sav_config) { 26076809eb4eSEric Schrock nvlist_free(spa->spa_spares.sav_config); 26086809eb4eSEric Schrock spa->spa_spares.sav_config = NULL; 26096809eb4eSEric Schrock spa_load_spares(spa); 26106809eb4eSEric Schrock } 26116809eb4eSEric Schrock if (spa->spa_l2cache.sav_config) { 26126809eb4eSEric Schrock nvlist_free(spa->spa_l2cache.sav_config); 26136809eb4eSEric Schrock spa->spa_l2cache.sav_config = NULL; 26146809eb4eSEric Schrock spa_load_l2cache(spa); 26156809eb4eSEric Schrock } 26166809eb4eSEric Schrock 26176809eb4eSEric Schrock VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 26186809eb4eSEric Schrock &nvroot) == 0); 26196809eb4eSEric Schrock if (error == 0) 26206809eb4eSEric Schrock error = spa_validate_aux(spa, nvroot, -1ULL, 26216809eb4eSEric Schrock VDEV_ALLOC_SPARE); 26226809eb4eSEric Schrock if (error == 0) 26236809eb4eSEric Schrock error = spa_validate_aux(spa, nvroot, -1ULL, 26246809eb4eSEric Schrock VDEV_ALLOC_L2CACHE); 26256809eb4eSEric Schrock spa_config_exit(spa, SCL_ALL, FTAG); 26266809eb4eSEric Schrock 26276809eb4eSEric Schrock if (props != NULL) 26286809eb4eSEric Schrock spa_configfile_set(spa, props, B_FALSE); 26296809eb4eSEric Schrock 26306809eb4eSEric Schrock if (error != 0 || (props && spa_writeable(spa) && 26316809eb4eSEric Schrock (error = spa_prop_set(spa, props)))) { 26326809eb4eSEric Schrock spa_unload(spa); 26336809eb4eSEric Schrock spa_deactivate(spa); 26346809eb4eSEric Schrock spa_remove(spa); 26356809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 26366809eb4eSEric Schrock return (error); 26376809eb4eSEric Schrock } 26386809eb4eSEric Schrock 263925f89ee2SJeff Bonwick spa_async_resume(spa); 264025f89ee2SJeff Bonwick 26416809eb4eSEric Schrock /* 26426809eb4eSEric Schrock * Override any spares and level 2 cache devices as specified by 26436809eb4eSEric Schrock * the user, as these may have correct device names/devids, etc. 26446809eb4eSEric Schrock */ 26456809eb4eSEric Schrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 26466809eb4eSEric Schrock &spares, &nspares) == 0) { 26476809eb4eSEric Schrock if (spa->spa_spares.sav_config) 26486809eb4eSEric Schrock VERIFY(nvlist_remove(spa->spa_spares.sav_config, 26496809eb4eSEric Schrock ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 26506809eb4eSEric Schrock else 26516809eb4eSEric Schrock VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 26526809eb4eSEric Schrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 26536809eb4eSEric Schrock VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 26546809eb4eSEric Schrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 26556809eb4eSEric Schrock spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26566809eb4eSEric Schrock spa_load_spares(spa); 26576809eb4eSEric Schrock spa_config_exit(spa, SCL_ALL, FTAG); 26586809eb4eSEric Schrock spa->spa_spares.sav_sync = B_TRUE; 26596809eb4eSEric Schrock } 26606809eb4eSEric Schrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 26616809eb4eSEric Schrock &l2cache, &nl2cache) == 0) { 26626809eb4eSEric Schrock if (spa->spa_l2cache.sav_config) 26636809eb4eSEric Schrock VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 26646809eb4eSEric Schrock ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 26656809eb4eSEric Schrock else 26666809eb4eSEric Schrock VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 26676809eb4eSEric Schrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 26686809eb4eSEric Schrock VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 26696809eb4eSEric Schrock ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 26706809eb4eSEric Schrock spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 26716809eb4eSEric Schrock spa_load_l2cache(spa); 26726809eb4eSEric Schrock spa_config_exit(spa, SCL_ALL, FTAG); 26736809eb4eSEric Schrock spa->spa_l2cache.sav_sync = B_TRUE; 26746809eb4eSEric Schrock } 26756809eb4eSEric Schrock 2676b693757aSEric Schrock /* 2677b693757aSEric Schrock * Check for any removed devices. 2678b693757aSEric Schrock */ 2679b693757aSEric Schrock if (spa->spa_autoreplace) { 2680b693757aSEric Schrock spa_aux_check_removed(&spa->spa_spares); 2681b693757aSEric Schrock spa_aux_check_removed(&spa->spa_l2cache); 2682b693757aSEric Schrock } 2683b693757aSEric Schrock 26846809eb4eSEric Schrock if (spa_writeable(spa)) { 26856809eb4eSEric Schrock /* 26866809eb4eSEric Schrock * Update the config cache to include the newly-imported pool. 26876809eb4eSEric Schrock */ 2688bc758434SLin Ling spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 26896809eb4eSEric Schrock } 26906809eb4eSEric Schrock 2691573ca77eSGeorge Wilson /* 2692573ca77eSGeorge Wilson * It's possible that the pool was expanded while it was exported. 2693573ca77eSGeorge Wilson * We kick off an async task to handle this for us. 2694573ca77eSGeorge Wilson */ 2695573ca77eSGeorge Wilson spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 2696573ca77eSGeorge Wilson 26976809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 2698c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_IMPORT); 26996809eb4eSEric Schrock 27006809eb4eSEric Schrock return (0); 2701c5904d13Seschrock } 2702c5904d13Seschrock 2703c5904d13Seschrock 2704fa9e4066Sahrens /* 2705fa9e4066Sahrens * This (illegal) pool name is used when temporarily importing a spa_t in order 2706fa9e4066Sahrens * to get the vdev stats associated with the imported devices. 2707fa9e4066Sahrens */ 2708fa9e4066Sahrens #define TRYIMPORT_NAME "$import" 2709fa9e4066Sahrens 2710fa9e4066Sahrens nvlist_t * 2711fa9e4066Sahrens spa_tryimport(nvlist_t *tryconfig) 2712fa9e4066Sahrens { 2713fa9e4066Sahrens nvlist_t *config = NULL; 2714fa9e4066Sahrens char *poolname; 2715fa9e4066Sahrens spa_t *spa; 2716fa9e4066Sahrens uint64_t state; 27177b7154beSLin Ling int error; 2718fa9e4066Sahrens 2719fa9e4066Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 2720fa9e4066Sahrens return (NULL); 2721fa9e4066Sahrens 2722fa9e4066Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 2723fa9e4066Sahrens return (NULL); 2724fa9e4066Sahrens 2725fa9e4066Sahrens /* 27260373e76bSbonwick * Create and initialize the spa structure. 2727fa9e4066Sahrens */ 27280373e76bSbonwick mutex_enter(&spa_namespace_lock); 27290373e76bSbonwick spa = spa_add(TRYIMPORT_NAME, NULL); 27308ad4d6ddSJeff Bonwick spa_activate(spa, FREAD); 2731fa9e4066Sahrens 2732fa9e4066Sahrens /* 27330373e76bSbonwick * Pass off the heavy lifting to spa_load(). 2734ecc2d604Sbonwick * Pass TRUE for mosconfig because the user-supplied config 2735ecc2d604Sbonwick * is actually the one to trust when doing an import. 2736fa9e4066Sahrens */ 27377b7154beSLin Ling error = spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); 2738fa9e4066Sahrens 2739fa9e4066Sahrens /* 2740fa9e4066Sahrens * If 'tryconfig' was at least parsable, return the current config. 2741fa9e4066Sahrens */ 2742fa9e4066Sahrens if (spa->spa_root_vdev != NULL) { 2743fa9e4066Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 2744fa9e4066Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 2745fa9e4066Sahrens poolname) == 0); 2746fa9e4066Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 2747fa9e4066Sahrens state) == 0); 274895173954Sek VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 274995173954Sek spa->spa_uberblock.ub_timestamp) == 0); 275099653d4eSeschrock 2751e7cbe64fSgw /* 2752e7cbe64fSgw * If the bootfs property exists on this pool then we 2753e7cbe64fSgw * copy it out so that external consumers can tell which 2754e7cbe64fSgw * pools are bootable. 2755e7cbe64fSgw */ 27567b7154beSLin Ling if ((!error || error == EEXIST) && spa->spa_bootfs) { 2757e7cbe64fSgw char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2758e7cbe64fSgw 2759e7cbe64fSgw /* 2760e7cbe64fSgw * We have to play games with the name since the 2761e7cbe64fSgw * pool was opened as TRYIMPORT_NAME. 2762e7cbe64fSgw */ 2763e14bb325SJeff Bonwick if (dsl_dsobj_to_dsname(spa_name(spa), 2764e7cbe64fSgw spa->spa_bootfs, tmpname) == 0) { 2765e7cbe64fSgw char *cp; 2766e7cbe64fSgw char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2767e7cbe64fSgw 2768e7cbe64fSgw cp = strchr(tmpname, '/'); 2769e7cbe64fSgw if (cp == NULL) { 2770e7cbe64fSgw (void) strlcpy(dsname, tmpname, 2771e7cbe64fSgw MAXPATHLEN); 2772e7cbe64fSgw } else { 2773e7cbe64fSgw (void) snprintf(dsname, MAXPATHLEN, 2774e7cbe64fSgw "%s/%s", poolname, ++cp); 2775e7cbe64fSgw } 2776e7cbe64fSgw VERIFY(nvlist_add_string(config, 2777e7cbe64fSgw ZPOOL_CONFIG_BOOTFS, dsname) == 0); 2778e7cbe64fSgw kmem_free(dsname, MAXPATHLEN); 2779e7cbe64fSgw } 2780e7cbe64fSgw kmem_free(tmpname, MAXPATHLEN); 2781e7cbe64fSgw } 2782e7cbe64fSgw 278399653d4eSeschrock /* 2784fa94a07fSbrendan * Add the list of hot spares and level 2 cache devices. 278599653d4eSeschrock */ 27866809eb4eSEric Schrock spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 278799653d4eSeschrock spa_add_spares(spa, config); 2788fa94a07fSbrendan spa_add_l2cache(spa, config); 27896809eb4eSEric Schrock spa_config_exit(spa, SCL_CONFIG, FTAG); 2790fa9e4066Sahrens } 2791fa9e4066Sahrens 2792fa9e4066Sahrens spa_unload(spa); 2793fa9e4066Sahrens spa_deactivate(spa); 2794fa9e4066Sahrens spa_remove(spa); 2795fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2796fa9e4066Sahrens 2797fa9e4066Sahrens return (config); 2798fa9e4066Sahrens } 2799fa9e4066Sahrens 2800fa9e4066Sahrens /* 2801fa9e4066Sahrens * Pool export/destroy 2802fa9e4066Sahrens * 2803fa9e4066Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 2804fa9e4066Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 2805fa9e4066Sahrens * update the pool state and sync all the labels to disk, removing the 2806394ab0cbSGeorge Wilson * configuration from the cache afterwards. If the 'hardforce' flag is set, then 2807394ab0cbSGeorge Wilson * we don't sync the labels or remove the configuration cache. 2808fa9e4066Sahrens */ 2809fa9e4066Sahrens static int 281089a89ebfSlling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 2811394ab0cbSGeorge Wilson boolean_t force, boolean_t hardforce) 2812fa9e4066Sahrens { 2813fa9e4066Sahrens spa_t *spa; 2814fa9e4066Sahrens 281544cd46caSbillm if (oldconfig) 281644cd46caSbillm *oldconfig = NULL; 281744cd46caSbillm 28188ad4d6ddSJeff Bonwick if (!(spa_mode_global & FWRITE)) 2819fa9e4066Sahrens return (EROFS); 2820fa9e4066Sahrens 2821fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 2822fa9e4066Sahrens if ((spa = spa_lookup(pool)) == NULL) { 2823fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2824fa9e4066Sahrens return (ENOENT); 2825fa9e4066Sahrens } 2826fa9e4066Sahrens 2827ea8dc4b6Seschrock /* 2828ea8dc4b6Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 2829ea8dc4b6Seschrock * reacquire the namespace lock, and see if we can export. 2830ea8dc4b6Seschrock */ 2831ea8dc4b6Seschrock spa_open_ref(spa, FTAG); 2832ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 2833ea8dc4b6Seschrock spa_async_suspend(spa); 2834ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 2835ea8dc4b6Seschrock spa_close(spa, FTAG); 2836ea8dc4b6Seschrock 2837fa9e4066Sahrens /* 2838fa9e4066Sahrens * The pool will be in core if it's openable, 2839fa9e4066Sahrens * in which case we can modify its state. 2840fa9e4066Sahrens */ 2841fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 2842fa9e4066Sahrens /* 2843fa9e4066Sahrens * Objsets may be open only because they're dirty, so we 2844fa9e4066Sahrens * have to force it to sync before checking spa_refcnt. 2845fa9e4066Sahrens */ 2846fa9e4066Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 2847fa9e4066Sahrens 2848ea8dc4b6Seschrock /* 2849ea8dc4b6Seschrock * A pool cannot be exported or destroyed if there are active 2850ea8dc4b6Seschrock * references. If we are resetting a pool, allow references by 2851ea8dc4b6Seschrock * fault injection handlers. 2852ea8dc4b6Seschrock */ 2853ea8dc4b6Seschrock if (!spa_refcount_zero(spa) || 2854ea8dc4b6Seschrock (spa->spa_inject_ref != 0 && 2855ea8dc4b6Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 2856ea8dc4b6Seschrock spa_async_resume(spa); 2857fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2858fa9e4066Sahrens return (EBUSY); 2859fa9e4066Sahrens } 2860fa9e4066Sahrens 286189a89ebfSlling /* 286289a89ebfSlling * A pool cannot be exported if it has an active shared spare. 286389a89ebfSlling * This is to prevent other pools stealing the active spare 286489a89ebfSlling * from an exported pool. At user's own will, such pool can 286589a89ebfSlling * be forcedly exported. 286689a89ebfSlling */ 286789a89ebfSlling if (!force && new_state == POOL_STATE_EXPORTED && 286889a89ebfSlling spa_has_active_shared_spare(spa)) { 286989a89ebfSlling spa_async_resume(spa); 287089a89ebfSlling mutex_exit(&spa_namespace_lock); 287189a89ebfSlling return (EXDEV); 287289a89ebfSlling } 287389a89ebfSlling 2874fa9e4066Sahrens /* 2875fa9e4066Sahrens * We want this to be reflected on every label, 2876fa9e4066Sahrens * so mark them all dirty. spa_unload() will do the 2877fa9e4066Sahrens * final sync that pushes these changes out. 2878fa9e4066Sahrens */ 2879394ab0cbSGeorge Wilson if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 2880e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2881ea8dc4b6Seschrock spa->spa_state = new_state; 28820373e76bSbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 2883ea8dc4b6Seschrock vdev_config_dirty(spa->spa_root_vdev); 2884e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2885ea8dc4b6Seschrock } 2886fa9e4066Sahrens } 2887fa9e4066Sahrens 28883d7072f8Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 28893d7072f8Seschrock 2890fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 2891fa9e4066Sahrens spa_unload(spa); 2892fa9e4066Sahrens spa_deactivate(spa); 2893fa9e4066Sahrens } 2894fa9e4066Sahrens 289544cd46caSbillm if (oldconfig && spa->spa_config) 289644cd46caSbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 289744cd46caSbillm 2898ea8dc4b6Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 2899394ab0cbSGeorge Wilson if (!hardforce) 2900394ab0cbSGeorge Wilson spa_config_sync(spa, B_TRUE, B_TRUE); 2901ea8dc4b6Seschrock spa_remove(spa); 2902ea8dc4b6Seschrock } 2903fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2904fa9e4066Sahrens 2905fa9e4066Sahrens return (0); 2906fa9e4066Sahrens } 2907fa9e4066Sahrens 2908fa9e4066Sahrens /* 2909fa9e4066Sahrens * Destroy a storage pool. 2910fa9e4066Sahrens */ 2911fa9e4066Sahrens int 2912fa9e4066Sahrens spa_destroy(char *pool) 2913fa9e4066Sahrens { 2914394ab0cbSGeorge Wilson return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 2915394ab0cbSGeorge Wilson B_FALSE, B_FALSE)); 2916fa9e4066Sahrens } 2917fa9e4066Sahrens 2918fa9e4066Sahrens /* 2919fa9e4066Sahrens * Export a storage pool. 2920fa9e4066Sahrens */ 2921fa9e4066Sahrens int 2922394ab0cbSGeorge Wilson spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 2923394ab0cbSGeorge Wilson boolean_t hardforce) 2924fa9e4066Sahrens { 2925394ab0cbSGeorge Wilson return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 2926394ab0cbSGeorge Wilson force, hardforce)); 2927fa9e4066Sahrens } 2928fa9e4066Sahrens 2929ea8dc4b6Seschrock /* 2930ea8dc4b6Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 2931ea8dc4b6Seschrock * from the namespace in any way. 2932ea8dc4b6Seschrock */ 2933ea8dc4b6Seschrock int 2934ea8dc4b6Seschrock spa_reset(char *pool) 2935ea8dc4b6Seschrock { 293689a89ebfSlling return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 2937394ab0cbSGeorge Wilson B_FALSE, B_FALSE)); 2938ea8dc4b6Seschrock } 2939ea8dc4b6Seschrock 2940fa9e4066Sahrens /* 2941fa9e4066Sahrens * ========================================================================== 2942fa9e4066Sahrens * Device manipulation 2943fa9e4066Sahrens * ========================================================================== 2944fa9e4066Sahrens */ 2945fa9e4066Sahrens 2946fa9e4066Sahrens /* 29478654d025Sperrin * Add a device to a storage pool. 2948fa9e4066Sahrens */ 2949fa9e4066Sahrens int 2950fa9e4066Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 2951fa9e4066Sahrens { 295288ecc943SGeorge Wilson uint64_t txg, id; 29538ad4d6ddSJeff Bonwick int error; 2954fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 29550e34b6a7Sbonwick vdev_t *vd, *tvd; 2956fa94a07fSbrendan nvlist_t **spares, **l2cache; 2957fa94a07fSbrendan uint_t nspares, nl2cache; 2958fa9e4066Sahrens 2959fa9e4066Sahrens txg = spa_vdev_enter(spa); 2960fa9e4066Sahrens 296199653d4eSeschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 296299653d4eSeschrock VDEV_ALLOC_ADD)) != 0) 296399653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, error)); 2964fa9e4066Sahrens 2965e14bb325SJeff Bonwick spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 296699653d4eSeschrock 2967fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 2968fa94a07fSbrendan &nspares) != 0) 296999653d4eSeschrock nspares = 0; 297099653d4eSeschrock 2971fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 2972fa94a07fSbrendan &nl2cache) != 0) 2973fa94a07fSbrendan nl2cache = 0; 2974fa94a07fSbrendan 2975e14bb325SJeff Bonwick if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 2976fa9e4066Sahrens return (spa_vdev_exit(spa, vd, txg, EINVAL)); 2977fa9e4066Sahrens 2978e14bb325SJeff Bonwick if (vd->vdev_children != 0 && 2979e14bb325SJeff Bonwick (error = vdev_create(vd, txg, B_FALSE)) != 0) 2980e14bb325SJeff Bonwick return (spa_vdev_exit(spa, vd, txg, error)); 298199653d4eSeschrock 298239c23413Seschrock /* 2983fa94a07fSbrendan * We must validate the spares and l2cache devices after checking the 2984fa94a07fSbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 298539c23413Seschrock */ 2986e14bb325SJeff Bonwick if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 298739c23413Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 298839c23413Seschrock 298939c23413Seschrock /* 299039c23413Seschrock * Transfer each new top-level vdev from vd to rvd. 299139c23413Seschrock */ 29928ad4d6ddSJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) { 299388ecc943SGeorge Wilson 299488ecc943SGeorge Wilson /* 299588ecc943SGeorge Wilson * Set the vdev id to the first hole, if one exists. 299688ecc943SGeorge Wilson */ 299788ecc943SGeorge Wilson for (id = 0; id < rvd->vdev_children; id++) { 299888ecc943SGeorge Wilson if (rvd->vdev_child[id]->vdev_ishole) { 299988ecc943SGeorge Wilson vdev_free(rvd->vdev_child[id]); 300088ecc943SGeorge Wilson break; 300188ecc943SGeorge Wilson } 300288ecc943SGeorge Wilson } 300339c23413Seschrock tvd = vd->vdev_child[c]; 300439c23413Seschrock vdev_remove_child(vd, tvd); 300588ecc943SGeorge Wilson tvd->vdev_id = id; 300639c23413Seschrock vdev_add_child(rvd, tvd); 300739c23413Seschrock vdev_config_dirty(tvd); 300839c23413Seschrock } 300939c23413Seschrock 301099653d4eSeschrock if (nspares != 0) { 3011fa94a07fSbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 3012fa94a07fSbrendan ZPOOL_CONFIG_SPARES); 301399653d4eSeschrock spa_load_spares(spa); 3014fa94a07fSbrendan spa->spa_spares.sav_sync = B_TRUE; 3015fa94a07fSbrendan } 3016fa94a07fSbrendan 3017fa94a07fSbrendan if (nl2cache != 0) { 3018fa94a07fSbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 3019fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE); 3020fa94a07fSbrendan spa_load_l2cache(spa); 3021fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 3022fa9e4066Sahrens } 3023fa9e4066Sahrens 3024fa9e4066Sahrens /* 30250e34b6a7Sbonwick * We have to be careful when adding new vdevs to an existing pool. 30260e34b6a7Sbonwick * If other threads start allocating from these vdevs before we 30270e34b6a7Sbonwick * sync the config cache, and we lose power, then upon reboot we may 30280e34b6a7Sbonwick * fail to open the pool because there are DVAs that the config cache 30290e34b6a7Sbonwick * can't translate. Therefore, we first add the vdevs without 30300e34b6a7Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 30310373e76bSbonwick * and then let spa_config_update() initialize the new metaslabs. 30320e34b6a7Sbonwick * 30330e34b6a7Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 30340e34b6a7Sbonwick * if we lose power at any point in this sequence, the remaining 30350e34b6a7Sbonwick * steps will be completed the next time we load the pool. 30360e34b6a7Sbonwick */ 30370373e76bSbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 30380e34b6a7Sbonwick 30390373e76bSbonwick mutex_enter(&spa_namespace_lock); 30400373e76bSbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 30410373e76bSbonwick mutex_exit(&spa_namespace_lock); 3042fa9e4066Sahrens 30430373e76bSbonwick return (0); 3044fa9e4066Sahrens } 3045fa9e4066Sahrens 3046fa9e4066Sahrens /* 3047fa9e4066Sahrens * Attach a device to a mirror. The arguments are the path to any device 3048fa9e4066Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 3049fa9e4066Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 3050fa9e4066Sahrens * 3051fa9e4066Sahrens * If 'replacing' is specified, the new device is intended to replace the 3052fa9e4066Sahrens * existing device; in this case the two devices are made into their own 30533d7072f8Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 3054fa9e4066Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 3055fa9e4066Sahrens * extra rules: you can't attach to it after it's been created, and upon 3056fa9e4066Sahrens * completion of resilvering, the first disk (the one being replaced) 3057fa9e4066Sahrens * is automatically detached. 3058fa9e4066Sahrens */ 3059fa9e4066Sahrens int 3060ea8dc4b6Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 3061fa9e4066Sahrens { 3062fa9e4066Sahrens uint64_t txg, open_txg; 3063fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 3064fa9e4066Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 306599653d4eSeschrock vdev_ops_t *pvops; 30669b3f6b42SEric Kustarz char *oldvdpath, *newvdpath; 30679b3f6b42SEric Kustarz int newvd_isspare; 30689b3f6b42SEric Kustarz int error; 3069fa9e4066Sahrens 3070fa9e4066Sahrens txg = spa_vdev_enter(spa); 3071fa9e4066Sahrens 3072c5904d13Seschrock oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 3073fa9e4066Sahrens 3074fa9e4066Sahrens if (oldvd == NULL) 3075fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3076fa9e4066Sahrens 30770e34b6a7Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 30780e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 30790e34b6a7Sbonwick 3080fa9e4066Sahrens pvd = oldvd->vdev_parent; 3081fa9e4066Sahrens 308299653d4eSeschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 30833d7072f8Seschrock VDEV_ALLOC_ADD)) != 0) 30843d7072f8Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 30853d7072f8Seschrock 30863d7072f8Seschrock if (newrootvd->vdev_children != 1) 3087fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3088fa9e4066Sahrens 3089fa9e4066Sahrens newvd = newrootvd->vdev_child[0]; 3090fa9e4066Sahrens 3091fa9e4066Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 3092fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3093fa9e4066Sahrens 309499653d4eSeschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 3095fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 3096fa9e4066Sahrens 30978654d025Sperrin /* 30988654d025Sperrin * Spares can't replace logs 30998654d025Sperrin */ 3100ee0eb9f2SEric Schrock if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 31018654d025Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 31028654d025Sperrin 310399653d4eSeschrock if (!replacing) { 310499653d4eSeschrock /* 310599653d4eSeschrock * For attach, the only allowable parent is a mirror or the root 310699653d4eSeschrock * vdev. 310799653d4eSeschrock */ 310899653d4eSeschrock if (pvd->vdev_ops != &vdev_mirror_ops && 310999653d4eSeschrock pvd->vdev_ops != &vdev_root_ops) 311099653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 311199653d4eSeschrock 311299653d4eSeschrock pvops = &vdev_mirror_ops; 311399653d4eSeschrock } else { 311499653d4eSeschrock /* 311599653d4eSeschrock * Active hot spares can only be replaced by inactive hot 311699653d4eSeschrock * spares. 311799653d4eSeschrock */ 311899653d4eSeschrock if (pvd->vdev_ops == &vdev_spare_ops && 311999653d4eSeschrock pvd->vdev_child[1] == oldvd && 312099653d4eSeschrock !spa_has_spare(spa, newvd->vdev_guid)) 312199653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 312299653d4eSeschrock 312399653d4eSeschrock /* 312499653d4eSeschrock * If the source is a hot spare, and the parent isn't already a 312599653d4eSeschrock * spare, then we want to create a new hot spare. Otherwise, we 312639c23413Seschrock * want to create a replacing vdev. The user is not allowed to 312739c23413Seschrock * attach to a spared vdev child unless the 'isspare' state is 312839c23413Seschrock * the same (spare replaces spare, non-spare replaces 312939c23413Seschrock * non-spare). 313099653d4eSeschrock */ 313199653d4eSeschrock if (pvd->vdev_ops == &vdev_replacing_ops) 313299653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 313339c23413Seschrock else if (pvd->vdev_ops == &vdev_spare_ops && 313439c23413Seschrock newvd->vdev_isspare != oldvd->vdev_isspare) 313539c23413Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 313699653d4eSeschrock else if (pvd->vdev_ops != &vdev_spare_ops && 313799653d4eSeschrock newvd->vdev_isspare) 313899653d4eSeschrock pvops = &vdev_spare_ops; 313999653d4eSeschrock else 314099653d4eSeschrock pvops = &vdev_replacing_ops; 314199653d4eSeschrock } 314299653d4eSeschrock 31432a79c5feSlling /* 3144573ca77eSGeorge Wilson * Make sure the new device is big enough. 31452a79c5feSlling */ 3146573ca77eSGeorge Wilson if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) 3147fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 3148fa9e4066Sahrens 3149ecc2d604Sbonwick /* 3150ecc2d604Sbonwick * The new device cannot have a higher alignment requirement 3151ecc2d604Sbonwick * than the top-level vdev. 3152ecc2d604Sbonwick */ 3153ecc2d604Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 3154fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 3155fa9e4066Sahrens 3156fa9e4066Sahrens /* 3157fa9e4066Sahrens * If this is an in-place replacement, update oldvd's path and devid 3158fa9e4066Sahrens * to make it distinguishable from newvd, and unopenable from now on. 3159fa9e4066Sahrens */ 3160fa9e4066Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 3161fa9e4066Sahrens spa_strfree(oldvd->vdev_path); 3162fa9e4066Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 3163fa9e4066Sahrens KM_SLEEP); 3164fa9e4066Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 3165fa9e4066Sahrens newvd->vdev_path, "old"); 3166fa9e4066Sahrens if (oldvd->vdev_devid != NULL) { 3167fa9e4066Sahrens spa_strfree(oldvd->vdev_devid); 3168fa9e4066Sahrens oldvd->vdev_devid = NULL; 3169fa9e4066Sahrens } 3170fa9e4066Sahrens } 3171fa9e4066Sahrens 3172fa9e4066Sahrens /* 317399653d4eSeschrock * If the parent is not a mirror, or if we're replacing, insert the new 317499653d4eSeschrock * mirror/replacing/spare vdev above oldvd. 3175fa9e4066Sahrens */ 3176fa9e4066Sahrens if (pvd->vdev_ops != pvops) 3177fa9e4066Sahrens pvd = vdev_add_parent(oldvd, pvops); 3178fa9e4066Sahrens 3179fa9e4066Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 3180fa9e4066Sahrens ASSERT(pvd->vdev_ops == pvops); 3181fa9e4066Sahrens ASSERT(oldvd->vdev_parent == pvd); 3182fa9e4066Sahrens 3183fa9e4066Sahrens /* 3184fa9e4066Sahrens * Extract the new device from its root and add it to pvd. 3185fa9e4066Sahrens */ 3186fa9e4066Sahrens vdev_remove_child(newrootvd, newvd); 3187fa9e4066Sahrens newvd->vdev_id = pvd->vdev_children; 318888ecc943SGeorge Wilson newvd->vdev_crtxg = oldvd->vdev_crtxg; 3189fa9e4066Sahrens vdev_add_child(pvd, newvd); 3190fa9e4066Sahrens 3191fa9e4066Sahrens tvd = newvd->vdev_top; 3192fa9e4066Sahrens ASSERT(pvd->vdev_top == tvd); 3193fa9e4066Sahrens ASSERT(tvd->vdev_parent == rvd); 3194fa9e4066Sahrens 3195fa9e4066Sahrens vdev_config_dirty(tvd); 3196fa9e4066Sahrens 3197fa9e4066Sahrens /* 3198fa9e4066Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 3199fa9e4066Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 3200fa9e4066Sahrens */ 3201fa9e4066Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 3202fa9e4066Sahrens 32038ad4d6ddSJeff Bonwick vdev_dtl_dirty(newvd, DTL_MISSING, 32048ad4d6ddSJeff Bonwick TXG_INITIAL, open_txg - TXG_INITIAL + 1); 3205fa9e4066Sahrens 32066809eb4eSEric Schrock if (newvd->vdev_isspare) { 320739c23413Seschrock spa_spare_activate(newvd); 32086809eb4eSEric Schrock spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); 32096809eb4eSEric Schrock } 32106809eb4eSEric Schrock 3211e14bb325SJeff Bonwick oldvdpath = spa_strdup(oldvd->vdev_path); 3212e14bb325SJeff Bonwick newvdpath = spa_strdup(newvd->vdev_path); 32139b3f6b42SEric Kustarz newvd_isspare = newvd->vdev_isspare; 3214ea8dc4b6Seschrock 3215fa9e4066Sahrens /* 3216fa9e4066Sahrens * Mark newvd's DTL dirty in this txg. 3217fa9e4066Sahrens */ 3218ecc2d604Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 3219fa9e4066Sahrens 3220fa9e4066Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 3221fa9e4066Sahrens 3222c8e1f6d2SMark J Musante spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, NULL, 3223c8e1f6d2SMark J Musante CRED(), "%s vdev=%s %s vdev=%s", 3224c8e1f6d2SMark J Musante replacing && newvd_isspare ? "spare in" : 3225c8e1f6d2SMark J Musante replacing ? "replace" : "attach", newvdpath, 3226c8e1f6d2SMark J Musante replacing ? "for" : "to", oldvdpath); 32279b3f6b42SEric Kustarz 32289b3f6b42SEric Kustarz spa_strfree(oldvdpath); 32299b3f6b42SEric Kustarz spa_strfree(newvdpath); 32309b3f6b42SEric Kustarz 3231fa9e4066Sahrens /* 3232088f3894Sahrens * Kick off a resilver to update newvd. 3233fa9e4066Sahrens */ 3234088f3894Sahrens VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0); 3235fa9e4066Sahrens 3236fa9e4066Sahrens return (0); 3237fa9e4066Sahrens } 3238fa9e4066Sahrens 3239fa9e4066Sahrens /* 3240fa9e4066Sahrens * Detach a device from a mirror or replacing vdev. 3241fa9e4066Sahrens * If 'replace_done' is specified, only detach if the parent 3242fa9e4066Sahrens * is a replacing vdev. 3243fa9e4066Sahrens */ 3244fa9e4066Sahrens int 32458ad4d6ddSJeff Bonwick spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) 3246fa9e4066Sahrens { 3247fa9e4066Sahrens uint64_t txg; 32488ad4d6ddSJeff Bonwick int error; 3249fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 3250fa9e4066Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 325199653d4eSeschrock boolean_t unspare = B_FALSE; 325299653d4eSeschrock uint64_t unspare_guid; 3253bf82a41bSeschrock size_t len; 3254fa9e4066Sahrens 3255fa9e4066Sahrens txg = spa_vdev_enter(spa); 3256fa9e4066Sahrens 3257c5904d13Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3258fa9e4066Sahrens 3259fa9e4066Sahrens if (vd == NULL) 3260fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3261fa9e4066Sahrens 32620e34b6a7Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 32630e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 32640e34b6a7Sbonwick 3265fa9e4066Sahrens pvd = vd->vdev_parent; 3266fa9e4066Sahrens 32678ad4d6ddSJeff Bonwick /* 32688ad4d6ddSJeff Bonwick * If the parent/child relationship is not as expected, don't do it. 32698ad4d6ddSJeff Bonwick * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing 32708ad4d6ddSJeff Bonwick * vdev that's replacing B with C. The user's intent in replacing 32718ad4d6ddSJeff Bonwick * is to go from M(A,B) to M(A,C). If the user decides to cancel 32728ad4d6ddSJeff Bonwick * the replace by detaching C, the expected behavior is to end up 32738ad4d6ddSJeff Bonwick * M(A,B). But suppose that right after deciding to detach C, 32748ad4d6ddSJeff Bonwick * the replacement of B completes. We would have M(A,C), and then 32758ad4d6ddSJeff Bonwick * ask to detach C, which would leave us with just A -- not what 32768ad4d6ddSJeff Bonwick * the user wanted. To prevent this, we make sure that the 32778ad4d6ddSJeff Bonwick * parent/child relationship hasn't changed -- in this example, 32788ad4d6ddSJeff Bonwick * that C's parent is still the replacing vdev R. 32798ad4d6ddSJeff Bonwick */ 32808ad4d6ddSJeff Bonwick if (pvd->vdev_guid != pguid && pguid != 0) 32818ad4d6ddSJeff Bonwick return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 32828ad4d6ddSJeff Bonwick 3283fa9e4066Sahrens /* 3284fa9e4066Sahrens * If replace_done is specified, only remove this device if it's 328599653d4eSeschrock * the first child of a replacing vdev. For the 'spare' vdev, either 328699653d4eSeschrock * disk can be removed. 328799653d4eSeschrock */ 328899653d4eSeschrock if (replace_done) { 328999653d4eSeschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 329099653d4eSeschrock if (vd->vdev_id != 0) 329199653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 329299653d4eSeschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 329399653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 329499653d4eSeschrock } 329599653d4eSeschrock } 329699653d4eSeschrock 329799653d4eSeschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 3298e7437265Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 3299fa9e4066Sahrens 3300fa9e4066Sahrens /* 330199653d4eSeschrock * Only mirror, replacing, and spare vdevs support detach. 3302fa9e4066Sahrens */ 3303fa9e4066Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 330499653d4eSeschrock pvd->vdev_ops != &vdev_mirror_ops && 330599653d4eSeschrock pvd->vdev_ops != &vdev_spare_ops) 3306fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3307fa9e4066Sahrens 3308fa9e4066Sahrens /* 33098ad4d6ddSJeff Bonwick * If this device has the only valid copy of some data, 33108ad4d6ddSJeff Bonwick * we cannot safely detach it. 3311fa9e4066Sahrens */ 33128ad4d6ddSJeff Bonwick if (vdev_dtl_required(vd)) 3313fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 3314fa9e4066Sahrens 33158ad4d6ddSJeff Bonwick ASSERT(pvd->vdev_children >= 2); 3316fa9e4066Sahrens 3317bf82a41bSeschrock /* 3318bf82a41bSeschrock * If we are detaching the second disk from a replacing vdev, then 3319bf82a41bSeschrock * check to see if we changed the original vdev's path to have "/old" 3320bf82a41bSeschrock * at the end in spa_vdev_attach(). If so, undo that change now. 3321bf82a41bSeschrock */ 3322bf82a41bSeschrock if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 && 3323bf82a41bSeschrock pvd->vdev_child[0]->vdev_path != NULL && 3324bf82a41bSeschrock pvd->vdev_child[1]->vdev_path != NULL) { 3325bf82a41bSeschrock ASSERT(pvd->vdev_child[1] == vd); 3326bf82a41bSeschrock cvd = pvd->vdev_child[0]; 3327bf82a41bSeschrock len = strlen(vd->vdev_path); 3328bf82a41bSeschrock if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 3329bf82a41bSeschrock strcmp(cvd->vdev_path + len, "/old") == 0) { 3330bf82a41bSeschrock spa_strfree(cvd->vdev_path); 3331bf82a41bSeschrock cvd->vdev_path = spa_strdup(vd->vdev_path); 3332bf82a41bSeschrock } 3333bf82a41bSeschrock } 3334bf82a41bSeschrock 333599653d4eSeschrock /* 333699653d4eSeschrock * If we are detaching the original disk from a spare, then it implies 333799653d4eSeschrock * that the spare should become a real disk, and be removed from the 333899653d4eSeschrock * active spare list for the pool. 333999653d4eSeschrock */ 334099653d4eSeschrock if (pvd->vdev_ops == &vdev_spare_ops && 33418ad4d6ddSJeff Bonwick vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare) 334299653d4eSeschrock unspare = B_TRUE; 334399653d4eSeschrock 3344fa9e4066Sahrens /* 3345fa9e4066Sahrens * Erase the disk labels so the disk can be used for other things. 3346fa9e4066Sahrens * This must be done after all other error cases are handled, 3347fa9e4066Sahrens * but before we disembowel vd (so we can still do I/O to it). 3348fa9e4066Sahrens * But if we can't do it, don't treat the error as fatal -- 3349fa9e4066Sahrens * it may be that the unwritability of the disk is the reason 3350fa9e4066Sahrens * it's being detached! 3351fa9e4066Sahrens */ 335239c23413Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3353fa9e4066Sahrens 3354fa9e4066Sahrens /* 3355fa9e4066Sahrens * Remove vd from its parent and compact the parent's children. 3356fa9e4066Sahrens */ 3357fa9e4066Sahrens vdev_remove_child(pvd, vd); 3358fa9e4066Sahrens vdev_compact_children(pvd); 3359fa9e4066Sahrens 3360fa9e4066Sahrens /* 3361fa9e4066Sahrens * Remember one of the remaining children so we can get tvd below. 3362fa9e4066Sahrens */ 3363fa9e4066Sahrens cvd = pvd->vdev_child[0]; 3364fa9e4066Sahrens 336599653d4eSeschrock /* 336699653d4eSeschrock * If we need to remove the remaining child from the list of hot spares, 33678ad4d6ddSJeff Bonwick * do it now, marking the vdev as no longer a spare in the process. 33688ad4d6ddSJeff Bonwick * We must do this before vdev_remove_parent(), because that can 33698ad4d6ddSJeff Bonwick * change the GUID if it creates a new toplevel GUID. For a similar 33708ad4d6ddSJeff Bonwick * reason, we must remove the spare now, in the same txg as the detach; 33718ad4d6ddSJeff Bonwick * otherwise someone could attach a new sibling, change the GUID, and 33728ad4d6ddSJeff Bonwick * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. 337399653d4eSeschrock */ 337499653d4eSeschrock if (unspare) { 337599653d4eSeschrock ASSERT(cvd->vdev_isspare); 337639c23413Seschrock spa_spare_remove(cvd); 337799653d4eSeschrock unspare_guid = cvd->vdev_guid; 33788ad4d6ddSJeff Bonwick (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 337999653d4eSeschrock } 338099653d4eSeschrock 3381fa9e4066Sahrens /* 3382fa9e4066Sahrens * If the parent mirror/replacing vdev only has one child, 3383fa9e4066Sahrens * the parent is no longer needed. Remove it from the tree. 3384fa9e4066Sahrens */ 3385fa9e4066Sahrens if (pvd->vdev_children == 1) 3386fa9e4066Sahrens vdev_remove_parent(cvd); 3387fa9e4066Sahrens 3388fa9e4066Sahrens /* 3389fa9e4066Sahrens * We don't set tvd until now because the parent we just removed 3390fa9e4066Sahrens * may have been the previous top-level vdev. 3391fa9e4066Sahrens */ 3392fa9e4066Sahrens tvd = cvd->vdev_top; 3393fa9e4066Sahrens ASSERT(tvd->vdev_parent == rvd); 3394fa9e4066Sahrens 3395fa9e4066Sahrens /* 339639c23413Seschrock * Reevaluate the parent vdev state. 3397fa9e4066Sahrens */ 33983d7072f8Seschrock vdev_propagate_state(cvd); 3399fa9e4066Sahrens 3400fa9e4066Sahrens /* 3401573ca77eSGeorge Wilson * If the 'autoexpand' property is set on the pool then automatically 3402573ca77eSGeorge Wilson * try to expand the size of the pool. For example if the device we 3403573ca77eSGeorge Wilson * just detached was smaller than the others, it may be possible to 3404573ca77eSGeorge Wilson * add metaslabs (i.e. grow the pool). We need to reopen the vdev 3405573ca77eSGeorge Wilson * first so that we can obtain the updated sizes of the leaf vdevs. 3406fa9e4066Sahrens */ 3407573ca77eSGeorge Wilson if (spa->spa_autoexpand) { 3408573ca77eSGeorge Wilson vdev_reopen(tvd); 3409573ca77eSGeorge Wilson vdev_expand(tvd, txg); 3410573ca77eSGeorge Wilson } 3411fa9e4066Sahrens 3412fa9e4066Sahrens vdev_config_dirty(tvd); 3413fa9e4066Sahrens 3414fa9e4066Sahrens /* 341539c23413Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 341639c23413Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 341739c23413Seschrock * But first make sure we're not on any *other* txg's DTL list, to 341839c23413Seschrock * prevent vd from being accessed after it's freed. 3419fa9e4066Sahrens */ 34208ad4d6ddSJeff Bonwick for (int t = 0; t < TXG_SIZE; t++) 3421fa9e4066Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 3422ecc2d604Sbonwick vd->vdev_detached = B_TRUE; 3423ecc2d604Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 3424fa9e4066Sahrens 34253d7072f8Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 34263d7072f8Seschrock 342799653d4eSeschrock error = spa_vdev_exit(spa, vd, txg, 0); 342899653d4eSeschrock 342999653d4eSeschrock /* 343039c23413Seschrock * If this was the removal of the original device in a hot spare vdev, 343139c23413Seschrock * then we want to go through and remove the device from the hot spare 343239c23413Seschrock * list of every other pool. 343399653d4eSeschrock */ 343499653d4eSeschrock if (unspare) { 34358ad4d6ddSJeff Bonwick spa_t *myspa = spa; 343699653d4eSeschrock spa = NULL; 343799653d4eSeschrock mutex_enter(&spa_namespace_lock); 343899653d4eSeschrock while ((spa = spa_next(spa)) != NULL) { 343999653d4eSeschrock if (spa->spa_state != POOL_STATE_ACTIVE) 344099653d4eSeschrock continue; 34418ad4d6ddSJeff Bonwick if (spa == myspa) 34428ad4d6ddSJeff Bonwick continue; 34439af0a4dfSJeff Bonwick spa_open_ref(spa, FTAG); 34449af0a4dfSJeff Bonwick mutex_exit(&spa_namespace_lock); 344599653d4eSeschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 34469af0a4dfSJeff Bonwick mutex_enter(&spa_namespace_lock); 34479af0a4dfSJeff Bonwick spa_close(spa, FTAG); 344899653d4eSeschrock } 344999653d4eSeschrock mutex_exit(&spa_namespace_lock); 345099653d4eSeschrock } 345199653d4eSeschrock 345299653d4eSeschrock return (error); 345399653d4eSeschrock } 345499653d4eSeschrock 3455e14bb325SJeff Bonwick static nvlist_t * 3456e14bb325SJeff Bonwick spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 345799653d4eSeschrock { 3458e14bb325SJeff Bonwick for (int i = 0; i < count; i++) { 3459e14bb325SJeff Bonwick uint64_t guid; 346099653d4eSeschrock 3461e14bb325SJeff Bonwick VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 3462e14bb325SJeff Bonwick &guid) == 0); 346399653d4eSeschrock 3464e14bb325SJeff Bonwick if (guid == target_guid) 3465e14bb325SJeff Bonwick return (nvpp[i]); 346699653d4eSeschrock } 346799653d4eSeschrock 3468e14bb325SJeff Bonwick return (NULL); 3469fa94a07fSbrendan } 3470fa94a07fSbrendan 3471e14bb325SJeff Bonwick static void 3472e14bb325SJeff Bonwick spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 3473e14bb325SJeff Bonwick nvlist_t *dev_to_remove) 3474fa94a07fSbrendan { 3475e14bb325SJeff Bonwick nvlist_t **newdev = NULL; 3476fa94a07fSbrendan 3477e14bb325SJeff Bonwick if (count > 1) 3478e14bb325SJeff Bonwick newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 3479fa94a07fSbrendan 3480e14bb325SJeff Bonwick for (int i = 0, j = 0; i < count; i++) { 3481e14bb325SJeff Bonwick if (dev[i] == dev_to_remove) 3482e14bb325SJeff Bonwick continue; 3483e14bb325SJeff Bonwick VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 3484fa94a07fSbrendan } 3485fa94a07fSbrendan 3486e14bb325SJeff Bonwick VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 3487e14bb325SJeff Bonwick VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 3488fa94a07fSbrendan 3489e14bb325SJeff Bonwick for (int i = 0; i < count - 1; i++) 3490e14bb325SJeff Bonwick nvlist_free(newdev[i]); 3491fa94a07fSbrendan 3492e14bb325SJeff Bonwick if (count > 1) 3493e14bb325SJeff Bonwick kmem_free(newdev, (count - 1) * sizeof (void *)); 3494fa94a07fSbrendan } 3495fa94a07fSbrendan 349688ecc943SGeorge Wilson /* 349788ecc943SGeorge Wilson * Removing a device from the vdev namespace requires several steps 349888ecc943SGeorge Wilson * and can take a significant amount of time. As a result we use 349988ecc943SGeorge Wilson * the spa_vdev_config_[enter/exit] functions which allow us to 350088ecc943SGeorge Wilson * grab and release the spa_config_lock while still holding the namespace 350188ecc943SGeorge Wilson * lock. During each step the configuration is synced out. 350288ecc943SGeorge Wilson */ 350388ecc943SGeorge Wilson 350488ecc943SGeorge Wilson /* 350588ecc943SGeorge Wilson * Initial phase of device removal - stop future allocations from this device. 350688ecc943SGeorge Wilson */ 350788ecc943SGeorge Wilson void 350888ecc943SGeorge Wilson spa_vdev_remove_start(spa_t *spa, vdev_t *vd) 350988ecc943SGeorge Wilson { 351088ecc943SGeorge Wilson metaslab_group_t *mg = vd->vdev_mg; 351188ecc943SGeorge Wilson 351288ecc943SGeorge Wilson ASSERT(MUTEX_HELD(&spa_namespace_lock)); 351388ecc943SGeorge Wilson ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 351488ecc943SGeorge Wilson 351588ecc943SGeorge Wilson /* 351688ecc943SGeorge Wilson * Remove our vdev from the allocatable vdevs 351788ecc943SGeorge Wilson */ 351888ecc943SGeorge Wilson if (mg) 351988ecc943SGeorge Wilson metaslab_class_remove(mg->mg_class, mg); 352088ecc943SGeorge Wilson } 352188ecc943SGeorge Wilson 352288ecc943SGeorge Wilson /* 352388ecc943SGeorge Wilson * Evacuate the device. 352488ecc943SGeorge Wilson */ 352588ecc943SGeorge Wilson int 352688ecc943SGeorge Wilson spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) 352788ecc943SGeorge Wilson { 352888ecc943SGeorge Wilson uint64_t txg; 352988ecc943SGeorge Wilson int error; 353088ecc943SGeorge Wilson 353188ecc943SGeorge Wilson ASSERT(MUTEX_HELD(&spa_namespace_lock)); 353288ecc943SGeorge Wilson ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 353388ecc943SGeorge Wilson 353488ecc943SGeorge Wilson /* 353588ecc943SGeorge Wilson * Evacuate the device. We don't hold the config lock as writer 353688ecc943SGeorge Wilson * since we need to do I/O but we do keep the 353788ecc943SGeorge Wilson * spa_namespace_lock held. Once this completes the device 353888ecc943SGeorge Wilson * should no longer have any blocks allocated on it. 353988ecc943SGeorge Wilson */ 354088ecc943SGeorge Wilson if (vd->vdev_islog) { 354188ecc943SGeorge Wilson /* 354288ecc943SGeorge Wilson * Evacuate the device. 354388ecc943SGeorge Wilson */ 354488ecc943SGeorge Wilson if (error = dmu_objset_find(spa_name(spa), 354588ecc943SGeorge Wilson zil_vdev_offline, NULL, DS_FIND_CHILDREN)) { 354688ecc943SGeorge Wilson uint64_t txg; 354788ecc943SGeorge Wilson 354888ecc943SGeorge Wilson txg = spa_vdev_config_enter(spa); 354988ecc943SGeorge Wilson metaslab_class_add(spa->spa_log_class, 355088ecc943SGeorge Wilson vd->vdev_mg); 355188ecc943SGeorge Wilson return (spa_vdev_exit(spa, NULL, txg, error)); 355288ecc943SGeorge Wilson } 355388ecc943SGeorge Wilson txg_wait_synced(spa_get_dsl(spa), 0); 355488ecc943SGeorge Wilson } 355588ecc943SGeorge Wilson 355688ecc943SGeorge Wilson /* 355788ecc943SGeorge Wilson * Remove any remaining MOS metadata associated with the device. 355888ecc943SGeorge Wilson */ 355988ecc943SGeorge Wilson txg = spa_vdev_config_enter(spa); 356088ecc943SGeorge Wilson vd->vdev_removing = B_TRUE; 356188ecc943SGeorge Wilson vdev_dirty(vd, 0, NULL, txg); 356288ecc943SGeorge Wilson vdev_config_dirty(vd); 356388ecc943SGeorge Wilson spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 356488ecc943SGeorge Wilson 356588ecc943SGeorge Wilson return (0); 356688ecc943SGeorge Wilson } 356788ecc943SGeorge Wilson 356888ecc943SGeorge Wilson /* 356988ecc943SGeorge Wilson * Complete the removal by cleaning up the namespace. 357088ecc943SGeorge Wilson */ 357188ecc943SGeorge Wilson void 357288ecc943SGeorge Wilson spa_vdev_remove_done(spa_t *spa, vdev_t *vd) 357388ecc943SGeorge Wilson { 357488ecc943SGeorge Wilson vdev_t *rvd = spa->spa_root_vdev; 357588ecc943SGeorge Wilson metaslab_group_t *mg = vd->vdev_mg; 357688ecc943SGeorge Wilson uint64_t id = vd->vdev_id; 357788ecc943SGeorge Wilson boolean_t last_vdev = (id == (rvd->vdev_children - 1)); 357888ecc943SGeorge Wilson 357988ecc943SGeorge Wilson ASSERT(MUTEX_HELD(&spa_namespace_lock)); 358088ecc943SGeorge Wilson ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 358188ecc943SGeorge Wilson 358288ecc943SGeorge Wilson (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 358388ecc943SGeorge Wilson vdev_free(vd); 358488ecc943SGeorge Wilson 358588ecc943SGeorge Wilson /* 358688ecc943SGeorge Wilson * It's possible that another thread is trying todo a spa_vdev_add() 358788ecc943SGeorge Wilson * at the same time we're trying remove it. As a result the 358888ecc943SGeorge Wilson * added vdev may not have initialized its metaslabs yet. 358988ecc943SGeorge Wilson */ 359088ecc943SGeorge Wilson if (mg != NULL) 359188ecc943SGeorge Wilson metaslab_group_destroy(mg); 359288ecc943SGeorge Wilson 359388ecc943SGeorge Wilson if (last_vdev) { 359488ecc943SGeorge Wilson vdev_compact_children(rvd); 359588ecc943SGeorge Wilson } else { 359688ecc943SGeorge Wilson vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); 359788ecc943SGeorge Wilson vdev_add_child(rvd, vd); 359888ecc943SGeorge Wilson } 359988ecc943SGeorge Wilson vdev_config_dirty(rvd); 360088ecc943SGeorge Wilson 360188ecc943SGeorge Wilson /* 360288ecc943SGeorge Wilson * Reassess the health of our root vdev. 360388ecc943SGeorge Wilson */ 360488ecc943SGeorge Wilson vdev_reopen(rvd); 360588ecc943SGeorge Wilson } 360688ecc943SGeorge Wilson 3607fa94a07fSbrendan /* 3608fa94a07fSbrendan * Remove a device from the pool. Currently, this supports removing only hot 360988ecc943SGeorge Wilson * spares, slogs, and level 2 ARC devices. 3610fa94a07fSbrendan */ 3611fa94a07fSbrendan int 3612fa94a07fSbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 3613fa94a07fSbrendan { 3614fa94a07fSbrendan vdev_t *vd; 3615e14bb325SJeff Bonwick nvlist_t **spares, **l2cache, *nv; 36168ad4d6ddSJeff Bonwick uint64_t txg = 0; 361788ecc943SGeorge Wilson uint_t nspares, nl2cache; 3618fa94a07fSbrendan int error = 0; 36198ad4d6ddSJeff Bonwick boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 3620fa94a07fSbrendan 36218ad4d6ddSJeff Bonwick if (!locked) 36228ad4d6ddSJeff Bonwick txg = spa_vdev_enter(spa); 3623fa94a07fSbrendan 3624c5904d13Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3625fa94a07fSbrendan 3626fa94a07fSbrendan if (spa->spa_spares.sav_vdevs != NULL && 3627fa94a07fSbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 3628e14bb325SJeff Bonwick ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 3629e14bb325SJeff Bonwick (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 3630e14bb325SJeff Bonwick /* 3631e14bb325SJeff Bonwick * Only remove the hot spare if it's not currently in use 3632e14bb325SJeff Bonwick * in this pool. 3633e14bb325SJeff Bonwick */ 3634e14bb325SJeff Bonwick if (vd == NULL || unspare) { 3635e14bb325SJeff Bonwick spa_vdev_remove_aux(spa->spa_spares.sav_config, 3636e14bb325SJeff Bonwick ZPOOL_CONFIG_SPARES, spares, nspares, nv); 3637e14bb325SJeff Bonwick spa_load_spares(spa); 3638e14bb325SJeff Bonwick spa->spa_spares.sav_sync = B_TRUE; 3639e14bb325SJeff Bonwick } else { 3640e14bb325SJeff Bonwick error = EBUSY; 3641e14bb325SJeff Bonwick } 3642e14bb325SJeff Bonwick } else if (spa->spa_l2cache.sav_vdevs != NULL && 3643fa94a07fSbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 3644e14bb325SJeff Bonwick ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 3645e14bb325SJeff Bonwick (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 3646e14bb325SJeff Bonwick /* 3647e14bb325SJeff Bonwick * Cache devices can always be removed. 3648e14bb325SJeff Bonwick */ 3649e14bb325SJeff Bonwick spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 3650e14bb325SJeff Bonwick ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 3651fa94a07fSbrendan spa_load_l2cache(spa); 3652fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 365388ecc943SGeorge Wilson } else if (vd != NULL && vd->vdev_islog) { 365488ecc943SGeorge Wilson ASSERT(!locked); 365588ecc943SGeorge Wilson 365688ecc943SGeorge Wilson /* 365788ecc943SGeorge Wilson * XXX - Once we have bp-rewrite this should 365888ecc943SGeorge Wilson * become the common case. 365988ecc943SGeorge Wilson */ 366088ecc943SGeorge Wilson 366188ecc943SGeorge Wilson /* 366288ecc943SGeorge Wilson * 1. Stop allocations 366388ecc943SGeorge Wilson * 2. Evacuate the device (i.e. kill off stubby and 366488ecc943SGeorge Wilson * metadata) and wait for it to complete (i.e. sync). 366588ecc943SGeorge Wilson * 3. Cleanup the vdev namespace. 366688ecc943SGeorge Wilson */ 366788ecc943SGeorge Wilson spa_vdev_remove_start(spa, vd); 366888ecc943SGeorge Wilson 366988ecc943SGeorge Wilson spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 367088ecc943SGeorge Wilson if ((error = spa_vdev_remove_evacuate(spa, vd)) != 0) 367188ecc943SGeorge Wilson return (error); 367288ecc943SGeorge Wilson txg = spa_vdev_config_enter(spa); 367388ecc943SGeorge Wilson 367488ecc943SGeorge Wilson spa_vdev_remove_done(spa, vd); 367588ecc943SGeorge Wilson 3676e14bb325SJeff Bonwick } else if (vd != NULL) { 3677e14bb325SJeff Bonwick /* 3678e14bb325SJeff Bonwick * Normal vdevs cannot be removed (yet). 3679e14bb325SJeff Bonwick */ 3680e14bb325SJeff Bonwick error = ENOTSUP; 3681e14bb325SJeff Bonwick } else { 3682e14bb325SJeff Bonwick /* 3683e14bb325SJeff Bonwick * There is no vdev of any kind with the specified guid. 3684e14bb325SJeff Bonwick */ 3685e14bb325SJeff Bonwick error = ENOENT; 3686fa94a07fSbrendan } 368799653d4eSeschrock 36888ad4d6ddSJeff Bonwick if (!locked) 36898ad4d6ddSJeff Bonwick return (spa_vdev_exit(spa, NULL, txg, error)); 36908ad4d6ddSJeff Bonwick 36918ad4d6ddSJeff Bonwick return (error); 3692fa9e4066Sahrens } 3693fa9e4066Sahrens 3694fa9e4066Sahrens /* 36953d7072f8Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 36963d7072f8Seschrock * current spared, so we can detach it. 3697fa9e4066Sahrens */ 3698ea8dc4b6Seschrock static vdev_t * 36993d7072f8Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 3700fa9e4066Sahrens { 3701ea8dc4b6Seschrock vdev_t *newvd, *oldvd; 3702fa9e4066Sahrens 3703573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) { 37043d7072f8Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 3705ea8dc4b6Seschrock if (oldvd != NULL) 3706ea8dc4b6Seschrock return (oldvd); 3707ea8dc4b6Seschrock } 3708fa9e4066Sahrens 37093d7072f8Seschrock /* 37103d7072f8Seschrock * Check for a completed replacement. 37113d7072f8Seschrock */ 3712fa9e4066Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 3713ea8dc4b6Seschrock oldvd = vd->vdev_child[0]; 3714ea8dc4b6Seschrock newvd = vd->vdev_child[1]; 3715ea8dc4b6Seschrock 37168ad4d6ddSJeff Bonwick if (vdev_dtl_empty(newvd, DTL_MISSING) && 37178ad4d6ddSJeff Bonwick !vdev_dtl_required(oldvd)) 3718ea8dc4b6Seschrock return (oldvd); 3719fa9e4066Sahrens } 3720ea8dc4b6Seschrock 37213d7072f8Seschrock /* 37223d7072f8Seschrock * Check for a completed resilver with the 'unspare' flag set. 37233d7072f8Seschrock */ 37243d7072f8Seschrock if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 37253d7072f8Seschrock newvd = vd->vdev_child[0]; 37263d7072f8Seschrock oldvd = vd->vdev_child[1]; 37273d7072f8Seschrock 37283d7072f8Seschrock if (newvd->vdev_unspare && 37298ad4d6ddSJeff Bonwick vdev_dtl_empty(newvd, DTL_MISSING) && 37308ad4d6ddSJeff Bonwick !vdev_dtl_required(oldvd)) { 37313d7072f8Seschrock newvd->vdev_unspare = 0; 37323d7072f8Seschrock return (oldvd); 37333d7072f8Seschrock } 37343d7072f8Seschrock } 37353d7072f8Seschrock 3736ea8dc4b6Seschrock return (NULL); 3737fa9e4066Sahrens } 3738fa9e4066Sahrens 3739ea8dc4b6Seschrock static void 37403d7072f8Seschrock spa_vdev_resilver_done(spa_t *spa) 3741fa9e4066Sahrens { 37428ad4d6ddSJeff Bonwick vdev_t *vd, *pvd, *ppvd; 37438ad4d6ddSJeff Bonwick uint64_t guid, sguid, pguid, ppguid; 3744ea8dc4b6Seschrock 37458ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3746ea8dc4b6Seschrock 37473d7072f8Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 37488ad4d6ddSJeff Bonwick pvd = vd->vdev_parent; 37498ad4d6ddSJeff Bonwick ppvd = pvd->vdev_parent; 3750ea8dc4b6Seschrock guid = vd->vdev_guid; 37518ad4d6ddSJeff Bonwick pguid = pvd->vdev_guid; 37528ad4d6ddSJeff Bonwick ppguid = ppvd->vdev_guid; 37538ad4d6ddSJeff Bonwick sguid = 0; 375499653d4eSeschrock /* 375599653d4eSeschrock * If we have just finished replacing a hot spared device, then 375699653d4eSeschrock * we need to detach the parent's first child (the original hot 375799653d4eSeschrock * spare) as well. 375899653d4eSeschrock */ 37598ad4d6ddSJeff Bonwick if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) { 376099653d4eSeschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 37618ad4d6ddSJeff Bonwick ASSERT(ppvd->vdev_children == 2); 37628ad4d6ddSJeff Bonwick sguid = ppvd->vdev_child[1]->vdev_guid; 376399653d4eSeschrock } 37648ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 37658ad4d6ddSJeff Bonwick if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) 3766ea8dc4b6Seschrock return; 37678ad4d6ddSJeff Bonwick if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) 376899653d4eSeschrock return; 37698ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3770fa9e4066Sahrens } 3771fa9e4066Sahrens 37728ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 3773fa9e4066Sahrens } 3774fa9e4066Sahrens 3775c67d9675Seschrock /* 37766809eb4eSEric Schrock * Update the stored path or FRU for this vdev. Dirty the vdev configuration, 37776809eb4eSEric Schrock * relying on spa_vdev_enter/exit() to synchronize the labels and cache. 3778c67d9675Seschrock */ 3779c67d9675Seschrock int 37806809eb4eSEric Schrock spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, 37816809eb4eSEric Schrock boolean_t ispath) 3782c67d9675Seschrock { 3783c5904d13Seschrock vdev_t *vd; 3784c67d9675Seschrock uint64_t txg; 3785c67d9675Seschrock 3786c67d9675Seschrock txg = spa_vdev_enter(spa); 3787c67d9675Seschrock 37886809eb4eSEric Schrock if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 3789fa94a07fSbrendan return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 3790c67d9675Seschrock 37910e34b6a7Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 37920e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 37930e34b6a7Sbonwick 37946809eb4eSEric Schrock if (ispath) { 37956809eb4eSEric Schrock spa_strfree(vd->vdev_path); 37966809eb4eSEric Schrock vd->vdev_path = spa_strdup(value); 37976809eb4eSEric Schrock } else { 37986809eb4eSEric Schrock if (vd->vdev_fru != NULL) 37996809eb4eSEric Schrock spa_strfree(vd->vdev_fru); 38006809eb4eSEric Schrock vd->vdev_fru = spa_strdup(value); 38016809eb4eSEric Schrock } 3802c67d9675Seschrock 3803c67d9675Seschrock vdev_config_dirty(vd->vdev_top); 3804c67d9675Seschrock 3805c67d9675Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 3806c67d9675Seschrock } 3807c67d9675Seschrock 38086809eb4eSEric Schrock int 38096809eb4eSEric Schrock spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 38106809eb4eSEric Schrock { 38116809eb4eSEric Schrock return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); 38126809eb4eSEric Schrock } 38136809eb4eSEric Schrock 38146809eb4eSEric Schrock int 38156809eb4eSEric Schrock spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) 38166809eb4eSEric Schrock { 38176809eb4eSEric Schrock return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); 38186809eb4eSEric Schrock } 38196809eb4eSEric Schrock 3820fa9e4066Sahrens /* 3821fa9e4066Sahrens * ========================================================================== 3822fa9e4066Sahrens * SPA Scrubbing 3823fa9e4066Sahrens * ========================================================================== 3824fa9e4066Sahrens */ 3825fa9e4066Sahrens 3826ea8dc4b6Seschrock int 3827088f3894Sahrens spa_scrub(spa_t *spa, pool_scrub_type_t type) 3828fa9e4066Sahrens { 3829e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 3830bb8b5132Sek 3831fa9e4066Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 3832fa9e4066Sahrens return (ENOTSUP); 3833fa9e4066Sahrens 3834fa9e4066Sahrens /* 3835088f3894Sahrens * If a resilver was requested, but there is no DTL on a 3836088f3894Sahrens * writeable leaf device, we have nothing to do. 3837fa9e4066Sahrens */ 3838088f3894Sahrens if (type == POOL_SCRUB_RESILVER && 3839088f3894Sahrens !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 3840088f3894Sahrens spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 3841ea8dc4b6Seschrock return (0); 3842ea8dc4b6Seschrock } 3843fa9e4066Sahrens 3844088f3894Sahrens if (type == POOL_SCRUB_EVERYTHING && 3845088f3894Sahrens spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE && 3846088f3894Sahrens spa->spa_dsl_pool->dp_scrub_isresilver) 3847088f3894Sahrens return (EBUSY); 3848fa9e4066Sahrens 3849088f3894Sahrens if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) { 3850088f3894Sahrens return (dsl_pool_scrub_clean(spa->spa_dsl_pool)); 3851088f3894Sahrens } else if (type == POOL_SCRUB_NONE) { 3852088f3894Sahrens return (dsl_pool_scrub_cancel(spa->spa_dsl_pool)); 3853ea8dc4b6Seschrock } else { 3854088f3894Sahrens return (EINVAL); 3855fa9e4066Sahrens } 3856fa9e4066Sahrens } 3857fa9e4066Sahrens 3858ea8dc4b6Seschrock /* 3859ea8dc4b6Seschrock * ========================================================================== 3860ea8dc4b6Seschrock * SPA async task processing 3861ea8dc4b6Seschrock * ========================================================================== 3862ea8dc4b6Seschrock */ 3863ea8dc4b6Seschrock 3864ea8dc4b6Seschrock static void 38653d7072f8Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 3866fa9e4066Sahrens { 386749cf58c0SBrendan Gregg - Sun Microsystems if (vd->vdev_remove_wanted) { 386849cf58c0SBrendan Gregg - Sun Microsystems vd->vdev_remove_wanted = 0; 386949cf58c0SBrendan Gregg - Sun Microsystems vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 38701d713200SEric Schrock 38711d713200SEric Schrock /* 38721d713200SEric Schrock * We want to clear the stats, but we don't want to do a full 38731d713200SEric Schrock * vdev_clear() as that will cause us to throw away 38741d713200SEric Schrock * degraded/faulted state as well as attempt to reopen the 38751d713200SEric Schrock * device, all of which is a waste. 38761d713200SEric Schrock */ 38771d713200SEric Schrock vd->vdev_stat.vs_read_errors = 0; 38781d713200SEric Schrock vd->vdev_stat.vs_write_errors = 0; 38791d713200SEric Schrock vd->vdev_stat.vs_checksum_errors = 0; 38801d713200SEric Schrock 3881e14bb325SJeff Bonwick vdev_state_dirty(vd->vdev_top); 3882ea8dc4b6Seschrock } 388349cf58c0SBrendan Gregg - Sun Microsystems 3884e14bb325SJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) 388549cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, vd->vdev_child[c]); 3886ea8dc4b6Seschrock } 3887fa9e4066Sahrens 3888e14bb325SJeff Bonwick static void 3889e14bb325SJeff Bonwick spa_async_probe(spa_t *spa, vdev_t *vd) 3890e14bb325SJeff Bonwick { 3891e14bb325SJeff Bonwick if (vd->vdev_probe_wanted) { 3892e14bb325SJeff Bonwick vd->vdev_probe_wanted = 0; 3893e14bb325SJeff Bonwick vdev_reopen(vd); /* vdev_open() does the actual probe */ 3894e14bb325SJeff Bonwick } 3895e14bb325SJeff Bonwick 3896e14bb325SJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) 3897e14bb325SJeff Bonwick spa_async_probe(spa, vd->vdev_child[c]); 3898e14bb325SJeff Bonwick } 3899e14bb325SJeff Bonwick 3900573ca77eSGeorge Wilson static void 3901573ca77eSGeorge Wilson spa_async_autoexpand(spa_t *spa, vdev_t *vd) 3902573ca77eSGeorge Wilson { 3903573ca77eSGeorge Wilson sysevent_id_t eid; 3904573ca77eSGeorge Wilson nvlist_t *attr; 3905573ca77eSGeorge Wilson char *physpath; 3906573ca77eSGeorge Wilson 3907573ca77eSGeorge Wilson if (!spa->spa_autoexpand) 3908573ca77eSGeorge Wilson return; 3909573ca77eSGeorge Wilson 3910573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) { 3911573ca77eSGeorge Wilson vdev_t *cvd = vd->vdev_child[c]; 3912573ca77eSGeorge Wilson spa_async_autoexpand(spa, cvd); 3913573ca77eSGeorge Wilson } 3914573ca77eSGeorge Wilson 3915573ca77eSGeorge Wilson if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) 3916573ca77eSGeorge Wilson return; 3917573ca77eSGeorge Wilson 3918573ca77eSGeorge Wilson physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 3919573ca77eSGeorge Wilson (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); 3920573ca77eSGeorge Wilson 3921573ca77eSGeorge Wilson VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 3922573ca77eSGeorge Wilson VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 3923573ca77eSGeorge Wilson 3924573ca77eSGeorge Wilson (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 3925573ca77eSGeorge Wilson ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 3926573ca77eSGeorge Wilson 3927573ca77eSGeorge Wilson nvlist_free(attr); 3928573ca77eSGeorge Wilson kmem_free(physpath, MAXPATHLEN); 3929573ca77eSGeorge Wilson } 3930573ca77eSGeorge Wilson 3931ea8dc4b6Seschrock static void 3932ea8dc4b6Seschrock spa_async_thread(spa_t *spa) 3933ea8dc4b6Seschrock { 3934e14bb325SJeff Bonwick int tasks; 3935ea8dc4b6Seschrock 3936ea8dc4b6Seschrock ASSERT(spa->spa_sync_on); 3937ea8dc4b6Seschrock 3938ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 3939ea8dc4b6Seschrock tasks = spa->spa_async_tasks; 3940ea8dc4b6Seschrock spa->spa_async_tasks = 0; 3941ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 3942ea8dc4b6Seschrock 39430373e76bSbonwick /* 39440373e76bSbonwick * See if the config needs to be updated. 39450373e76bSbonwick */ 39460373e76bSbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 3947573ca77eSGeorge Wilson uint64_t oldsz, space_update; 3948573ca77eSGeorge Wilson 39490373e76bSbonwick mutex_enter(&spa_namespace_lock); 3950573ca77eSGeorge Wilson oldsz = spa_get_space(spa); 39510373e76bSbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 3952573ca77eSGeorge Wilson space_update = spa_get_space(spa) - oldsz; 39530373e76bSbonwick mutex_exit(&spa_namespace_lock); 3954573ca77eSGeorge Wilson 3955573ca77eSGeorge Wilson /* 3956573ca77eSGeorge Wilson * If the pool grew as a result of the config update, 3957573ca77eSGeorge Wilson * then log an internal history event. 3958573ca77eSGeorge Wilson */ 3959573ca77eSGeorge Wilson if (space_update) { 3960c8e1f6d2SMark J Musante spa_history_internal_log(LOG_POOL_VDEV_ONLINE, 3961c8e1f6d2SMark J Musante spa, NULL, CRED(), 3962c8e1f6d2SMark J Musante "pool '%s' size: %llu(+%llu)", 3963c8e1f6d2SMark J Musante spa_name(spa), spa_get_space(spa), 3964c8e1f6d2SMark J Musante space_update); 3965573ca77eSGeorge Wilson } 39660373e76bSbonwick } 39670373e76bSbonwick 3968ea8dc4b6Seschrock /* 39693d7072f8Seschrock * See if any devices need to be marked REMOVED. 3970ea8dc4b6Seschrock */ 3971e14bb325SJeff Bonwick if (tasks & SPA_ASYNC_REMOVE) { 39728f18d1faSGeorge Wilson spa_vdev_state_enter(spa, SCL_NONE); 39733d7072f8Seschrock spa_async_remove(spa, spa->spa_root_vdev); 3974e14bb325SJeff Bonwick for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 397549cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 3976e14bb325SJeff Bonwick for (int i = 0; i < spa->spa_spares.sav_count; i++) 397749cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 3978e14bb325SJeff Bonwick (void) spa_vdev_state_exit(spa, NULL, 0); 3979e14bb325SJeff Bonwick } 3980e14bb325SJeff Bonwick 3981573ca77eSGeorge Wilson if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { 3982573ca77eSGeorge Wilson spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 3983573ca77eSGeorge Wilson spa_async_autoexpand(spa, spa->spa_root_vdev); 3984573ca77eSGeorge Wilson spa_config_exit(spa, SCL_CONFIG, FTAG); 3985573ca77eSGeorge Wilson } 3986573ca77eSGeorge Wilson 3987e14bb325SJeff Bonwick /* 3988e14bb325SJeff Bonwick * See if any devices need to be probed. 3989e14bb325SJeff Bonwick */ 3990e14bb325SJeff Bonwick if (tasks & SPA_ASYNC_PROBE) { 39918f18d1faSGeorge Wilson spa_vdev_state_enter(spa, SCL_NONE); 3992e14bb325SJeff Bonwick spa_async_probe(spa, spa->spa_root_vdev); 3993e14bb325SJeff Bonwick (void) spa_vdev_state_exit(spa, NULL, 0); 39943d7072f8Seschrock } 3995ea8dc4b6Seschrock 3996ea8dc4b6Seschrock /* 3997ea8dc4b6Seschrock * If any devices are done replacing, detach them. 3998ea8dc4b6Seschrock */ 39993d7072f8Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 40003d7072f8Seschrock spa_vdev_resilver_done(spa); 4001fa9e4066Sahrens 4002ea8dc4b6Seschrock /* 4003ea8dc4b6Seschrock * Kick off a resilver. 4004ea8dc4b6Seschrock */ 4005088f3894Sahrens if (tasks & SPA_ASYNC_RESILVER) 4006088f3894Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0); 4007ea8dc4b6Seschrock 4008ea8dc4b6Seschrock /* 4009ea8dc4b6Seschrock * Let the world know that we're done. 4010ea8dc4b6Seschrock */ 4011ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4012ea8dc4b6Seschrock spa->spa_async_thread = NULL; 4013ea8dc4b6Seschrock cv_broadcast(&spa->spa_async_cv); 4014ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4015ea8dc4b6Seschrock thread_exit(); 4016ea8dc4b6Seschrock } 4017ea8dc4b6Seschrock 4018ea8dc4b6Seschrock void 4019ea8dc4b6Seschrock spa_async_suspend(spa_t *spa) 4020ea8dc4b6Seschrock { 4021ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4022ea8dc4b6Seschrock spa->spa_async_suspended++; 4023ea8dc4b6Seschrock while (spa->spa_async_thread != NULL) 4024ea8dc4b6Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 4025ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4026ea8dc4b6Seschrock } 4027ea8dc4b6Seschrock 4028ea8dc4b6Seschrock void 4029ea8dc4b6Seschrock spa_async_resume(spa_t *spa) 4030ea8dc4b6Seschrock { 4031ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4032ea8dc4b6Seschrock ASSERT(spa->spa_async_suspended != 0); 4033ea8dc4b6Seschrock spa->spa_async_suspended--; 4034ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4035ea8dc4b6Seschrock } 4036ea8dc4b6Seschrock 4037ea8dc4b6Seschrock static void 4038ea8dc4b6Seschrock spa_async_dispatch(spa_t *spa) 4039ea8dc4b6Seschrock { 4040ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4041ea8dc4b6Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 40420373e76bSbonwick spa->spa_async_thread == NULL && 40430373e76bSbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 4044ea8dc4b6Seschrock spa->spa_async_thread = thread_create(NULL, 0, 4045ea8dc4b6Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 4046ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4047ea8dc4b6Seschrock } 4048ea8dc4b6Seschrock 4049ea8dc4b6Seschrock void 4050ea8dc4b6Seschrock spa_async_request(spa_t *spa, int task) 4051ea8dc4b6Seschrock { 4052ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4053ea8dc4b6Seschrock spa->spa_async_tasks |= task; 4054ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4055fa9e4066Sahrens } 4056fa9e4066Sahrens 4057fa9e4066Sahrens /* 4058fa9e4066Sahrens * ========================================================================== 4059fa9e4066Sahrens * SPA syncing routines 4060fa9e4066Sahrens * ========================================================================== 4061fa9e4066Sahrens */ 4062fa9e4066Sahrens 4063fa9e4066Sahrens static void 4064fa9e4066Sahrens spa_sync_deferred_frees(spa_t *spa, uint64_t txg) 4065fa9e4066Sahrens { 4066fa9e4066Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 4067fa9e4066Sahrens dmu_tx_t *tx; 4068fa9e4066Sahrens blkptr_t blk; 4069fa9e4066Sahrens uint64_t itor = 0; 4070fa9e4066Sahrens zio_t *zio; 4071fa9e4066Sahrens int error; 4072fa9e4066Sahrens uint8_t c = 1; 4073fa9e4066Sahrens 4074e14bb325SJeff Bonwick zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 4075fa9e4066Sahrens 4076e14bb325SJeff Bonwick while (bplist_iterate(bpl, &itor, &blk) == 0) { 4077e14bb325SJeff Bonwick ASSERT(blk.blk_birth < txg); 4078e14bb325SJeff Bonwick zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL, 4079e14bb325SJeff Bonwick ZIO_FLAG_MUSTSUCCEED)); 4080e14bb325SJeff Bonwick } 4081fa9e4066Sahrens 4082fa9e4066Sahrens error = zio_wait(zio); 4083fa9e4066Sahrens ASSERT3U(error, ==, 0); 4084fa9e4066Sahrens 4085fa9e4066Sahrens tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 4086fa9e4066Sahrens bplist_vacate(bpl, tx); 4087fa9e4066Sahrens 4088fa9e4066Sahrens /* 4089fa9e4066Sahrens * Pre-dirty the first block so we sync to convergence faster. 4090fa9e4066Sahrens * (Usually only the first block is needed.) 4091fa9e4066Sahrens */ 4092fa9e4066Sahrens dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); 4093fa9e4066Sahrens dmu_tx_commit(tx); 4094fa9e4066Sahrens } 4095fa9e4066Sahrens 4096fa9e4066Sahrens static void 409799653d4eSeschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 4098fa9e4066Sahrens { 4099fa9e4066Sahrens char *packed = NULL; 4100f7991ba4STim Haley size_t bufsize; 4101fa9e4066Sahrens size_t nvsize = 0; 4102fa9e4066Sahrens dmu_buf_t *db; 4103fa9e4066Sahrens 410499653d4eSeschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 4105fa9e4066Sahrens 4106f7991ba4STim Haley /* 4107f7991ba4STim Haley * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 4108f7991ba4STim Haley * information. This avoids the dbuf_will_dirty() path and 4109f7991ba4STim Haley * saves us a pre-read to get data we don't actually care about. 4110f7991ba4STim Haley */ 4111f7991ba4STim Haley bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); 4112f7991ba4STim Haley packed = kmem_alloc(bufsize, KM_SLEEP); 4113fa9e4066Sahrens 411499653d4eSeschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 4115ea8dc4b6Seschrock KM_SLEEP) == 0); 4116f7991ba4STim Haley bzero(packed + nvsize, bufsize - nvsize); 4117fa9e4066Sahrens 4118f7991ba4STim Haley dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 4119fa9e4066Sahrens 4120f7991ba4STim Haley kmem_free(packed, bufsize); 4121fa9e4066Sahrens 412299653d4eSeschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 4123fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 4124fa9e4066Sahrens *(uint64_t *)db->db_data = nvsize; 4125ea8dc4b6Seschrock dmu_buf_rele(db, FTAG); 4126fa9e4066Sahrens } 4127fa9e4066Sahrens 412899653d4eSeschrock static void 4129fa94a07fSbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 4130fa94a07fSbrendan const char *config, const char *entry) 413199653d4eSeschrock { 413299653d4eSeschrock nvlist_t *nvroot; 4133fa94a07fSbrendan nvlist_t **list; 413499653d4eSeschrock int i; 413599653d4eSeschrock 4136fa94a07fSbrendan if (!sav->sav_sync) 413799653d4eSeschrock return; 413899653d4eSeschrock 413999653d4eSeschrock /* 4140fa94a07fSbrendan * Update the MOS nvlist describing the list of available devices. 4141fa94a07fSbrendan * spa_validate_aux() will have already made sure this nvlist is 41423d7072f8Seschrock * valid and the vdevs are labeled appropriately. 414399653d4eSeschrock */ 4144fa94a07fSbrendan if (sav->sav_object == 0) { 4145fa94a07fSbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 4146fa94a07fSbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 4147fa94a07fSbrendan sizeof (uint64_t), tx); 414899653d4eSeschrock VERIFY(zap_update(spa->spa_meta_objset, 4149fa94a07fSbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 4150fa94a07fSbrendan &sav->sav_object, tx) == 0); 415199653d4eSeschrock } 415299653d4eSeschrock 415399653d4eSeschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 4154fa94a07fSbrendan if (sav->sav_count == 0) { 4155fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 415699653d4eSeschrock } else { 4157fa94a07fSbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 4158fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 4159fa94a07fSbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 4160fa94a07fSbrendan B_FALSE, B_FALSE, B_TRUE); 4161fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 4162fa94a07fSbrendan sav->sav_count) == 0); 4163fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 4164fa94a07fSbrendan nvlist_free(list[i]); 4165fa94a07fSbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 416699653d4eSeschrock } 416799653d4eSeschrock 4168fa94a07fSbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 416906eeb2adSek nvlist_free(nvroot); 417099653d4eSeschrock 4171fa94a07fSbrendan sav->sav_sync = B_FALSE; 417299653d4eSeschrock } 417399653d4eSeschrock 417499653d4eSeschrock static void 417599653d4eSeschrock spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 417699653d4eSeschrock { 417799653d4eSeschrock nvlist_t *config; 417899653d4eSeschrock 4179e14bb325SJeff Bonwick if (list_is_empty(&spa->spa_config_dirty_list)) 418099653d4eSeschrock return; 418199653d4eSeschrock 4182e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4183e14bb325SJeff Bonwick 4184e14bb325SJeff Bonwick config = spa_config_generate(spa, spa->spa_root_vdev, 4185e14bb325SJeff Bonwick dmu_tx_get_txg(tx), B_FALSE); 4186e14bb325SJeff Bonwick 4187e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 418899653d4eSeschrock 418999653d4eSeschrock if (spa->spa_config_syncing) 419099653d4eSeschrock nvlist_free(spa->spa_config_syncing); 419199653d4eSeschrock spa->spa_config_syncing = config; 419299653d4eSeschrock 419399653d4eSeschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 419499653d4eSeschrock } 419599653d4eSeschrock 4196990b4856Slling /* 4197990b4856Slling * Set zpool properties. 4198990b4856Slling */ 4199b1b8ab34Slling static void 4200ecd6cf80Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 4201b1b8ab34Slling { 4202b1b8ab34Slling spa_t *spa = arg1; 4203b1b8ab34Slling objset_t *mos = spa->spa_meta_objset; 4204990b4856Slling nvlist_t *nvp = arg2; 4205990b4856Slling nvpair_t *elem; 42063d7072f8Seschrock uint64_t intval; 4207c5904d13Seschrock char *strval; 4208990b4856Slling zpool_prop_t prop; 4209990b4856Slling const char *propname; 4210990b4856Slling zprop_type_t proptype; 4211b1b8ab34Slling 4212e14bb325SJeff Bonwick mutex_enter(&spa->spa_props_lock); 4213e14bb325SJeff Bonwick 4214990b4856Slling elem = NULL; 4215990b4856Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 4216990b4856Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 4217990b4856Slling case ZPOOL_PROP_VERSION: 4218990b4856Slling /* 4219990b4856Slling * Only set version for non-zpool-creation cases 4220990b4856Slling * (set/import). spa_create() needs special care 4221990b4856Slling * for version setting. 4222990b4856Slling */ 4223990b4856Slling if (tx->tx_txg != TXG_INITIAL) { 4224990b4856Slling VERIFY(nvpair_value_uint64(elem, 4225990b4856Slling &intval) == 0); 4226990b4856Slling ASSERT(intval <= SPA_VERSION); 4227990b4856Slling ASSERT(intval >= spa_version(spa)); 4228990b4856Slling spa->spa_uberblock.ub_version = intval; 4229990b4856Slling vdev_config_dirty(spa->spa_root_vdev); 4230990b4856Slling } 4231ecd6cf80Smarks break; 4232990b4856Slling 4233990b4856Slling case ZPOOL_PROP_ALTROOT: 4234990b4856Slling /* 4235990b4856Slling * 'altroot' is a non-persistent property. It should 4236990b4856Slling * have been set temporarily at creation or import time. 4237990b4856Slling */ 4238990b4856Slling ASSERT(spa->spa_root != NULL); 4239b1b8ab34Slling break; 42403d7072f8Seschrock 42412f8aaab3Seschrock case ZPOOL_PROP_CACHEFILE: 4242990b4856Slling /* 4243379c004dSEric Schrock * 'cachefile' is also a non-persisitent property. 4244990b4856Slling */ 42453d7072f8Seschrock break; 4246990b4856Slling default: 4247990b4856Slling /* 4248990b4856Slling * Set pool property values in the poolprops mos object. 4249990b4856Slling */ 4250990b4856Slling if (spa->spa_pool_props_object == 0) { 4251990b4856Slling objset_t *mos = spa->spa_meta_objset; 4252990b4856Slling 4253990b4856Slling VERIFY((spa->spa_pool_props_object = 4254990b4856Slling zap_create(mos, DMU_OT_POOL_PROPS, 4255990b4856Slling DMU_OT_NONE, 0, tx)) > 0); 4256990b4856Slling 4257990b4856Slling VERIFY(zap_update(mos, 4258990b4856Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 4259990b4856Slling 8, 1, &spa->spa_pool_props_object, tx) 4260990b4856Slling == 0); 4261990b4856Slling } 4262990b4856Slling 4263990b4856Slling /* normalize the property name */ 4264990b4856Slling propname = zpool_prop_to_name(prop); 4265990b4856Slling proptype = zpool_prop_get_type(prop); 4266990b4856Slling 4267990b4856Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 4268990b4856Slling ASSERT(proptype == PROP_TYPE_STRING); 4269990b4856Slling VERIFY(nvpair_value_string(elem, &strval) == 0); 4270990b4856Slling VERIFY(zap_update(mos, 4271990b4856Slling spa->spa_pool_props_object, propname, 4272990b4856Slling 1, strlen(strval) + 1, strval, tx) == 0); 4273990b4856Slling 4274990b4856Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 4275990b4856Slling VERIFY(nvpair_value_uint64(elem, &intval) == 0); 4276990b4856Slling 4277990b4856Slling if (proptype == PROP_TYPE_INDEX) { 4278990b4856Slling const char *unused; 4279990b4856Slling VERIFY(zpool_prop_index_to_string( 4280990b4856Slling prop, intval, &unused) == 0); 4281990b4856Slling } 4282990b4856Slling VERIFY(zap_update(mos, 4283990b4856Slling spa->spa_pool_props_object, propname, 4284990b4856Slling 8, 1, &intval, tx) == 0); 4285990b4856Slling } else { 4286990b4856Slling ASSERT(0); /* not allowed */ 4287990b4856Slling } 4288990b4856Slling 42890a4e9518Sgw switch (prop) { 42900a4e9518Sgw case ZPOOL_PROP_DELEGATION: 4291990b4856Slling spa->spa_delegation = intval; 42920a4e9518Sgw break; 42930a4e9518Sgw case ZPOOL_PROP_BOOTFS: 4294990b4856Slling spa->spa_bootfs = intval; 42950a4e9518Sgw break; 42960a4e9518Sgw case ZPOOL_PROP_FAILUREMODE: 42970a4e9518Sgw spa->spa_failmode = intval; 42980a4e9518Sgw break; 4299573ca77eSGeorge Wilson case ZPOOL_PROP_AUTOEXPAND: 4300573ca77eSGeorge Wilson spa->spa_autoexpand = intval; 4301573ca77eSGeorge Wilson spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 4302573ca77eSGeorge Wilson break; 43030a4e9518Sgw default: 43040a4e9518Sgw break; 43050a4e9518Sgw } 4306990b4856Slling } 4307990b4856Slling 4308990b4856Slling /* log internal history if this is not a zpool create */ 4309990b4856Slling if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 4310990b4856Slling tx->tx_txg != TXG_INITIAL) { 4311990b4856Slling spa_history_internal_log(LOG_POOL_PROPSET, 4312990b4856Slling spa, tx, cr, "%s %lld %s", 4313e14bb325SJeff Bonwick nvpair_name(elem), intval, spa_name(spa)); 4314b1b8ab34Slling } 4315b1b8ab34Slling } 4316e14bb325SJeff Bonwick 4317e14bb325SJeff Bonwick mutex_exit(&spa->spa_props_lock); 4318b1b8ab34Slling } 4319b1b8ab34Slling 4320fa9e4066Sahrens /* 4321fa9e4066Sahrens * Sync the specified transaction group. New blocks may be dirtied as 4322fa9e4066Sahrens * part of the process, so we iterate until it converges. 4323fa9e4066Sahrens */ 4324fa9e4066Sahrens void 4325fa9e4066Sahrens spa_sync(spa_t *spa, uint64_t txg) 4326fa9e4066Sahrens { 4327fa9e4066Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 4328fa9e4066Sahrens objset_t *mos = spa->spa_meta_objset; 4329fa9e4066Sahrens bplist_t *bpl = &spa->spa_sync_bplist; 43300373e76bSbonwick vdev_t *rvd = spa->spa_root_vdev; 4331fa9e4066Sahrens vdev_t *vd; 4332fa9e4066Sahrens dmu_tx_t *tx; 4333fa9e4066Sahrens int dirty_vdevs; 4334e14bb325SJeff Bonwick int error; 4335fa9e4066Sahrens 4336fa9e4066Sahrens /* 4337fa9e4066Sahrens * Lock out configuration changes. 4338fa9e4066Sahrens */ 4339e14bb325SJeff Bonwick spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 4340fa9e4066Sahrens 4341fa9e4066Sahrens spa->spa_syncing_txg = txg; 4342fa9e4066Sahrens spa->spa_sync_pass = 0; 4343fa9e4066Sahrens 4344e14bb325SJeff Bonwick /* 4345e14bb325SJeff Bonwick * If there are any pending vdev state changes, convert them 4346e14bb325SJeff Bonwick * into config changes that go out with this transaction group. 4347e14bb325SJeff Bonwick */ 4348e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 43498ad4d6ddSJeff Bonwick while (list_head(&spa->spa_state_dirty_list) != NULL) { 43508ad4d6ddSJeff Bonwick /* 43518ad4d6ddSJeff Bonwick * We need the write lock here because, for aux vdevs, 43528ad4d6ddSJeff Bonwick * calling vdev_config_dirty() modifies sav_config. 43538ad4d6ddSJeff Bonwick * This is ugly and will become unnecessary when we 43548ad4d6ddSJeff Bonwick * eliminate the aux vdev wart by integrating all vdevs 43558ad4d6ddSJeff Bonwick * into the root vdev tree. 43568ad4d6ddSJeff Bonwick */ 43578ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 43588ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); 43598ad4d6ddSJeff Bonwick while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 43608ad4d6ddSJeff Bonwick vdev_state_clean(vd); 43618ad4d6ddSJeff Bonwick vdev_config_dirty(vd); 43628ad4d6ddSJeff Bonwick } 43638ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 43648ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); 4365e14bb325SJeff Bonwick } 4366e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 4367e14bb325SJeff Bonwick 4368ea8dc4b6Seschrock VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); 4369fa9e4066Sahrens 437099653d4eSeschrock tx = dmu_tx_create_assigned(dp, txg); 437199653d4eSeschrock 437299653d4eSeschrock /* 4373e7437265Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 437499653d4eSeschrock * set spa_deflate if we have no raid-z vdevs. 437599653d4eSeschrock */ 4376e7437265Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 4377e7437265Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 437899653d4eSeschrock int i; 437999653d4eSeschrock 438099653d4eSeschrock for (i = 0; i < rvd->vdev_children; i++) { 438199653d4eSeschrock vd = rvd->vdev_child[i]; 438299653d4eSeschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 438399653d4eSeschrock break; 438499653d4eSeschrock } 438599653d4eSeschrock if (i == rvd->vdev_children) { 438699653d4eSeschrock spa->spa_deflate = TRUE; 438799653d4eSeschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 438899653d4eSeschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 438999653d4eSeschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 439099653d4eSeschrock } 439199653d4eSeschrock } 439299653d4eSeschrock 4393088f3894Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 4394088f3894Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 4395088f3894Sahrens dsl_pool_create_origin(dp, tx); 4396088f3894Sahrens 4397088f3894Sahrens /* Keeping the origin open increases spa_minref */ 4398088f3894Sahrens spa->spa_minref += 3; 4399088f3894Sahrens } 4400088f3894Sahrens 4401088f3894Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 4402088f3894Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 4403088f3894Sahrens dsl_pool_upgrade_clones(dp, tx); 4404088f3894Sahrens } 4405088f3894Sahrens 4406fa9e4066Sahrens /* 4407fa9e4066Sahrens * If anything has changed in this txg, push the deferred frees 4408fa9e4066Sahrens * from the previous txg. If not, leave them alone so that we 4409fa9e4066Sahrens * don't generate work on an otherwise idle system. 4410fa9e4066Sahrens */ 4411fa9e4066Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 44121615a317Sek !txg_list_empty(&dp->dp_dirty_dirs, txg) || 44131615a317Sek !txg_list_empty(&dp->dp_sync_tasks, txg)) 4414fa9e4066Sahrens spa_sync_deferred_frees(spa, txg); 4415fa9e4066Sahrens 4416fa9e4066Sahrens /* 4417fa9e4066Sahrens * Iterate to convergence. 4418fa9e4066Sahrens */ 4419fa9e4066Sahrens do { 4420fa9e4066Sahrens spa->spa_sync_pass++; 4421fa9e4066Sahrens 4422fa9e4066Sahrens spa_sync_config_object(spa, tx); 4423fa94a07fSbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 4424fa94a07fSbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 4425fa94a07fSbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 4426fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 4427ea8dc4b6Seschrock spa_errlog_sync(spa, txg); 4428fa9e4066Sahrens dsl_pool_sync(dp, txg); 4429fa9e4066Sahrens 4430fa9e4066Sahrens dirty_vdevs = 0; 4431fa9e4066Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { 4432fa9e4066Sahrens vdev_sync(vd, txg); 4433fa9e4066Sahrens dirty_vdevs++; 4434fa9e4066Sahrens } 4435fa9e4066Sahrens 4436fa9e4066Sahrens bplist_sync(bpl, tx); 4437fa9e4066Sahrens } while (dirty_vdevs); 4438fa9e4066Sahrens 4439fa9e4066Sahrens bplist_close(bpl); 4440fa9e4066Sahrens 4441fa9e4066Sahrens dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); 4442fa9e4066Sahrens 4443fa9e4066Sahrens /* 4444fa9e4066Sahrens * Rewrite the vdev configuration (which includes the uberblock) 4445fa9e4066Sahrens * to commit the transaction group. 44460373e76bSbonwick * 444717f17c2dSbonwick * If there are no dirty vdevs, we sync the uberblock to a few 444817f17c2dSbonwick * random top-level vdevs that are known to be visible in the 4449e14bb325SJeff Bonwick * config cache (see spa_vdev_add() for a complete description). 4450e14bb325SJeff Bonwick * If there *are* dirty vdevs, sync the uberblock to all vdevs. 44510373e76bSbonwick */ 4452e14bb325SJeff Bonwick for (;;) { 4453e14bb325SJeff Bonwick /* 4454e14bb325SJeff Bonwick * We hold SCL_STATE to prevent vdev open/close/etc. 4455e14bb325SJeff Bonwick * while we're attempting to write the vdev labels. 4456e14bb325SJeff Bonwick */ 4457e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4458e14bb325SJeff Bonwick 4459e14bb325SJeff Bonwick if (list_is_empty(&spa->spa_config_dirty_list)) { 4460e14bb325SJeff Bonwick vdev_t *svd[SPA_DVAS_PER_BP]; 4461e14bb325SJeff Bonwick int svdcount = 0; 4462e14bb325SJeff Bonwick int children = rvd->vdev_children; 4463e14bb325SJeff Bonwick int c0 = spa_get_random(children); 4464e14bb325SJeff Bonwick 4465573ca77eSGeorge Wilson for (int c = 0; c < children; c++) { 4466e14bb325SJeff Bonwick vd = rvd->vdev_child[(c0 + c) % children]; 4467e14bb325SJeff Bonwick if (vd->vdev_ms_array == 0 || vd->vdev_islog) 4468e14bb325SJeff Bonwick continue; 4469e14bb325SJeff Bonwick svd[svdcount++] = vd; 4470e14bb325SJeff Bonwick if (svdcount == SPA_DVAS_PER_BP) 4471e14bb325SJeff Bonwick break; 4472e14bb325SJeff Bonwick } 44738956713aSEric Schrock error = vdev_config_sync(svd, svdcount, txg, B_FALSE); 44748956713aSEric Schrock if (error != 0) 44758956713aSEric Schrock error = vdev_config_sync(svd, svdcount, txg, 44768956713aSEric Schrock B_TRUE); 4477e14bb325SJeff Bonwick } else { 4478e14bb325SJeff Bonwick error = vdev_config_sync(rvd->vdev_child, 44798956713aSEric Schrock rvd->vdev_children, txg, B_FALSE); 44808956713aSEric Schrock if (error != 0) 44818956713aSEric Schrock error = vdev_config_sync(rvd->vdev_child, 44828956713aSEric Schrock rvd->vdev_children, txg, B_TRUE); 44830373e76bSbonwick } 4484e14bb325SJeff Bonwick 4485e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 4486e14bb325SJeff Bonwick 4487e14bb325SJeff Bonwick if (error == 0) 4488e14bb325SJeff Bonwick break; 4489e14bb325SJeff Bonwick zio_suspend(spa, NULL); 4490e14bb325SJeff Bonwick zio_resume_wait(spa); 44910373e76bSbonwick } 449299653d4eSeschrock dmu_tx_commit(tx); 449399653d4eSeschrock 44940373e76bSbonwick /* 44950373e76bSbonwick * Clear the dirty config list. 4496fa9e4066Sahrens */ 4497e14bb325SJeff Bonwick while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 44980373e76bSbonwick vdev_config_clean(vd); 44990373e76bSbonwick 45000373e76bSbonwick /* 45010373e76bSbonwick * Now that the new config has synced transactionally, 45020373e76bSbonwick * let it become visible to the config cache. 45030373e76bSbonwick */ 45040373e76bSbonwick if (spa->spa_config_syncing != NULL) { 45050373e76bSbonwick spa_config_set(spa, spa->spa_config_syncing); 45060373e76bSbonwick spa->spa_config_txg = txg; 45070373e76bSbonwick spa->spa_config_syncing = NULL; 45080373e76bSbonwick } 4509fa9e4066Sahrens 4510fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 4511fa9e4066Sahrens 4512fa9e4066Sahrens /* 4513fa9e4066Sahrens * Clean up the ZIL records for the synced txg. 4514fa9e4066Sahrens */ 4515fa9e4066Sahrens dsl_pool_zil_clean(dp); 4516fa9e4066Sahrens 4517fa9e4066Sahrens /* 4518fa9e4066Sahrens * Update usable space statistics. 4519fa9e4066Sahrens */ 4520fa9e4066Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 4521fa9e4066Sahrens vdev_sync_done(vd, txg); 4522fa9e4066Sahrens 4523fa9e4066Sahrens /* 4524fa9e4066Sahrens * It had better be the case that we didn't dirty anything 452599653d4eSeschrock * since vdev_config_sync(). 4526fa9e4066Sahrens */ 4527fa9e4066Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 4528fa9e4066Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 4529fa9e4066Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 4530fa9e4066Sahrens ASSERT(bpl->bpl_queue == NULL); 4531fa9e4066Sahrens 4532e14bb325SJeff Bonwick spa_config_exit(spa, SCL_CONFIG, FTAG); 4533ea8dc4b6Seschrock 4534ea8dc4b6Seschrock /* 4535ea8dc4b6Seschrock * If any async tasks have been requested, kick them off. 4536ea8dc4b6Seschrock */ 4537ea8dc4b6Seschrock spa_async_dispatch(spa); 4538fa9e4066Sahrens } 4539fa9e4066Sahrens 4540fa9e4066Sahrens /* 4541fa9e4066Sahrens * Sync all pools. We don't want to hold the namespace lock across these 4542fa9e4066Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 4543fa9e4066Sahrens * sync. 4544fa9e4066Sahrens */ 4545fa9e4066Sahrens void 4546fa9e4066Sahrens spa_sync_allpools(void) 4547fa9e4066Sahrens { 4548fa9e4066Sahrens spa_t *spa = NULL; 4549fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4550fa9e4066Sahrens while ((spa = spa_next(spa)) != NULL) { 4551e14bb325SJeff Bonwick if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa)) 4552fa9e4066Sahrens continue; 4553fa9e4066Sahrens spa_open_ref(spa, FTAG); 4554fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4555fa9e4066Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 4556fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4557fa9e4066Sahrens spa_close(spa, FTAG); 4558fa9e4066Sahrens } 4559fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4560fa9e4066Sahrens } 4561fa9e4066Sahrens 4562fa9e4066Sahrens /* 4563fa9e4066Sahrens * ========================================================================== 4564fa9e4066Sahrens * Miscellaneous routines 4565fa9e4066Sahrens * ========================================================================== 4566fa9e4066Sahrens */ 4567fa9e4066Sahrens 4568fa9e4066Sahrens /* 4569fa9e4066Sahrens * Remove all pools in the system. 4570fa9e4066Sahrens */ 4571fa9e4066Sahrens void 4572fa9e4066Sahrens spa_evict_all(void) 4573fa9e4066Sahrens { 4574fa9e4066Sahrens spa_t *spa; 4575fa9e4066Sahrens 4576fa9e4066Sahrens /* 4577fa9e4066Sahrens * Remove all cached state. All pools should be closed now, 4578fa9e4066Sahrens * so every spa in the AVL tree should be unreferenced. 4579fa9e4066Sahrens */ 4580fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4581fa9e4066Sahrens while ((spa = spa_next(NULL)) != NULL) { 4582fa9e4066Sahrens /* 4583ea8dc4b6Seschrock * Stop async tasks. The async thread may need to detach 4584ea8dc4b6Seschrock * a device that's been replaced, which requires grabbing 4585ea8dc4b6Seschrock * spa_namespace_lock, so we must drop it here. 4586fa9e4066Sahrens */ 4587fa9e4066Sahrens spa_open_ref(spa, FTAG); 4588fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4589ea8dc4b6Seschrock spa_async_suspend(spa); 4590fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4591fa9e4066Sahrens spa_close(spa, FTAG); 4592fa9e4066Sahrens 4593fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4594fa9e4066Sahrens spa_unload(spa); 4595fa9e4066Sahrens spa_deactivate(spa); 4596fa9e4066Sahrens } 4597fa9e4066Sahrens spa_remove(spa); 4598fa9e4066Sahrens } 4599fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4600fa9e4066Sahrens } 4601ea8dc4b6Seschrock 4602ea8dc4b6Seschrock vdev_t * 46036809eb4eSEric Schrock spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) 4604ea8dc4b6Seschrock { 4605c5904d13Seschrock vdev_t *vd; 4606c5904d13Seschrock int i; 4607c5904d13Seschrock 4608c5904d13Seschrock if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 4609c5904d13Seschrock return (vd); 4610c5904d13Seschrock 46116809eb4eSEric Schrock if (aux) { 4612c5904d13Seschrock for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 4613c5904d13Seschrock vd = spa->spa_l2cache.sav_vdevs[i]; 46146809eb4eSEric Schrock if (vd->vdev_guid == guid) 46156809eb4eSEric Schrock return (vd); 46166809eb4eSEric Schrock } 46176809eb4eSEric Schrock 46186809eb4eSEric Schrock for (i = 0; i < spa->spa_spares.sav_count; i++) { 46196809eb4eSEric Schrock vd = spa->spa_spares.sav_vdevs[i]; 4620c5904d13Seschrock if (vd->vdev_guid == guid) 4621c5904d13Seschrock return (vd); 4622c5904d13Seschrock } 4623c5904d13Seschrock } 4624c5904d13Seschrock 4625c5904d13Seschrock return (NULL); 4626ea8dc4b6Seschrock } 4627eaca9bbdSeschrock 4628eaca9bbdSeschrock void 4629990b4856Slling spa_upgrade(spa_t *spa, uint64_t version) 4630eaca9bbdSeschrock { 4631e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4632eaca9bbdSeschrock 4633eaca9bbdSeschrock /* 4634eaca9bbdSeschrock * This should only be called for a non-faulted pool, and since a 4635eaca9bbdSeschrock * future version would result in an unopenable pool, this shouldn't be 4636eaca9bbdSeschrock * possible. 4637eaca9bbdSeschrock */ 4638e7437265Sahrens ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 4639990b4856Slling ASSERT(version >= spa->spa_uberblock.ub_version); 4640eaca9bbdSeschrock 4641990b4856Slling spa->spa_uberblock.ub_version = version; 4642eaca9bbdSeschrock vdev_config_dirty(spa->spa_root_vdev); 4643eaca9bbdSeschrock 4644e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 464599653d4eSeschrock 464699653d4eSeschrock txg_wait_synced(spa_get_dsl(spa), 0); 464799653d4eSeschrock } 464899653d4eSeschrock 464999653d4eSeschrock boolean_t 465099653d4eSeschrock spa_has_spare(spa_t *spa, uint64_t guid) 465199653d4eSeschrock { 465299653d4eSeschrock int i; 465339c23413Seschrock uint64_t spareguid; 4654fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 465599653d4eSeschrock 4656fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 4657fa94a07fSbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 465899653d4eSeschrock return (B_TRUE); 465999653d4eSeschrock 4660fa94a07fSbrendan for (i = 0; i < sav->sav_npending; i++) { 4661fa94a07fSbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 4662fa94a07fSbrendan &spareguid) == 0 && spareguid == guid) 466339c23413Seschrock return (B_TRUE); 466439c23413Seschrock } 466539c23413Seschrock 466699653d4eSeschrock return (B_FALSE); 4667eaca9bbdSeschrock } 4668b1b8ab34Slling 466989a89ebfSlling /* 467089a89ebfSlling * Check if a pool has an active shared spare device. 467189a89ebfSlling * Note: reference count of an active spare is 2, as a spare and as a replace 467289a89ebfSlling */ 467389a89ebfSlling static boolean_t 467489a89ebfSlling spa_has_active_shared_spare(spa_t *spa) 467589a89ebfSlling { 467689a89ebfSlling int i, refcnt; 467789a89ebfSlling uint64_t pool; 467889a89ebfSlling spa_aux_vdev_t *sav = &spa->spa_spares; 467989a89ebfSlling 468089a89ebfSlling for (i = 0; i < sav->sav_count; i++) { 468189a89ebfSlling if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 468289a89ebfSlling &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 468389a89ebfSlling refcnt > 2) 468489a89ebfSlling return (B_TRUE); 468589a89ebfSlling } 468689a89ebfSlling 468789a89ebfSlling return (B_FALSE); 468889a89ebfSlling } 468989a89ebfSlling 46903d7072f8Seschrock /* 46913d7072f8Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 46923d7072f8Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 46933d7072f8Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 46943d7072f8Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 46953d7072f8Seschrock * or zdb as real changes. 46963d7072f8Seschrock */ 46973d7072f8Seschrock void 46983d7072f8Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 46993d7072f8Seschrock { 47003d7072f8Seschrock #ifdef _KERNEL 47013d7072f8Seschrock sysevent_t *ev; 47023d7072f8Seschrock sysevent_attr_list_t *attr = NULL; 47033d7072f8Seschrock sysevent_value_t value; 47043d7072f8Seschrock sysevent_id_t eid; 47053d7072f8Seschrock 47063d7072f8Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 47073d7072f8Seschrock SE_SLEEP); 47083d7072f8Seschrock 47093d7072f8Seschrock value.value_type = SE_DATA_TYPE_STRING; 47103d7072f8Seschrock value.value.sv_string = spa_name(spa); 47113d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 47123d7072f8Seschrock goto done; 47133d7072f8Seschrock 47143d7072f8Seschrock value.value_type = SE_DATA_TYPE_UINT64; 47153d7072f8Seschrock value.value.sv_uint64 = spa_guid(spa); 47163d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 47173d7072f8Seschrock goto done; 47183d7072f8Seschrock 47193d7072f8Seschrock if (vd) { 47203d7072f8Seschrock value.value_type = SE_DATA_TYPE_UINT64; 47213d7072f8Seschrock value.value.sv_uint64 = vd->vdev_guid; 47223d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 47233d7072f8Seschrock SE_SLEEP) != 0) 47243d7072f8Seschrock goto done; 47253d7072f8Seschrock 47263d7072f8Seschrock if (vd->vdev_path) { 47273d7072f8Seschrock value.value_type = SE_DATA_TYPE_STRING; 47283d7072f8Seschrock value.value.sv_string = vd->vdev_path; 47293d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 47303d7072f8Seschrock &value, SE_SLEEP) != 0) 47313d7072f8Seschrock goto done; 47323d7072f8Seschrock } 47333d7072f8Seschrock } 47343d7072f8Seschrock 4735b01c3b58Seschrock if (sysevent_attach_attributes(ev, attr) != 0) 4736b01c3b58Seschrock goto done; 4737b01c3b58Seschrock attr = NULL; 4738b01c3b58Seschrock 47393d7072f8Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 47403d7072f8Seschrock 47413d7072f8Seschrock done: 47423d7072f8Seschrock if (attr) 47433d7072f8Seschrock sysevent_free_attr(attr); 47443d7072f8Seschrock sysevent_free(ev); 47453d7072f8Seschrock #endif 47463d7072f8Seschrock } 4747