1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 2199653d4eSeschrock 22fa9e4066Sahrens /* 23379c004dSEric Schrock * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24fa9e4066Sahrens * Use is subject to license terms. 25fa9e4066Sahrens */ 26fa9e4066Sahrens 27fa9e4066Sahrens /* 28fa9e4066Sahrens * This file contains all the routines used when modifying on-disk SPA state. 29fa9e4066Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 30fa9e4066Sahrens * pool. 31fa9e4066Sahrens */ 32fa9e4066Sahrens 33fa9e4066Sahrens #include <sys/zfs_context.h> 34ea8dc4b6Seschrock #include <sys/fm/fs/zfs.h> 35fa9e4066Sahrens #include <sys/spa_impl.h> 36fa9e4066Sahrens #include <sys/zio.h> 37fa9e4066Sahrens #include <sys/zio_checksum.h> 38fa9e4066Sahrens #include <sys/dmu.h> 39fa9e4066Sahrens #include <sys/dmu_tx.h> 40fa9e4066Sahrens #include <sys/zap.h> 41fa9e4066Sahrens #include <sys/zil.h> 42*b24ab676SJeff Bonwick #include <sys/ddt.h> 43fa9e4066Sahrens #include <sys/vdev_impl.h> 44fa9e4066Sahrens #include <sys/metaslab.h> 4588ecc943SGeorge Wilson #include <sys/metaslab_impl.h> 46fa9e4066Sahrens #include <sys/uberblock_impl.h> 47fa9e4066Sahrens #include <sys/txg.h> 48fa9e4066Sahrens #include <sys/avl.h> 49fa9e4066Sahrens #include <sys/dmu_traverse.h> 50b1b8ab34Slling #include <sys/dmu_objset.h> 51fa9e4066Sahrens #include <sys/unique.h> 52fa9e4066Sahrens #include <sys/dsl_pool.h> 53b1b8ab34Slling #include <sys/dsl_dataset.h> 54fa9e4066Sahrens #include <sys/dsl_dir.h> 55fa9e4066Sahrens #include <sys/dsl_prop.h> 56b1b8ab34Slling #include <sys/dsl_synctask.h> 57fa9e4066Sahrens #include <sys/fs/zfs.h> 58fa94a07fSbrendan #include <sys/arc.h> 59fa9e4066Sahrens #include <sys/callb.h> 6095173954Sek #include <sys/systeminfo.h> 6195173954Sek #include <sys/sunddi.h> 62e7cbe64fSgw #include <sys/spa_boot.h> 63573ca77eSGeorge Wilson #include <sys/zfs_ioctl.h> 64fa9e4066Sahrens 655679c89fSjv #ifdef _KERNEL 665679c89fSjv #include <sys/zone.h> 67dedec472SJack Meng #include <sys/bootprops.h> 685679c89fSjv #endif /* _KERNEL */ 695679c89fSjv 70990b4856Slling #include "zfs_prop.h" 71b7b97454Sperrin #include "zfs_comutil.h" 72990b4856Slling 732e0c549eSJonathan Adams enum zti_modes { 742e0c549eSJonathan Adams zti_mode_fixed, /* value is # of threads (min 1) */ 752e0c549eSJonathan Adams zti_mode_online_percent, /* value is % of online CPUs */ 762e0c549eSJonathan Adams zti_mode_tune, /* fill from zio_taskq_tune_* */ 772e0c549eSJonathan Adams zti_nmodes 78e14bb325SJeff Bonwick }; 79416e0cd8Sek 802e0c549eSJonathan Adams #define ZTI_THREAD_FIX(n) { zti_mode_fixed, (n) } 812e0c549eSJonathan Adams #define ZTI_THREAD_PCT(n) { zti_mode_online_percent, (n) } 822e0c549eSJonathan Adams #define ZTI_THREAD_TUNE { zti_mode_tune, 0 } 832e0c549eSJonathan Adams 842e0c549eSJonathan Adams #define ZTI_THREAD_ONE ZTI_THREAD_FIX(1) 852e0c549eSJonathan Adams 862e0c549eSJonathan Adams typedef struct zio_taskq_info { 872e0c549eSJonathan Adams const char *zti_name; 882e0c549eSJonathan Adams struct { 892e0c549eSJonathan Adams enum zti_modes zti_mode; 902e0c549eSJonathan Adams uint_t zti_value; 912e0c549eSJonathan Adams } zti_nthreads[ZIO_TASKQ_TYPES]; 922e0c549eSJonathan Adams } zio_taskq_info_t; 932e0c549eSJonathan Adams 942e0c549eSJonathan Adams static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { 952e0c549eSJonathan Adams "issue", "intr" 962e0c549eSJonathan Adams }; 972e0c549eSJonathan Adams 982e0c549eSJonathan Adams const zio_taskq_info_t zio_taskqs[ZIO_TYPES] = { 992e0c549eSJonathan Adams /* ISSUE INTR */ 1002e0c549eSJonathan Adams { "spa_zio_null", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1012e0c549eSJonathan Adams { "spa_zio_read", { ZTI_THREAD_FIX(8), ZTI_THREAD_TUNE } }, 1022e0c549eSJonathan Adams { "spa_zio_write", { ZTI_THREAD_TUNE, ZTI_THREAD_FIX(8) } }, 1032e0c549eSJonathan Adams { "spa_zio_free", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1042e0c549eSJonathan Adams { "spa_zio_claim", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1052e0c549eSJonathan Adams { "spa_zio_ioctl", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } }, 1062e0c549eSJonathan Adams }; 1072e0c549eSJonathan Adams 1082e0c549eSJonathan Adams enum zti_modes zio_taskq_tune_mode = zti_mode_online_percent; 1092e0c549eSJonathan Adams uint_t zio_taskq_tune_value = 80; /* #threads = 80% of # online CPUs */ 1102e0c549eSJonathan Adams 111990b4856Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 11289a89ebfSlling static boolean_t spa_has_active_shared_spare(spa_t *spa); 113990b4856Slling 114990b4856Slling /* 115990b4856Slling * ========================================================================== 116990b4856Slling * SPA properties routines 117990b4856Slling * ========================================================================== 118990b4856Slling */ 119990b4856Slling 120990b4856Slling /* 121990b4856Slling * Add a (source=src, propname=propval) list to an nvlist. 122990b4856Slling */ 1239d82f4f6Slling static void 124990b4856Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 125990b4856Slling uint64_t intval, zprop_source_t src) 126990b4856Slling { 127990b4856Slling const char *propname = zpool_prop_to_name(prop); 128990b4856Slling nvlist_t *propval; 129990b4856Slling 1309d82f4f6Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1319d82f4f6Slling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 132990b4856Slling 1339d82f4f6Slling if (strval != NULL) 1349d82f4f6Slling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 1359d82f4f6Slling else 1369d82f4f6Slling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 137990b4856Slling 1389d82f4f6Slling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 139990b4856Slling nvlist_free(propval); 140990b4856Slling } 141990b4856Slling 142990b4856Slling /* 143990b4856Slling * Get property values from the spa configuration. 144990b4856Slling */ 1459d82f4f6Slling static void 146990b4856Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 147990b4856Slling { 148379c004dSEric Schrock uint64_t size; 149379c004dSEric Schrock uint64_t used; 150990b4856Slling uint64_t cap, version; 151990b4856Slling zprop_source_t src = ZPROP_SRC_NONE; 152c5904d13Seschrock spa_config_dirent_t *dp; 153990b4856Slling 154e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 155e14bb325SJeff Bonwick 156379c004dSEric Schrock if (spa->spa_root_vdev != NULL) { 157*b24ab676SJeff Bonwick used = metaslab_class_get_alloc(spa_normal_class(spa)); 158*b24ab676SJeff Bonwick size = metaslab_class_get_space(spa_normal_class(spa)); 159379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 160379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 161379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src); 162379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, 163379c004dSEric Schrock size - used, src); 164379c004dSEric Schrock 165379c004dSEric Schrock cap = (size == 0) ? 0 : (used * 100 / size); 166379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 167379c004dSEric Schrock 168*b24ab676SJeff Bonwick spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL, 169*b24ab676SJeff Bonwick ddt_get_pool_dedup_ratio(spa), src); 170*b24ab676SJeff Bonwick 171379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 172379c004dSEric Schrock spa->spa_root_vdev->vdev_state, src); 173379c004dSEric Schrock 174379c004dSEric Schrock version = spa_version(spa); 175379c004dSEric Schrock if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 176379c004dSEric Schrock src = ZPROP_SRC_DEFAULT; 177379c004dSEric Schrock else 178379c004dSEric Schrock src = ZPROP_SRC_LOCAL; 179379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 180379c004dSEric Schrock } 181990b4856Slling 1829d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 183990b4856Slling 1849d82f4f6Slling if (spa->spa_root != NULL) 1859d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 1869d82f4f6Slling 0, ZPROP_SRC_LOCAL); 187990b4856Slling 188c5904d13Seschrock if ((dp = list_head(&spa->spa_config_list)) != NULL) { 189c5904d13Seschrock if (dp->scd_path == NULL) { 1909d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 191c5904d13Seschrock "none", 0, ZPROP_SRC_LOCAL); 192c5904d13Seschrock } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 1939d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 194c5904d13Seschrock dp->scd_path, 0, ZPROP_SRC_LOCAL); 1952f8aaab3Seschrock } 1962f8aaab3Seschrock } 197990b4856Slling } 198990b4856Slling 199990b4856Slling /* 200990b4856Slling * Get zpool property values. 201990b4856Slling */ 202990b4856Slling int 203990b4856Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 204990b4856Slling { 205*b24ab676SJeff Bonwick objset_t *mos = spa->spa_meta_objset; 206990b4856Slling zap_cursor_t zc; 207990b4856Slling zap_attribute_t za; 208990b4856Slling int err; 209990b4856Slling 2109d82f4f6Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 211990b4856Slling 212e14bb325SJeff Bonwick mutex_enter(&spa->spa_props_lock); 213e14bb325SJeff Bonwick 214990b4856Slling /* 215990b4856Slling * Get properties from the spa config. 216990b4856Slling */ 2179d82f4f6Slling spa_prop_get_config(spa, nvp); 218990b4856Slling 219990b4856Slling /* If no pool property object, no more prop to get. */ 220990b4856Slling if (spa->spa_pool_props_object == 0) { 221990b4856Slling mutex_exit(&spa->spa_props_lock); 222990b4856Slling return (0); 223990b4856Slling } 224990b4856Slling 225990b4856Slling /* 226990b4856Slling * Get properties from the MOS pool property object. 227990b4856Slling */ 228990b4856Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 229990b4856Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 230990b4856Slling zap_cursor_advance(&zc)) { 231990b4856Slling uint64_t intval = 0; 232990b4856Slling char *strval = NULL; 233990b4856Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 234990b4856Slling zpool_prop_t prop; 235990b4856Slling 236990b4856Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 237990b4856Slling continue; 238990b4856Slling 239990b4856Slling switch (za.za_integer_length) { 240990b4856Slling case 8: 241990b4856Slling /* integer property */ 242990b4856Slling if (za.za_first_integer != 243990b4856Slling zpool_prop_default_numeric(prop)) 244990b4856Slling src = ZPROP_SRC_LOCAL; 245990b4856Slling 246990b4856Slling if (prop == ZPOOL_PROP_BOOTFS) { 247990b4856Slling dsl_pool_t *dp; 248990b4856Slling dsl_dataset_t *ds = NULL; 249990b4856Slling 250990b4856Slling dp = spa_get_dsl(spa); 251990b4856Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 252745cd3c5Smaybee if (err = dsl_dataset_hold_obj(dp, 253745cd3c5Smaybee za.za_first_integer, FTAG, &ds)) { 254990b4856Slling rw_exit(&dp->dp_config_rwlock); 255990b4856Slling break; 256990b4856Slling } 257990b4856Slling 258990b4856Slling strval = kmem_alloc( 259990b4856Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 260990b4856Slling KM_SLEEP); 261990b4856Slling dsl_dataset_name(ds, strval); 262745cd3c5Smaybee dsl_dataset_rele(ds, FTAG); 263990b4856Slling rw_exit(&dp->dp_config_rwlock); 264990b4856Slling } else { 265990b4856Slling strval = NULL; 266990b4856Slling intval = za.za_first_integer; 267990b4856Slling } 268990b4856Slling 2699d82f4f6Slling spa_prop_add_list(*nvp, prop, strval, intval, src); 270990b4856Slling 271990b4856Slling if (strval != NULL) 272990b4856Slling kmem_free(strval, 273990b4856Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 274990b4856Slling 275990b4856Slling break; 276990b4856Slling 277990b4856Slling case 1: 278990b4856Slling /* string property */ 279990b4856Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 280990b4856Slling err = zap_lookup(mos, spa->spa_pool_props_object, 281990b4856Slling za.za_name, 1, za.za_num_integers, strval); 282990b4856Slling if (err) { 283990b4856Slling kmem_free(strval, za.za_num_integers); 284990b4856Slling break; 285990b4856Slling } 2869d82f4f6Slling spa_prop_add_list(*nvp, prop, strval, 0, src); 287990b4856Slling kmem_free(strval, za.za_num_integers); 288990b4856Slling break; 289990b4856Slling 290990b4856Slling default: 291990b4856Slling break; 292990b4856Slling } 293990b4856Slling } 294990b4856Slling zap_cursor_fini(&zc); 295990b4856Slling mutex_exit(&spa->spa_props_lock); 296990b4856Slling out: 297990b4856Slling if (err && err != ENOENT) { 298990b4856Slling nvlist_free(*nvp); 2999d82f4f6Slling *nvp = NULL; 300990b4856Slling return (err); 301990b4856Slling } 302990b4856Slling 303990b4856Slling return (0); 304990b4856Slling } 305990b4856Slling 306990b4856Slling /* 307990b4856Slling * Validate the given pool properties nvlist and modify the list 308990b4856Slling * for the property values to be set. 309990b4856Slling */ 310990b4856Slling static int 311990b4856Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 312990b4856Slling { 313990b4856Slling nvpair_t *elem; 314990b4856Slling int error = 0, reset_bootfs = 0; 315990b4856Slling uint64_t objnum; 316990b4856Slling 317990b4856Slling elem = NULL; 318990b4856Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 319990b4856Slling zpool_prop_t prop; 320990b4856Slling char *propname, *strval; 321990b4856Slling uint64_t intval; 322990b4856Slling objset_t *os; 3232f8aaab3Seschrock char *slash; 324990b4856Slling 325990b4856Slling propname = nvpair_name(elem); 326990b4856Slling 327990b4856Slling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 328990b4856Slling return (EINVAL); 329990b4856Slling 330990b4856Slling switch (prop) { 331990b4856Slling case ZPOOL_PROP_VERSION: 332990b4856Slling error = nvpair_value_uint64(elem, &intval); 333990b4856Slling if (!error && 334990b4856Slling (intval < spa_version(spa) || intval > SPA_VERSION)) 335990b4856Slling error = EINVAL; 336990b4856Slling break; 337990b4856Slling 338990b4856Slling case ZPOOL_PROP_DELEGATION: 339990b4856Slling case ZPOOL_PROP_AUTOREPLACE: 340d5b5bb25SRich Morris case ZPOOL_PROP_LISTSNAPS: 341573ca77eSGeorge Wilson case ZPOOL_PROP_AUTOEXPAND: 342990b4856Slling error = nvpair_value_uint64(elem, &intval); 343990b4856Slling if (!error && intval > 1) 344990b4856Slling error = EINVAL; 345990b4856Slling break; 346990b4856Slling 347990b4856Slling case ZPOOL_PROP_BOOTFS: 34825f89ee2SJeff Bonwick /* 34925f89ee2SJeff Bonwick * If the pool version is less than SPA_VERSION_BOOTFS, 35025f89ee2SJeff Bonwick * or the pool is still being created (version == 0), 35125f89ee2SJeff Bonwick * the bootfs property cannot be set. 35225f89ee2SJeff Bonwick */ 353990b4856Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 354990b4856Slling error = ENOTSUP; 355990b4856Slling break; 356990b4856Slling } 357990b4856Slling 358990b4856Slling /* 35915e6edf1Sgw * Make sure the vdev config is bootable 360990b4856Slling */ 36115e6edf1Sgw if (!vdev_is_bootable(spa->spa_root_vdev)) { 362990b4856Slling error = ENOTSUP; 363990b4856Slling break; 364990b4856Slling } 365990b4856Slling 366990b4856Slling reset_bootfs = 1; 367990b4856Slling 368990b4856Slling error = nvpair_value_string(elem, &strval); 369990b4856Slling 370990b4856Slling if (!error) { 37115e6edf1Sgw uint64_t compress; 37215e6edf1Sgw 373990b4856Slling if (strval == NULL || strval[0] == '\0') { 374990b4856Slling objnum = zpool_prop_default_numeric( 375990b4856Slling ZPOOL_PROP_BOOTFS); 376990b4856Slling break; 377990b4856Slling } 378990b4856Slling 379503ad85cSMatthew Ahrens if (error = dmu_objset_hold(strval, FTAG, &os)) 380990b4856Slling break; 38115e6edf1Sgw 382503ad85cSMatthew Ahrens /* Must be ZPL and not gzip compressed. */ 383503ad85cSMatthew Ahrens 384503ad85cSMatthew Ahrens if (dmu_objset_type(os) != DMU_OST_ZFS) { 385503ad85cSMatthew Ahrens error = ENOTSUP; 386503ad85cSMatthew Ahrens } else if ((error = dsl_prop_get_integer(strval, 38715e6edf1Sgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), 38815e6edf1Sgw &compress, NULL)) == 0 && 38915e6edf1Sgw !BOOTFS_COMPRESS_VALID(compress)) { 39015e6edf1Sgw error = ENOTSUP; 39115e6edf1Sgw } else { 39215e6edf1Sgw objnum = dmu_objset_id(os); 39315e6edf1Sgw } 394503ad85cSMatthew Ahrens dmu_objset_rele(os, FTAG); 395990b4856Slling } 396990b4856Slling break; 397e14bb325SJeff Bonwick 3980a4e9518Sgw case ZPOOL_PROP_FAILUREMODE: 3990a4e9518Sgw error = nvpair_value_uint64(elem, &intval); 4000a4e9518Sgw if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 4010a4e9518Sgw intval > ZIO_FAILURE_MODE_PANIC)) 4020a4e9518Sgw error = EINVAL; 4030a4e9518Sgw 4040a4e9518Sgw /* 4050a4e9518Sgw * This is a special case which only occurs when 4060a4e9518Sgw * the pool has completely failed. This allows 4070a4e9518Sgw * the user to change the in-core failmode property 4080a4e9518Sgw * without syncing it out to disk (I/Os might 4090a4e9518Sgw * currently be blocked). We do this by returning 4100a4e9518Sgw * EIO to the caller (spa_prop_set) to trick it 4110a4e9518Sgw * into thinking we encountered a property validation 4120a4e9518Sgw * error. 4130a4e9518Sgw */ 414e14bb325SJeff Bonwick if (!error && spa_suspended(spa)) { 4150a4e9518Sgw spa->spa_failmode = intval; 4160a4e9518Sgw error = EIO; 4170a4e9518Sgw } 4180a4e9518Sgw break; 4192f8aaab3Seschrock 4202f8aaab3Seschrock case ZPOOL_PROP_CACHEFILE: 4212f8aaab3Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 4222f8aaab3Seschrock break; 4232f8aaab3Seschrock 4242f8aaab3Seschrock if (strval[0] == '\0') 4252f8aaab3Seschrock break; 4262f8aaab3Seschrock 4272f8aaab3Seschrock if (strcmp(strval, "none") == 0) 4282f8aaab3Seschrock break; 4292f8aaab3Seschrock 4302f8aaab3Seschrock if (strval[0] != '/') { 4312f8aaab3Seschrock error = EINVAL; 4322f8aaab3Seschrock break; 4332f8aaab3Seschrock } 4342f8aaab3Seschrock 4352f8aaab3Seschrock slash = strrchr(strval, '/'); 4362f8aaab3Seschrock ASSERT(slash != NULL); 4372f8aaab3Seschrock 4382f8aaab3Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 4392f8aaab3Seschrock strcmp(slash, "/..") == 0) 4402f8aaab3Seschrock error = EINVAL; 4412f8aaab3Seschrock break; 442*b24ab676SJeff Bonwick 443*b24ab676SJeff Bonwick case ZPOOL_PROP_DEDUPDITTO: 444*b24ab676SJeff Bonwick if (spa_version(spa) < SPA_VERSION_DEDUP) 445*b24ab676SJeff Bonwick error = ENOTSUP; 446*b24ab676SJeff Bonwick else 447*b24ab676SJeff Bonwick error = nvpair_value_uint64(elem, &intval); 448*b24ab676SJeff Bonwick if (error == 0 && 449*b24ab676SJeff Bonwick intval != 0 && intval < ZIO_DEDUPDITTO_MIN) 450*b24ab676SJeff Bonwick error = EINVAL; 451*b24ab676SJeff Bonwick break; 452990b4856Slling } 453990b4856Slling 454990b4856Slling if (error) 455990b4856Slling break; 456990b4856Slling } 457990b4856Slling 458990b4856Slling if (!error && reset_bootfs) { 459990b4856Slling error = nvlist_remove(props, 460990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 461990b4856Slling 462990b4856Slling if (!error) { 463990b4856Slling error = nvlist_add_uint64(props, 464990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 465990b4856Slling } 466990b4856Slling } 467990b4856Slling 468990b4856Slling return (error); 469990b4856Slling } 470990b4856Slling 471379c004dSEric Schrock void 472379c004dSEric Schrock spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) 473379c004dSEric Schrock { 474379c004dSEric Schrock char *cachefile; 475379c004dSEric Schrock spa_config_dirent_t *dp; 476379c004dSEric Schrock 477379c004dSEric Schrock if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), 478379c004dSEric Schrock &cachefile) != 0) 479379c004dSEric Schrock return; 480379c004dSEric Schrock 481379c004dSEric Schrock dp = kmem_alloc(sizeof (spa_config_dirent_t), 482379c004dSEric Schrock KM_SLEEP); 483379c004dSEric Schrock 484379c004dSEric Schrock if (cachefile[0] == '\0') 485379c004dSEric Schrock dp->scd_path = spa_strdup(spa_config_path); 486379c004dSEric Schrock else if (strcmp(cachefile, "none") == 0) 487379c004dSEric Schrock dp->scd_path = NULL; 488379c004dSEric Schrock else 489379c004dSEric Schrock dp->scd_path = spa_strdup(cachefile); 490379c004dSEric Schrock 491379c004dSEric Schrock list_insert_head(&spa->spa_config_list, dp); 492379c004dSEric Schrock if (need_sync) 493379c004dSEric Schrock spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 494379c004dSEric Schrock } 495379c004dSEric Schrock 496990b4856Slling int 497990b4856Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 498990b4856Slling { 499990b4856Slling int error; 500379c004dSEric Schrock nvpair_t *elem; 501379c004dSEric Schrock boolean_t need_sync = B_FALSE; 502379c004dSEric Schrock zpool_prop_t prop; 503990b4856Slling 504990b4856Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 505990b4856Slling return (error); 506990b4856Slling 507379c004dSEric Schrock elem = NULL; 508379c004dSEric Schrock while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { 509379c004dSEric Schrock if ((prop = zpool_name_to_prop( 510379c004dSEric Schrock nvpair_name(elem))) == ZPROP_INVAL) 511379c004dSEric Schrock return (EINVAL); 512379c004dSEric Schrock 513379c004dSEric Schrock if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT) 514379c004dSEric Schrock continue; 515379c004dSEric Schrock 516379c004dSEric Schrock need_sync = B_TRUE; 517379c004dSEric Schrock break; 518379c004dSEric Schrock } 519379c004dSEric Schrock 520379c004dSEric Schrock if (need_sync) 521379c004dSEric Schrock return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 522379c004dSEric Schrock spa, nvp, 3)); 523379c004dSEric Schrock else 524379c004dSEric Schrock return (0); 525990b4856Slling } 526990b4856Slling 527990b4856Slling /* 528990b4856Slling * If the bootfs property value is dsobj, clear it. 529990b4856Slling */ 530990b4856Slling void 531990b4856Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 532990b4856Slling { 533990b4856Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 534990b4856Slling VERIFY(zap_remove(spa->spa_meta_objset, 535990b4856Slling spa->spa_pool_props_object, 536990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 537990b4856Slling spa->spa_bootfs = 0; 538990b4856Slling } 539990b4856Slling } 540990b4856Slling 541fa9e4066Sahrens /* 542fa9e4066Sahrens * ========================================================================== 543fa9e4066Sahrens * SPA state manipulation (open/create/destroy/import/export) 544fa9e4066Sahrens * ========================================================================== 545fa9e4066Sahrens */ 546fa9e4066Sahrens 547ea8dc4b6Seschrock static int 548ea8dc4b6Seschrock spa_error_entry_compare(const void *a, const void *b) 549ea8dc4b6Seschrock { 550ea8dc4b6Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 551ea8dc4b6Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 552ea8dc4b6Seschrock int ret; 553ea8dc4b6Seschrock 554ea8dc4b6Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 555ea8dc4b6Seschrock sizeof (zbookmark_t)); 556ea8dc4b6Seschrock 557ea8dc4b6Seschrock if (ret < 0) 558ea8dc4b6Seschrock return (-1); 559ea8dc4b6Seschrock else if (ret > 0) 560ea8dc4b6Seschrock return (1); 561ea8dc4b6Seschrock else 562ea8dc4b6Seschrock return (0); 563ea8dc4b6Seschrock } 564ea8dc4b6Seschrock 565ea8dc4b6Seschrock /* 566ea8dc4b6Seschrock * Utility function which retrieves copies of the current logs and 567ea8dc4b6Seschrock * re-initializes them in the process. 568ea8dc4b6Seschrock */ 569ea8dc4b6Seschrock void 570ea8dc4b6Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 571ea8dc4b6Seschrock { 572ea8dc4b6Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 573ea8dc4b6Seschrock 574ea8dc4b6Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 575ea8dc4b6Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 576ea8dc4b6Seschrock 577ea8dc4b6Seschrock avl_create(&spa->spa_errlist_scrub, 578ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 579ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 580ea8dc4b6Seschrock avl_create(&spa->spa_errlist_last, 581ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 582ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 583ea8dc4b6Seschrock } 584ea8dc4b6Seschrock 585fa9e4066Sahrens /* 586fa9e4066Sahrens * Activate an uninitialized pool. 587fa9e4066Sahrens */ 588fa9e4066Sahrens static void 5898ad4d6ddSJeff Bonwick spa_activate(spa_t *spa, int mode) 590fa9e4066Sahrens { 591fa9e4066Sahrens ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 592fa9e4066Sahrens 593fa9e4066Sahrens spa->spa_state = POOL_STATE_ACTIVE; 5948ad4d6ddSJeff Bonwick spa->spa_mode = mode; 595fa9e4066Sahrens 59688ecc943SGeorge Wilson spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); 59788ecc943SGeorge Wilson spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); 598fa9e4066Sahrens 599e14bb325SJeff Bonwick for (int t = 0; t < ZIO_TYPES; t++) { 6002e0c549eSJonathan Adams const zio_taskq_info_t *ztip = &zio_taskqs[t]; 601e14bb325SJeff Bonwick for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 6022e0c549eSJonathan Adams enum zti_modes mode = ztip->zti_nthreads[q].zti_mode; 6032e0c549eSJonathan Adams uint_t value = ztip->zti_nthreads[q].zti_value; 6042e0c549eSJonathan Adams char name[32]; 6052e0c549eSJonathan Adams 6062e0c549eSJonathan Adams (void) snprintf(name, sizeof (name), 6072e0c549eSJonathan Adams "%s_%s", ztip->zti_name, zio_taskq_types[q]); 6082e0c549eSJonathan Adams 6092e0c549eSJonathan Adams if (mode == zti_mode_tune) { 6102e0c549eSJonathan Adams mode = zio_taskq_tune_mode; 6112e0c549eSJonathan Adams value = zio_taskq_tune_value; 6122e0c549eSJonathan Adams if (mode == zti_mode_tune) 6132e0c549eSJonathan Adams mode = zti_mode_online_percent; 6142e0c549eSJonathan Adams } 6152e0c549eSJonathan Adams 6162e0c549eSJonathan Adams switch (mode) { 6172e0c549eSJonathan Adams case zti_mode_fixed: 6182e0c549eSJonathan Adams ASSERT3U(value, >=, 1); 6192e0c549eSJonathan Adams value = MAX(value, 1); 6202e0c549eSJonathan Adams 6212e0c549eSJonathan Adams spa->spa_zio_taskq[t][q] = taskq_create(name, 6222e0c549eSJonathan Adams value, maxclsyspri, 50, INT_MAX, 6232e0c549eSJonathan Adams TASKQ_PREPOPULATE); 6242e0c549eSJonathan Adams break; 6252e0c549eSJonathan Adams 6262e0c549eSJonathan Adams case zti_mode_online_percent: 6272e0c549eSJonathan Adams spa->spa_zio_taskq[t][q] = taskq_create(name, 6282e0c549eSJonathan Adams value, maxclsyspri, 50, INT_MAX, 6292e0c549eSJonathan Adams TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT); 6302e0c549eSJonathan Adams break; 6312e0c549eSJonathan Adams 6322e0c549eSJonathan Adams case zti_mode_tune: 6332e0c549eSJonathan Adams default: 6342e0c549eSJonathan Adams panic("unrecognized mode for " 6352e0c549eSJonathan Adams "zio_taskqs[%u]->zti_nthreads[%u] (%u:%u) " 6362e0c549eSJonathan Adams "in spa_activate()", 6372e0c549eSJonathan Adams t, q, mode, value); 6382e0c549eSJonathan Adams break; 6392e0c549eSJonathan Adams } 640e14bb325SJeff Bonwick } 641fa9e4066Sahrens } 642fa9e4066Sahrens 643e14bb325SJeff Bonwick list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 644e14bb325SJeff Bonwick offsetof(vdev_t, vdev_config_dirty_node)); 645e14bb325SJeff Bonwick list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 646e14bb325SJeff Bonwick offsetof(vdev_t, vdev_state_dirty_node)); 647fa9e4066Sahrens 648fa9e4066Sahrens txg_list_create(&spa->spa_vdev_txg_list, 649fa9e4066Sahrens offsetof(struct vdev, vdev_txg_node)); 650ea8dc4b6Seschrock 651ea8dc4b6Seschrock avl_create(&spa->spa_errlist_scrub, 652ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 653ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 654ea8dc4b6Seschrock avl_create(&spa->spa_errlist_last, 655ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 656ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 657fa9e4066Sahrens } 658fa9e4066Sahrens 659fa9e4066Sahrens /* 660fa9e4066Sahrens * Opposite of spa_activate(). 661fa9e4066Sahrens */ 662fa9e4066Sahrens static void 663fa9e4066Sahrens spa_deactivate(spa_t *spa) 664fa9e4066Sahrens { 665fa9e4066Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 666fa9e4066Sahrens ASSERT(spa->spa_dsl_pool == NULL); 667fa9e4066Sahrens ASSERT(spa->spa_root_vdev == NULL); 66825f89ee2SJeff Bonwick ASSERT(spa->spa_async_zio_root == NULL); 669fa9e4066Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 670fa9e4066Sahrens 671fa9e4066Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 672fa9e4066Sahrens 673e14bb325SJeff Bonwick list_destroy(&spa->spa_config_dirty_list); 674e14bb325SJeff Bonwick list_destroy(&spa->spa_state_dirty_list); 675fa9e4066Sahrens 676e14bb325SJeff Bonwick for (int t = 0; t < ZIO_TYPES; t++) { 677e14bb325SJeff Bonwick for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 678e14bb325SJeff Bonwick taskq_destroy(spa->spa_zio_taskq[t][q]); 679e14bb325SJeff Bonwick spa->spa_zio_taskq[t][q] = NULL; 680e14bb325SJeff Bonwick } 681fa9e4066Sahrens } 682fa9e4066Sahrens 683fa9e4066Sahrens metaslab_class_destroy(spa->spa_normal_class); 684fa9e4066Sahrens spa->spa_normal_class = NULL; 685fa9e4066Sahrens 6868654d025Sperrin metaslab_class_destroy(spa->spa_log_class); 6878654d025Sperrin spa->spa_log_class = NULL; 6888654d025Sperrin 689ea8dc4b6Seschrock /* 690ea8dc4b6Seschrock * If this was part of an import or the open otherwise failed, we may 691ea8dc4b6Seschrock * still have errors left in the queues. Empty them just in case. 692ea8dc4b6Seschrock */ 693ea8dc4b6Seschrock spa_errlog_drain(spa); 694ea8dc4b6Seschrock 695ea8dc4b6Seschrock avl_destroy(&spa->spa_errlist_scrub); 696ea8dc4b6Seschrock avl_destroy(&spa->spa_errlist_last); 697ea8dc4b6Seschrock 698fa9e4066Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 699fa9e4066Sahrens } 700fa9e4066Sahrens 701fa9e4066Sahrens /* 702fa9e4066Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 703fa9e4066Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 704fa9e4066Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 705fa9e4066Sahrens * All vdev validation is done by the vdev_alloc() routine. 706fa9e4066Sahrens */ 70799653d4eSeschrock static int 70899653d4eSeschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 70999653d4eSeschrock uint_t id, int atype) 710fa9e4066Sahrens { 711fa9e4066Sahrens nvlist_t **child; 712573ca77eSGeorge Wilson uint_t children; 71399653d4eSeschrock int error; 714fa9e4066Sahrens 71599653d4eSeschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 71699653d4eSeschrock return (error); 717fa9e4066Sahrens 71899653d4eSeschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 71999653d4eSeschrock return (0); 720fa9e4066Sahrens 721e14bb325SJeff Bonwick error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 722e14bb325SJeff Bonwick &child, &children); 723e14bb325SJeff Bonwick 724e14bb325SJeff Bonwick if (error == ENOENT) 725e14bb325SJeff Bonwick return (0); 726e14bb325SJeff Bonwick 727e14bb325SJeff Bonwick if (error) { 72899653d4eSeschrock vdev_free(*vdp); 72999653d4eSeschrock *vdp = NULL; 73099653d4eSeschrock return (EINVAL); 731fa9e4066Sahrens } 732fa9e4066Sahrens 733573ca77eSGeorge Wilson for (int c = 0; c < children; c++) { 73499653d4eSeschrock vdev_t *vd; 73599653d4eSeschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 73699653d4eSeschrock atype)) != 0) { 73799653d4eSeschrock vdev_free(*vdp); 73899653d4eSeschrock *vdp = NULL; 73999653d4eSeschrock return (error); 740fa9e4066Sahrens } 741fa9e4066Sahrens } 742fa9e4066Sahrens 74399653d4eSeschrock ASSERT(*vdp != NULL); 74499653d4eSeschrock 74599653d4eSeschrock return (0); 746fa9e4066Sahrens } 747fa9e4066Sahrens 748fa9e4066Sahrens /* 749fa9e4066Sahrens * Opposite of spa_load(). 750fa9e4066Sahrens */ 751fa9e4066Sahrens static void 752fa9e4066Sahrens spa_unload(spa_t *spa) 753fa9e4066Sahrens { 75499653d4eSeschrock int i; 75599653d4eSeschrock 756e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa_namespace_lock)); 757e14bb325SJeff Bonwick 758ea8dc4b6Seschrock /* 759ea8dc4b6Seschrock * Stop async tasks. 760ea8dc4b6Seschrock */ 761ea8dc4b6Seschrock spa_async_suspend(spa); 762ea8dc4b6Seschrock 763fa9e4066Sahrens /* 764fa9e4066Sahrens * Stop syncing. 765fa9e4066Sahrens */ 766fa9e4066Sahrens if (spa->spa_sync_on) { 767fa9e4066Sahrens txg_sync_stop(spa->spa_dsl_pool); 768fa9e4066Sahrens spa->spa_sync_on = B_FALSE; 769fa9e4066Sahrens } 770fa9e4066Sahrens 771fa9e4066Sahrens /* 772e14bb325SJeff Bonwick * Wait for any outstanding async I/O to complete. 773fa9e4066Sahrens */ 77454d692b7SGeorge Wilson if (spa->spa_async_zio_root != NULL) { 77554d692b7SGeorge Wilson (void) zio_wait(spa->spa_async_zio_root); 77654d692b7SGeorge Wilson spa->spa_async_zio_root = NULL; 77754d692b7SGeorge Wilson } 778fa9e4066Sahrens 779fa9e4066Sahrens /* 780fa9e4066Sahrens * Close the dsl pool. 781fa9e4066Sahrens */ 782fa9e4066Sahrens if (spa->spa_dsl_pool) { 783fa9e4066Sahrens dsl_pool_close(spa->spa_dsl_pool); 784fa9e4066Sahrens spa->spa_dsl_pool = NULL; 785fa9e4066Sahrens } 786fa9e4066Sahrens 787*b24ab676SJeff Bonwick ddt_unload(spa); 788*b24ab676SJeff Bonwick 7898ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 7908ad4d6ddSJeff Bonwick 7918ad4d6ddSJeff Bonwick /* 7928ad4d6ddSJeff Bonwick * Drop and purge level 2 cache 7938ad4d6ddSJeff Bonwick */ 7948ad4d6ddSJeff Bonwick spa_l2cache_drop(spa); 7958ad4d6ddSJeff Bonwick 796fa9e4066Sahrens /* 797fa9e4066Sahrens * Close all vdevs. 798fa9e4066Sahrens */ 7990e34b6a7Sbonwick if (spa->spa_root_vdev) 800fa9e4066Sahrens vdev_free(spa->spa_root_vdev); 8010e34b6a7Sbonwick ASSERT(spa->spa_root_vdev == NULL); 802ea8dc4b6Seschrock 803fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 804fa94a07fSbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 805fa94a07fSbrendan if (spa->spa_spares.sav_vdevs) { 806fa94a07fSbrendan kmem_free(spa->spa_spares.sav_vdevs, 807fa94a07fSbrendan spa->spa_spares.sav_count * sizeof (void *)); 808fa94a07fSbrendan spa->spa_spares.sav_vdevs = NULL; 80999653d4eSeschrock } 810fa94a07fSbrendan if (spa->spa_spares.sav_config) { 811fa94a07fSbrendan nvlist_free(spa->spa_spares.sav_config); 812fa94a07fSbrendan spa->spa_spares.sav_config = NULL; 813fa94a07fSbrendan } 8142ce8af81SEric Schrock spa->spa_spares.sav_count = 0; 815fa94a07fSbrendan 816fa94a07fSbrendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 817fa94a07fSbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 818fa94a07fSbrendan if (spa->spa_l2cache.sav_vdevs) { 819fa94a07fSbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 820fa94a07fSbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 821fa94a07fSbrendan spa->spa_l2cache.sav_vdevs = NULL; 822fa94a07fSbrendan } 823fa94a07fSbrendan if (spa->spa_l2cache.sav_config) { 824fa94a07fSbrendan nvlist_free(spa->spa_l2cache.sav_config); 825fa94a07fSbrendan spa->spa_l2cache.sav_config = NULL; 82699653d4eSeschrock } 8272ce8af81SEric Schrock spa->spa_l2cache.sav_count = 0; 82899653d4eSeschrock 829ea8dc4b6Seschrock spa->spa_async_suspended = 0; 8308ad4d6ddSJeff Bonwick 8318ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 832fa9e4066Sahrens } 833fa9e4066Sahrens 83499653d4eSeschrock /* 83599653d4eSeschrock * Load (or re-load) the current list of vdevs describing the active spares for 83699653d4eSeschrock * this pool. When this is called, we have some form of basic information in 837fa94a07fSbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 838fa94a07fSbrendan * then re-generate a more complete list including status information. 83999653d4eSeschrock */ 84099653d4eSeschrock static void 84199653d4eSeschrock spa_load_spares(spa_t *spa) 84299653d4eSeschrock { 84399653d4eSeschrock nvlist_t **spares; 84499653d4eSeschrock uint_t nspares; 84599653d4eSeschrock int i; 84639c23413Seschrock vdev_t *vd, *tvd; 84799653d4eSeschrock 848e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 849e14bb325SJeff Bonwick 85099653d4eSeschrock /* 85199653d4eSeschrock * First, close and free any existing spare vdevs. 85299653d4eSeschrock */ 853fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 854fa94a07fSbrendan vd = spa->spa_spares.sav_vdevs[i]; 85539c23413Seschrock 85639c23413Seschrock /* Undo the call to spa_activate() below */ 857c5904d13Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 858c5904d13Seschrock B_FALSE)) != NULL && tvd->vdev_isspare) 85939c23413Seschrock spa_spare_remove(tvd); 86039c23413Seschrock vdev_close(vd); 86139c23413Seschrock vdev_free(vd); 86299653d4eSeschrock } 86339c23413Seschrock 864fa94a07fSbrendan if (spa->spa_spares.sav_vdevs) 865fa94a07fSbrendan kmem_free(spa->spa_spares.sav_vdevs, 866fa94a07fSbrendan spa->spa_spares.sav_count * sizeof (void *)); 86799653d4eSeschrock 868fa94a07fSbrendan if (spa->spa_spares.sav_config == NULL) 86999653d4eSeschrock nspares = 0; 87099653d4eSeschrock else 871fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 87299653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 87399653d4eSeschrock 874fa94a07fSbrendan spa->spa_spares.sav_count = (int)nspares; 875fa94a07fSbrendan spa->spa_spares.sav_vdevs = NULL; 87699653d4eSeschrock 87799653d4eSeschrock if (nspares == 0) 87899653d4eSeschrock return; 87999653d4eSeschrock 88099653d4eSeschrock /* 88199653d4eSeschrock * Construct the array of vdevs, opening them to get status in the 88239c23413Seschrock * process. For each spare, there is potentially two different vdev_t 88339c23413Seschrock * structures associated with it: one in the list of spares (used only 88439c23413Seschrock * for basic validation purposes) and one in the active vdev 88539c23413Seschrock * configuration (if it's spared in). During this phase we open and 88639c23413Seschrock * validate each vdev on the spare list. If the vdev also exists in the 88739c23413Seschrock * active configuration, then we also mark this vdev as an active spare. 88899653d4eSeschrock */ 889fa94a07fSbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 890fa94a07fSbrendan KM_SLEEP); 891fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 89299653d4eSeschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 89399653d4eSeschrock VDEV_ALLOC_SPARE) == 0); 89499653d4eSeschrock ASSERT(vd != NULL); 89599653d4eSeschrock 896fa94a07fSbrendan spa->spa_spares.sav_vdevs[i] = vd; 89799653d4eSeschrock 898c5904d13Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 899c5904d13Seschrock B_FALSE)) != NULL) { 90039c23413Seschrock if (!tvd->vdev_isspare) 90139c23413Seschrock spa_spare_add(tvd); 90239c23413Seschrock 90339c23413Seschrock /* 90439c23413Seschrock * We only mark the spare active if we were successfully 90539c23413Seschrock * able to load the vdev. Otherwise, importing a pool 90639c23413Seschrock * with a bad active spare would result in strange 90739c23413Seschrock * behavior, because multiple pool would think the spare 90839c23413Seschrock * is actively in use. 90939c23413Seschrock * 91039c23413Seschrock * There is a vulnerability here to an equally bizarre 91139c23413Seschrock * circumstance, where a dead active spare is later 91239c23413Seschrock * brought back to life (onlined or otherwise). Given 91339c23413Seschrock * the rarity of this scenario, and the extra complexity 91439c23413Seschrock * it adds, we ignore the possibility. 91539c23413Seschrock */ 91639c23413Seschrock if (!vdev_is_dead(tvd)) 91739c23413Seschrock spa_spare_activate(tvd); 91839c23413Seschrock } 91939c23413Seschrock 920e14bb325SJeff Bonwick vd->vdev_top = vd; 9216809eb4eSEric Schrock vd->vdev_aux = &spa->spa_spares; 922e14bb325SJeff Bonwick 92399653d4eSeschrock if (vdev_open(vd) != 0) 92499653d4eSeschrock continue; 92599653d4eSeschrock 926fa94a07fSbrendan if (vdev_validate_aux(vd) == 0) 927fa94a07fSbrendan spa_spare_add(vd); 92899653d4eSeschrock } 92999653d4eSeschrock 93099653d4eSeschrock /* 93199653d4eSeschrock * Recompute the stashed list of spares, with status information 93299653d4eSeschrock * this time. 93399653d4eSeschrock */ 934fa94a07fSbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 93599653d4eSeschrock DATA_TYPE_NVLIST_ARRAY) == 0); 93699653d4eSeschrock 937fa94a07fSbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 938fa94a07fSbrendan KM_SLEEP); 939fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 940fa94a07fSbrendan spares[i] = vdev_config_generate(spa, 941fa94a07fSbrendan spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 942fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 943fa94a07fSbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 944fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 94599653d4eSeschrock nvlist_free(spares[i]); 946fa94a07fSbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 947fa94a07fSbrendan } 948fa94a07fSbrendan 949fa94a07fSbrendan /* 950fa94a07fSbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 951fa94a07fSbrendan * this pool. When this is called, we have some form of basic information in 952fa94a07fSbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 953fa94a07fSbrendan * then re-generate a more complete list including status information. 954fa94a07fSbrendan * Devices which are already active have their details maintained, and are 955fa94a07fSbrendan * not re-opened. 956fa94a07fSbrendan */ 957fa94a07fSbrendan static void 958fa94a07fSbrendan spa_load_l2cache(spa_t *spa) 959fa94a07fSbrendan { 960fa94a07fSbrendan nvlist_t **l2cache; 961fa94a07fSbrendan uint_t nl2cache; 962fa94a07fSbrendan int i, j, oldnvdevs; 963573ca77eSGeorge Wilson uint64_t guid; 964fa94a07fSbrendan vdev_t *vd, **oldvdevs, **newvdevs; 965fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 966fa94a07fSbrendan 967e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 968e14bb325SJeff Bonwick 969fa94a07fSbrendan if (sav->sav_config != NULL) { 970fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 971fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 972fa94a07fSbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 973fa94a07fSbrendan } else { 974fa94a07fSbrendan nl2cache = 0; 975fa94a07fSbrendan } 976fa94a07fSbrendan 977fa94a07fSbrendan oldvdevs = sav->sav_vdevs; 978fa94a07fSbrendan oldnvdevs = sav->sav_count; 979fa94a07fSbrendan sav->sav_vdevs = NULL; 980fa94a07fSbrendan sav->sav_count = 0; 981fa94a07fSbrendan 982fa94a07fSbrendan /* 983fa94a07fSbrendan * Process new nvlist of vdevs. 984fa94a07fSbrendan */ 985fa94a07fSbrendan for (i = 0; i < nl2cache; i++) { 986fa94a07fSbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 987fa94a07fSbrendan &guid) == 0); 988fa94a07fSbrendan 989fa94a07fSbrendan newvdevs[i] = NULL; 990fa94a07fSbrendan for (j = 0; j < oldnvdevs; j++) { 991fa94a07fSbrendan vd = oldvdevs[j]; 992fa94a07fSbrendan if (vd != NULL && guid == vd->vdev_guid) { 993fa94a07fSbrendan /* 994fa94a07fSbrendan * Retain previous vdev for add/remove ops. 995fa94a07fSbrendan */ 996fa94a07fSbrendan newvdevs[i] = vd; 997fa94a07fSbrendan oldvdevs[j] = NULL; 998fa94a07fSbrendan break; 999fa94a07fSbrendan } 1000fa94a07fSbrendan } 1001fa94a07fSbrendan 1002fa94a07fSbrendan if (newvdevs[i] == NULL) { 1003fa94a07fSbrendan /* 1004fa94a07fSbrendan * Create new vdev 1005fa94a07fSbrendan */ 1006fa94a07fSbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 1007fa94a07fSbrendan VDEV_ALLOC_L2CACHE) == 0); 1008fa94a07fSbrendan ASSERT(vd != NULL); 1009fa94a07fSbrendan newvdevs[i] = vd; 1010fa94a07fSbrendan 1011fa94a07fSbrendan /* 1012fa94a07fSbrendan * Commit this vdev as an l2cache device, 1013fa94a07fSbrendan * even if it fails to open. 1014fa94a07fSbrendan */ 1015fa94a07fSbrendan spa_l2cache_add(vd); 1016fa94a07fSbrendan 1017c5904d13Seschrock vd->vdev_top = vd; 1018c5904d13Seschrock vd->vdev_aux = sav; 1019c5904d13Seschrock 1020c5904d13Seschrock spa_l2cache_activate(vd); 1021c5904d13Seschrock 1022fa94a07fSbrendan if (vdev_open(vd) != 0) 1023fa94a07fSbrendan continue; 1024fa94a07fSbrendan 1025fa94a07fSbrendan (void) vdev_validate_aux(vd); 1026fa94a07fSbrendan 1027573ca77eSGeorge Wilson if (!vdev_is_dead(vd)) 1028573ca77eSGeorge Wilson l2arc_add_vdev(spa, vd); 1029fa94a07fSbrendan } 1030fa94a07fSbrendan } 1031fa94a07fSbrendan 1032fa94a07fSbrendan /* 1033fa94a07fSbrendan * Purge vdevs that were dropped 1034fa94a07fSbrendan */ 1035fa94a07fSbrendan for (i = 0; i < oldnvdevs; i++) { 1036fa94a07fSbrendan uint64_t pool; 1037fa94a07fSbrendan 1038fa94a07fSbrendan vd = oldvdevs[i]; 1039fa94a07fSbrendan if (vd != NULL) { 10408ad4d6ddSJeff Bonwick if (spa_l2cache_exists(vd->vdev_guid, &pool) && 10418ad4d6ddSJeff Bonwick pool != 0ULL && l2arc_vdev_present(vd)) 1042fa94a07fSbrendan l2arc_remove_vdev(vd); 1043fa94a07fSbrendan (void) vdev_close(vd); 1044fa94a07fSbrendan spa_l2cache_remove(vd); 1045fa94a07fSbrendan } 1046fa94a07fSbrendan } 1047fa94a07fSbrendan 1048fa94a07fSbrendan if (oldvdevs) 1049fa94a07fSbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 1050fa94a07fSbrendan 1051fa94a07fSbrendan if (sav->sav_config == NULL) 1052fa94a07fSbrendan goto out; 1053fa94a07fSbrendan 1054fa94a07fSbrendan sav->sav_vdevs = newvdevs; 1055fa94a07fSbrendan sav->sav_count = (int)nl2cache; 1056fa94a07fSbrendan 1057fa94a07fSbrendan /* 1058fa94a07fSbrendan * Recompute the stashed list of l2cache devices, with status 1059fa94a07fSbrendan * information this time. 1060fa94a07fSbrendan */ 1061fa94a07fSbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 1062fa94a07fSbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 1063fa94a07fSbrendan 1064fa94a07fSbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 1065fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 1066fa94a07fSbrendan l2cache[i] = vdev_config_generate(spa, 1067fa94a07fSbrendan sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 1068fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 1069fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 1070fa94a07fSbrendan out: 1071fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 1072fa94a07fSbrendan nvlist_free(l2cache[i]); 1073fa94a07fSbrendan if (sav->sav_count) 1074fa94a07fSbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 107599653d4eSeschrock } 107699653d4eSeschrock 107799653d4eSeschrock static int 107899653d4eSeschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 107999653d4eSeschrock { 108099653d4eSeschrock dmu_buf_t *db; 108199653d4eSeschrock char *packed = NULL; 108299653d4eSeschrock size_t nvsize = 0; 108399653d4eSeschrock int error; 108499653d4eSeschrock *value = NULL; 108599653d4eSeschrock 108699653d4eSeschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 108799653d4eSeschrock nvsize = *(uint64_t *)db->db_data; 108899653d4eSeschrock dmu_buf_rele(db, FTAG); 108999653d4eSeschrock 109099653d4eSeschrock packed = kmem_alloc(nvsize, KM_SLEEP); 10917bfdf011SNeil Perrin error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, 10927bfdf011SNeil Perrin DMU_READ_PREFETCH); 109399653d4eSeschrock if (error == 0) 109499653d4eSeschrock error = nvlist_unpack(packed, nvsize, value, 0); 109599653d4eSeschrock kmem_free(packed, nvsize); 109699653d4eSeschrock 109799653d4eSeschrock return (error); 109899653d4eSeschrock } 109999653d4eSeschrock 11003d7072f8Seschrock /* 11013d7072f8Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 11023d7072f8Seschrock * sysevent to notify the autoreplace code that the device has been removed. 11033d7072f8Seschrock */ 11043d7072f8Seschrock static void 11053d7072f8Seschrock spa_check_removed(vdev_t *vd) 11063d7072f8Seschrock { 1107573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) 11083d7072f8Seschrock spa_check_removed(vd->vdev_child[c]); 11093d7072f8Seschrock 11103d7072f8Seschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 11113d7072f8Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 11123d7072f8Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 11133d7072f8Seschrock } 11143d7072f8Seschrock } 11153d7072f8Seschrock 1116e6ca193dSGeorge Wilson /* 1117e6ca193dSGeorge Wilson * Load the slog device state from the config object since it's possible 1118e6ca193dSGeorge Wilson * that the label does not contain the most up-to-date information. 1119e6ca193dSGeorge Wilson */ 1120e6ca193dSGeorge Wilson void 112188ecc943SGeorge Wilson spa_load_log_state(spa_t *spa, nvlist_t *nv) 1122e6ca193dSGeorge Wilson { 112388ecc943SGeorge Wilson vdev_t *ovd, *rvd = spa->spa_root_vdev; 1124e6ca193dSGeorge Wilson 112588ecc943SGeorge Wilson /* 112688ecc943SGeorge Wilson * Load the original root vdev tree from the passed config. 112788ecc943SGeorge Wilson */ 112888ecc943SGeorge Wilson spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 112988ecc943SGeorge Wilson VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); 1130e6ca193dSGeorge Wilson 113188ecc943SGeorge Wilson for (int c = 0; c < rvd->vdev_children; c++) { 113288ecc943SGeorge Wilson vdev_t *cvd = rvd->vdev_child[c]; 113388ecc943SGeorge Wilson if (cvd->vdev_islog) 113488ecc943SGeorge Wilson vdev_load_log_state(cvd, ovd->vdev_child[c]); 1135e6ca193dSGeorge Wilson } 113688ecc943SGeorge Wilson vdev_free(ovd); 113788ecc943SGeorge Wilson spa_config_exit(spa, SCL_ALL, FTAG); 1138e6ca193dSGeorge Wilson } 1139e6ca193dSGeorge Wilson 1140b87f3af3Sperrin /* 1141b87f3af3Sperrin * Check for missing log devices 1142b87f3af3Sperrin */ 1143b87f3af3Sperrin int 1144b87f3af3Sperrin spa_check_logs(spa_t *spa) 1145b87f3af3Sperrin { 1146b87f3af3Sperrin switch (spa->spa_log_state) { 1147b87f3af3Sperrin case SPA_LOG_MISSING: 1148b87f3af3Sperrin /* need to recheck in case slog has been restored */ 1149b87f3af3Sperrin case SPA_LOG_UNKNOWN: 1150b87f3af3Sperrin if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 1151b87f3af3Sperrin DS_FIND_CHILDREN)) { 1152b87f3af3Sperrin spa->spa_log_state = SPA_LOG_MISSING; 1153b87f3af3Sperrin return (1); 1154b87f3af3Sperrin } 1155b87f3af3Sperrin break; 1156b87f3af3Sperrin } 1157b87f3af3Sperrin return (0); 1158b87f3af3Sperrin } 1159b87f3af3Sperrin 1160b693757aSEric Schrock static void 1161b693757aSEric Schrock spa_aux_check_removed(spa_aux_vdev_t *sav) 1162b693757aSEric Schrock { 1163*b24ab676SJeff Bonwick for (int i = 0; i < sav->sav_count; i++) 1164b693757aSEric Schrock spa_check_removed(sav->sav_vdevs[i]); 1165b693757aSEric Schrock } 1166b693757aSEric Schrock 1167*b24ab676SJeff Bonwick void 1168*b24ab676SJeff Bonwick spa_claim_notify(zio_t *zio) 1169*b24ab676SJeff Bonwick { 1170*b24ab676SJeff Bonwick spa_t *spa = zio->io_spa; 1171*b24ab676SJeff Bonwick 1172*b24ab676SJeff Bonwick if (zio->io_error) 1173*b24ab676SJeff Bonwick return; 1174*b24ab676SJeff Bonwick 1175*b24ab676SJeff Bonwick mutex_enter(&spa->spa_props_lock); /* any mutex will do */ 1176*b24ab676SJeff Bonwick if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) 1177*b24ab676SJeff Bonwick spa->spa_claim_max_txg = zio->io_bp->blk_birth; 1178*b24ab676SJeff Bonwick mutex_exit(&spa->spa_props_lock); 1179*b24ab676SJeff Bonwick } 1180*b24ab676SJeff Bonwick 1181468c413aSTim Haley typedef struct spa_load_error { 1182468c413aSTim Haley uint64_t sle_metadata_count; 1183468c413aSTim Haley uint64_t sle_data_count; 1184468c413aSTim Haley } spa_load_error_t; 1185468c413aSTim Haley 1186468c413aSTim Haley static void 1187468c413aSTim Haley spa_load_verify_done(zio_t *zio) 1188468c413aSTim Haley { 1189468c413aSTim Haley blkptr_t *bp = zio->io_bp; 1190468c413aSTim Haley spa_load_error_t *sle = zio->io_private; 1191468c413aSTim Haley dmu_object_type_t type = BP_GET_TYPE(bp); 1192468c413aSTim Haley int error = zio->io_error; 1193468c413aSTim Haley 1194468c413aSTim Haley if (error) { 1195468c413aSTim Haley if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) && 1196468c413aSTim Haley type != DMU_OT_INTENT_LOG) 1197468c413aSTim Haley atomic_add_64(&sle->sle_metadata_count, 1); 1198468c413aSTim Haley else 1199468c413aSTim Haley atomic_add_64(&sle->sle_data_count, 1); 1200468c413aSTim Haley } 1201468c413aSTim Haley zio_data_buf_free(zio->io_data, zio->io_size); 1202468c413aSTim Haley } 1203468c413aSTim Haley 1204468c413aSTim Haley /*ARGSUSED*/ 1205468c413aSTim Haley static int 1206*b24ab676SJeff Bonwick spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 1207*b24ab676SJeff Bonwick const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 1208468c413aSTim Haley { 1209468c413aSTim Haley if (bp != NULL) { 1210468c413aSTim Haley zio_t *rio = arg; 1211468c413aSTim Haley size_t size = BP_GET_PSIZE(bp); 1212468c413aSTim Haley void *data = zio_data_buf_alloc(size); 1213468c413aSTim Haley 1214468c413aSTim Haley zio_nowait(zio_read(rio, spa, bp, data, size, 1215468c413aSTim Haley spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, 1216468c413aSTim Haley ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | 1217468c413aSTim Haley ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); 1218468c413aSTim Haley } 1219468c413aSTim Haley return (0); 1220468c413aSTim Haley } 1221468c413aSTim Haley 1222468c413aSTim Haley static int 1223468c413aSTim Haley spa_load_verify(spa_t *spa) 1224468c413aSTim Haley { 1225468c413aSTim Haley zio_t *rio; 1226468c413aSTim Haley spa_load_error_t sle = { 0 }; 1227468c413aSTim Haley zpool_rewind_policy_t policy; 1228468c413aSTim Haley boolean_t verify_ok = B_FALSE; 1229468c413aSTim Haley int error; 1230468c413aSTim Haley 1231468c413aSTim Haley rio = zio_root(spa, NULL, &sle, 1232468c413aSTim Haley ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 1233468c413aSTim Haley 1234468c413aSTim Haley error = traverse_pool(spa, spa_load_verify_cb, rio, 1235468c413aSTim Haley spa->spa_verify_min_txg); 1236468c413aSTim Haley 1237468c413aSTim Haley (void) zio_wait(rio); 1238468c413aSTim Haley 1239468c413aSTim Haley zpool_get_rewind_policy(spa->spa_config, &policy); 1240468c413aSTim Haley 1241468c413aSTim Haley spa->spa_load_meta_errors = sle.sle_metadata_count; 1242468c413aSTim Haley spa->spa_load_data_errors = sle.sle_data_count; 1243468c413aSTim Haley 1244468c413aSTim Haley if (!error && sle.sle_metadata_count <= policy.zrp_maxmeta && 1245468c413aSTim Haley sle.sle_data_count <= policy.zrp_maxdata) { 1246468c413aSTim Haley verify_ok = B_TRUE; 1247468c413aSTim Haley spa->spa_load_txg = spa->spa_uberblock.ub_txg; 1248468c413aSTim Haley spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; 1249468c413aSTim Haley } 1250468c413aSTim Haley 1251468c413aSTim Haley if (error) { 1252468c413aSTim Haley if (error != ENXIO && error != EIO) 1253468c413aSTim Haley error = EIO; 1254468c413aSTim Haley return (error); 1255468c413aSTim Haley } 1256468c413aSTim Haley 1257468c413aSTim Haley return (verify_ok ? 0 : EIO); 1258468c413aSTim Haley } 1259468c413aSTim Haley 1260fa9e4066Sahrens /* 1261fa9e4066Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 1262ea8dc4b6Seschrock * source of configuration information. 1263fa9e4066Sahrens */ 1264fa9e4066Sahrens static int 1265468c413aSTim Haley spa_load(spa_t *spa, spa_load_state_t state, int mosconfig) 1266fa9e4066Sahrens { 1267fa9e4066Sahrens int error = 0; 126888ecc943SGeorge Wilson nvlist_t *nvconfig, *nvroot = NULL; 1269fa9e4066Sahrens vdev_t *rvd; 1270fa9e4066Sahrens uberblock_t *ub = &spa->spa_uberblock; 12710373e76bSbonwick uint64_t config_cache_txg = spa->spa_config_txg; 1272fa9e4066Sahrens uint64_t pool_guid; 127399653d4eSeschrock uint64_t version; 12743d7072f8Seschrock uint64_t autoreplace = 0; 12758ad4d6ddSJeff Bonwick int orig_mode = spa->spa_mode; 1276b87f3af3Sperrin char *ereport = FM_EREPORT_ZFS_POOL; 1277468c413aSTim Haley nvlist_t *config = spa->spa_config; 1278fa9e4066Sahrens 12798ad4d6ddSJeff Bonwick /* 12808ad4d6ddSJeff Bonwick * If this is an untrusted config, access the pool in read-only mode. 12818ad4d6ddSJeff Bonwick * This prevents things like resilvering recently removed devices. 12828ad4d6ddSJeff Bonwick */ 12838ad4d6ddSJeff Bonwick if (!mosconfig) 12848ad4d6ddSJeff Bonwick spa->spa_mode = FREAD; 12858ad4d6ddSJeff Bonwick 1286e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1287e14bb325SJeff Bonwick 1288ea8dc4b6Seschrock spa->spa_load_state = state; 12890373e76bSbonwick 1290fa9e4066Sahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 1291a9926bf0Sbonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 1292ea8dc4b6Seschrock error = EINVAL; 1293ea8dc4b6Seschrock goto out; 1294ea8dc4b6Seschrock } 1295fa9e4066Sahrens 129699653d4eSeschrock /* 129799653d4eSeschrock * Versioning wasn't explicitly added to the label until later, so if 129899653d4eSeschrock * it's not present treat it as the initial version. 129999653d4eSeschrock */ 130099653d4eSeschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 1301e7437265Sahrens version = SPA_VERSION_INITIAL; 130299653d4eSeschrock 1303a9926bf0Sbonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 1304a9926bf0Sbonwick &spa->spa_config_txg); 1305a9926bf0Sbonwick 13060373e76bSbonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 1307ea8dc4b6Seschrock spa_guid_exists(pool_guid, 0)) { 1308ea8dc4b6Seschrock error = EEXIST; 1309ea8dc4b6Seschrock goto out; 1310ea8dc4b6Seschrock } 1311fa9e4066Sahrens 1312b5989ec7Seschrock spa->spa_load_guid = pool_guid; 1313b5989ec7Seschrock 131454d692b7SGeorge Wilson /* 131554d692b7SGeorge Wilson * Create "The Godfather" zio to hold all async IOs 131654d692b7SGeorge Wilson */ 131725f89ee2SJeff Bonwick spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 131825f89ee2SJeff Bonwick ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 131954d692b7SGeorge Wilson 1320fa9e4066Sahrens /* 132199653d4eSeschrock * Parse the configuration into a vdev tree. We explicitly set the 132299653d4eSeschrock * value that will be returned by spa_version() since parsing the 132399653d4eSeschrock * configuration requires knowing the version number. 1324fa9e4066Sahrens */ 1325e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 132699653d4eSeschrock spa->spa_ubsync.ub_version = version; 132799653d4eSeschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 1328e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1329fa9e4066Sahrens 133099653d4eSeschrock if (error != 0) 1331ea8dc4b6Seschrock goto out; 1332fa9e4066Sahrens 13330e34b6a7Sbonwick ASSERT(spa->spa_root_vdev == rvd); 1334fa9e4066Sahrens ASSERT(spa_guid(spa) == pool_guid); 1335fa9e4066Sahrens 1336fa9e4066Sahrens /* 1337fa9e4066Sahrens * Try to open all vdevs, loading each label in the process. 1338fa9e4066Sahrens */ 1339e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 13400bf246f5Smc error = vdev_open(rvd); 1341e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 13420bf246f5Smc if (error != 0) 1343ea8dc4b6Seschrock goto out; 1344fa9e4066Sahrens 1345560e6e96Seschrock /* 134677e3a39cSMark J Musante * We need to validate the vdev labels against the configuration that 134777e3a39cSMark J Musante * we have in hand, which is dependent on the setting of mosconfig. If 134877e3a39cSMark J Musante * mosconfig is true then we're validating the vdev labels based on 134977e3a39cSMark J Musante * that config. Otherwise, we're validating against the cached config 135077e3a39cSMark J Musante * (zpool.cache) that was read when we loaded the zfs module, and then 135177e3a39cSMark J Musante * later we will recursively call spa_load() and validate against 135277e3a39cSMark J Musante * the vdev config. 1353560e6e96Seschrock */ 135477e3a39cSMark J Musante spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 135577e3a39cSMark J Musante error = vdev_validate(rvd); 135677e3a39cSMark J Musante spa_config_exit(spa, SCL_ALL, FTAG); 135777e3a39cSMark J Musante if (error != 0) 135877e3a39cSMark J Musante goto out; 1359560e6e96Seschrock 1360560e6e96Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 1361560e6e96Seschrock error = ENXIO; 1362560e6e96Seschrock goto out; 1363560e6e96Seschrock } 1364560e6e96Seschrock 1365fa9e4066Sahrens /* 1366fa9e4066Sahrens * Find the best uberblock. 1367fa9e4066Sahrens */ 1368e14bb325SJeff Bonwick vdev_uberblock_load(NULL, rvd, ub); 1369fa9e4066Sahrens 1370fa9e4066Sahrens /* 1371fa9e4066Sahrens * If we weren't able to find a single valid uberblock, return failure. 1372fa9e4066Sahrens */ 1373fa9e4066Sahrens if (ub->ub_txg == 0) { 1374eaca9bbdSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1375eaca9bbdSeschrock VDEV_AUX_CORRUPT_DATA); 1376ea8dc4b6Seschrock error = ENXIO; 1377ea8dc4b6Seschrock goto out; 1378ea8dc4b6Seschrock } 1379ea8dc4b6Seschrock 1380ea8dc4b6Seschrock /* 1381ea8dc4b6Seschrock * If the pool is newer than the code, we can't open it. 1382ea8dc4b6Seschrock */ 1383e7437265Sahrens if (ub->ub_version > SPA_VERSION) { 1384eaca9bbdSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1385eaca9bbdSeschrock VDEV_AUX_VERSION_NEWER); 1386ea8dc4b6Seschrock error = ENOTSUP; 1387ea8dc4b6Seschrock goto out; 1388fa9e4066Sahrens } 1389fa9e4066Sahrens 1390fa9e4066Sahrens /* 1391fa9e4066Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 1392fa9e4066Sahrens * incomplete configuration. 1393fa9e4066Sahrens */ 1394ecc2d604Sbonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 1395ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1396ea8dc4b6Seschrock VDEV_AUX_BAD_GUID_SUM); 1397ea8dc4b6Seschrock error = ENXIO; 1398ea8dc4b6Seschrock goto out; 1399fa9e4066Sahrens } 1400fa9e4066Sahrens 1401fa9e4066Sahrens /* 1402fa9e4066Sahrens * Initialize internal SPA structures. 1403fa9e4066Sahrens */ 1404fa9e4066Sahrens spa->spa_state = POOL_STATE_ACTIVE; 1405fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 1406468c413aSTim Haley spa->spa_verify_min_txg = spa->spa_extreme_rewind ? 1407468c413aSTim Haley TXG_INITIAL : spa_last_synced_txg(spa) - TXG_DEFER_SIZE; 1408468c413aSTim Haley spa->spa_first_txg = spa->spa_last_ubsync_txg ? 1409468c413aSTim Haley spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; 1410*b24ab676SJeff Bonwick spa->spa_claim_max_txg = spa->spa_first_txg; 1411*b24ab676SJeff Bonwick 1412ea8dc4b6Seschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 1413ea8dc4b6Seschrock if (error) { 1414ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1415ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1416468c413aSTim Haley error = EIO; 1417ea8dc4b6Seschrock goto out; 1418ea8dc4b6Seschrock } 1419fa9e4066Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1420fa9e4066Sahrens 1421ea8dc4b6Seschrock if (zap_lookup(spa->spa_meta_objset, 1422fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 1423ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 1424ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1425ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1426ea8dc4b6Seschrock error = EIO; 1427ea8dc4b6Seschrock goto out; 1428ea8dc4b6Seschrock } 1429fa9e4066Sahrens 143088ecc943SGeorge Wilson if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) { 143188ecc943SGeorge Wilson vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 143288ecc943SGeorge Wilson VDEV_AUX_CORRUPT_DATA); 143388ecc943SGeorge Wilson error = EIO; 143488ecc943SGeorge Wilson goto out; 143588ecc943SGeorge Wilson } 143688ecc943SGeorge Wilson 1437fa9e4066Sahrens if (!mosconfig) { 143895173954Sek uint64_t hostid; 1439fa9e4066Sahrens 144088ecc943SGeorge Wilson if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig, 144177650510SLin Ling ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 144295173954Sek char *hostname; 144395173954Sek unsigned long myhostid = 0; 144495173954Sek 144588ecc943SGeorge Wilson VERIFY(nvlist_lookup_string(nvconfig, 144695173954Sek ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 144795173954Sek 14485679c89fSjv #ifdef _KERNEL 14495679c89fSjv myhostid = zone_get_hostid(NULL); 14505679c89fSjv #else /* _KERNEL */ 14515679c89fSjv /* 14525679c89fSjv * We're emulating the system's hostid in userland, so 14535679c89fSjv * we can't use zone_get_hostid(). 14545679c89fSjv */ 145595173954Sek (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 14565679c89fSjv #endif /* _KERNEL */ 145717194a52Slling if (hostid != 0 && myhostid != 0 && 14585679c89fSjv hostid != myhostid) { 145995173954Sek cmn_err(CE_WARN, "pool '%s' could not be " 146095173954Sek "loaded as it was last accessed by " 146177650510SLin Ling "another system (host: %s hostid: 0x%lx). " 146295173954Sek "See: http://www.sun.com/msg/ZFS-8000-EY", 1463e14bb325SJeff Bonwick spa_name(spa), hostname, 146495173954Sek (unsigned long)hostid); 146595173954Sek error = EBADF; 146695173954Sek goto out; 146795173954Sek } 146895173954Sek } 146995173954Sek 147088ecc943SGeorge Wilson spa_config_set(spa, nvconfig); 1471fa9e4066Sahrens spa_unload(spa); 1472fa9e4066Sahrens spa_deactivate(spa); 14738ad4d6ddSJeff Bonwick spa_activate(spa, orig_mode); 1474fa9e4066Sahrens 1475468c413aSTim Haley return (spa_load(spa, state, B_TRUE)); 1476fa9e4066Sahrens } 1477fa9e4066Sahrens 1478ea8dc4b6Seschrock if (zap_lookup(spa->spa_meta_objset, 1479fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 1480*b24ab676SJeff Bonwick sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj) != 0) { 1481ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1482ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1483ea8dc4b6Seschrock error = EIO; 1484ea8dc4b6Seschrock goto out; 1485ea8dc4b6Seschrock } 1486fa9e4066Sahrens 148799653d4eSeschrock /* 148899653d4eSeschrock * Load the bit that tells us to use the new accounting function 148999653d4eSeschrock * (raid-z deflation). If we have an older pool, this will not 149099653d4eSeschrock * be present. 149199653d4eSeschrock */ 149299653d4eSeschrock error = zap_lookup(spa->spa_meta_objset, 149399653d4eSeschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 149499653d4eSeschrock sizeof (uint64_t), 1, &spa->spa_deflate); 149599653d4eSeschrock if (error != 0 && error != ENOENT) { 149699653d4eSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 149799653d4eSeschrock VDEV_AUX_CORRUPT_DATA); 149899653d4eSeschrock error = EIO; 149999653d4eSeschrock goto out; 150099653d4eSeschrock } 150199653d4eSeschrock 1502fa9e4066Sahrens /* 1503ea8dc4b6Seschrock * Load the persistent error log. If we have an older pool, this will 1504ea8dc4b6Seschrock * not be present. 1505fa9e4066Sahrens */ 1506ea8dc4b6Seschrock error = zap_lookup(spa->spa_meta_objset, 1507ea8dc4b6Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 1508ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 1509d80c45e0Sbonwick if (error != 0 && error != ENOENT) { 1510ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1511ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1512ea8dc4b6Seschrock error = EIO; 1513ea8dc4b6Seschrock goto out; 1514ea8dc4b6Seschrock } 1515ea8dc4b6Seschrock 1516ea8dc4b6Seschrock error = zap_lookup(spa->spa_meta_objset, 1517ea8dc4b6Seschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 1518ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 1519ea8dc4b6Seschrock if (error != 0 && error != ENOENT) { 1520ea8dc4b6Seschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1521ea8dc4b6Seschrock VDEV_AUX_CORRUPT_DATA); 1522ea8dc4b6Seschrock error = EIO; 1523ea8dc4b6Seschrock goto out; 1524ea8dc4b6Seschrock } 1525ea8dc4b6Seschrock 152606eeb2adSek /* 152706eeb2adSek * Load the history object. If we have an older pool, this 152806eeb2adSek * will not be present. 152906eeb2adSek */ 153006eeb2adSek error = zap_lookup(spa->spa_meta_objset, 153106eeb2adSek DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 153206eeb2adSek sizeof (uint64_t), 1, &spa->spa_history); 153306eeb2adSek if (error != 0 && error != ENOENT) { 153406eeb2adSek vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 153506eeb2adSek VDEV_AUX_CORRUPT_DATA); 153606eeb2adSek error = EIO; 153706eeb2adSek goto out; 153806eeb2adSek } 153906eeb2adSek 154099653d4eSeschrock /* 154199653d4eSeschrock * Load any hot spares for this pool. 154299653d4eSeschrock */ 154399653d4eSeschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1544fa94a07fSbrendan DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 154599653d4eSeschrock if (error != 0 && error != ENOENT) { 154699653d4eSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 154799653d4eSeschrock VDEV_AUX_CORRUPT_DATA); 154899653d4eSeschrock error = EIO; 154999653d4eSeschrock goto out; 155099653d4eSeschrock } 155199653d4eSeschrock if (error == 0) { 1552e7437265Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 1553fa94a07fSbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 1554fa94a07fSbrendan &spa->spa_spares.sav_config) != 0) { 155599653d4eSeschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 155699653d4eSeschrock VDEV_AUX_CORRUPT_DATA); 155799653d4eSeschrock error = EIO; 155899653d4eSeschrock goto out; 155999653d4eSeschrock } 156099653d4eSeschrock 1561e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 156299653d4eSeschrock spa_load_spares(spa); 1563e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 156499653d4eSeschrock } 156599653d4eSeschrock 1566fa94a07fSbrendan /* 1567fa94a07fSbrendan * Load any level 2 ARC devices for this pool. 1568fa94a07fSbrendan */ 1569fa94a07fSbrendan error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1570fa94a07fSbrendan DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 1571fa94a07fSbrendan &spa->spa_l2cache.sav_object); 1572fa94a07fSbrendan if (error != 0 && error != ENOENT) { 1573fa94a07fSbrendan vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1574fa94a07fSbrendan VDEV_AUX_CORRUPT_DATA); 1575fa94a07fSbrendan error = EIO; 1576fa94a07fSbrendan goto out; 1577fa94a07fSbrendan } 1578fa94a07fSbrendan if (error == 0) { 1579fa94a07fSbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 1580fa94a07fSbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 1581fa94a07fSbrendan &spa->spa_l2cache.sav_config) != 0) { 1582fa94a07fSbrendan vdev_set_state(rvd, B_TRUE, 1583fa94a07fSbrendan VDEV_STATE_CANT_OPEN, 1584fa94a07fSbrendan VDEV_AUX_CORRUPT_DATA); 1585fa94a07fSbrendan error = EIO; 1586fa94a07fSbrendan goto out; 1587fa94a07fSbrendan } 1588fa94a07fSbrendan 1589e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1590fa94a07fSbrendan spa_load_l2cache(spa); 1591e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1592fa94a07fSbrendan } 1593fa94a07fSbrendan 1594990b4856Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 1595ecd6cf80Smarks 1596b1b8ab34Slling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1597b1b8ab34Slling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 1598b1b8ab34Slling 1599b1b8ab34Slling if (error && error != ENOENT) { 1600b1b8ab34Slling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1601b1b8ab34Slling VDEV_AUX_CORRUPT_DATA); 1602b1b8ab34Slling error = EIO; 1603b1b8ab34Slling goto out; 1604b1b8ab34Slling } 1605b1b8ab34Slling 1606b1b8ab34Slling if (error == 0) { 1607b1b8ab34Slling (void) zap_lookup(spa->spa_meta_objset, 1608b1b8ab34Slling spa->spa_pool_props_object, 16093d7072f8Seschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 1610b1b8ab34Slling sizeof (uint64_t), 1, &spa->spa_bootfs); 16113d7072f8Seschrock (void) zap_lookup(spa->spa_meta_objset, 16123d7072f8Seschrock spa->spa_pool_props_object, 16133d7072f8Seschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 16143d7072f8Seschrock sizeof (uint64_t), 1, &autoreplace); 1615b693757aSEric Schrock spa->spa_autoreplace = (autoreplace != 0); 1616ecd6cf80Smarks (void) zap_lookup(spa->spa_meta_objset, 1617ecd6cf80Smarks spa->spa_pool_props_object, 1618ecd6cf80Smarks zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 1619ecd6cf80Smarks sizeof (uint64_t), 1, &spa->spa_delegation); 16200a4e9518Sgw (void) zap_lookup(spa->spa_meta_objset, 16210a4e9518Sgw spa->spa_pool_props_object, 16220a4e9518Sgw zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 16230a4e9518Sgw sizeof (uint64_t), 1, &spa->spa_failmode); 1624573ca77eSGeorge Wilson (void) zap_lookup(spa->spa_meta_objset, 1625573ca77eSGeorge Wilson spa->spa_pool_props_object, 1626573ca77eSGeorge Wilson zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND), 1627573ca77eSGeorge Wilson sizeof (uint64_t), 1, &spa->spa_autoexpand); 1628*b24ab676SJeff Bonwick (void) zap_lookup(spa->spa_meta_objset, 1629*b24ab676SJeff Bonwick spa->spa_pool_props_object, 1630*b24ab676SJeff Bonwick zpool_prop_to_name(ZPOOL_PROP_DEDUPDITTO), 1631*b24ab676SJeff Bonwick sizeof (uint64_t), 1, &spa->spa_dedup_ditto); 1632b1b8ab34Slling } 1633b1b8ab34Slling 16343d7072f8Seschrock /* 16353d7072f8Seschrock * If the 'autoreplace' property is set, then post a resource notifying 16363d7072f8Seschrock * the ZFS DE that it should not issue any faults for unopenable 16373d7072f8Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 16383d7072f8Seschrock * unopenable vdevs so that the normal autoreplace handler can take 16393d7072f8Seschrock * over. 16403d7072f8Seschrock */ 1641b693757aSEric Schrock if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) { 16423d7072f8Seschrock spa_check_removed(spa->spa_root_vdev); 1643b693757aSEric Schrock /* 1644b693757aSEric Schrock * For the import case, this is done in spa_import(), because 1645b693757aSEric Schrock * at this point we're using the spare definitions from 1646b693757aSEric Schrock * the MOS config, not necessarily from the userland config. 1647b693757aSEric Schrock */ 1648b693757aSEric Schrock if (state != SPA_LOAD_IMPORT) { 1649b693757aSEric Schrock spa_aux_check_removed(&spa->spa_spares); 1650b693757aSEric Schrock spa_aux_check_removed(&spa->spa_l2cache); 1651b693757aSEric Schrock } 1652b693757aSEric Schrock } 16533d7072f8Seschrock 1654ea8dc4b6Seschrock /* 1655560e6e96Seschrock * Load the vdev state for all toplevel vdevs. 1656ea8dc4b6Seschrock */ 1657560e6e96Seschrock vdev_load(rvd); 16580373e76bSbonwick 1659fa9e4066Sahrens /* 1660fa9e4066Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 1661fa9e4066Sahrens */ 1662e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1663fa9e4066Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 1664e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1665fa9e4066Sahrens 1666fa9e4066Sahrens /* 1667fa9e4066Sahrens * Check the state of the root vdev. If it can't be opened, it 1668fa9e4066Sahrens * indicates one or more toplevel vdevs are faulted. 1669fa9e4066Sahrens */ 1670ea8dc4b6Seschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 1671ea8dc4b6Seschrock error = ENXIO; 1672ea8dc4b6Seschrock goto out; 1673ea8dc4b6Seschrock } 1674fa9e4066Sahrens 1675*b24ab676SJeff Bonwick /* 1676*b24ab676SJeff Bonwick * Load the DDTs (dedup tables). 1677*b24ab676SJeff Bonwick */ 1678*b24ab676SJeff Bonwick error = ddt_load(spa); 1679*b24ab676SJeff Bonwick if (error != 0) { 1680*b24ab676SJeff Bonwick vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1681*b24ab676SJeff Bonwick VDEV_AUX_CORRUPT_DATA); 1682*b24ab676SJeff Bonwick error = EIO; 1683*b24ab676SJeff Bonwick goto out; 1684*b24ab676SJeff Bonwick } 1685*b24ab676SJeff Bonwick 1686468c413aSTim Haley if (state != SPA_LOAD_TRYIMPORT) { 1687468c413aSTim Haley error = spa_load_verify(spa); 1688468c413aSTim Haley if (error) { 1689468c413aSTim Haley vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1690468c413aSTim Haley VDEV_AUX_CORRUPT_DATA); 1691468c413aSTim Haley goto out; 1692468c413aSTim Haley } 1693468c413aSTim Haley } 1694468c413aSTim Haley 1695*b24ab676SJeff Bonwick /* 1696*b24ab676SJeff Bonwick * Load the intent log state and check log integrity. 1697*b24ab676SJeff Bonwick */ 1698*b24ab676SJeff Bonwick VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE, 1699*b24ab676SJeff Bonwick &nvroot) == 0); 1700*b24ab676SJeff Bonwick spa_load_log_state(spa, nvroot); 1701*b24ab676SJeff Bonwick nvlist_free(nvconfig); 1702*b24ab676SJeff Bonwick 1703*b24ab676SJeff Bonwick if (spa_check_logs(spa)) { 1704*b24ab676SJeff Bonwick vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1705*b24ab676SJeff Bonwick VDEV_AUX_BAD_LOG); 1706*b24ab676SJeff Bonwick error = ENXIO; 1707*b24ab676SJeff Bonwick ereport = FM_EREPORT_ZFS_LOG_REPLAY; 1708*b24ab676SJeff Bonwick goto out; 1709*b24ab676SJeff Bonwick } 1710*b24ab676SJeff Bonwick 1711468c413aSTim Haley if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER || 1712468c413aSTim Haley spa->spa_load_max_txg == UINT64_MAX)) { 17135dabedeeSbonwick dmu_tx_t *tx; 17140373e76bSbonwick int need_update = B_FALSE; 17158ad4d6ddSJeff Bonwick 17168ad4d6ddSJeff Bonwick ASSERT(state != SPA_LOAD_TRYIMPORT); 17175dabedeeSbonwick 17180373e76bSbonwick /* 17190373e76bSbonwick * Claim log blocks that haven't been committed yet. 17200373e76bSbonwick * This must all happen in a single txg. 1721*b24ab676SJeff Bonwick * Note: spa_claim_max_txg is updated by spa_claim_notify(), 1722*b24ab676SJeff Bonwick * invoked from zil_claim_log_block()'s i/o done callback. 1723468c413aSTim Haley * Price of rollback is that we abandon the log. 17240373e76bSbonwick */ 1725*b24ab676SJeff Bonwick spa->spa_claiming = B_TRUE; 1726*b24ab676SJeff Bonwick 17275dabedeeSbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1728fa9e4066Sahrens spa_first_txg(spa)); 1729e14bb325SJeff Bonwick (void) dmu_objset_find(spa_name(spa), 17300b69c2f0Sahrens zil_claim, tx, DS_FIND_CHILDREN); 1731fa9e4066Sahrens dmu_tx_commit(tx); 1732fa9e4066Sahrens 1733*b24ab676SJeff Bonwick spa->spa_claiming = B_FALSE; 1734*b24ab676SJeff Bonwick 1735e6ca193dSGeorge Wilson spa->spa_log_state = SPA_LOG_GOOD; 1736fa9e4066Sahrens spa->spa_sync_on = B_TRUE; 1737fa9e4066Sahrens txg_sync_start(spa->spa_dsl_pool); 1738fa9e4066Sahrens 1739fa9e4066Sahrens /* 1740*b24ab676SJeff Bonwick * Wait for all claims to sync. We sync up to the highest 1741*b24ab676SJeff Bonwick * claimed log block birth time so that claimed log blocks 1742*b24ab676SJeff Bonwick * don't appear to be from the future. spa_claim_max_txg 1743*b24ab676SJeff Bonwick * will have been set for us by either zil_check_log_chain() 1744*b24ab676SJeff Bonwick * (invoked from spa_check_logs()) or zil_claim() above. 1745fa9e4066Sahrens */ 1746*b24ab676SJeff Bonwick txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); 17470e34b6a7Sbonwick 17480e34b6a7Sbonwick /* 17490373e76bSbonwick * If the config cache is stale, or we have uninitialized 17500373e76bSbonwick * metaslabs (see spa_vdev_add()), then update the config. 1751bc758434SLin Ling * 1752bc758434SLin Ling * If spa_load_verbatim is true, trust the current 1753bc758434SLin Ling * in-core spa_config and update the disk labels. 17540e34b6a7Sbonwick */ 17550373e76bSbonwick if (config_cache_txg != spa->spa_config_txg || 1756468c413aSTim Haley state == SPA_LOAD_IMPORT || spa->spa_load_verbatim || 1757468c413aSTim Haley state == SPA_LOAD_RECOVER) 17580373e76bSbonwick need_update = B_TRUE; 17590373e76bSbonwick 17608ad4d6ddSJeff Bonwick for (int c = 0; c < rvd->vdev_children; c++) 17610373e76bSbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 17620373e76bSbonwick need_update = B_TRUE; 17630e34b6a7Sbonwick 17640e34b6a7Sbonwick /* 17650373e76bSbonwick * Update the config cache asychronously in case we're the 17660373e76bSbonwick * root pool, in which case the config cache isn't writable yet. 17670e34b6a7Sbonwick */ 17680373e76bSbonwick if (need_update) 17690373e76bSbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 17708ad4d6ddSJeff Bonwick 17718ad4d6ddSJeff Bonwick /* 17728ad4d6ddSJeff Bonwick * Check all DTLs to see if anything needs resilvering. 17738ad4d6ddSJeff Bonwick */ 17748ad4d6ddSJeff Bonwick if (vdev_resilver_needed(rvd, NULL, NULL)) 17758ad4d6ddSJeff Bonwick spa_async_request(spa, SPA_ASYNC_RESILVER); 1776503ad85cSMatthew Ahrens 1777503ad85cSMatthew Ahrens /* 1778503ad85cSMatthew Ahrens * Delete any inconsistent datasets. 1779503ad85cSMatthew Ahrens */ 1780503ad85cSMatthew Ahrens (void) dmu_objset_find(spa_name(spa), 1781503ad85cSMatthew Ahrens dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); 1782ca45db41SChris Kirby 1783ca45db41SChris Kirby /* 1784ca45db41SChris Kirby * Clean up any stale temporary dataset userrefs. 1785ca45db41SChris Kirby */ 1786ca45db41SChris Kirby dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); 1787fa9e4066Sahrens } 1788fa9e4066Sahrens 1789ea8dc4b6Seschrock error = 0; 1790ea8dc4b6Seschrock out: 1791468c413aSTim Haley 1792088f3894Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 179399653d4eSeschrock if (error && error != EBADF) 1794b87f3af3Sperrin zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 1795ea8dc4b6Seschrock spa->spa_load_state = SPA_LOAD_NONE; 1796ea8dc4b6Seschrock spa->spa_ena = 0; 1797ea8dc4b6Seschrock 1798ea8dc4b6Seschrock return (error); 1799fa9e4066Sahrens } 1800fa9e4066Sahrens 1801468c413aSTim Haley static int 1802468c413aSTim Haley spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig) 1803468c413aSTim Haley { 1804468c413aSTim Haley spa_unload(spa); 1805468c413aSTim Haley spa_deactivate(spa); 1806468c413aSTim Haley 1807468c413aSTim Haley spa->spa_load_max_txg--; 1808468c413aSTim Haley 1809468c413aSTim Haley spa_activate(spa, spa_mode_global); 1810468c413aSTim Haley spa_async_suspend(spa); 1811468c413aSTim Haley 1812468c413aSTim Haley return (spa_load(spa, state, mosconfig)); 1813468c413aSTim Haley } 1814468c413aSTim Haley 1815468c413aSTim Haley static int 1816468c413aSTim Haley spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig, 1817468c413aSTim Haley uint64_t max_request, boolean_t extreme) 1818468c413aSTim Haley { 1819468c413aSTim Haley nvlist_t *config = NULL; 1820468c413aSTim Haley int load_error, rewind_error; 1821468c413aSTim Haley uint64_t safe_rollback_txg; 1822468c413aSTim Haley uint64_t min_txg; 1823468c413aSTim Haley 1824468c413aSTim Haley if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) 1825468c413aSTim Haley spa->spa_load_max_txg = spa->spa_load_txg; 1826468c413aSTim Haley else 1827468c413aSTim Haley spa->spa_load_max_txg = max_request; 1828468c413aSTim Haley 1829468c413aSTim Haley load_error = rewind_error = spa_load(spa, state, mosconfig); 1830468c413aSTim Haley if (load_error == 0) 1831468c413aSTim Haley return (0); 1832468c413aSTim Haley 1833468c413aSTim Haley if (spa->spa_root_vdev != NULL) 1834468c413aSTim Haley config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 1835468c413aSTim Haley 1836468c413aSTim Haley spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg; 1837468c413aSTim Haley spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp; 1838468c413aSTim Haley 1839468c413aSTim Haley /* specific txg requested */ 1840468c413aSTim Haley if (spa->spa_load_max_txg != UINT64_MAX && !extreme) { 1841468c413aSTim Haley nvlist_free(config); 1842468c413aSTim Haley return (load_error); 1843468c413aSTim Haley } 1844468c413aSTim Haley 1845468c413aSTim Haley /* Price of rolling back is discarding txgs, including log */ 1846468c413aSTim Haley if (state == SPA_LOAD_RECOVER) 1847468c413aSTim Haley spa->spa_log_state = SPA_LOG_CLEAR; 1848468c413aSTim Haley 1849468c413aSTim Haley spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; 1850468c413aSTim Haley safe_rollback_txg = spa->spa_uberblock.ub_txg - TXG_DEFER_SIZE; 1851468c413aSTim Haley 1852468c413aSTim Haley min_txg = extreme ? TXG_INITIAL : safe_rollback_txg; 1853468c413aSTim Haley while (rewind_error && (spa->spa_uberblock.ub_txg >= min_txg)) { 1854468c413aSTim Haley if (spa->spa_load_max_txg < safe_rollback_txg) 1855468c413aSTim Haley spa->spa_extreme_rewind = B_TRUE; 1856468c413aSTim Haley rewind_error = spa_load_retry(spa, state, mosconfig); 1857468c413aSTim Haley } 1858468c413aSTim Haley 1859468c413aSTim Haley if (config) 1860468c413aSTim Haley spa_rewind_data_to_nvlist(spa, config); 1861468c413aSTim Haley 1862468c413aSTim Haley spa->spa_extreme_rewind = B_FALSE; 1863468c413aSTim Haley spa->spa_load_max_txg = UINT64_MAX; 1864468c413aSTim Haley 1865468c413aSTim Haley if (config && (rewind_error || state != SPA_LOAD_RECOVER)) 1866468c413aSTim Haley spa_config_set(spa, config); 1867468c413aSTim Haley 1868468c413aSTim Haley return (state == SPA_LOAD_RECOVER ? rewind_error : load_error); 1869468c413aSTim Haley } 1870468c413aSTim Haley 1871fa9e4066Sahrens /* 1872fa9e4066Sahrens * Pool Open/Import 1873fa9e4066Sahrens * 1874fa9e4066Sahrens * The import case is identical to an open except that the configuration is sent 1875fa9e4066Sahrens * down from userland, instead of grabbed from the configuration cache. For the 1876fa9e4066Sahrens * case of an open, the pool configuration will exist in the 18773d7072f8Seschrock * POOL_STATE_UNINITIALIZED state. 1878fa9e4066Sahrens * 1879fa9e4066Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 1880fa9e4066Sahrens * the same time open the pool, without having to keep around the spa_t in some 1881fa9e4066Sahrens * ambiguous state. 1882fa9e4066Sahrens */ 1883fa9e4066Sahrens static int 1884468c413aSTim Haley spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, 1885468c413aSTim Haley nvlist_t **config) 1886fa9e4066Sahrens { 1887fa9e4066Sahrens spa_t *spa; 1888468c413aSTim Haley boolean_t norewind; 1889468c413aSTim Haley boolean_t extreme; 1890468c413aSTim Haley zpool_rewind_policy_t policy; 1891468c413aSTim Haley spa_load_state_t state = SPA_LOAD_OPEN; 1892fa9e4066Sahrens int error; 1893fa9e4066Sahrens int locked = B_FALSE; 1894fa9e4066Sahrens 1895fa9e4066Sahrens *spapp = NULL; 1896fa9e4066Sahrens 1897468c413aSTim Haley zpool_get_rewind_policy(nvpolicy, &policy); 1898468c413aSTim Haley if (policy.zrp_request & ZPOOL_DO_REWIND) 1899468c413aSTim Haley state = SPA_LOAD_RECOVER; 1900468c413aSTim Haley norewind = (policy.zrp_request == ZPOOL_NO_REWIND); 1901468c413aSTim Haley extreme = ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0); 1902468c413aSTim Haley 1903fa9e4066Sahrens /* 1904fa9e4066Sahrens * As disgusting as this is, we need to support recursive calls to this 1905fa9e4066Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 1906fa9e4066Sahrens * up calling spa_open() again. The real fix is to figure out how to 1907fa9e4066Sahrens * avoid dsl_dir_open() calling this in the first place. 1908fa9e4066Sahrens */ 1909fa9e4066Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 1910fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 1911fa9e4066Sahrens locked = B_TRUE; 1912fa9e4066Sahrens } 1913fa9e4066Sahrens 1914fa9e4066Sahrens if ((spa = spa_lookup(pool)) == NULL) { 1915fa9e4066Sahrens if (locked) 1916fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1917fa9e4066Sahrens return (ENOENT); 1918fa9e4066Sahrens } 1919468c413aSTim Haley 1920fa9e4066Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 1921fa9e4066Sahrens 19228ad4d6ddSJeff Bonwick spa_activate(spa, spa_mode_global); 1923fa9e4066Sahrens 1924468c413aSTim Haley if (spa->spa_last_open_failed && norewind) { 1925468c413aSTim Haley if (config != NULL && spa->spa_config) 1926468c413aSTim Haley VERIFY(nvlist_dup(spa->spa_config, 1927468c413aSTim Haley config, KM_SLEEP) == 0); 1928468c413aSTim Haley spa_deactivate(spa); 1929468c413aSTim Haley if (locked) 1930468c413aSTim Haley mutex_exit(&spa_namespace_lock); 1931468c413aSTim Haley return (spa->spa_last_open_failed); 1932468c413aSTim Haley } 1933468c413aSTim Haley 1934468c413aSTim Haley if (state != SPA_LOAD_RECOVER) 1935468c413aSTim Haley spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; 1936468c413aSTim Haley 1937468c413aSTim Haley error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg, 1938468c413aSTim Haley extreme); 1939fa9e4066Sahrens 1940fa9e4066Sahrens if (error == EBADF) { 1941fa9e4066Sahrens /* 1942560e6e96Seschrock * If vdev_validate() returns failure (indicated by 1943560e6e96Seschrock * EBADF), it indicates that one of the vdevs indicates 1944560e6e96Seschrock * that the pool has been exported or destroyed. If 1945560e6e96Seschrock * this is the case, the config cache is out of sync and 1946560e6e96Seschrock * we should remove the pool from the namespace. 1947fa9e4066Sahrens */ 1948fa9e4066Sahrens spa_unload(spa); 1949fa9e4066Sahrens spa_deactivate(spa); 1950c5904d13Seschrock spa_config_sync(spa, B_TRUE, B_TRUE); 1951fa9e4066Sahrens spa_remove(spa); 1952fa9e4066Sahrens if (locked) 1953fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1954fa9e4066Sahrens return (ENOENT); 1955ea8dc4b6Seschrock } 1956ea8dc4b6Seschrock 1957ea8dc4b6Seschrock if (error) { 1958fa9e4066Sahrens /* 1959fa9e4066Sahrens * We can't open the pool, but we still have useful 1960fa9e4066Sahrens * information: the state of each vdev after the 1961fa9e4066Sahrens * attempted vdev_open(). Return this to the user. 1962fa9e4066Sahrens */ 1963468c413aSTim Haley if (config != NULL && spa->spa_config) 1964468c413aSTim Haley VERIFY(nvlist_dup(spa->spa_config, config, 1965468c413aSTim Haley KM_SLEEP) == 0); 1966fa9e4066Sahrens spa_unload(spa); 1967fa9e4066Sahrens spa_deactivate(spa); 1968468c413aSTim Haley spa->spa_last_open_failed = error; 1969fa9e4066Sahrens if (locked) 1970fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1971fa9e4066Sahrens *spapp = NULL; 1972fa9e4066Sahrens return (error); 1973fa9e4066Sahrens } 1974468c413aSTim Haley 1975fa9e4066Sahrens } 1976fa9e4066Sahrens 1977fa9e4066Sahrens spa_open_ref(spa, tag); 19783d7072f8Seschrock 1979468c413aSTim Haley spa->spa_last_open_failed = 0; 1980468c413aSTim Haley 1981468c413aSTim Haley if (config != NULL) 1982468c413aSTim Haley *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 1983468c413aSTim Haley 1984468c413aSTim Haley spa->spa_last_ubsync_txg = 0; 1985468c413aSTim Haley spa->spa_load_txg = 0; 1986468c413aSTim Haley 1987fa9e4066Sahrens if (locked) 1988fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 1989fa9e4066Sahrens 1990fa9e4066Sahrens *spapp = spa; 1991fa9e4066Sahrens 1992fa9e4066Sahrens return (0); 1993fa9e4066Sahrens } 1994fa9e4066Sahrens 1995468c413aSTim Haley int 1996468c413aSTim Haley spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy, 1997468c413aSTim Haley nvlist_t **config) 1998468c413aSTim Haley { 1999468c413aSTim Haley return (spa_open_common(name, spapp, tag, policy, config)); 2000468c413aSTim Haley } 2001468c413aSTim Haley 2002fa9e4066Sahrens int 2003fa9e4066Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 2004fa9e4066Sahrens { 2005468c413aSTim Haley return (spa_open_common(name, spapp, tag, NULL, NULL)); 2006fa9e4066Sahrens } 2007fa9e4066Sahrens 2008ea8dc4b6Seschrock /* 2009ea8dc4b6Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 2010ea8dc4b6Seschrock * preventing it from being exported or destroyed. 2011ea8dc4b6Seschrock */ 2012ea8dc4b6Seschrock spa_t * 2013ea8dc4b6Seschrock spa_inject_addref(char *name) 2014ea8dc4b6Seschrock { 2015ea8dc4b6Seschrock spa_t *spa; 2016ea8dc4b6Seschrock 2017ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 2018ea8dc4b6Seschrock if ((spa = spa_lookup(name)) == NULL) { 2019ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 2020ea8dc4b6Seschrock return (NULL); 2021ea8dc4b6Seschrock } 2022ea8dc4b6Seschrock spa->spa_inject_ref++; 2023ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 2024ea8dc4b6Seschrock 2025ea8dc4b6Seschrock return (spa); 2026ea8dc4b6Seschrock } 2027ea8dc4b6Seschrock 2028ea8dc4b6Seschrock void 2029ea8dc4b6Seschrock spa_inject_delref(spa_t *spa) 2030ea8dc4b6Seschrock { 2031ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 2032ea8dc4b6Seschrock spa->spa_inject_ref--; 2033ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 2034ea8dc4b6Seschrock } 2035ea8dc4b6Seschrock 2036fa94a07fSbrendan /* 2037fa94a07fSbrendan * Add spares device information to the nvlist. 2038fa94a07fSbrendan */ 203999653d4eSeschrock static void 204099653d4eSeschrock spa_add_spares(spa_t *spa, nvlist_t *config) 204199653d4eSeschrock { 204299653d4eSeschrock nvlist_t **spares; 204399653d4eSeschrock uint_t i, nspares; 204499653d4eSeschrock nvlist_t *nvroot; 204599653d4eSeschrock uint64_t guid; 204699653d4eSeschrock vdev_stat_t *vs; 204799653d4eSeschrock uint_t vsc; 204839c23413Seschrock uint64_t pool; 204999653d4eSeschrock 20506809eb4eSEric Schrock ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 20516809eb4eSEric Schrock 2052fa94a07fSbrendan if (spa->spa_spares.sav_count == 0) 205399653d4eSeschrock return; 205499653d4eSeschrock 205599653d4eSeschrock VERIFY(nvlist_lookup_nvlist(config, 205699653d4eSeschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 2057fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 205899653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 205999653d4eSeschrock if (nspares != 0) { 206099653d4eSeschrock VERIFY(nvlist_add_nvlist_array(nvroot, 206199653d4eSeschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 206299653d4eSeschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 206399653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 206499653d4eSeschrock 206599653d4eSeschrock /* 206699653d4eSeschrock * Go through and find any spares which have since been 206799653d4eSeschrock * repurposed as an active spare. If this is the case, update 206899653d4eSeschrock * their status appropriately. 206999653d4eSeschrock */ 207099653d4eSeschrock for (i = 0; i < nspares; i++) { 207199653d4eSeschrock VERIFY(nvlist_lookup_uint64(spares[i], 207299653d4eSeschrock ZPOOL_CONFIG_GUID, &guid) == 0); 207389a89ebfSlling if (spa_spare_exists(guid, &pool, NULL) && 207489a89ebfSlling pool != 0ULL) { 207599653d4eSeschrock VERIFY(nvlist_lookup_uint64_array( 207699653d4eSeschrock spares[i], ZPOOL_CONFIG_STATS, 207799653d4eSeschrock (uint64_t **)&vs, &vsc) == 0); 207899653d4eSeschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 207999653d4eSeschrock vs->vs_aux = VDEV_AUX_SPARED; 208099653d4eSeschrock } 208199653d4eSeschrock } 208299653d4eSeschrock } 208399653d4eSeschrock } 208499653d4eSeschrock 2085fa94a07fSbrendan /* 2086fa94a07fSbrendan * Add l2cache device information to the nvlist, including vdev stats. 2087fa94a07fSbrendan */ 2088fa94a07fSbrendan static void 2089fa94a07fSbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 2090fa94a07fSbrendan { 2091fa94a07fSbrendan nvlist_t **l2cache; 2092fa94a07fSbrendan uint_t i, j, nl2cache; 2093fa94a07fSbrendan nvlist_t *nvroot; 2094fa94a07fSbrendan uint64_t guid; 2095fa94a07fSbrendan vdev_t *vd; 2096fa94a07fSbrendan vdev_stat_t *vs; 2097fa94a07fSbrendan uint_t vsc; 2098fa94a07fSbrendan 20996809eb4eSEric Schrock ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 21006809eb4eSEric Schrock 2101fa94a07fSbrendan if (spa->spa_l2cache.sav_count == 0) 2102fa94a07fSbrendan return; 2103fa94a07fSbrendan 2104fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist(config, 2105fa94a07fSbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 2106fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 2107fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 2108fa94a07fSbrendan if (nl2cache != 0) { 2109fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 2110fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 2111fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 2112fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 2113fa94a07fSbrendan 2114fa94a07fSbrendan /* 2115fa94a07fSbrendan * Update level 2 cache device stats. 2116fa94a07fSbrendan */ 2117fa94a07fSbrendan 2118fa94a07fSbrendan for (i = 0; i < nl2cache; i++) { 2119fa94a07fSbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 2120fa94a07fSbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 2121fa94a07fSbrendan 2122fa94a07fSbrendan vd = NULL; 2123fa94a07fSbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 2124fa94a07fSbrendan if (guid == 2125fa94a07fSbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 2126fa94a07fSbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 2127fa94a07fSbrendan break; 2128fa94a07fSbrendan } 2129fa94a07fSbrendan } 2130fa94a07fSbrendan ASSERT(vd != NULL); 2131fa94a07fSbrendan 2132fa94a07fSbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 2133fa94a07fSbrendan ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 2134fa94a07fSbrendan vdev_get_stats(vd, vs); 2135fa94a07fSbrendan } 2136fa94a07fSbrendan } 2137fa94a07fSbrendan } 2138fa94a07fSbrendan 2139fa9e4066Sahrens int 2140ea8dc4b6Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 2141fa9e4066Sahrens { 2142fa9e4066Sahrens int error; 2143fa9e4066Sahrens spa_t *spa; 2144fa9e4066Sahrens 2145fa9e4066Sahrens *config = NULL; 2146468c413aSTim Haley error = spa_open_common(name, &spa, FTAG, NULL, config); 2147fa9e4066Sahrens 21486809eb4eSEric Schrock if (spa != NULL) { 21496809eb4eSEric Schrock /* 21506809eb4eSEric Schrock * This still leaves a window of inconsistency where the spares 21516809eb4eSEric Schrock * or l2cache devices could change and the config would be 21526809eb4eSEric Schrock * self-inconsistent. 21536809eb4eSEric Schrock */ 21546809eb4eSEric Schrock spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 2155ea8dc4b6Seschrock 21566809eb4eSEric Schrock if (*config != NULL) { 2157e14bb325SJeff Bonwick VERIFY(nvlist_add_uint64(*config, 21586809eb4eSEric Schrock ZPOOL_CONFIG_ERRCOUNT, 21596809eb4eSEric Schrock spa_get_errlog_size(spa)) == 0); 2160e14bb325SJeff Bonwick 21616809eb4eSEric Schrock if (spa_suspended(spa)) 21626809eb4eSEric Schrock VERIFY(nvlist_add_uint64(*config, 21636809eb4eSEric Schrock ZPOOL_CONFIG_SUSPENDED, 21646809eb4eSEric Schrock spa->spa_failmode) == 0); 21656809eb4eSEric Schrock 21666809eb4eSEric Schrock spa_add_spares(spa, *config); 21676809eb4eSEric Schrock spa_add_l2cache(spa, *config); 21686809eb4eSEric Schrock } 216999653d4eSeschrock } 217099653d4eSeschrock 2171ea8dc4b6Seschrock /* 2172ea8dc4b6Seschrock * We want to get the alternate root even for faulted pools, so we cheat 2173ea8dc4b6Seschrock * and call spa_lookup() directly. 2174ea8dc4b6Seschrock */ 2175ea8dc4b6Seschrock if (altroot) { 2176ea8dc4b6Seschrock if (spa == NULL) { 2177ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 2178ea8dc4b6Seschrock spa = spa_lookup(name); 2179ea8dc4b6Seschrock if (spa) 2180ea8dc4b6Seschrock spa_altroot(spa, altroot, buflen); 2181ea8dc4b6Seschrock else 2182ea8dc4b6Seschrock altroot[0] = '\0'; 2183ea8dc4b6Seschrock spa = NULL; 2184ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 2185ea8dc4b6Seschrock } else { 2186ea8dc4b6Seschrock spa_altroot(spa, altroot, buflen); 2187ea8dc4b6Seschrock } 2188ea8dc4b6Seschrock } 2189ea8dc4b6Seschrock 21906809eb4eSEric Schrock if (spa != NULL) { 21916809eb4eSEric Schrock spa_config_exit(spa, SCL_CONFIG, FTAG); 2192fa9e4066Sahrens spa_close(spa, FTAG); 21936809eb4eSEric Schrock } 2194fa9e4066Sahrens 2195fa9e4066Sahrens return (error); 2196fa9e4066Sahrens } 2197fa9e4066Sahrens 219899653d4eSeschrock /* 2199fa94a07fSbrendan * Validate that the auxiliary device array is well formed. We must have an 2200fa94a07fSbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 2201fa94a07fSbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 2202fa94a07fSbrendan * specified, as long as they are well-formed. 220399653d4eSeschrock */ 220499653d4eSeschrock static int 2205fa94a07fSbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 2206fa94a07fSbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 2207fa94a07fSbrendan vdev_labeltype_t label) 220899653d4eSeschrock { 2209fa94a07fSbrendan nvlist_t **dev; 2210fa94a07fSbrendan uint_t i, ndev; 221199653d4eSeschrock vdev_t *vd; 221299653d4eSeschrock int error; 221399653d4eSeschrock 2214e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 2215e14bb325SJeff Bonwick 221699653d4eSeschrock /* 2217fa94a07fSbrendan * It's acceptable to have no devs specified. 221899653d4eSeschrock */ 2219fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 222099653d4eSeschrock return (0); 222199653d4eSeschrock 2222fa94a07fSbrendan if (ndev == 0) 222399653d4eSeschrock return (EINVAL); 222499653d4eSeschrock 222599653d4eSeschrock /* 2226fa94a07fSbrendan * Make sure the pool is formatted with a version that supports this 2227fa94a07fSbrendan * device type. 222899653d4eSeschrock */ 2229fa94a07fSbrendan if (spa_version(spa) < version) 223099653d4eSeschrock return (ENOTSUP); 223199653d4eSeschrock 223239c23413Seschrock /* 2233fa94a07fSbrendan * Set the pending device list so we correctly handle device in-use 223439c23413Seschrock * checking. 223539c23413Seschrock */ 2236fa94a07fSbrendan sav->sav_pending = dev; 2237fa94a07fSbrendan sav->sav_npending = ndev; 223839c23413Seschrock 2239fa94a07fSbrendan for (i = 0; i < ndev; i++) { 2240fa94a07fSbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 224199653d4eSeschrock mode)) != 0) 224239c23413Seschrock goto out; 224399653d4eSeschrock 224499653d4eSeschrock if (!vd->vdev_ops->vdev_op_leaf) { 224599653d4eSeschrock vdev_free(vd); 224639c23413Seschrock error = EINVAL; 224739c23413Seschrock goto out; 224899653d4eSeschrock } 224999653d4eSeschrock 2250fa94a07fSbrendan /* 2251e14bb325SJeff Bonwick * The L2ARC currently only supports disk devices in 2252e14bb325SJeff Bonwick * kernel context. For user-level testing, we allow it. 2253fa94a07fSbrendan */ 2254e14bb325SJeff Bonwick #ifdef _KERNEL 2255fa94a07fSbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 2256fa94a07fSbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 2257fa94a07fSbrendan error = ENOTBLK; 2258fa94a07fSbrendan goto out; 2259fa94a07fSbrendan } 2260e14bb325SJeff Bonwick #endif 226199653d4eSeschrock vd->vdev_top = vd; 226299653d4eSeschrock 226339c23413Seschrock if ((error = vdev_open(vd)) == 0 && 2264fa94a07fSbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 2265fa94a07fSbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 226639c23413Seschrock vd->vdev_guid) == 0); 226739c23413Seschrock } 226899653d4eSeschrock 226999653d4eSeschrock vdev_free(vd); 227039c23413Seschrock 2271fa94a07fSbrendan if (error && 2272fa94a07fSbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 227339c23413Seschrock goto out; 227439c23413Seschrock else 227539c23413Seschrock error = 0; 227699653d4eSeschrock } 227799653d4eSeschrock 227839c23413Seschrock out: 2279fa94a07fSbrendan sav->sav_pending = NULL; 2280fa94a07fSbrendan sav->sav_npending = 0; 228139c23413Seschrock return (error); 228299653d4eSeschrock } 228399653d4eSeschrock 2284fa94a07fSbrendan static int 2285fa94a07fSbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 2286fa94a07fSbrendan { 2287fa94a07fSbrendan int error; 2288fa94a07fSbrendan 2289e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 2290e14bb325SJeff Bonwick 2291fa94a07fSbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 2292fa94a07fSbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 2293fa94a07fSbrendan VDEV_LABEL_SPARE)) != 0) { 2294fa94a07fSbrendan return (error); 2295fa94a07fSbrendan } 2296fa94a07fSbrendan 2297fa94a07fSbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 2298fa94a07fSbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 2299fa94a07fSbrendan VDEV_LABEL_L2CACHE)); 2300fa94a07fSbrendan } 2301fa94a07fSbrendan 2302fa94a07fSbrendan static void 2303fa94a07fSbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 2304fa94a07fSbrendan const char *config) 2305fa94a07fSbrendan { 2306fa94a07fSbrendan int i; 2307fa94a07fSbrendan 2308fa94a07fSbrendan if (sav->sav_config != NULL) { 2309fa94a07fSbrendan nvlist_t **olddevs; 2310fa94a07fSbrendan uint_t oldndevs; 2311fa94a07fSbrendan nvlist_t **newdevs; 2312fa94a07fSbrendan 2313fa94a07fSbrendan /* 2314fa94a07fSbrendan * Generate new dev list by concatentating with the 2315fa94a07fSbrendan * current dev list. 2316fa94a07fSbrendan */ 2317fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 2318fa94a07fSbrendan &olddevs, &oldndevs) == 0); 2319fa94a07fSbrendan 2320fa94a07fSbrendan newdevs = kmem_alloc(sizeof (void *) * 2321fa94a07fSbrendan (ndevs + oldndevs), KM_SLEEP); 2322fa94a07fSbrendan for (i = 0; i < oldndevs; i++) 2323fa94a07fSbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 2324fa94a07fSbrendan KM_SLEEP) == 0); 2325fa94a07fSbrendan for (i = 0; i < ndevs; i++) 2326fa94a07fSbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 2327fa94a07fSbrendan KM_SLEEP) == 0); 2328fa94a07fSbrendan 2329fa94a07fSbrendan VERIFY(nvlist_remove(sav->sav_config, config, 2330fa94a07fSbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 2331fa94a07fSbrendan 2332fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 2333fa94a07fSbrendan config, newdevs, ndevs + oldndevs) == 0); 2334fa94a07fSbrendan for (i = 0; i < oldndevs + ndevs; i++) 2335fa94a07fSbrendan nvlist_free(newdevs[i]); 2336fa94a07fSbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 2337fa94a07fSbrendan } else { 2338fa94a07fSbrendan /* 2339fa94a07fSbrendan * Generate a new dev list. 2340fa94a07fSbrendan */ 2341fa94a07fSbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 2342fa94a07fSbrendan KM_SLEEP) == 0); 2343fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 2344fa94a07fSbrendan devs, ndevs) == 0); 2345fa94a07fSbrendan } 2346fa94a07fSbrendan } 2347fa94a07fSbrendan 2348fa94a07fSbrendan /* 2349fa94a07fSbrendan * Stop and drop level 2 ARC devices 2350fa94a07fSbrendan */ 2351fa94a07fSbrendan void 2352fa94a07fSbrendan spa_l2cache_drop(spa_t *spa) 2353fa94a07fSbrendan { 2354fa94a07fSbrendan vdev_t *vd; 2355fa94a07fSbrendan int i; 2356fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 2357fa94a07fSbrendan 2358fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) { 2359fa94a07fSbrendan uint64_t pool; 2360fa94a07fSbrendan 2361fa94a07fSbrendan vd = sav->sav_vdevs[i]; 2362fa94a07fSbrendan ASSERT(vd != NULL); 2363fa94a07fSbrendan 23648ad4d6ddSJeff Bonwick if (spa_l2cache_exists(vd->vdev_guid, &pool) && 23658ad4d6ddSJeff Bonwick pool != 0ULL && l2arc_vdev_present(vd)) 2366fa94a07fSbrendan l2arc_remove_vdev(vd); 2367fa94a07fSbrendan if (vd->vdev_isl2cache) 2368fa94a07fSbrendan spa_l2cache_remove(vd); 2369fa94a07fSbrendan vdev_clear_stats(vd); 2370fa94a07fSbrendan (void) vdev_close(vd); 2371fa94a07fSbrendan } 2372fa94a07fSbrendan } 2373fa94a07fSbrendan 2374fa9e4066Sahrens /* 2375fa9e4066Sahrens * Pool Creation 2376fa9e4066Sahrens */ 2377fa9e4066Sahrens int 2378990b4856Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 23790a48a24eStimh const char *history_str, nvlist_t *zplprops) 2380fa9e4066Sahrens { 2381fa9e4066Sahrens spa_t *spa; 2382990b4856Slling char *altroot = NULL; 23830373e76bSbonwick vdev_t *rvd; 2384fa9e4066Sahrens dsl_pool_t *dp; 2385fa9e4066Sahrens dmu_tx_t *tx; 2386573ca77eSGeorge Wilson int error = 0; 2387fa9e4066Sahrens uint64_t txg = TXG_INITIAL; 2388fa94a07fSbrendan nvlist_t **spares, **l2cache; 2389fa94a07fSbrendan uint_t nspares, nl2cache; 2390990b4856Slling uint64_t version; 2391fa9e4066Sahrens 2392fa9e4066Sahrens /* 2393fa9e4066Sahrens * If this pool already exists, return failure. 2394fa9e4066Sahrens */ 2395fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 2396fa9e4066Sahrens if (spa_lookup(pool) != NULL) { 2397fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2398fa9e4066Sahrens return (EEXIST); 2399fa9e4066Sahrens } 2400fa9e4066Sahrens 2401fa9e4066Sahrens /* 2402fa9e4066Sahrens * Allocate a new spa_t structure. 2403fa9e4066Sahrens */ 2404990b4856Slling (void) nvlist_lookup_string(props, 2405990b4856Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 2406468c413aSTim Haley spa = spa_add(pool, NULL, altroot); 24078ad4d6ddSJeff Bonwick spa_activate(spa, spa_mode_global); 2408fa9e4066Sahrens 2409990b4856Slling if (props && (error = spa_prop_validate(spa, props))) { 2410990b4856Slling spa_deactivate(spa); 2411990b4856Slling spa_remove(spa); 2412c5904d13Seschrock mutex_exit(&spa_namespace_lock); 2413990b4856Slling return (error); 2414990b4856Slling } 2415990b4856Slling 2416990b4856Slling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 2417990b4856Slling &version) != 0) 2418990b4856Slling version = SPA_VERSION; 2419990b4856Slling ASSERT(version <= SPA_VERSION); 2420*b24ab676SJeff Bonwick 2421*b24ab676SJeff Bonwick spa->spa_first_txg = txg; 2422*b24ab676SJeff Bonwick spa->spa_uberblock.ub_txg = txg - 1; 2423990b4856Slling spa->spa_uberblock.ub_version = version; 2424fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 2425fa9e4066Sahrens 242654d692b7SGeorge Wilson /* 242754d692b7SGeorge Wilson * Create "The Godfather" zio to hold all async IOs 242854d692b7SGeorge Wilson */ 242925f89ee2SJeff Bonwick spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 243025f89ee2SJeff Bonwick ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 243154d692b7SGeorge Wilson 24320373e76bSbonwick /* 24330373e76bSbonwick * Create the root vdev. 24340373e76bSbonwick */ 2435e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 24360373e76bSbonwick 243799653d4eSeschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 24380373e76bSbonwick 243999653d4eSeschrock ASSERT(error != 0 || rvd != NULL); 244099653d4eSeschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 24410373e76bSbonwick 2442b7b97454Sperrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 24430373e76bSbonwick error = EINVAL; 244499653d4eSeschrock 244599653d4eSeschrock if (error == 0 && 244699653d4eSeschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 2447fa94a07fSbrendan (error = spa_validate_aux(spa, nvroot, txg, 244899653d4eSeschrock VDEV_ALLOC_ADD)) == 0) { 2449573ca77eSGeorge Wilson for (int c = 0; c < rvd->vdev_children; c++) { 2450573ca77eSGeorge Wilson vdev_metaslab_set_size(rvd->vdev_child[c]); 2451573ca77eSGeorge Wilson vdev_expand(rvd->vdev_child[c], txg); 2452573ca77eSGeorge Wilson } 24530373e76bSbonwick } 24540373e76bSbonwick 2455e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2456fa9e4066Sahrens 245799653d4eSeschrock if (error != 0) { 2458fa9e4066Sahrens spa_unload(spa); 2459fa9e4066Sahrens spa_deactivate(spa); 2460fa9e4066Sahrens spa_remove(spa); 2461fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2462fa9e4066Sahrens return (error); 2463fa9e4066Sahrens } 2464fa9e4066Sahrens 246599653d4eSeschrock /* 246699653d4eSeschrock * Get the list of spares, if specified. 246799653d4eSeschrock */ 246899653d4eSeschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 246999653d4eSeschrock &spares, &nspares) == 0) { 2470fa94a07fSbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 247199653d4eSeschrock KM_SLEEP) == 0); 2472fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 247399653d4eSeschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 2474e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 247599653d4eSeschrock spa_load_spares(spa); 2476e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2477fa94a07fSbrendan spa->spa_spares.sav_sync = B_TRUE; 2478fa94a07fSbrendan } 2479fa94a07fSbrendan 2480fa94a07fSbrendan /* 2481fa94a07fSbrendan * Get the list of level 2 cache devices, if specified. 2482fa94a07fSbrendan */ 2483fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 2484fa94a07fSbrendan &l2cache, &nl2cache) == 0) { 2485fa94a07fSbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 2486fa94a07fSbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 2487fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 2488fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 2489e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2490fa94a07fSbrendan spa_load_l2cache(spa); 2491e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2492fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 249399653d4eSeschrock } 249499653d4eSeschrock 24950a48a24eStimh spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 2496fa9e4066Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 2497fa9e4066Sahrens 2498fa9e4066Sahrens tx = dmu_tx_create_assigned(dp, txg); 2499fa9e4066Sahrens 2500fa9e4066Sahrens /* 2501fa9e4066Sahrens * Create the pool config object. 2502fa9e4066Sahrens */ 2503fa9e4066Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 2504f7991ba4STim Haley DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 2505fa9e4066Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 2506fa9e4066Sahrens 2507ea8dc4b6Seschrock if (zap_add(spa->spa_meta_objset, 2508fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 2509ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 2510ea8dc4b6Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 2511ea8dc4b6Seschrock } 2512fa9e4066Sahrens 2513990b4856Slling /* Newly created pools with the right version are always deflated. */ 2514990b4856Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 2515990b4856Slling spa->spa_deflate = TRUE; 2516990b4856Slling if (zap_add(spa->spa_meta_objset, 2517990b4856Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 2518990b4856Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 2519990b4856Slling cmn_err(CE_PANIC, "failed to add deflate"); 2520990b4856Slling } 252199653d4eSeschrock } 252299653d4eSeschrock 2523fa9e4066Sahrens /* 2524fa9e4066Sahrens * Create the deferred-free bplist object. Turn off compression 2525fa9e4066Sahrens * because sync-to-convergence takes longer if the blocksize 2526fa9e4066Sahrens * keeps changing. 2527fa9e4066Sahrens */ 2528*b24ab676SJeff Bonwick spa->spa_deferred_bplist_obj = bplist_create(spa->spa_meta_objset, 2529fa9e4066Sahrens 1 << 14, tx); 2530*b24ab676SJeff Bonwick dmu_object_set_compress(spa->spa_meta_objset, 2531*b24ab676SJeff Bonwick spa->spa_deferred_bplist_obj, ZIO_COMPRESS_OFF, tx); 2532fa9e4066Sahrens 2533ea8dc4b6Seschrock if (zap_add(spa->spa_meta_objset, 2534fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 2535*b24ab676SJeff Bonwick sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj, tx) != 0) { 2536ea8dc4b6Seschrock cmn_err(CE_PANIC, "failed to add bplist"); 2537ea8dc4b6Seschrock } 2538fa9e4066Sahrens 253906eeb2adSek /* 254006eeb2adSek * Create the pool's history object. 254106eeb2adSek */ 2542990b4856Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 2543990b4856Slling spa_history_create_obj(spa, tx); 2544990b4856Slling 2545990b4856Slling /* 2546990b4856Slling * Set pool properties. 2547990b4856Slling */ 2548990b4856Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 2549990b4856Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 25500a4e9518Sgw spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 2551573ca77eSGeorge Wilson spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); 2552*b24ab676SJeff Bonwick 2553379c004dSEric Schrock if (props != NULL) { 2554379c004dSEric Schrock spa_configfile_set(spa, props, B_FALSE); 2555990b4856Slling spa_sync_props(spa, props, CRED(), tx); 2556379c004dSEric Schrock } 255706eeb2adSek 2558*b24ab676SJeff Bonwick /* 2559*b24ab676SJeff Bonwick * Create DDTs (dedup tables). 2560*b24ab676SJeff Bonwick */ 2561*b24ab676SJeff Bonwick ddt_create(spa); 2562*b24ab676SJeff Bonwick 2563fa9e4066Sahrens dmu_tx_commit(tx); 2564fa9e4066Sahrens 2565fa9e4066Sahrens spa->spa_sync_on = B_TRUE; 2566fa9e4066Sahrens txg_sync_start(spa->spa_dsl_pool); 2567fa9e4066Sahrens 2568fa9e4066Sahrens /* 2569fa9e4066Sahrens * We explicitly wait for the first transaction to complete so that our 2570fa9e4066Sahrens * bean counters are appropriately updated. 2571fa9e4066Sahrens */ 2572fa9e4066Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 2573fa9e4066Sahrens 2574c5904d13Seschrock spa_config_sync(spa, B_FALSE, B_TRUE); 2575fa9e4066Sahrens 2576990b4856Slling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 2577228975ccSek (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 2578c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_CREATE); 2579228975ccSek 2580088f3894Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 2581088f3894Sahrens 2582daaa36a7SGeorge Wilson mutex_exit(&spa_namespace_lock); 2583daaa36a7SGeorge Wilson 2584fa9e4066Sahrens return (0); 2585fa9e4066Sahrens } 2586fa9e4066Sahrens 2587e7cbe64fSgw #ifdef _KERNEL 2588e7cbe64fSgw /* 258921ecdf64SLin Ling * Get the root pool information from the root disk, then import the root pool 259021ecdf64SLin Ling * during the system boot up time. 2591e7cbe64fSgw */ 259221ecdf64SLin Ling extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 259321ecdf64SLin Ling 259421ecdf64SLin Ling static nvlist_t * 259521ecdf64SLin Ling spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) 2596e7cbe64fSgw { 259721ecdf64SLin Ling nvlist_t *config; 2598e7cbe64fSgw nvlist_t *nvtop, *nvroot; 2599e7cbe64fSgw uint64_t pgid; 2600e7cbe64fSgw 260121ecdf64SLin Ling if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) 260221ecdf64SLin Ling return (NULL); 260321ecdf64SLin Ling 2604e7cbe64fSgw /* 2605e7cbe64fSgw * Add this top-level vdev to the child array. 2606e7cbe64fSgw */ 260721ecdf64SLin Ling VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 260821ecdf64SLin Ling &nvtop) == 0); 260921ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 261021ecdf64SLin Ling &pgid) == 0); 261121ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); 2612e7cbe64fSgw 2613e7cbe64fSgw /* 2614e7cbe64fSgw * Put this pool's top-level vdevs into a root vdev. 2615e7cbe64fSgw */ 2616e7cbe64fSgw VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 261721ecdf64SLin Ling VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 261821ecdf64SLin Ling VDEV_TYPE_ROOT) == 0); 2619e7cbe64fSgw VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 2620e7cbe64fSgw VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 2621e7cbe64fSgw VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 2622e7cbe64fSgw &nvtop, 1) == 0); 2623e7cbe64fSgw 2624e7cbe64fSgw /* 2625e7cbe64fSgw * Replace the existing vdev_tree with the new root vdev in 2626e7cbe64fSgw * this pool's configuration (remove the old, add the new). 2627e7cbe64fSgw */ 2628e7cbe64fSgw VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 2629e7cbe64fSgw nvlist_free(nvroot); 263021ecdf64SLin Ling return (config); 2631e7cbe64fSgw } 2632e7cbe64fSgw 2633e7cbe64fSgw /* 263421ecdf64SLin Ling * Walk the vdev tree and see if we can find a device with "better" 263521ecdf64SLin Ling * configuration. A configuration is "better" if the label on that 263621ecdf64SLin Ling * device has a more recent txg. 2637051aabe6Staylor */ 263821ecdf64SLin Ling static void 263921ecdf64SLin Ling spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) 2640051aabe6Staylor { 2641573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) 264221ecdf64SLin Ling spa_alt_rootvdev(vd->vdev_child[c], avd, txg); 2643051aabe6Staylor 264421ecdf64SLin Ling if (vd->vdev_ops->vdev_op_leaf) { 264521ecdf64SLin Ling nvlist_t *label; 264621ecdf64SLin Ling uint64_t label_txg; 2647051aabe6Staylor 264821ecdf64SLin Ling if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, 264921ecdf64SLin Ling &label) != 0) 265021ecdf64SLin Ling return; 2651051aabe6Staylor 265221ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, 265321ecdf64SLin Ling &label_txg) == 0); 2654051aabe6Staylor 265521ecdf64SLin Ling /* 265621ecdf64SLin Ling * Do we have a better boot device? 265721ecdf64SLin Ling */ 265821ecdf64SLin Ling if (label_txg > *txg) { 265921ecdf64SLin Ling *txg = label_txg; 266021ecdf64SLin Ling *avd = vd; 2661051aabe6Staylor } 266221ecdf64SLin Ling nvlist_free(label); 2663051aabe6Staylor } 2664051aabe6Staylor } 2665051aabe6Staylor 2666e7cbe64fSgw /* 2667e7cbe64fSgw * Import a root pool. 2668e7cbe64fSgw * 2669051aabe6Staylor * For x86. devpath_list will consist of devid and/or physpath name of 2670051aabe6Staylor * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 2671051aabe6Staylor * The GRUB "findroot" command will return the vdev we should boot. 2672e7cbe64fSgw * 2673e7cbe64fSgw * For Sparc, devpath_list consists the physpath name of the booting device 2674e7cbe64fSgw * no matter the rootpool is a single device pool or a mirrored pool. 2675e7cbe64fSgw * e.g. 2676e7cbe64fSgw * "/pci@1f,0/ide@d/disk@0,0:a" 2677e7cbe64fSgw */ 2678e7cbe64fSgw int 2679051aabe6Staylor spa_import_rootpool(char *devpath, char *devid) 2680e7cbe64fSgw { 268121ecdf64SLin Ling spa_t *spa; 268221ecdf64SLin Ling vdev_t *rvd, *bvd, *avd = NULL; 268321ecdf64SLin Ling nvlist_t *config, *nvtop; 268421ecdf64SLin Ling uint64_t guid, txg; 2685e7cbe64fSgw char *pname; 2686e7cbe64fSgw int error; 2687e7cbe64fSgw 2688e7cbe64fSgw /* 268921ecdf64SLin Ling * Read the label from the boot device and generate a configuration. 2690e7cbe64fSgw */ 2691dedec472SJack Meng config = spa_generate_rootconf(devpath, devid, &guid); 2692dedec472SJack Meng #if defined(_OBP) && defined(_KERNEL) 2693dedec472SJack Meng if (config == NULL) { 2694dedec472SJack Meng if (strstr(devpath, "/iscsi/ssd") != NULL) { 2695dedec472SJack Meng /* iscsi boot */ 2696dedec472SJack Meng get_iscsi_bootpath_phy(devpath); 2697dedec472SJack Meng config = spa_generate_rootconf(devpath, devid, &guid); 2698dedec472SJack Meng } 2699dedec472SJack Meng } 2700dedec472SJack Meng #endif 2701dedec472SJack Meng if (config == NULL) { 270221ecdf64SLin Ling cmn_err(CE_NOTE, "Can not read the pool label from '%s'", 270321ecdf64SLin Ling devpath); 270421ecdf64SLin Ling return (EIO); 270521ecdf64SLin Ling } 2706e7cbe64fSgw 270721ecdf64SLin Ling VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 270821ecdf64SLin Ling &pname) == 0); 270921ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 2710e7cbe64fSgw 27116809eb4eSEric Schrock mutex_enter(&spa_namespace_lock); 27126809eb4eSEric Schrock if ((spa = spa_lookup(pname)) != NULL) { 27136809eb4eSEric Schrock /* 27146809eb4eSEric Schrock * Remove the existing root pool from the namespace so that we 27156809eb4eSEric Schrock * can replace it with the correct config we just read in. 27166809eb4eSEric Schrock */ 27176809eb4eSEric Schrock spa_remove(spa); 27186809eb4eSEric Schrock } 27196809eb4eSEric Schrock 2720468c413aSTim Haley spa = spa_add(pname, config, NULL); 27216809eb4eSEric Schrock spa->spa_is_root = B_TRUE; 2722bc758434SLin Ling spa->spa_load_verbatim = B_TRUE; 2723e7cbe64fSgw 272421ecdf64SLin Ling /* 272521ecdf64SLin Ling * Build up a vdev tree based on the boot device's label config. 272621ecdf64SLin Ling */ 272721ecdf64SLin Ling VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 272821ecdf64SLin Ling &nvtop) == 0); 272921ecdf64SLin Ling spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 273021ecdf64SLin Ling error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, 273121ecdf64SLin Ling VDEV_ALLOC_ROOTPOOL); 273221ecdf64SLin Ling spa_config_exit(spa, SCL_ALL, FTAG); 273321ecdf64SLin Ling if (error) { 273421ecdf64SLin Ling mutex_exit(&spa_namespace_lock); 273521ecdf64SLin Ling nvlist_free(config); 273621ecdf64SLin Ling cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", 273721ecdf64SLin Ling pname); 273821ecdf64SLin Ling return (error); 273921ecdf64SLin Ling } 274021ecdf64SLin Ling 274121ecdf64SLin Ling /* 274221ecdf64SLin Ling * Get the boot vdev. 274321ecdf64SLin Ling */ 274421ecdf64SLin Ling if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 274521ecdf64SLin Ling cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", 274621ecdf64SLin Ling (u_longlong_t)guid); 274721ecdf64SLin Ling error = ENOENT; 274821ecdf64SLin Ling goto out; 274921ecdf64SLin Ling } 2750e7cbe64fSgw 275121ecdf64SLin Ling /* 275221ecdf64SLin Ling * Determine if there is a better boot device. 275321ecdf64SLin Ling */ 275421ecdf64SLin Ling avd = bvd; 275521ecdf64SLin Ling spa_alt_rootvdev(rvd, &avd, &txg); 275621ecdf64SLin Ling if (avd != bvd) { 275721ecdf64SLin Ling cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " 275821ecdf64SLin Ling "try booting from '%s'", avd->vdev_path); 275921ecdf64SLin Ling error = EINVAL; 276021ecdf64SLin Ling goto out; 276121ecdf64SLin Ling } 2762e7cbe64fSgw 276321ecdf64SLin Ling /* 276421ecdf64SLin Ling * If the boot device is part of a spare vdev then ensure that 276521ecdf64SLin Ling * we're booting off the active spare. 276621ecdf64SLin Ling */ 276721ecdf64SLin Ling if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && 276821ecdf64SLin Ling !bvd->vdev_isspare) { 276921ecdf64SLin Ling cmn_err(CE_NOTE, "The boot device is currently spared. Please " 277021ecdf64SLin Ling "try booting from '%s'", 277121ecdf64SLin Ling bvd->vdev_parent->vdev_child[1]->vdev_path); 277221ecdf64SLin Ling error = EINVAL; 277321ecdf64SLin Ling goto out; 277421ecdf64SLin Ling } 277521ecdf64SLin Ling 277621ecdf64SLin Ling VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); 277721ecdf64SLin Ling error = 0; 2778c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_IMPORT); 277921ecdf64SLin Ling out: 278021ecdf64SLin Ling spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 278121ecdf64SLin Ling vdev_free(rvd); 278221ecdf64SLin Ling spa_config_exit(spa, SCL_ALL, FTAG); 278321ecdf64SLin Ling mutex_exit(&spa_namespace_lock); 278421ecdf64SLin Ling 278521ecdf64SLin Ling nvlist_free(config); 2786e7cbe64fSgw return (error); 2787e7cbe64fSgw } 278821ecdf64SLin Ling 2789e7cbe64fSgw #endif 2790e7cbe64fSgw 2791e7cbe64fSgw /* 27926809eb4eSEric Schrock * Take a pool and insert it into the namespace as if it had been loaded at 27936809eb4eSEric Schrock * boot. 2794e7cbe64fSgw */ 2795e7cbe64fSgw int 27966809eb4eSEric Schrock spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props) 2797e7cbe64fSgw { 27986809eb4eSEric Schrock spa_t *spa; 2799468c413aSTim Haley zpool_rewind_policy_t policy; 28006809eb4eSEric Schrock char *altroot = NULL; 28016809eb4eSEric Schrock 28026809eb4eSEric Schrock mutex_enter(&spa_namespace_lock); 28036809eb4eSEric Schrock if (spa_lookup(pool) != NULL) { 28046809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 28056809eb4eSEric Schrock return (EEXIST); 28066809eb4eSEric Schrock } 28076809eb4eSEric Schrock 28086809eb4eSEric Schrock (void) nvlist_lookup_string(props, 28096809eb4eSEric Schrock zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 2810468c413aSTim Haley spa = spa_add(pool, config, altroot); 28116809eb4eSEric Schrock 2812468c413aSTim Haley zpool_get_rewind_policy(config, &policy); 2813468c413aSTim Haley spa->spa_load_max_txg = policy.zrp_txg; 28144f0f5e5bSVictor Latushkin 2815468c413aSTim Haley spa->spa_load_verbatim = B_TRUE; 28166809eb4eSEric Schrock 28176809eb4eSEric Schrock if (props != NULL) 28186809eb4eSEric Schrock spa_configfile_set(spa, props, B_FALSE); 28196809eb4eSEric Schrock 28206809eb4eSEric Schrock spa_config_sync(spa, B_FALSE, B_TRUE); 28216809eb4eSEric Schrock 28226809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 2823c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_IMPORT); 28246809eb4eSEric Schrock 28256809eb4eSEric Schrock return (0); 2826e7cbe64fSgw } 2827e7cbe64fSgw 28286809eb4eSEric Schrock /* 28296809eb4eSEric Schrock * Import a non-root pool into the system. 28306809eb4eSEric Schrock */ 2831c5904d13Seschrock int 28326809eb4eSEric Schrock spa_import(const char *pool, nvlist_t *config, nvlist_t *props) 2833c5904d13Seschrock { 28346809eb4eSEric Schrock spa_t *spa; 28356809eb4eSEric Schrock char *altroot = NULL; 2836468c413aSTim Haley spa_load_state_t state = SPA_LOAD_IMPORT; 2837468c413aSTim Haley zpool_rewind_policy_t policy; 28386809eb4eSEric Schrock int error; 28396809eb4eSEric Schrock nvlist_t *nvroot; 28406809eb4eSEric Schrock nvlist_t **spares, **l2cache; 28416809eb4eSEric Schrock uint_t nspares, nl2cache; 28426809eb4eSEric Schrock 28436809eb4eSEric Schrock /* 28446809eb4eSEric Schrock * If a pool with this name exists, return failure. 28456809eb4eSEric Schrock */ 28466809eb4eSEric Schrock mutex_enter(&spa_namespace_lock); 28476809eb4eSEric Schrock if ((spa = spa_lookup(pool)) != NULL) { 28486809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 28496809eb4eSEric Schrock return (EEXIST); 28506809eb4eSEric Schrock } 28516809eb4eSEric Schrock 2852468c413aSTim Haley zpool_get_rewind_policy(config, &policy); 2853468c413aSTim Haley if (policy.zrp_request & ZPOOL_DO_REWIND) 2854468c413aSTim Haley state = SPA_LOAD_RECOVER; 2855468c413aSTim Haley 28566809eb4eSEric Schrock /* 28576809eb4eSEric Schrock * Create and initialize the spa structure. 28586809eb4eSEric Schrock */ 28596809eb4eSEric Schrock (void) nvlist_lookup_string(props, 28606809eb4eSEric Schrock zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 2861468c413aSTim Haley spa = spa_add(pool, config, altroot); 28626809eb4eSEric Schrock spa_activate(spa, spa_mode_global); 28636809eb4eSEric Schrock 286425f89ee2SJeff Bonwick /* 286525f89ee2SJeff Bonwick * Don't start async tasks until we know everything is healthy. 286625f89ee2SJeff Bonwick */ 286725f89ee2SJeff Bonwick spa_async_suspend(spa); 286825f89ee2SJeff Bonwick 28696809eb4eSEric Schrock /* 28706809eb4eSEric Schrock * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig 28716809eb4eSEric Schrock * because the user-supplied config is actually the one to trust when 28726809eb4eSEric Schrock * doing an import. 28736809eb4eSEric Schrock */ 2874468c413aSTim Haley if (state != SPA_LOAD_RECOVER) 2875468c413aSTim Haley spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; 2876468c413aSTim Haley error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg, 2877468c413aSTim Haley ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0)); 2878468c413aSTim Haley 2879468c413aSTim Haley /* 2880468c413aSTim Haley * Propagate anything learned about failing or best txgs 2881468c413aSTim Haley * back to caller 2882468c413aSTim Haley */ 2883468c413aSTim Haley spa_rewind_data_to_nvlist(spa, config); 28846809eb4eSEric Schrock 28856809eb4eSEric Schrock spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 28866809eb4eSEric Schrock /* 28876809eb4eSEric Schrock * Toss any existing sparelist, as it doesn't have any validity 28886809eb4eSEric Schrock * anymore, and conflicts with spa_has_spare(). 28896809eb4eSEric Schrock */ 28906809eb4eSEric Schrock if (spa->spa_spares.sav_config) { 28916809eb4eSEric Schrock nvlist_free(spa->spa_spares.sav_config); 28926809eb4eSEric Schrock spa->spa_spares.sav_config = NULL; 28936809eb4eSEric Schrock spa_load_spares(spa); 28946809eb4eSEric Schrock } 28956809eb4eSEric Schrock if (spa->spa_l2cache.sav_config) { 28966809eb4eSEric Schrock nvlist_free(spa->spa_l2cache.sav_config); 28976809eb4eSEric Schrock spa->spa_l2cache.sav_config = NULL; 28986809eb4eSEric Schrock spa_load_l2cache(spa); 28996809eb4eSEric Schrock } 29006809eb4eSEric Schrock 29016809eb4eSEric Schrock VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 29026809eb4eSEric Schrock &nvroot) == 0); 29036809eb4eSEric Schrock if (error == 0) 29046809eb4eSEric Schrock error = spa_validate_aux(spa, nvroot, -1ULL, 29056809eb4eSEric Schrock VDEV_ALLOC_SPARE); 29066809eb4eSEric Schrock if (error == 0) 29076809eb4eSEric Schrock error = spa_validate_aux(spa, nvroot, -1ULL, 29086809eb4eSEric Schrock VDEV_ALLOC_L2CACHE); 29096809eb4eSEric Schrock spa_config_exit(spa, SCL_ALL, FTAG); 29106809eb4eSEric Schrock 29116809eb4eSEric Schrock if (props != NULL) 29126809eb4eSEric Schrock spa_configfile_set(spa, props, B_FALSE); 29136809eb4eSEric Schrock 29146809eb4eSEric Schrock if (error != 0 || (props && spa_writeable(spa) && 29156809eb4eSEric Schrock (error = spa_prop_set(spa, props)))) { 29166809eb4eSEric Schrock spa_unload(spa); 29176809eb4eSEric Schrock spa_deactivate(spa); 29186809eb4eSEric Schrock spa_remove(spa); 29196809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 29206809eb4eSEric Schrock return (error); 29216809eb4eSEric Schrock } 29226809eb4eSEric Schrock 292325f89ee2SJeff Bonwick spa_async_resume(spa); 292425f89ee2SJeff Bonwick 29256809eb4eSEric Schrock /* 29266809eb4eSEric Schrock * Override any spares and level 2 cache devices as specified by 29276809eb4eSEric Schrock * the user, as these may have correct device names/devids, etc. 29286809eb4eSEric Schrock */ 29296809eb4eSEric Schrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 29306809eb4eSEric Schrock &spares, &nspares) == 0) { 29316809eb4eSEric Schrock if (spa->spa_spares.sav_config) 29326809eb4eSEric Schrock VERIFY(nvlist_remove(spa->spa_spares.sav_config, 29336809eb4eSEric Schrock ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 29346809eb4eSEric Schrock else 29356809eb4eSEric Schrock VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 29366809eb4eSEric Schrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 29376809eb4eSEric Schrock VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 29386809eb4eSEric Schrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 29396809eb4eSEric Schrock spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 29406809eb4eSEric Schrock spa_load_spares(spa); 29416809eb4eSEric Schrock spa_config_exit(spa, SCL_ALL, FTAG); 29426809eb4eSEric Schrock spa->spa_spares.sav_sync = B_TRUE; 29436809eb4eSEric Schrock } 29446809eb4eSEric Schrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 29456809eb4eSEric Schrock &l2cache, &nl2cache) == 0) { 29466809eb4eSEric Schrock if (spa->spa_l2cache.sav_config) 29476809eb4eSEric Schrock VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 29486809eb4eSEric Schrock ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 29496809eb4eSEric Schrock else 29506809eb4eSEric Schrock VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 29516809eb4eSEric Schrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 29526809eb4eSEric Schrock VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 29536809eb4eSEric Schrock ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 29546809eb4eSEric Schrock spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 29556809eb4eSEric Schrock spa_load_l2cache(spa); 29566809eb4eSEric Schrock spa_config_exit(spa, SCL_ALL, FTAG); 29576809eb4eSEric Schrock spa->spa_l2cache.sav_sync = B_TRUE; 29586809eb4eSEric Schrock } 29596809eb4eSEric Schrock 2960b693757aSEric Schrock /* 2961b693757aSEric Schrock * Check for any removed devices. 2962b693757aSEric Schrock */ 2963b693757aSEric Schrock if (spa->spa_autoreplace) { 2964b693757aSEric Schrock spa_aux_check_removed(&spa->spa_spares); 2965b693757aSEric Schrock spa_aux_check_removed(&spa->spa_l2cache); 2966b693757aSEric Schrock } 2967b693757aSEric Schrock 29686809eb4eSEric Schrock if (spa_writeable(spa)) { 29696809eb4eSEric Schrock /* 29706809eb4eSEric Schrock * Update the config cache to include the newly-imported pool. 29716809eb4eSEric Schrock */ 2972bc758434SLin Ling spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 29736809eb4eSEric Schrock } 29746809eb4eSEric Schrock 2975573ca77eSGeorge Wilson /* 2976573ca77eSGeorge Wilson * It's possible that the pool was expanded while it was exported. 2977573ca77eSGeorge Wilson * We kick off an async task to handle this for us. 2978573ca77eSGeorge Wilson */ 2979573ca77eSGeorge Wilson spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 2980573ca77eSGeorge Wilson 29816809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 2982c8e1f6d2SMark J Musante spa_history_log_version(spa, LOG_POOL_IMPORT); 29836809eb4eSEric Schrock 29846809eb4eSEric Schrock return (0); 2985c5904d13Seschrock } 2986c5904d13Seschrock 2987c5904d13Seschrock 2988fa9e4066Sahrens /* 2989fa9e4066Sahrens * This (illegal) pool name is used when temporarily importing a spa_t in order 2990fa9e4066Sahrens * to get the vdev stats associated with the imported devices. 2991fa9e4066Sahrens */ 2992fa9e4066Sahrens #define TRYIMPORT_NAME "$import" 2993fa9e4066Sahrens 2994fa9e4066Sahrens nvlist_t * 2995fa9e4066Sahrens spa_tryimport(nvlist_t *tryconfig) 2996fa9e4066Sahrens { 2997fa9e4066Sahrens nvlist_t *config = NULL; 2998fa9e4066Sahrens char *poolname; 2999fa9e4066Sahrens spa_t *spa; 3000fa9e4066Sahrens uint64_t state; 30017b7154beSLin Ling int error; 3002fa9e4066Sahrens 3003fa9e4066Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 3004fa9e4066Sahrens return (NULL); 3005fa9e4066Sahrens 3006fa9e4066Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 3007fa9e4066Sahrens return (NULL); 3008fa9e4066Sahrens 3009fa9e4066Sahrens /* 30100373e76bSbonwick * Create and initialize the spa structure. 3011fa9e4066Sahrens */ 30120373e76bSbonwick mutex_enter(&spa_namespace_lock); 3013468c413aSTim Haley spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); 30148ad4d6ddSJeff Bonwick spa_activate(spa, FREAD); 3015fa9e4066Sahrens 3016fa9e4066Sahrens /* 30170373e76bSbonwick * Pass off the heavy lifting to spa_load(). 3018ecc2d604Sbonwick * Pass TRUE for mosconfig because the user-supplied config 3019ecc2d604Sbonwick * is actually the one to trust when doing an import. 3020fa9e4066Sahrens */ 3021468c413aSTim Haley error = spa_load(spa, SPA_LOAD_TRYIMPORT, B_TRUE); 3022fa9e4066Sahrens 3023fa9e4066Sahrens /* 3024fa9e4066Sahrens * If 'tryconfig' was at least parsable, return the current config. 3025fa9e4066Sahrens */ 3026fa9e4066Sahrens if (spa->spa_root_vdev != NULL) { 3027fa9e4066Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 3028fa9e4066Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 3029fa9e4066Sahrens poolname) == 0); 3030fa9e4066Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 3031fa9e4066Sahrens state) == 0); 303295173954Sek VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 303395173954Sek spa->spa_uberblock.ub_timestamp) == 0); 303499653d4eSeschrock 3035e7cbe64fSgw /* 3036e7cbe64fSgw * If the bootfs property exists on this pool then we 3037e7cbe64fSgw * copy it out so that external consumers can tell which 3038e7cbe64fSgw * pools are bootable. 3039e7cbe64fSgw */ 30407b7154beSLin Ling if ((!error || error == EEXIST) && spa->spa_bootfs) { 3041e7cbe64fSgw char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 3042e7cbe64fSgw 3043e7cbe64fSgw /* 3044e7cbe64fSgw * We have to play games with the name since the 3045e7cbe64fSgw * pool was opened as TRYIMPORT_NAME. 3046e7cbe64fSgw */ 3047e14bb325SJeff Bonwick if (dsl_dsobj_to_dsname(spa_name(spa), 3048e7cbe64fSgw spa->spa_bootfs, tmpname) == 0) { 3049e7cbe64fSgw char *cp; 3050e7cbe64fSgw char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 3051e7cbe64fSgw 3052e7cbe64fSgw cp = strchr(tmpname, '/'); 3053e7cbe64fSgw if (cp == NULL) { 3054e7cbe64fSgw (void) strlcpy(dsname, tmpname, 3055e7cbe64fSgw MAXPATHLEN); 3056e7cbe64fSgw } else { 3057e7cbe64fSgw (void) snprintf(dsname, MAXPATHLEN, 3058e7cbe64fSgw "%s/%s", poolname, ++cp); 3059e7cbe64fSgw } 3060e7cbe64fSgw VERIFY(nvlist_add_string(config, 3061e7cbe64fSgw ZPOOL_CONFIG_BOOTFS, dsname) == 0); 3062e7cbe64fSgw kmem_free(dsname, MAXPATHLEN); 3063e7cbe64fSgw } 3064e7cbe64fSgw kmem_free(tmpname, MAXPATHLEN); 3065e7cbe64fSgw } 3066e7cbe64fSgw 306799653d4eSeschrock /* 3068fa94a07fSbrendan * Add the list of hot spares and level 2 cache devices. 306999653d4eSeschrock */ 30706809eb4eSEric Schrock spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 307199653d4eSeschrock spa_add_spares(spa, config); 3072fa94a07fSbrendan spa_add_l2cache(spa, config); 30736809eb4eSEric Schrock spa_config_exit(spa, SCL_CONFIG, FTAG); 3074fa9e4066Sahrens } 3075fa9e4066Sahrens 3076fa9e4066Sahrens spa_unload(spa); 3077fa9e4066Sahrens spa_deactivate(spa); 3078fa9e4066Sahrens spa_remove(spa); 3079fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 3080fa9e4066Sahrens 3081fa9e4066Sahrens return (config); 3082fa9e4066Sahrens } 3083fa9e4066Sahrens 3084fa9e4066Sahrens /* 3085fa9e4066Sahrens * Pool export/destroy 3086fa9e4066Sahrens * 3087fa9e4066Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 3088fa9e4066Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 3089fa9e4066Sahrens * update the pool state and sync all the labels to disk, removing the 3090394ab0cbSGeorge Wilson * configuration from the cache afterwards. If the 'hardforce' flag is set, then 3091394ab0cbSGeorge Wilson * we don't sync the labels or remove the configuration cache. 3092fa9e4066Sahrens */ 3093fa9e4066Sahrens static int 309489a89ebfSlling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 3095394ab0cbSGeorge Wilson boolean_t force, boolean_t hardforce) 3096fa9e4066Sahrens { 3097fa9e4066Sahrens spa_t *spa; 3098fa9e4066Sahrens 309944cd46caSbillm if (oldconfig) 310044cd46caSbillm *oldconfig = NULL; 310144cd46caSbillm 31028ad4d6ddSJeff Bonwick if (!(spa_mode_global & FWRITE)) 3103fa9e4066Sahrens return (EROFS); 3104fa9e4066Sahrens 3105fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 3106fa9e4066Sahrens if ((spa = spa_lookup(pool)) == NULL) { 3107fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 3108fa9e4066Sahrens return (ENOENT); 3109fa9e4066Sahrens } 3110fa9e4066Sahrens 3111ea8dc4b6Seschrock /* 3112ea8dc4b6Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 3113ea8dc4b6Seschrock * reacquire the namespace lock, and see if we can export. 3114ea8dc4b6Seschrock */ 3115ea8dc4b6Seschrock spa_open_ref(spa, FTAG); 3116ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 3117ea8dc4b6Seschrock spa_async_suspend(spa); 3118ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 3119ea8dc4b6Seschrock spa_close(spa, FTAG); 3120ea8dc4b6Seschrock 3121fa9e4066Sahrens /* 3122fa9e4066Sahrens * The pool will be in core if it's openable, 3123fa9e4066Sahrens * in which case we can modify its state. 3124fa9e4066Sahrens */ 3125fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 3126fa9e4066Sahrens /* 3127fa9e4066Sahrens * Objsets may be open only because they're dirty, so we 3128fa9e4066Sahrens * have to force it to sync before checking spa_refcnt. 3129fa9e4066Sahrens */ 3130fa9e4066Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 3131fa9e4066Sahrens 3132ea8dc4b6Seschrock /* 3133ea8dc4b6Seschrock * A pool cannot be exported or destroyed if there are active 3134ea8dc4b6Seschrock * references. If we are resetting a pool, allow references by 3135ea8dc4b6Seschrock * fault injection handlers. 3136ea8dc4b6Seschrock */ 3137ea8dc4b6Seschrock if (!spa_refcount_zero(spa) || 3138ea8dc4b6Seschrock (spa->spa_inject_ref != 0 && 3139ea8dc4b6Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 3140ea8dc4b6Seschrock spa_async_resume(spa); 3141fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 3142fa9e4066Sahrens return (EBUSY); 3143fa9e4066Sahrens } 3144fa9e4066Sahrens 314589a89ebfSlling /* 314689a89ebfSlling * A pool cannot be exported if it has an active shared spare. 314789a89ebfSlling * This is to prevent other pools stealing the active spare 314889a89ebfSlling * from an exported pool. At user's own will, such pool can 314989a89ebfSlling * be forcedly exported. 315089a89ebfSlling */ 315189a89ebfSlling if (!force && new_state == POOL_STATE_EXPORTED && 315289a89ebfSlling spa_has_active_shared_spare(spa)) { 315389a89ebfSlling spa_async_resume(spa); 315489a89ebfSlling mutex_exit(&spa_namespace_lock); 315589a89ebfSlling return (EXDEV); 315689a89ebfSlling } 315789a89ebfSlling 3158fa9e4066Sahrens /* 3159fa9e4066Sahrens * We want this to be reflected on every label, 3160fa9e4066Sahrens * so mark them all dirty. spa_unload() will do the 3161fa9e4066Sahrens * final sync that pushes these changes out. 3162fa9e4066Sahrens */ 3163394ab0cbSGeorge Wilson if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 3164e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3165ea8dc4b6Seschrock spa->spa_state = new_state; 31660373e76bSbonwick spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 3167ea8dc4b6Seschrock vdev_config_dirty(spa->spa_root_vdev); 3168e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 3169ea8dc4b6Seschrock } 3170fa9e4066Sahrens } 3171fa9e4066Sahrens 31723d7072f8Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 31733d7072f8Seschrock 3174fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 3175fa9e4066Sahrens spa_unload(spa); 3176fa9e4066Sahrens spa_deactivate(spa); 3177fa9e4066Sahrens } 3178fa9e4066Sahrens 317944cd46caSbillm if (oldconfig && spa->spa_config) 318044cd46caSbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 318144cd46caSbillm 3182ea8dc4b6Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 3183394ab0cbSGeorge Wilson if (!hardforce) 3184394ab0cbSGeorge Wilson spa_config_sync(spa, B_TRUE, B_TRUE); 3185ea8dc4b6Seschrock spa_remove(spa); 3186ea8dc4b6Seschrock } 3187fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 3188fa9e4066Sahrens 3189fa9e4066Sahrens return (0); 3190fa9e4066Sahrens } 3191fa9e4066Sahrens 3192fa9e4066Sahrens /* 3193fa9e4066Sahrens * Destroy a storage pool. 3194fa9e4066Sahrens */ 3195fa9e4066Sahrens int 3196fa9e4066Sahrens spa_destroy(char *pool) 3197fa9e4066Sahrens { 3198394ab0cbSGeorge Wilson return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 3199394ab0cbSGeorge Wilson B_FALSE, B_FALSE)); 3200fa9e4066Sahrens } 3201fa9e4066Sahrens 3202fa9e4066Sahrens /* 3203fa9e4066Sahrens * Export a storage pool. 3204fa9e4066Sahrens */ 3205fa9e4066Sahrens int 3206394ab0cbSGeorge Wilson spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 3207394ab0cbSGeorge Wilson boolean_t hardforce) 3208fa9e4066Sahrens { 3209394ab0cbSGeorge Wilson return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 3210394ab0cbSGeorge Wilson force, hardforce)); 3211fa9e4066Sahrens } 3212fa9e4066Sahrens 3213ea8dc4b6Seschrock /* 3214ea8dc4b6Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 3215ea8dc4b6Seschrock * from the namespace in any way. 3216ea8dc4b6Seschrock */ 3217ea8dc4b6Seschrock int 3218ea8dc4b6Seschrock spa_reset(char *pool) 3219ea8dc4b6Seschrock { 322089a89ebfSlling return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 3221394ab0cbSGeorge Wilson B_FALSE, B_FALSE)); 3222ea8dc4b6Seschrock } 3223ea8dc4b6Seschrock 3224fa9e4066Sahrens /* 3225fa9e4066Sahrens * ========================================================================== 3226fa9e4066Sahrens * Device manipulation 3227fa9e4066Sahrens * ========================================================================== 3228fa9e4066Sahrens */ 3229fa9e4066Sahrens 3230fa9e4066Sahrens /* 32318654d025Sperrin * Add a device to a storage pool. 3232fa9e4066Sahrens */ 3233fa9e4066Sahrens int 3234fa9e4066Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 3235fa9e4066Sahrens { 323688ecc943SGeorge Wilson uint64_t txg, id; 32378ad4d6ddSJeff Bonwick int error; 3238fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 32390e34b6a7Sbonwick vdev_t *vd, *tvd; 3240fa94a07fSbrendan nvlist_t **spares, **l2cache; 3241fa94a07fSbrendan uint_t nspares, nl2cache; 3242fa9e4066Sahrens 3243fa9e4066Sahrens txg = spa_vdev_enter(spa); 3244fa9e4066Sahrens 324599653d4eSeschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 324699653d4eSeschrock VDEV_ALLOC_ADD)) != 0) 324799653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, error)); 3248fa9e4066Sahrens 3249e14bb325SJeff Bonwick spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 325099653d4eSeschrock 3251fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 3252fa94a07fSbrendan &nspares) != 0) 325399653d4eSeschrock nspares = 0; 325499653d4eSeschrock 3255fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 3256fa94a07fSbrendan &nl2cache) != 0) 3257fa94a07fSbrendan nl2cache = 0; 3258fa94a07fSbrendan 3259e14bb325SJeff Bonwick if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 3260fa9e4066Sahrens return (spa_vdev_exit(spa, vd, txg, EINVAL)); 3261fa9e4066Sahrens 3262e14bb325SJeff Bonwick if (vd->vdev_children != 0 && 3263e14bb325SJeff Bonwick (error = vdev_create(vd, txg, B_FALSE)) != 0) 3264e14bb325SJeff Bonwick return (spa_vdev_exit(spa, vd, txg, error)); 326599653d4eSeschrock 326639c23413Seschrock /* 3267fa94a07fSbrendan * We must validate the spares and l2cache devices after checking the 3268fa94a07fSbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 326939c23413Seschrock */ 3270e14bb325SJeff Bonwick if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 327139c23413Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 327239c23413Seschrock 327339c23413Seschrock /* 327439c23413Seschrock * Transfer each new top-level vdev from vd to rvd. 327539c23413Seschrock */ 32768ad4d6ddSJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) { 327788ecc943SGeorge Wilson 327888ecc943SGeorge Wilson /* 327988ecc943SGeorge Wilson * Set the vdev id to the first hole, if one exists. 328088ecc943SGeorge Wilson */ 328188ecc943SGeorge Wilson for (id = 0; id < rvd->vdev_children; id++) { 328288ecc943SGeorge Wilson if (rvd->vdev_child[id]->vdev_ishole) { 328388ecc943SGeorge Wilson vdev_free(rvd->vdev_child[id]); 328488ecc943SGeorge Wilson break; 328588ecc943SGeorge Wilson } 328688ecc943SGeorge Wilson } 328739c23413Seschrock tvd = vd->vdev_child[c]; 328839c23413Seschrock vdev_remove_child(vd, tvd); 328988ecc943SGeorge Wilson tvd->vdev_id = id; 329039c23413Seschrock vdev_add_child(rvd, tvd); 329139c23413Seschrock vdev_config_dirty(tvd); 329239c23413Seschrock } 329339c23413Seschrock 329499653d4eSeschrock if (nspares != 0) { 3295fa94a07fSbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 3296fa94a07fSbrendan ZPOOL_CONFIG_SPARES); 329799653d4eSeschrock spa_load_spares(spa); 3298fa94a07fSbrendan spa->spa_spares.sav_sync = B_TRUE; 3299fa94a07fSbrendan } 3300fa94a07fSbrendan 3301fa94a07fSbrendan if (nl2cache != 0) { 3302fa94a07fSbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 3303fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE); 3304fa94a07fSbrendan spa_load_l2cache(spa); 3305fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 3306fa9e4066Sahrens } 3307fa9e4066Sahrens 3308fa9e4066Sahrens /* 33090e34b6a7Sbonwick * We have to be careful when adding new vdevs to an existing pool. 33100e34b6a7Sbonwick * If other threads start allocating from these vdevs before we 33110e34b6a7Sbonwick * sync the config cache, and we lose power, then upon reboot we may 33120e34b6a7Sbonwick * fail to open the pool because there are DVAs that the config cache 33130e34b6a7Sbonwick * can't translate. Therefore, we first add the vdevs without 33140e34b6a7Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 33150373e76bSbonwick * and then let spa_config_update() initialize the new metaslabs. 33160e34b6a7Sbonwick * 33170e34b6a7Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 33180e34b6a7Sbonwick * if we lose power at any point in this sequence, the remaining 33190e34b6a7Sbonwick * steps will be completed the next time we load the pool. 33200e34b6a7Sbonwick */ 33210373e76bSbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 33220e34b6a7Sbonwick 33230373e76bSbonwick mutex_enter(&spa_namespace_lock); 33240373e76bSbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 33250373e76bSbonwick mutex_exit(&spa_namespace_lock); 3326fa9e4066Sahrens 33270373e76bSbonwick return (0); 3328fa9e4066Sahrens } 3329fa9e4066Sahrens 3330fa9e4066Sahrens /* 3331fa9e4066Sahrens * Attach a device to a mirror. The arguments are the path to any device 3332fa9e4066Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 3333fa9e4066Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 3334fa9e4066Sahrens * 3335fa9e4066Sahrens * If 'replacing' is specified, the new device is intended to replace the 3336fa9e4066Sahrens * existing device; in this case the two devices are made into their own 33373d7072f8Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 3338fa9e4066Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 3339fa9e4066Sahrens * extra rules: you can't attach to it after it's been created, and upon 3340fa9e4066Sahrens * completion of resilvering, the first disk (the one being replaced) 3341fa9e4066Sahrens * is automatically detached. 3342fa9e4066Sahrens */ 3343fa9e4066Sahrens int 3344ea8dc4b6Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 3345fa9e4066Sahrens { 3346fa9e4066Sahrens uint64_t txg, open_txg; 3347fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 3348fa9e4066Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 334999653d4eSeschrock vdev_ops_t *pvops; 33509b3f6b42SEric Kustarz char *oldvdpath, *newvdpath; 33519b3f6b42SEric Kustarz int newvd_isspare; 33529b3f6b42SEric Kustarz int error; 3353fa9e4066Sahrens 3354fa9e4066Sahrens txg = spa_vdev_enter(spa); 3355fa9e4066Sahrens 3356c5904d13Seschrock oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 3357fa9e4066Sahrens 3358fa9e4066Sahrens if (oldvd == NULL) 3359fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3360fa9e4066Sahrens 33610e34b6a7Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 33620e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 33630e34b6a7Sbonwick 3364fa9e4066Sahrens pvd = oldvd->vdev_parent; 3365fa9e4066Sahrens 336699653d4eSeschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 33673d7072f8Seschrock VDEV_ALLOC_ADD)) != 0) 33683d7072f8Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 33693d7072f8Seschrock 33703d7072f8Seschrock if (newrootvd->vdev_children != 1) 3371fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3372fa9e4066Sahrens 3373fa9e4066Sahrens newvd = newrootvd->vdev_child[0]; 3374fa9e4066Sahrens 3375fa9e4066Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 3376fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 3377fa9e4066Sahrens 337899653d4eSeschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 3379fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 3380fa9e4066Sahrens 33818654d025Sperrin /* 33828654d025Sperrin * Spares can't replace logs 33838654d025Sperrin */ 3384ee0eb9f2SEric Schrock if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 33858654d025Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 33868654d025Sperrin 338799653d4eSeschrock if (!replacing) { 338899653d4eSeschrock /* 338999653d4eSeschrock * For attach, the only allowable parent is a mirror or the root 339099653d4eSeschrock * vdev. 339199653d4eSeschrock */ 339299653d4eSeschrock if (pvd->vdev_ops != &vdev_mirror_ops && 339399653d4eSeschrock pvd->vdev_ops != &vdev_root_ops) 339499653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 339599653d4eSeschrock 339699653d4eSeschrock pvops = &vdev_mirror_ops; 339799653d4eSeschrock } else { 339899653d4eSeschrock /* 339999653d4eSeschrock * Active hot spares can only be replaced by inactive hot 340099653d4eSeschrock * spares. 340199653d4eSeschrock */ 340299653d4eSeschrock if (pvd->vdev_ops == &vdev_spare_ops && 340399653d4eSeschrock pvd->vdev_child[1] == oldvd && 340499653d4eSeschrock !spa_has_spare(spa, newvd->vdev_guid)) 340599653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 340699653d4eSeschrock 340799653d4eSeschrock /* 340899653d4eSeschrock * If the source is a hot spare, and the parent isn't already a 340999653d4eSeschrock * spare, then we want to create a new hot spare. Otherwise, we 341039c23413Seschrock * want to create a replacing vdev. The user is not allowed to 341139c23413Seschrock * attach to a spared vdev child unless the 'isspare' state is 341239c23413Seschrock * the same (spare replaces spare, non-spare replaces 341339c23413Seschrock * non-spare). 341499653d4eSeschrock */ 341599653d4eSeschrock if (pvd->vdev_ops == &vdev_replacing_ops) 341699653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 341739c23413Seschrock else if (pvd->vdev_ops == &vdev_spare_ops && 341839c23413Seschrock newvd->vdev_isspare != oldvd->vdev_isspare) 341939c23413Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 342099653d4eSeschrock else if (pvd->vdev_ops != &vdev_spare_ops && 342199653d4eSeschrock newvd->vdev_isspare) 342299653d4eSeschrock pvops = &vdev_spare_ops; 342399653d4eSeschrock else 342499653d4eSeschrock pvops = &vdev_replacing_ops; 342599653d4eSeschrock } 342699653d4eSeschrock 34272a79c5feSlling /* 3428573ca77eSGeorge Wilson * Make sure the new device is big enough. 34292a79c5feSlling */ 3430573ca77eSGeorge Wilson if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) 3431fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 3432fa9e4066Sahrens 3433ecc2d604Sbonwick /* 3434ecc2d604Sbonwick * The new device cannot have a higher alignment requirement 3435ecc2d604Sbonwick * than the top-level vdev. 3436ecc2d604Sbonwick */ 3437ecc2d604Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 3438fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 3439fa9e4066Sahrens 3440fa9e4066Sahrens /* 3441fa9e4066Sahrens * If this is an in-place replacement, update oldvd's path and devid 3442fa9e4066Sahrens * to make it distinguishable from newvd, and unopenable from now on. 3443fa9e4066Sahrens */ 3444fa9e4066Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 3445fa9e4066Sahrens spa_strfree(oldvd->vdev_path); 3446fa9e4066Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 3447fa9e4066Sahrens KM_SLEEP); 3448fa9e4066Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 3449fa9e4066Sahrens newvd->vdev_path, "old"); 3450fa9e4066Sahrens if (oldvd->vdev_devid != NULL) { 3451fa9e4066Sahrens spa_strfree(oldvd->vdev_devid); 3452fa9e4066Sahrens oldvd->vdev_devid = NULL; 3453fa9e4066Sahrens } 3454fa9e4066Sahrens } 3455fa9e4066Sahrens 3456fa9e4066Sahrens /* 345799653d4eSeschrock * If the parent is not a mirror, or if we're replacing, insert the new 345899653d4eSeschrock * mirror/replacing/spare vdev above oldvd. 3459fa9e4066Sahrens */ 3460fa9e4066Sahrens if (pvd->vdev_ops != pvops) 3461fa9e4066Sahrens pvd = vdev_add_parent(oldvd, pvops); 3462fa9e4066Sahrens 3463fa9e4066Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 3464fa9e4066Sahrens ASSERT(pvd->vdev_ops == pvops); 3465fa9e4066Sahrens ASSERT(oldvd->vdev_parent == pvd); 3466fa9e4066Sahrens 3467fa9e4066Sahrens /* 3468fa9e4066Sahrens * Extract the new device from its root and add it to pvd. 3469fa9e4066Sahrens */ 3470fa9e4066Sahrens vdev_remove_child(newrootvd, newvd); 3471fa9e4066Sahrens newvd->vdev_id = pvd->vdev_children; 347288ecc943SGeorge Wilson newvd->vdev_crtxg = oldvd->vdev_crtxg; 3473fa9e4066Sahrens vdev_add_child(pvd, newvd); 3474fa9e4066Sahrens 3475fa9e4066Sahrens tvd = newvd->vdev_top; 3476fa9e4066Sahrens ASSERT(pvd->vdev_top == tvd); 3477fa9e4066Sahrens ASSERT(tvd->vdev_parent == rvd); 3478fa9e4066Sahrens 3479fa9e4066Sahrens vdev_config_dirty(tvd); 3480fa9e4066Sahrens 3481fa9e4066Sahrens /* 3482fa9e4066Sahrens * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 3483fa9e4066Sahrens * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 3484fa9e4066Sahrens */ 3485fa9e4066Sahrens open_txg = txg + TXG_CONCURRENT_STATES - 1; 3486fa9e4066Sahrens 34878ad4d6ddSJeff Bonwick vdev_dtl_dirty(newvd, DTL_MISSING, 34888ad4d6ddSJeff Bonwick TXG_INITIAL, open_txg - TXG_INITIAL + 1); 3489fa9e4066Sahrens 34906809eb4eSEric Schrock if (newvd->vdev_isspare) { 349139c23413Seschrock spa_spare_activate(newvd); 34926809eb4eSEric Schrock spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); 34936809eb4eSEric Schrock } 34946809eb4eSEric Schrock 3495e14bb325SJeff Bonwick oldvdpath = spa_strdup(oldvd->vdev_path); 3496e14bb325SJeff Bonwick newvdpath = spa_strdup(newvd->vdev_path); 34979b3f6b42SEric Kustarz newvd_isspare = newvd->vdev_isspare; 3498ea8dc4b6Seschrock 3499fa9e4066Sahrens /* 3500fa9e4066Sahrens * Mark newvd's DTL dirty in this txg. 3501fa9e4066Sahrens */ 3502ecc2d604Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 3503fa9e4066Sahrens 3504fa9e4066Sahrens (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 3505fa9e4066Sahrens 3506c8e1f6d2SMark J Musante spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, NULL, 3507c8e1f6d2SMark J Musante CRED(), "%s vdev=%s %s vdev=%s", 3508c8e1f6d2SMark J Musante replacing && newvd_isspare ? "spare in" : 3509c8e1f6d2SMark J Musante replacing ? "replace" : "attach", newvdpath, 3510c8e1f6d2SMark J Musante replacing ? "for" : "to", oldvdpath); 35119b3f6b42SEric Kustarz 35129b3f6b42SEric Kustarz spa_strfree(oldvdpath); 35139b3f6b42SEric Kustarz spa_strfree(newvdpath); 35149b3f6b42SEric Kustarz 3515fa9e4066Sahrens /* 3516088f3894Sahrens * Kick off a resilver to update newvd. 3517fa9e4066Sahrens */ 3518088f3894Sahrens VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0); 3519fa9e4066Sahrens 3520fa9e4066Sahrens return (0); 3521fa9e4066Sahrens } 3522fa9e4066Sahrens 3523fa9e4066Sahrens /* 3524fa9e4066Sahrens * Detach a device from a mirror or replacing vdev. 3525fa9e4066Sahrens * If 'replace_done' is specified, only detach if the parent 3526fa9e4066Sahrens * is a replacing vdev. 3527fa9e4066Sahrens */ 3528fa9e4066Sahrens int 35298ad4d6ddSJeff Bonwick spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) 3530fa9e4066Sahrens { 3531fa9e4066Sahrens uint64_t txg; 35328ad4d6ddSJeff Bonwick int error; 3533fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 3534fa9e4066Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 353599653d4eSeschrock boolean_t unspare = B_FALSE; 353699653d4eSeschrock uint64_t unspare_guid; 3537bf82a41bSeschrock size_t len; 3538fa9e4066Sahrens 3539fa9e4066Sahrens txg = spa_vdev_enter(spa); 3540fa9e4066Sahrens 3541c5904d13Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3542fa9e4066Sahrens 3543fa9e4066Sahrens if (vd == NULL) 3544fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3545fa9e4066Sahrens 35460e34b6a7Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 35470e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 35480e34b6a7Sbonwick 3549fa9e4066Sahrens pvd = vd->vdev_parent; 3550fa9e4066Sahrens 35518ad4d6ddSJeff Bonwick /* 35528ad4d6ddSJeff Bonwick * If the parent/child relationship is not as expected, don't do it. 35538ad4d6ddSJeff Bonwick * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing 35548ad4d6ddSJeff Bonwick * vdev that's replacing B with C. The user's intent in replacing 35558ad4d6ddSJeff Bonwick * is to go from M(A,B) to M(A,C). If the user decides to cancel 35568ad4d6ddSJeff Bonwick * the replace by detaching C, the expected behavior is to end up 35578ad4d6ddSJeff Bonwick * M(A,B). But suppose that right after deciding to detach C, 35588ad4d6ddSJeff Bonwick * the replacement of B completes. We would have M(A,C), and then 35598ad4d6ddSJeff Bonwick * ask to detach C, which would leave us with just A -- not what 35608ad4d6ddSJeff Bonwick * the user wanted. To prevent this, we make sure that the 35618ad4d6ddSJeff Bonwick * parent/child relationship hasn't changed -- in this example, 35628ad4d6ddSJeff Bonwick * that C's parent is still the replacing vdev R. 35638ad4d6ddSJeff Bonwick */ 35648ad4d6ddSJeff Bonwick if (pvd->vdev_guid != pguid && pguid != 0) 35658ad4d6ddSJeff Bonwick return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 35668ad4d6ddSJeff Bonwick 3567fa9e4066Sahrens /* 3568fa9e4066Sahrens * If replace_done is specified, only remove this device if it's 356999653d4eSeschrock * the first child of a replacing vdev. For the 'spare' vdev, either 357099653d4eSeschrock * disk can be removed. 357199653d4eSeschrock */ 357299653d4eSeschrock if (replace_done) { 357399653d4eSeschrock if (pvd->vdev_ops == &vdev_replacing_ops) { 357499653d4eSeschrock if (vd->vdev_id != 0) 357599653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 357699653d4eSeschrock } else if (pvd->vdev_ops != &vdev_spare_ops) { 357799653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 357899653d4eSeschrock } 357999653d4eSeschrock } 358099653d4eSeschrock 358199653d4eSeschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 3582e7437265Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 3583fa9e4066Sahrens 3584fa9e4066Sahrens /* 358599653d4eSeschrock * Only mirror, replacing, and spare vdevs support detach. 3586fa9e4066Sahrens */ 3587fa9e4066Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 358899653d4eSeschrock pvd->vdev_ops != &vdev_mirror_ops && 358999653d4eSeschrock pvd->vdev_ops != &vdev_spare_ops) 3590fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3591fa9e4066Sahrens 3592fa9e4066Sahrens /* 35938ad4d6ddSJeff Bonwick * If this device has the only valid copy of some data, 35948ad4d6ddSJeff Bonwick * we cannot safely detach it. 3595fa9e4066Sahrens */ 35968ad4d6ddSJeff Bonwick if (vdev_dtl_required(vd)) 3597fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 3598fa9e4066Sahrens 35998ad4d6ddSJeff Bonwick ASSERT(pvd->vdev_children >= 2); 3600fa9e4066Sahrens 3601bf82a41bSeschrock /* 3602bf82a41bSeschrock * If we are detaching the second disk from a replacing vdev, then 3603bf82a41bSeschrock * check to see if we changed the original vdev's path to have "/old" 3604bf82a41bSeschrock * at the end in spa_vdev_attach(). If so, undo that change now. 3605bf82a41bSeschrock */ 3606bf82a41bSeschrock if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 && 3607bf82a41bSeschrock pvd->vdev_child[0]->vdev_path != NULL && 3608bf82a41bSeschrock pvd->vdev_child[1]->vdev_path != NULL) { 3609bf82a41bSeschrock ASSERT(pvd->vdev_child[1] == vd); 3610bf82a41bSeschrock cvd = pvd->vdev_child[0]; 3611bf82a41bSeschrock len = strlen(vd->vdev_path); 3612bf82a41bSeschrock if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 3613bf82a41bSeschrock strcmp(cvd->vdev_path + len, "/old") == 0) { 3614bf82a41bSeschrock spa_strfree(cvd->vdev_path); 3615bf82a41bSeschrock cvd->vdev_path = spa_strdup(vd->vdev_path); 3616bf82a41bSeschrock } 3617bf82a41bSeschrock } 3618bf82a41bSeschrock 361999653d4eSeschrock /* 362099653d4eSeschrock * If we are detaching the original disk from a spare, then it implies 362199653d4eSeschrock * that the spare should become a real disk, and be removed from the 362299653d4eSeschrock * active spare list for the pool. 362399653d4eSeschrock */ 362499653d4eSeschrock if (pvd->vdev_ops == &vdev_spare_ops && 36258ad4d6ddSJeff Bonwick vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare) 362699653d4eSeschrock unspare = B_TRUE; 362799653d4eSeschrock 3628fa9e4066Sahrens /* 3629fa9e4066Sahrens * Erase the disk labels so the disk can be used for other things. 3630fa9e4066Sahrens * This must be done after all other error cases are handled, 3631fa9e4066Sahrens * but before we disembowel vd (so we can still do I/O to it). 3632fa9e4066Sahrens * But if we can't do it, don't treat the error as fatal -- 3633fa9e4066Sahrens * it may be that the unwritability of the disk is the reason 3634fa9e4066Sahrens * it's being detached! 3635fa9e4066Sahrens */ 363639c23413Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3637fa9e4066Sahrens 3638fa9e4066Sahrens /* 3639fa9e4066Sahrens * Remove vd from its parent and compact the parent's children. 3640fa9e4066Sahrens */ 3641fa9e4066Sahrens vdev_remove_child(pvd, vd); 3642fa9e4066Sahrens vdev_compact_children(pvd); 3643fa9e4066Sahrens 3644fa9e4066Sahrens /* 3645fa9e4066Sahrens * Remember one of the remaining children so we can get tvd below. 3646fa9e4066Sahrens */ 3647fa9e4066Sahrens cvd = pvd->vdev_child[0]; 3648fa9e4066Sahrens 364999653d4eSeschrock /* 365099653d4eSeschrock * If we need to remove the remaining child from the list of hot spares, 36518ad4d6ddSJeff Bonwick * do it now, marking the vdev as no longer a spare in the process. 36528ad4d6ddSJeff Bonwick * We must do this before vdev_remove_parent(), because that can 36538ad4d6ddSJeff Bonwick * change the GUID if it creates a new toplevel GUID. For a similar 36548ad4d6ddSJeff Bonwick * reason, we must remove the spare now, in the same txg as the detach; 36558ad4d6ddSJeff Bonwick * otherwise someone could attach a new sibling, change the GUID, and 36568ad4d6ddSJeff Bonwick * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. 365799653d4eSeschrock */ 365899653d4eSeschrock if (unspare) { 365999653d4eSeschrock ASSERT(cvd->vdev_isspare); 366039c23413Seschrock spa_spare_remove(cvd); 366199653d4eSeschrock unspare_guid = cvd->vdev_guid; 36628ad4d6ddSJeff Bonwick (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 366399653d4eSeschrock } 366499653d4eSeschrock 3665fa9e4066Sahrens /* 3666fa9e4066Sahrens * If the parent mirror/replacing vdev only has one child, 3667fa9e4066Sahrens * the parent is no longer needed. Remove it from the tree. 3668fa9e4066Sahrens */ 3669fa9e4066Sahrens if (pvd->vdev_children == 1) 3670fa9e4066Sahrens vdev_remove_parent(cvd); 3671fa9e4066Sahrens 3672fa9e4066Sahrens /* 3673fa9e4066Sahrens * We don't set tvd until now because the parent we just removed 3674fa9e4066Sahrens * may have been the previous top-level vdev. 3675fa9e4066Sahrens */ 3676fa9e4066Sahrens tvd = cvd->vdev_top; 3677fa9e4066Sahrens ASSERT(tvd->vdev_parent == rvd); 3678fa9e4066Sahrens 3679fa9e4066Sahrens /* 368039c23413Seschrock * Reevaluate the parent vdev state. 3681fa9e4066Sahrens */ 36823d7072f8Seschrock vdev_propagate_state(cvd); 3683fa9e4066Sahrens 3684fa9e4066Sahrens /* 3685573ca77eSGeorge Wilson * If the 'autoexpand' property is set on the pool then automatically 3686573ca77eSGeorge Wilson * try to expand the size of the pool. For example if the device we 3687573ca77eSGeorge Wilson * just detached was smaller than the others, it may be possible to 3688573ca77eSGeorge Wilson * add metaslabs (i.e. grow the pool). We need to reopen the vdev 3689573ca77eSGeorge Wilson * first so that we can obtain the updated sizes of the leaf vdevs. 3690fa9e4066Sahrens */ 3691573ca77eSGeorge Wilson if (spa->spa_autoexpand) { 3692573ca77eSGeorge Wilson vdev_reopen(tvd); 3693573ca77eSGeorge Wilson vdev_expand(tvd, txg); 3694573ca77eSGeorge Wilson } 3695fa9e4066Sahrens 3696fa9e4066Sahrens vdev_config_dirty(tvd); 3697fa9e4066Sahrens 3698fa9e4066Sahrens /* 369939c23413Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 370039c23413Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 370139c23413Seschrock * But first make sure we're not on any *other* txg's DTL list, to 370239c23413Seschrock * prevent vd from being accessed after it's freed. 3703fa9e4066Sahrens */ 37048ad4d6ddSJeff Bonwick for (int t = 0; t < TXG_SIZE; t++) 3705fa9e4066Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 3706ecc2d604Sbonwick vd->vdev_detached = B_TRUE; 3707ecc2d604Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 3708fa9e4066Sahrens 37093d7072f8Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 37103d7072f8Seschrock 371199653d4eSeschrock error = spa_vdev_exit(spa, vd, txg, 0); 371299653d4eSeschrock 371399653d4eSeschrock /* 371439c23413Seschrock * If this was the removal of the original device in a hot spare vdev, 371539c23413Seschrock * then we want to go through and remove the device from the hot spare 371639c23413Seschrock * list of every other pool. 371799653d4eSeschrock */ 371899653d4eSeschrock if (unspare) { 37198ad4d6ddSJeff Bonwick spa_t *myspa = spa; 372099653d4eSeschrock spa = NULL; 372199653d4eSeschrock mutex_enter(&spa_namespace_lock); 372299653d4eSeschrock while ((spa = spa_next(spa)) != NULL) { 372399653d4eSeschrock if (spa->spa_state != POOL_STATE_ACTIVE) 372499653d4eSeschrock continue; 37258ad4d6ddSJeff Bonwick if (spa == myspa) 37268ad4d6ddSJeff Bonwick continue; 37279af0a4dfSJeff Bonwick spa_open_ref(spa, FTAG); 37289af0a4dfSJeff Bonwick mutex_exit(&spa_namespace_lock); 372999653d4eSeschrock (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 37309af0a4dfSJeff Bonwick mutex_enter(&spa_namespace_lock); 37319af0a4dfSJeff Bonwick spa_close(spa, FTAG); 373299653d4eSeschrock } 373399653d4eSeschrock mutex_exit(&spa_namespace_lock); 373499653d4eSeschrock } 373599653d4eSeschrock 373699653d4eSeschrock return (error); 373799653d4eSeschrock } 373899653d4eSeschrock 3739e14bb325SJeff Bonwick static nvlist_t * 3740e14bb325SJeff Bonwick spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 374199653d4eSeschrock { 3742e14bb325SJeff Bonwick for (int i = 0; i < count; i++) { 3743e14bb325SJeff Bonwick uint64_t guid; 374499653d4eSeschrock 3745e14bb325SJeff Bonwick VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 3746e14bb325SJeff Bonwick &guid) == 0); 374799653d4eSeschrock 3748e14bb325SJeff Bonwick if (guid == target_guid) 3749e14bb325SJeff Bonwick return (nvpp[i]); 375099653d4eSeschrock } 375199653d4eSeschrock 3752e14bb325SJeff Bonwick return (NULL); 3753fa94a07fSbrendan } 3754fa94a07fSbrendan 3755e14bb325SJeff Bonwick static void 3756e14bb325SJeff Bonwick spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 3757e14bb325SJeff Bonwick nvlist_t *dev_to_remove) 3758fa94a07fSbrendan { 3759e14bb325SJeff Bonwick nvlist_t **newdev = NULL; 3760fa94a07fSbrendan 3761e14bb325SJeff Bonwick if (count > 1) 3762e14bb325SJeff Bonwick newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 3763fa94a07fSbrendan 3764e14bb325SJeff Bonwick for (int i = 0, j = 0; i < count; i++) { 3765e14bb325SJeff Bonwick if (dev[i] == dev_to_remove) 3766e14bb325SJeff Bonwick continue; 3767e14bb325SJeff Bonwick VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 3768fa94a07fSbrendan } 3769fa94a07fSbrendan 3770e14bb325SJeff Bonwick VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 3771e14bb325SJeff Bonwick VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 3772fa94a07fSbrendan 3773e14bb325SJeff Bonwick for (int i = 0; i < count - 1; i++) 3774e14bb325SJeff Bonwick nvlist_free(newdev[i]); 3775fa94a07fSbrendan 3776e14bb325SJeff Bonwick if (count > 1) 3777e14bb325SJeff Bonwick kmem_free(newdev, (count - 1) * sizeof (void *)); 3778fa94a07fSbrendan } 3779fa94a07fSbrendan 378088ecc943SGeorge Wilson /* 378188ecc943SGeorge Wilson * Removing a device from the vdev namespace requires several steps 378288ecc943SGeorge Wilson * and can take a significant amount of time. As a result we use 378388ecc943SGeorge Wilson * the spa_vdev_config_[enter/exit] functions which allow us to 378488ecc943SGeorge Wilson * grab and release the spa_config_lock while still holding the namespace 378588ecc943SGeorge Wilson * lock. During each step the configuration is synced out. 378688ecc943SGeorge Wilson */ 378788ecc943SGeorge Wilson 378888ecc943SGeorge Wilson /* 378988ecc943SGeorge Wilson * Initial phase of device removal - stop future allocations from this device. 379088ecc943SGeorge Wilson */ 379188ecc943SGeorge Wilson void 379288ecc943SGeorge Wilson spa_vdev_remove_start(spa_t *spa, vdev_t *vd) 379388ecc943SGeorge Wilson { 379488ecc943SGeorge Wilson metaslab_group_t *mg = vd->vdev_mg; 379588ecc943SGeorge Wilson 379688ecc943SGeorge Wilson ASSERT(MUTEX_HELD(&spa_namespace_lock)); 379788ecc943SGeorge Wilson ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 3798*b24ab676SJeff Bonwick ASSERT(vd == vd->vdev_top); 379988ecc943SGeorge Wilson 380088ecc943SGeorge Wilson /* 380188ecc943SGeorge Wilson * Remove our vdev from the allocatable vdevs 380288ecc943SGeorge Wilson */ 380388ecc943SGeorge Wilson if (mg) 380488ecc943SGeorge Wilson metaslab_class_remove(mg->mg_class, mg); 380588ecc943SGeorge Wilson } 380688ecc943SGeorge Wilson 380788ecc943SGeorge Wilson /* 380888ecc943SGeorge Wilson * Evacuate the device. 380988ecc943SGeorge Wilson */ 381088ecc943SGeorge Wilson int 381188ecc943SGeorge Wilson spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) 381288ecc943SGeorge Wilson { 381388ecc943SGeorge Wilson uint64_t txg; 381488ecc943SGeorge Wilson int error; 381588ecc943SGeorge Wilson 381688ecc943SGeorge Wilson ASSERT(MUTEX_HELD(&spa_namespace_lock)); 381788ecc943SGeorge Wilson ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 3818*b24ab676SJeff Bonwick ASSERT(vd == vd->vdev_top); 381988ecc943SGeorge Wilson 382088ecc943SGeorge Wilson /* 382188ecc943SGeorge Wilson * Evacuate the device. We don't hold the config lock as writer 382288ecc943SGeorge Wilson * since we need to do I/O but we do keep the 382388ecc943SGeorge Wilson * spa_namespace_lock held. Once this completes the device 382488ecc943SGeorge Wilson * should no longer have any blocks allocated on it. 382588ecc943SGeorge Wilson */ 382688ecc943SGeorge Wilson if (vd->vdev_islog) { 382788ecc943SGeorge Wilson /* 382888ecc943SGeorge Wilson * Evacuate the device. 382988ecc943SGeorge Wilson */ 383088ecc943SGeorge Wilson if (error = dmu_objset_find(spa_name(spa), 383188ecc943SGeorge Wilson zil_vdev_offline, NULL, DS_FIND_CHILDREN)) { 383288ecc943SGeorge Wilson uint64_t txg; 383388ecc943SGeorge Wilson 383488ecc943SGeorge Wilson txg = spa_vdev_config_enter(spa); 383588ecc943SGeorge Wilson metaslab_class_add(spa->spa_log_class, 383688ecc943SGeorge Wilson vd->vdev_mg); 383788ecc943SGeorge Wilson return (spa_vdev_exit(spa, NULL, txg, error)); 383888ecc943SGeorge Wilson } 383988ecc943SGeorge Wilson txg_wait_synced(spa_get_dsl(spa), 0); 384088ecc943SGeorge Wilson } 384188ecc943SGeorge Wilson 384288ecc943SGeorge Wilson /* 384388ecc943SGeorge Wilson * Remove any remaining MOS metadata associated with the device. 384488ecc943SGeorge Wilson */ 384588ecc943SGeorge Wilson txg = spa_vdev_config_enter(spa); 384688ecc943SGeorge Wilson vd->vdev_removing = B_TRUE; 384788ecc943SGeorge Wilson vdev_dirty(vd, 0, NULL, txg); 384888ecc943SGeorge Wilson vdev_config_dirty(vd); 384988ecc943SGeorge Wilson spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 385088ecc943SGeorge Wilson 385188ecc943SGeorge Wilson return (0); 385288ecc943SGeorge Wilson } 385388ecc943SGeorge Wilson 385488ecc943SGeorge Wilson /* 385588ecc943SGeorge Wilson * Complete the removal by cleaning up the namespace. 385688ecc943SGeorge Wilson */ 385788ecc943SGeorge Wilson void 385888ecc943SGeorge Wilson spa_vdev_remove_done(spa_t *spa, vdev_t *vd) 385988ecc943SGeorge Wilson { 386088ecc943SGeorge Wilson vdev_t *rvd = spa->spa_root_vdev; 386188ecc943SGeorge Wilson metaslab_group_t *mg = vd->vdev_mg; 386288ecc943SGeorge Wilson uint64_t id = vd->vdev_id; 386388ecc943SGeorge Wilson boolean_t last_vdev = (id == (rvd->vdev_children - 1)); 386488ecc943SGeorge Wilson 386588ecc943SGeorge Wilson ASSERT(MUTEX_HELD(&spa_namespace_lock)); 386688ecc943SGeorge Wilson ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 3867*b24ab676SJeff Bonwick ASSERT(vd == vd->vdev_top); 386888ecc943SGeorge Wilson 386988ecc943SGeorge Wilson (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3870*b24ab676SJeff Bonwick 3871*b24ab676SJeff Bonwick if (list_link_active(&vd->vdev_state_dirty_node)) 3872*b24ab676SJeff Bonwick vdev_state_clean(vd); 3873*b24ab676SJeff Bonwick if (list_link_active(&vd->vdev_config_dirty_node)) 3874*b24ab676SJeff Bonwick vdev_config_clean(vd); 3875*b24ab676SJeff Bonwick 387688ecc943SGeorge Wilson vdev_free(vd); 387788ecc943SGeorge Wilson 387888ecc943SGeorge Wilson /* 387988ecc943SGeorge Wilson * It's possible that another thread is trying todo a spa_vdev_add() 388088ecc943SGeorge Wilson * at the same time we're trying remove it. As a result the 388188ecc943SGeorge Wilson * added vdev may not have initialized its metaslabs yet. 388288ecc943SGeorge Wilson */ 388388ecc943SGeorge Wilson if (mg != NULL) 388488ecc943SGeorge Wilson metaslab_group_destroy(mg); 388588ecc943SGeorge Wilson 388688ecc943SGeorge Wilson if (last_vdev) { 388788ecc943SGeorge Wilson vdev_compact_children(rvd); 388888ecc943SGeorge Wilson } else { 388988ecc943SGeorge Wilson vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); 389088ecc943SGeorge Wilson vdev_add_child(rvd, vd); 389188ecc943SGeorge Wilson } 389288ecc943SGeorge Wilson vdev_config_dirty(rvd); 389388ecc943SGeorge Wilson 389488ecc943SGeorge Wilson /* 389588ecc943SGeorge Wilson * Reassess the health of our root vdev. 389688ecc943SGeorge Wilson */ 389788ecc943SGeorge Wilson vdev_reopen(rvd); 389888ecc943SGeorge Wilson } 389988ecc943SGeorge Wilson 3900fa94a07fSbrendan /* 3901fa94a07fSbrendan * Remove a device from the pool. Currently, this supports removing only hot 390288ecc943SGeorge Wilson * spares, slogs, and level 2 ARC devices. 3903fa94a07fSbrendan */ 3904fa94a07fSbrendan int 3905fa94a07fSbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 3906fa94a07fSbrendan { 3907fa94a07fSbrendan vdev_t *vd; 3908e14bb325SJeff Bonwick nvlist_t **spares, **l2cache, *nv; 39098ad4d6ddSJeff Bonwick uint64_t txg = 0; 391088ecc943SGeorge Wilson uint_t nspares, nl2cache; 3911fa94a07fSbrendan int error = 0; 39128ad4d6ddSJeff Bonwick boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 3913fa94a07fSbrendan 39148ad4d6ddSJeff Bonwick if (!locked) 39158ad4d6ddSJeff Bonwick txg = spa_vdev_enter(spa); 3916fa94a07fSbrendan 3917c5904d13Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3918fa94a07fSbrendan 3919fa94a07fSbrendan if (spa->spa_spares.sav_vdevs != NULL && 3920fa94a07fSbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 3921e14bb325SJeff Bonwick ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 3922e14bb325SJeff Bonwick (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 3923e14bb325SJeff Bonwick /* 3924e14bb325SJeff Bonwick * Only remove the hot spare if it's not currently in use 3925e14bb325SJeff Bonwick * in this pool. 3926e14bb325SJeff Bonwick */ 3927e14bb325SJeff Bonwick if (vd == NULL || unspare) { 3928e14bb325SJeff Bonwick spa_vdev_remove_aux(spa->spa_spares.sav_config, 3929e14bb325SJeff Bonwick ZPOOL_CONFIG_SPARES, spares, nspares, nv); 3930e14bb325SJeff Bonwick spa_load_spares(spa); 3931e14bb325SJeff Bonwick spa->spa_spares.sav_sync = B_TRUE; 3932e14bb325SJeff Bonwick } else { 3933e14bb325SJeff Bonwick error = EBUSY; 3934e14bb325SJeff Bonwick } 3935e14bb325SJeff Bonwick } else if (spa->spa_l2cache.sav_vdevs != NULL && 3936fa94a07fSbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 3937e14bb325SJeff Bonwick ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 3938e14bb325SJeff Bonwick (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 3939e14bb325SJeff Bonwick /* 3940e14bb325SJeff Bonwick * Cache devices can always be removed. 3941e14bb325SJeff Bonwick */ 3942e14bb325SJeff Bonwick spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 3943e14bb325SJeff Bonwick ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 3944fa94a07fSbrendan spa_load_l2cache(spa); 3945fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 394688ecc943SGeorge Wilson } else if (vd != NULL && vd->vdev_islog) { 394788ecc943SGeorge Wilson ASSERT(!locked); 3948*b24ab676SJeff Bonwick ASSERT(vd == vd->vdev_top); 394988ecc943SGeorge Wilson 395088ecc943SGeorge Wilson /* 395188ecc943SGeorge Wilson * XXX - Once we have bp-rewrite this should 395288ecc943SGeorge Wilson * become the common case. 395388ecc943SGeorge Wilson */ 395488ecc943SGeorge Wilson 395588ecc943SGeorge Wilson /* 395688ecc943SGeorge Wilson * 1. Stop allocations 395788ecc943SGeorge Wilson * 2. Evacuate the device (i.e. kill off stubby and 395888ecc943SGeorge Wilson * metadata) and wait for it to complete (i.e. sync). 395988ecc943SGeorge Wilson * 3. Cleanup the vdev namespace. 396088ecc943SGeorge Wilson */ 396188ecc943SGeorge Wilson spa_vdev_remove_start(spa, vd); 396288ecc943SGeorge Wilson 3963*b24ab676SJeff Bonwick /* 3964*b24ab676SJeff Bonwick * Wait for the youngest allocations and frees to sync, 3965*b24ab676SJeff Bonwick * and then wait for the deferral of those frees to finish. 3966*b24ab676SJeff Bonwick */ 3967*b24ab676SJeff Bonwick spa_vdev_config_exit(spa, NULL, 3968*b24ab676SJeff Bonwick txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); 3969*b24ab676SJeff Bonwick 397088ecc943SGeorge Wilson if ((error = spa_vdev_remove_evacuate(spa, vd)) != 0) 397188ecc943SGeorge Wilson return (error); 397288ecc943SGeorge Wilson txg = spa_vdev_config_enter(spa); 397388ecc943SGeorge Wilson 397488ecc943SGeorge Wilson spa_vdev_remove_done(spa, vd); 397588ecc943SGeorge Wilson 3976e14bb325SJeff Bonwick } else if (vd != NULL) { 3977e14bb325SJeff Bonwick /* 3978e14bb325SJeff Bonwick * Normal vdevs cannot be removed (yet). 3979e14bb325SJeff Bonwick */ 3980e14bb325SJeff Bonwick error = ENOTSUP; 3981e14bb325SJeff Bonwick } else { 3982e14bb325SJeff Bonwick /* 3983e14bb325SJeff Bonwick * There is no vdev of any kind with the specified guid. 3984e14bb325SJeff Bonwick */ 3985e14bb325SJeff Bonwick error = ENOENT; 3986fa94a07fSbrendan } 398799653d4eSeschrock 39888ad4d6ddSJeff Bonwick if (!locked) 39898ad4d6ddSJeff Bonwick return (spa_vdev_exit(spa, NULL, txg, error)); 39908ad4d6ddSJeff Bonwick 39918ad4d6ddSJeff Bonwick return (error); 3992fa9e4066Sahrens } 3993fa9e4066Sahrens 3994fa9e4066Sahrens /* 39953d7072f8Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 39963d7072f8Seschrock * current spared, so we can detach it. 3997fa9e4066Sahrens */ 3998ea8dc4b6Seschrock static vdev_t * 39993d7072f8Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 4000fa9e4066Sahrens { 4001ea8dc4b6Seschrock vdev_t *newvd, *oldvd; 4002fa9e4066Sahrens 4003573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) { 40043d7072f8Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 4005ea8dc4b6Seschrock if (oldvd != NULL) 4006ea8dc4b6Seschrock return (oldvd); 4007ea8dc4b6Seschrock } 4008fa9e4066Sahrens 40093d7072f8Seschrock /* 40103d7072f8Seschrock * Check for a completed replacement. 40113d7072f8Seschrock */ 4012fa9e4066Sahrens if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 4013ea8dc4b6Seschrock oldvd = vd->vdev_child[0]; 4014ea8dc4b6Seschrock newvd = vd->vdev_child[1]; 4015ea8dc4b6Seschrock 40168ad4d6ddSJeff Bonwick if (vdev_dtl_empty(newvd, DTL_MISSING) && 40178ad4d6ddSJeff Bonwick !vdev_dtl_required(oldvd)) 4018ea8dc4b6Seschrock return (oldvd); 4019fa9e4066Sahrens } 4020ea8dc4b6Seschrock 40213d7072f8Seschrock /* 40223d7072f8Seschrock * Check for a completed resilver with the 'unspare' flag set. 40233d7072f8Seschrock */ 40243d7072f8Seschrock if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 40253d7072f8Seschrock newvd = vd->vdev_child[0]; 40263d7072f8Seschrock oldvd = vd->vdev_child[1]; 40273d7072f8Seschrock 40283d7072f8Seschrock if (newvd->vdev_unspare && 40298ad4d6ddSJeff Bonwick vdev_dtl_empty(newvd, DTL_MISSING) && 40308ad4d6ddSJeff Bonwick !vdev_dtl_required(oldvd)) { 40313d7072f8Seschrock newvd->vdev_unspare = 0; 40323d7072f8Seschrock return (oldvd); 40333d7072f8Seschrock } 40343d7072f8Seschrock } 40353d7072f8Seschrock 4036ea8dc4b6Seschrock return (NULL); 4037fa9e4066Sahrens } 4038fa9e4066Sahrens 4039ea8dc4b6Seschrock static void 40403d7072f8Seschrock spa_vdev_resilver_done(spa_t *spa) 4041fa9e4066Sahrens { 40428ad4d6ddSJeff Bonwick vdev_t *vd, *pvd, *ppvd; 40438ad4d6ddSJeff Bonwick uint64_t guid, sguid, pguid, ppguid; 4044ea8dc4b6Seschrock 40458ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4046ea8dc4b6Seschrock 40473d7072f8Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 40488ad4d6ddSJeff Bonwick pvd = vd->vdev_parent; 40498ad4d6ddSJeff Bonwick ppvd = pvd->vdev_parent; 4050ea8dc4b6Seschrock guid = vd->vdev_guid; 40518ad4d6ddSJeff Bonwick pguid = pvd->vdev_guid; 40528ad4d6ddSJeff Bonwick ppguid = ppvd->vdev_guid; 40538ad4d6ddSJeff Bonwick sguid = 0; 405499653d4eSeschrock /* 405599653d4eSeschrock * If we have just finished replacing a hot spared device, then 405699653d4eSeschrock * we need to detach the parent's first child (the original hot 405799653d4eSeschrock * spare) as well. 405899653d4eSeschrock */ 40598ad4d6ddSJeff Bonwick if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) { 406099653d4eSeschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 40618ad4d6ddSJeff Bonwick ASSERT(ppvd->vdev_children == 2); 40628ad4d6ddSJeff Bonwick sguid = ppvd->vdev_child[1]->vdev_guid; 406399653d4eSeschrock } 40648ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 40658ad4d6ddSJeff Bonwick if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) 4066ea8dc4b6Seschrock return; 40678ad4d6ddSJeff Bonwick if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) 406899653d4eSeschrock return; 40698ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4070fa9e4066Sahrens } 4071fa9e4066Sahrens 40728ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 4073fa9e4066Sahrens } 4074fa9e4066Sahrens 4075c67d9675Seschrock /* 40766809eb4eSEric Schrock * Update the stored path or FRU for this vdev. Dirty the vdev configuration, 40776809eb4eSEric Schrock * relying on spa_vdev_enter/exit() to synchronize the labels and cache. 4078c67d9675Seschrock */ 4079c67d9675Seschrock int 40806809eb4eSEric Schrock spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, 40816809eb4eSEric Schrock boolean_t ispath) 4082c67d9675Seschrock { 4083c5904d13Seschrock vdev_t *vd; 4084c67d9675Seschrock uint64_t txg; 4085c67d9675Seschrock 4086c67d9675Seschrock txg = spa_vdev_enter(spa); 4087c67d9675Seschrock 40886809eb4eSEric Schrock if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 4089fa94a07fSbrendan return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 4090c67d9675Seschrock 40910e34b6a7Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 40920e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 40930e34b6a7Sbonwick 40946809eb4eSEric Schrock if (ispath) { 40956809eb4eSEric Schrock spa_strfree(vd->vdev_path); 40966809eb4eSEric Schrock vd->vdev_path = spa_strdup(value); 40976809eb4eSEric Schrock } else { 40986809eb4eSEric Schrock if (vd->vdev_fru != NULL) 40996809eb4eSEric Schrock spa_strfree(vd->vdev_fru); 41006809eb4eSEric Schrock vd->vdev_fru = spa_strdup(value); 41016809eb4eSEric Schrock } 4102c67d9675Seschrock 4103c67d9675Seschrock vdev_config_dirty(vd->vdev_top); 4104c67d9675Seschrock 4105c67d9675Seschrock return (spa_vdev_exit(spa, NULL, txg, 0)); 4106c67d9675Seschrock } 4107c67d9675Seschrock 41086809eb4eSEric Schrock int 41096809eb4eSEric Schrock spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 41106809eb4eSEric Schrock { 41116809eb4eSEric Schrock return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); 41126809eb4eSEric Schrock } 41136809eb4eSEric Schrock 41146809eb4eSEric Schrock int 41156809eb4eSEric Schrock spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) 41166809eb4eSEric Schrock { 41176809eb4eSEric Schrock return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); 41186809eb4eSEric Schrock } 41196809eb4eSEric Schrock 4120fa9e4066Sahrens /* 4121fa9e4066Sahrens * ========================================================================== 4122fa9e4066Sahrens * SPA Scrubbing 4123fa9e4066Sahrens * ========================================================================== 4124fa9e4066Sahrens */ 4125fa9e4066Sahrens 4126ea8dc4b6Seschrock int 4127088f3894Sahrens spa_scrub(spa_t *spa, pool_scrub_type_t type) 4128fa9e4066Sahrens { 4129e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 4130bb8b5132Sek 4131fa9e4066Sahrens if ((uint_t)type >= POOL_SCRUB_TYPES) 4132fa9e4066Sahrens return (ENOTSUP); 4133fa9e4066Sahrens 4134fa9e4066Sahrens /* 4135088f3894Sahrens * If a resilver was requested, but there is no DTL on a 4136088f3894Sahrens * writeable leaf device, we have nothing to do. 4137fa9e4066Sahrens */ 4138088f3894Sahrens if (type == POOL_SCRUB_RESILVER && 4139088f3894Sahrens !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 4140088f3894Sahrens spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 4141ea8dc4b6Seschrock return (0); 4142ea8dc4b6Seschrock } 4143fa9e4066Sahrens 4144088f3894Sahrens if (type == POOL_SCRUB_EVERYTHING && 4145088f3894Sahrens spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE && 4146088f3894Sahrens spa->spa_dsl_pool->dp_scrub_isresilver) 4147088f3894Sahrens return (EBUSY); 4148fa9e4066Sahrens 4149088f3894Sahrens if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) { 4150088f3894Sahrens return (dsl_pool_scrub_clean(spa->spa_dsl_pool)); 4151088f3894Sahrens } else if (type == POOL_SCRUB_NONE) { 4152088f3894Sahrens return (dsl_pool_scrub_cancel(spa->spa_dsl_pool)); 4153ea8dc4b6Seschrock } else { 4154088f3894Sahrens return (EINVAL); 4155fa9e4066Sahrens } 4156fa9e4066Sahrens } 4157fa9e4066Sahrens 4158ea8dc4b6Seschrock /* 4159ea8dc4b6Seschrock * ========================================================================== 4160ea8dc4b6Seschrock * SPA async task processing 4161ea8dc4b6Seschrock * ========================================================================== 4162ea8dc4b6Seschrock */ 4163ea8dc4b6Seschrock 4164ea8dc4b6Seschrock static void 41653d7072f8Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 4166fa9e4066Sahrens { 416749cf58c0SBrendan Gregg - Sun Microsystems if (vd->vdev_remove_wanted) { 416849cf58c0SBrendan Gregg - Sun Microsystems vd->vdev_remove_wanted = 0; 416949cf58c0SBrendan Gregg - Sun Microsystems vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 41701d713200SEric Schrock 41711d713200SEric Schrock /* 41721d713200SEric Schrock * We want to clear the stats, but we don't want to do a full 41731d713200SEric Schrock * vdev_clear() as that will cause us to throw away 41741d713200SEric Schrock * degraded/faulted state as well as attempt to reopen the 41751d713200SEric Schrock * device, all of which is a waste. 41761d713200SEric Schrock */ 41771d713200SEric Schrock vd->vdev_stat.vs_read_errors = 0; 41781d713200SEric Schrock vd->vdev_stat.vs_write_errors = 0; 41791d713200SEric Schrock vd->vdev_stat.vs_checksum_errors = 0; 41801d713200SEric Schrock 4181e14bb325SJeff Bonwick vdev_state_dirty(vd->vdev_top); 4182ea8dc4b6Seschrock } 418349cf58c0SBrendan Gregg - Sun Microsystems 4184e14bb325SJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) 418549cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, vd->vdev_child[c]); 4186ea8dc4b6Seschrock } 4187fa9e4066Sahrens 4188e14bb325SJeff Bonwick static void 4189e14bb325SJeff Bonwick spa_async_probe(spa_t *spa, vdev_t *vd) 4190e14bb325SJeff Bonwick { 4191e14bb325SJeff Bonwick if (vd->vdev_probe_wanted) { 4192e14bb325SJeff Bonwick vd->vdev_probe_wanted = 0; 4193e14bb325SJeff Bonwick vdev_reopen(vd); /* vdev_open() does the actual probe */ 4194e14bb325SJeff Bonwick } 4195e14bb325SJeff Bonwick 4196e14bb325SJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) 4197e14bb325SJeff Bonwick spa_async_probe(spa, vd->vdev_child[c]); 4198e14bb325SJeff Bonwick } 4199e14bb325SJeff Bonwick 4200573ca77eSGeorge Wilson static void 4201573ca77eSGeorge Wilson spa_async_autoexpand(spa_t *spa, vdev_t *vd) 4202573ca77eSGeorge Wilson { 4203573ca77eSGeorge Wilson sysevent_id_t eid; 4204573ca77eSGeorge Wilson nvlist_t *attr; 4205573ca77eSGeorge Wilson char *physpath; 4206573ca77eSGeorge Wilson 4207573ca77eSGeorge Wilson if (!spa->spa_autoexpand) 4208573ca77eSGeorge Wilson return; 4209573ca77eSGeorge Wilson 4210573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) { 4211573ca77eSGeorge Wilson vdev_t *cvd = vd->vdev_child[c]; 4212573ca77eSGeorge Wilson spa_async_autoexpand(spa, cvd); 4213573ca77eSGeorge Wilson } 4214573ca77eSGeorge Wilson 4215573ca77eSGeorge Wilson if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) 4216573ca77eSGeorge Wilson return; 4217573ca77eSGeorge Wilson 4218573ca77eSGeorge Wilson physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4219573ca77eSGeorge Wilson (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); 4220573ca77eSGeorge Wilson 4221573ca77eSGeorge Wilson VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 4222573ca77eSGeorge Wilson VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 4223573ca77eSGeorge Wilson 4224573ca77eSGeorge Wilson (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 4225573ca77eSGeorge Wilson ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 4226573ca77eSGeorge Wilson 4227573ca77eSGeorge Wilson nvlist_free(attr); 4228573ca77eSGeorge Wilson kmem_free(physpath, MAXPATHLEN); 4229573ca77eSGeorge Wilson } 4230573ca77eSGeorge Wilson 4231ea8dc4b6Seschrock static void 4232ea8dc4b6Seschrock spa_async_thread(spa_t *spa) 4233ea8dc4b6Seschrock { 4234e14bb325SJeff Bonwick int tasks; 4235ea8dc4b6Seschrock 4236ea8dc4b6Seschrock ASSERT(spa->spa_sync_on); 4237ea8dc4b6Seschrock 4238ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4239ea8dc4b6Seschrock tasks = spa->spa_async_tasks; 4240ea8dc4b6Seschrock spa->spa_async_tasks = 0; 4241ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4242ea8dc4b6Seschrock 42430373e76bSbonwick /* 42440373e76bSbonwick * See if the config needs to be updated. 42450373e76bSbonwick */ 42460373e76bSbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 4247*b24ab676SJeff Bonwick uint64_t old_space, new_space; 4248573ca77eSGeorge Wilson 42490373e76bSbonwick mutex_enter(&spa_namespace_lock); 4250*b24ab676SJeff Bonwick old_space = metaslab_class_get_space(spa_normal_class(spa)); 42510373e76bSbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 4252*b24ab676SJeff Bonwick new_space = metaslab_class_get_space(spa_normal_class(spa)); 42530373e76bSbonwick mutex_exit(&spa_namespace_lock); 4254573ca77eSGeorge Wilson 4255573ca77eSGeorge Wilson /* 4256573ca77eSGeorge Wilson * If the pool grew as a result of the config update, 4257573ca77eSGeorge Wilson * then log an internal history event. 4258573ca77eSGeorge Wilson */ 4259*b24ab676SJeff Bonwick if (new_space != old_space) { 4260c8e1f6d2SMark J Musante spa_history_internal_log(LOG_POOL_VDEV_ONLINE, 4261c8e1f6d2SMark J Musante spa, NULL, CRED(), 4262c8e1f6d2SMark J Musante "pool '%s' size: %llu(+%llu)", 4263*b24ab676SJeff Bonwick spa_name(spa), new_space, new_space - old_space); 4264573ca77eSGeorge Wilson } 42650373e76bSbonwick } 42660373e76bSbonwick 4267ea8dc4b6Seschrock /* 42683d7072f8Seschrock * See if any devices need to be marked REMOVED. 4269ea8dc4b6Seschrock */ 4270e14bb325SJeff Bonwick if (tasks & SPA_ASYNC_REMOVE) { 42718f18d1faSGeorge Wilson spa_vdev_state_enter(spa, SCL_NONE); 42723d7072f8Seschrock spa_async_remove(spa, spa->spa_root_vdev); 4273e14bb325SJeff Bonwick for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 427449cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 4275e14bb325SJeff Bonwick for (int i = 0; i < spa->spa_spares.sav_count; i++) 427649cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 4277e14bb325SJeff Bonwick (void) spa_vdev_state_exit(spa, NULL, 0); 4278e14bb325SJeff Bonwick } 4279e14bb325SJeff Bonwick 4280573ca77eSGeorge Wilson if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { 4281573ca77eSGeorge Wilson spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 4282573ca77eSGeorge Wilson spa_async_autoexpand(spa, spa->spa_root_vdev); 4283573ca77eSGeorge Wilson spa_config_exit(spa, SCL_CONFIG, FTAG); 4284573ca77eSGeorge Wilson } 4285573ca77eSGeorge Wilson 4286e14bb325SJeff Bonwick /* 4287e14bb325SJeff Bonwick * See if any devices need to be probed. 4288e14bb325SJeff Bonwick */ 4289e14bb325SJeff Bonwick if (tasks & SPA_ASYNC_PROBE) { 42908f18d1faSGeorge Wilson spa_vdev_state_enter(spa, SCL_NONE); 4291e14bb325SJeff Bonwick spa_async_probe(spa, spa->spa_root_vdev); 4292e14bb325SJeff Bonwick (void) spa_vdev_state_exit(spa, NULL, 0); 42933d7072f8Seschrock } 4294ea8dc4b6Seschrock 4295ea8dc4b6Seschrock /* 4296ea8dc4b6Seschrock * If any devices are done replacing, detach them. 4297ea8dc4b6Seschrock */ 42983d7072f8Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 42993d7072f8Seschrock spa_vdev_resilver_done(spa); 4300fa9e4066Sahrens 4301ea8dc4b6Seschrock /* 4302ea8dc4b6Seschrock * Kick off a resilver. 4303ea8dc4b6Seschrock */ 4304088f3894Sahrens if (tasks & SPA_ASYNC_RESILVER) 4305088f3894Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0); 4306ea8dc4b6Seschrock 4307ea8dc4b6Seschrock /* 4308ea8dc4b6Seschrock * Let the world know that we're done. 4309ea8dc4b6Seschrock */ 4310ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4311ea8dc4b6Seschrock spa->spa_async_thread = NULL; 4312ea8dc4b6Seschrock cv_broadcast(&spa->spa_async_cv); 4313ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4314ea8dc4b6Seschrock thread_exit(); 4315ea8dc4b6Seschrock } 4316ea8dc4b6Seschrock 4317ea8dc4b6Seschrock void 4318ea8dc4b6Seschrock spa_async_suspend(spa_t *spa) 4319ea8dc4b6Seschrock { 4320ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4321ea8dc4b6Seschrock spa->spa_async_suspended++; 4322ea8dc4b6Seschrock while (spa->spa_async_thread != NULL) 4323ea8dc4b6Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 4324ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4325ea8dc4b6Seschrock } 4326ea8dc4b6Seschrock 4327ea8dc4b6Seschrock void 4328ea8dc4b6Seschrock spa_async_resume(spa_t *spa) 4329ea8dc4b6Seschrock { 4330ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4331ea8dc4b6Seschrock ASSERT(spa->spa_async_suspended != 0); 4332ea8dc4b6Seschrock spa->spa_async_suspended--; 4333ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4334ea8dc4b6Seschrock } 4335ea8dc4b6Seschrock 4336ea8dc4b6Seschrock static void 4337ea8dc4b6Seschrock spa_async_dispatch(spa_t *spa) 4338ea8dc4b6Seschrock { 4339ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4340ea8dc4b6Seschrock if (spa->spa_async_tasks && !spa->spa_async_suspended && 43410373e76bSbonwick spa->spa_async_thread == NULL && 43420373e76bSbonwick rootdir != NULL && !vn_is_readonly(rootdir)) 4343ea8dc4b6Seschrock spa->spa_async_thread = thread_create(NULL, 0, 4344ea8dc4b6Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 4345ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4346ea8dc4b6Seschrock } 4347ea8dc4b6Seschrock 4348ea8dc4b6Seschrock void 4349ea8dc4b6Seschrock spa_async_request(spa_t *spa, int task) 4350ea8dc4b6Seschrock { 4351ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 4352ea8dc4b6Seschrock spa->spa_async_tasks |= task; 4353ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 4354fa9e4066Sahrens } 4355fa9e4066Sahrens 4356fa9e4066Sahrens /* 4357fa9e4066Sahrens * ========================================================================== 4358fa9e4066Sahrens * SPA syncing routines 4359fa9e4066Sahrens * ========================================================================== 4360fa9e4066Sahrens */ 4361fa9e4066Sahrens static void 4362*b24ab676SJeff Bonwick spa_sync_deferred_bplist(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx, uint64_t txg) 4363fa9e4066Sahrens { 4364fa9e4066Sahrens blkptr_t blk; 4365fa9e4066Sahrens uint64_t itor = 0; 4366fa9e4066Sahrens uint8_t c = 1; 4367fa9e4066Sahrens 4368e14bb325SJeff Bonwick while (bplist_iterate(bpl, &itor, &blk) == 0) { 4369e14bb325SJeff Bonwick ASSERT(blk.blk_birth < txg); 4370*b24ab676SJeff Bonwick zio_free(spa, txg, &blk); 4371e14bb325SJeff Bonwick } 4372fa9e4066Sahrens 4373fa9e4066Sahrens bplist_vacate(bpl, tx); 4374fa9e4066Sahrens 4375fa9e4066Sahrens /* 4376fa9e4066Sahrens * Pre-dirty the first block so we sync to convergence faster. 4377fa9e4066Sahrens * (Usually only the first block is needed.) 4378fa9e4066Sahrens */ 4379*b24ab676SJeff Bonwick dmu_write(bpl->bpl_mos, spa->spa_deferred_bplist_obj, 0, 1, &c, tx); 4380*b24ab676SJeff Bonwick } 4381*b24ab676SJeff Bonwick 4382*b24ab676SJeff Bonwick static void 4383*b24ab676SJeff Bonwick spa_sync_free(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 4384*b24ab676SJeff Bonwick { 4385*b24ab676SJeff Bonwick zio_t *zio = arg; 4386*b24ab676SJeff Bonwick 4387*b24ab676SJeff Bonwick zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp, 4388*b24ab676SJeff Bonwick zio->io_flags)); 4389fa9e4066Sahrens } 4390fa9e4066Sahrens 4391fa9e4066Sahrens static void 439299653d4eSeschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 4393fa9e4066Sahrens { 4394fa9e4066Sahrens char *packed = NULL; 4395f7991ba4STim Haley size_t bufsize; 4396fa9e4066Sahrens size_t nvsize = 0; 4397fa9e4066Sahrens dmu_buf_t *db; 4398fa9e4066Sahrens 439999653d4eSeschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 4400fa9e4066Sahrens 4401f7991ba4STim Haley /* 4402f7991ba4STim Haley * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 4403f7991ba4STim Haley * information. This avoids the dbuf_will_dirty() path and 4404f7991ba4STim Haley * saves us a pre-read to get data we don't actually care about. 4405f7991ba4STim Haley */ 4406f7991ba4STim Haley bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); 4407f7991ba4STim Haley packed = kmem_alloc(bufsize, KM_SLEEP); 4408fa9e4066Sahrens 440999653d4eSeschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 4410ea8dc4b6Seschrock KM_SLEEP) == 0); 4411f7991ba4STim Haley bzero(packed + nvsize, bufsize - nvsize); 4412fa9e4066Sahrens 4413f7991ba4STim Haley dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 4414fa9e4066Sahrens 4415f7991ba4STim Haley kmem_free(packed, bufsize); 4416fa9e4066Sahrens 441799653d4eSeschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 4418fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 4419fa9e4066Sahrens *(uint64_t *)db->db_data = nvsize; 4420ea8dc4b6Seschrock dmu_buf_rele(db, FTAG); 4421fa9e4066Sahrens } 4422fa9e4066Sahrens 442399653d4eSeschrock static void 4424fa94a07fSbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 4425fa94a07fSbrendan const char *config, const char *entry) 442699653d4eSeschrock { 442799653d4eSeschrock nvlist_t *nvroot; 4428fa94a07fSbrendan nvlist_t **list; 442999653d4eSeschrock int i; 443099653d4eSeschrock 4431fa94a07fSbrendan if (!sav->sav_sync) 443299653d4eSeschrock return; 443399653d4eSeschrock 443499653d4eSeschrock /* 4435fa94a07fSbrendan * Update the MOS nvlist describing the list of available devices. 4436fa94a07fSbrendan * spa_validate_aux() will have already made sure this nvlist is 44373d7072f8Seschrock * valid and the vdevs are labeled appropriately. 443899653d4eSeschrock */ 4439fa94a07fSbrendan if (sav->sav_object == 0) { 4440fa94a07fSbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 4441fa94a07fSbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 4442fa94a07fSbrendan sizeof (uint64_t), tx); 444399653d4eSeschrock VERIFY(zap_update(spa->spa_meta_objset, 4444fa94a07fSbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 4445fa94a07fSbrendan &sav->sav_object, tx) == 0); 444699653d4eSeschrock } 444799653d4eSeschrock 444899653d4eSeschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 4449fa94a07fSbrendan if (sav->sav_count == 0) { 4450fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 445199653d4eSeschrock } else { 4452fa94a07fSbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 4453fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 4454fa94a07fSbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 4455fa94a07fSbrendan B_FALSE, B_FALSE, B_TRUE); 4456fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 4457fa94a07fSbrendan sav->sav_count) == 0); 4458fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 4459fa94a07fSbrendan nvlist_free(list[i]); 4460fa94a07fSbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 446199653d4eSeschrock } 446299653d4eSeschrock 4463fa94a07fSbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 446406eeb2adSek nvlist_free(nvroot); 446599653d4eSeschrock 4466fa94a07fSbrendan sav->sav_sync = B_FALSE; 446799653d4eSeschrock } 446899653d4eSeschrock 446999653d4eSeschrock static void 447099653d4eSeschrock spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 447199653d4eSeschrock { 447299653d4eSeschrock nvlist_t *config; 447399653d4eSeschrock 4474e14bb325SJeff Bonwick if (list_is_empty(&spa->spa_config_dirty_list)) 447599653d4eSeschrock return; 447699653d4eSeschrock 4477e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4478e14bb325SJeff Bonwick 4479e14bb325SJeff Bonwick config = spa_config_generate(spa, spa->spa_root_vdev, 4480e14bb325SJeff Bonwick dmu_tx_get_txg(tx), B_FALSE); 4481e14bb325SJeff Bonwick 4482e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 448399653d4eSeschrock 448499653d4eSeschrock if (spa->spa_config_syncing) 448599653d4eSeschrock nvlist_free(spa->spa_config_syncing); 448699653d4eSeschrock spa->spa_config_syncing = config; 448799653d4eSeschrock 448899653d4eSeschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 448999653d4eSeschrock } 449099653d4eSeschrock 4491990b4856Slling /* 4492990b4856Slling * Set zpool properties. 4493990b4856Slling */ 4494b1b8ab34Slling static void 4495ecd6cf80Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 4496b1b8ab34Slling { 4497b1b8ab34Slling spa_t *spa = arg1; 4498b1b8ab34Slling objset_t *mos = spa->spa_meta_objset; 4499990b4856Slling nvlist_t *nvp = arg2; 4500990b4856Slling nvpair_t *elem; 45013d7072f8Seschrock uint64_t intval; 4502c5904d13Seschrock char *strval; 4503990b4856Slling zpool_prop_t prop; 4504990b4856Slling const char *propname; 4505990b4856Slling zprop_type_t proptype; 4506b1b8ab34Slling 4507e14bb325SJeff Bonwick mutex_enter(&spa->spa_props_lock); 4508e14bb325SJeff Bonwick 4509990b4856Slling elem = NULL; 4510990b4856Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 4511990b4856Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 4512990b4856Slling case ZPOOL_PROP_VERSION: 4513990b4856Slling /* 4514990b4856Slling * Only set version for non-zpool-creation cases 4515990b4856Slling * (set/import). spa_create() needs special care 4516990b4856Slling * for version setting. 4517990b4856Slling */ 4518990b4856Slling if (tx->tx_txg != TXG_INITIAL) { 4519990b4856Slling VERIFY(nvpair_value_uint64(elem, 4520990b4856Slling &intval) == 0); 4521990b4856Slling ASSERT(intval <= SPA_VERSION); 4522990b4856Slling ASSERT(intval >= spa_version(spa)); 4523990b4856Slling spa->spa_uberblock.ub_version = intval; 4524990b4856Slling vdev_config_dirty(spa->spa_root_vdev); 4525990b4856Slling } 4526ecd6cf80Smarks break; 4527990b4856Slling 4528990b4856Slling case ZPOOL_PROP_ALTROOT: 4529990b4856Slling /* 4530990b4856Slling * 'altroot' is a non-persistent property. It should 4531990b4856Slling * have been set temporarily at creation or import time. 4532990b4856Slling */ 4533990b4856Slling ASSERT(spa->spa_root != NULL); 4534b1b8ab34Slling break; 45353d7072f8Seschrock 45362f8aaab3Seschrock case ZPOOL_PROP_CACHEFILE: 4537990b4856Slling /* 4538379c004dSEric Schrock * 'cachefile' is also a non-persisitent property. 4539990b4856Slling */ 45403d7072f8Seschrock break; 4541990b4856Slling default: 4542990b4856Slling /* 4543990b4856Slling * Set pool property values in the poolprops mos object. 4544990b4856Slling */ 4545990b4856Slling if (spa->spa_pool_props_object == 0) { 4546990b4856Slling VERIFY((spa->spa_pool_props_object = 4547990b4856Slling zap_create(mos, DMU_OT_POOL_PROPS, 4548990b4856Slling DMU_OT_NONE, 0, tx)) > 0); 4549990b4856Slling 4550990b4856Slling VERIFY(zap_update(mos, 4551990b4856Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 4552990b4856Slling 8, 1, &spa->spa_pool_props_object, tx) 4553990b4856Slling == 0); 4554990b4856Slling } 4555990b4856Slling 4556990b4856Slling /* normalize the property name */ 4557990b4856Slling propname = zpool_prop_to_name(prop); 4558990b4856Slling proptype = zpool_prop_get_type(prop); 4559990b4856Slling 4560990b4856Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 4561990b4856Slling ASSERT(proptype == PROP_TYPE_STRING); 4562990b4856Slling VERIFY(nvpair_value_string(elem, &strval) == 0); 4563990b4856Slling VERIFY(zap_update(mos, 4564990b4856Slling spa->spa_pool_props_object, propname, 4565990b4856Slling 1, strlen(strval) + 1, strval, tx) == 0); 4566990b4856Slling 4567990b4856Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 4568990b4856Slling VERIFY(nvpair_value_uint64(elem, &intval) == 0); 4569990b4856Slling 4570990b4856Slling if (proptype == PROP_TYPE_INDEX) { 4571990b4856Slling const char *unused; 4572990b4856Slling VERIFY(zpool_prop_index_to_string( 4573990b4856Slling prop, intval, &unused) == 0); 4574990b4856Slling } 4575990b4856Slling VERIFY(zap_update(mos, 4576990b4856Slling spa->spa_pool_props_object, propname, 4577990b4856Slling 8, 1, &intval, tx) == 0); 4578990b4856Slling } else { 4579990b4856Slling ASSERT(0); /* not allowed */ 4580990b4856Slling } 4581990b4856Slling 45820a4e9518Sgw switch (prop) { 45830a4e9518Sgw case ZPOOL_PROP_DELEGATION: 4584990b4856Slling spa->spa_delegation = intval; 45850a4e9518Sgw break; 45860a4e9518Sgw case ZPOOL_PROP_BOOTFS: 4587990b4856Slling spa->spa_bootfs = intval; 45880a4e9518Sgw break; 45890a4e9518Sgw case ZPOOL_PROP_FAILUREMODE: 45900a4e9518Sgw spa->spa_failmode = intval; 45910a4e9518Sgw break; 4592573ca77eSGeorge Wilson case ZPOOL_PROP_AUTOEXPAND: 4593573ca77eSGeorge Wilson spa->spa_autoexpand = intval; 4594573ca77eSGeorge Wilson spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 4595573ca77eSGeorge Wilson break; 4596*b24ab676SJeff Bonwick case ZPOOL_PROP_DEDUPDITTO: 4597*b24ab676SJeff Bonwick spa->spa_dedup_ditto = intval; 4598*b24ab676SJeff Bonwick break; 45990a4e9518Sgw default: 46000a4e9518Sgw break; 46010a4e9518Sgw } 4602990b4856Slling } 4603990b4856Slling 4604990b4856Slling /* log internal history if this is not a zpool create */ 4605990b4856Slling if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 4606990b4856Slling tx->tx_txg != TXG_INITIAL) { 4607990b4856Slling spa_history_internal_log(LOG_POOL_PROPSET, 4608990b4856Slling spa, tx, cr, "%s %lld %s", 4609e14bb325SJeff Bonwick nvpair_name(elem), intval, spa_name(spa)); 4610b1b8ab34Slling } 4611b1b8ab34Slling } 4612e14bb325SJeff Bonwick 4613e14bb325SJeff Bonwick mutex_exit(&spa->spa_props_lock); 4614b1b8ab34Slling } 4615b1b8ab34Slling 4616fa9e4066Sahrens /* 4617fa9e4066Sahrens * Sync the specified transaction group. New blocks may be dirtied as 4618fa9e4066Sahrens * part of the process, so we iterate until it converges. 4619fa9e4066Sahrens */ 4620fa9e4066Sahrens void 4621fa9e4066Sahrens spa_sync(spa_t *spa, uint64_t txg) 4622fa9e4066Sahrens { 4623fa9e4066Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 4624fa9e4066Sahrens objset_t *mos = spa->spa_meta_objset; 4625*b24ab676SJeff Bonwick bplist_t *defer_bpl = &spa->spa_deferred_bplist; 4626*b24ab676SJeff Bonwick bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK]; 46270373e76bSbonwick vdev_t *rvd = spa->spa_root_vdev; 4628fa9e4066Sahrens vdev_t *vd; 4629fa9e4066Sahrens dmu_tx_t *tx; 4630e14bb325SJeff Bonwick int error; 4631fa9e4066Sahrens 4632fa9e4066Sahrens /* 4633fa9e4066Sahrens * Lock out configuration changes. 4634fa9e4066Sahrens */ 4635e14bb325SJeff Bonwick spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 4636fa9e4066Sahrens 4637fa9e4066Sahrens spa->spa_syncing_txg = txg; 4638fa9e4066Sahrens spa->spa_sync_pass = 0; 4639fa9e4066Sahrens 4640e14bb325SJeff Bonwick /* 4641e14bb325SJeff Bonwick * If there are any pending vdev state changes, convert them 4642e14bb325SJeff Bonwick * into config changes that go out with this transaction group. 4643e14bb325SJeff Bonwick */ 4644e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 46458ad4d6ddSJeff Bonwick while (list_head(&spa->spa_state_dirty_list) != NULL) { 46468ad4d6ddSJeff Bonwick /* 46478ad4d6ddSJeff Bonwick * We need the write lock here because, for aux vdevs, 46488ad4d6ddSJeff Bonwick * calling vdev_config_dirty() modifies sav_config. 46498ad4d6ddSJeff Bonwick * This is ugly and will become unnecessary when we 46508ad4d6ddSJeff Bonwick * eliminate the aux vdev wart by integrating all vdevs 46518ad4d6ddSJeff Bonwick * into the root vdev tree. 46528ad4d6ddSJeff Bonwick */ 46538ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 46548ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); 46558ad4d6ddSJeff Bonwick while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 46568ad4d6ddSJeff Bonwick vdev_state_clean(vd); 46578ad4d6ddSJeff Bonwick vdev_config_dirty(vd); 46588ad4d6ddSJeff Bonwick } 46598ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 46608ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); 4661e14bb325SJeff Bonwick } 4662e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 4663e14bb325SJeff Bonwick 4664*b24ab676SJeff Bonwick VERIFY(0 == bplist_open(defer_bpl, mos, spa->spa_deferred_bplist_obj)); 4665fa9e4066Sahrens 466699653d4eSeschrock tx = dmu_tx_create_assigned(dp, txg); 466799653d4eSeschrock 466899653d4eSeschrock /* 4669e7437265Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 467099653d4eSeschrock * set spa_deflate if we have no raid-z vdevs. 467199653d4eSeschrock */ 4672e7437265Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 4673e7437265Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 467499653d4eSeschrock int i; 467599653d4eSeschrock 467699653d4eSeschrock for (i = 0; i < rvd->vdev_children; i++) { 467799653d4eSeschrock vd = rvd->vdev_child[i]; 467899653d4eSeschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 467999653d4eSeschrock break; 468099653d4eSeschrock } 468199653d4eSeschrock if (i == rvd->vdev_children) { 468299653d4eSeschrock spa->spa_deflate = TRUE; 468399653d4eSeschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 468499653d4eSeschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 468599653d4eSeschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 468699653d4eSeschrock } 468799653d4eSeschrock } 468899653d4eSeschrock 4689088f3894Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 4690088f3894Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 4691088f3894Sahrens dsl_pool_create_origin(dp, tx); 4692088f3894Sahrens 4693088f3894Sahrens /* Keeping the origin open increases spa_minref */ 4694088f3894Sahrens spa->spa_minref += 3; 4695088f3894Sahrens } 4696088f3894Sahrens 4697088f3894Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 4698088f3894Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 4699088f3894Sahrens dsl_pool_upgrade_clones(dp, tx); 4700088f3894Sahrens } 4701088f3894Sahrens 4702fa9e4066Sahrens /* 4703fa9e4066Sahrens * If anything has changed in this txg, push the deferred frees 4704fa9e4066Sahrens * from the previous txg. If not, leave them alone so that we 4705fa9e4066Sahrens * don't generate work on an otherwise idle system. 4706fa9e4066Sahrens */ 4707fa9e4066Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 47081615a317Sek !txg_list_empty(&dp->dp_dirty_dirs, txg) || 47091615a317Sek !txg_list_empty(&dp->dp_sync_tasks, txg)) 4710*b24ab676SJeff Bonwick spa_sync_deferred_bplist(spa, defer_bpl, tx, txg); 4711fa9e4066Sahrens 4712fa9e4066Sahrens /* 4713fa9e4066Sahrens * Iterate to convergence. 4714fa9e4066Sahrens */ 4715fa9e4066Sahrens do { 4716*b24ab676SJeff Bonwick int pass = ++spa->spa_sync_pass; 4717fa9e4066Sahrens 4718fa9e4066Sahrens spa_sync_config_object(spa, tx); 4719fa94a07fSbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 4720fa94a07fSbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 4721fa94a07fSbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 4722fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 4723ea8dc4b6Seschrock spa_errlog_sync(spa, txg); 4724fa9e4066Sahrens dsl_pool_sync(dp, txg); 4725fa9e4066Sahrens 4726*b24ab676SJeff Bonwick if (pass <= SYNC_PASS_DEFERRED_FREE) { 4727*b24ab676SJeff Bonwick zio_t *zio = zio_root(spa, NULL, NULL, 0); 4728*b24ab676SJeff Bonwick bplist_sync(free_bpl, spa_sync_free, zio, tx); 4729*b24ab676SJeff Bonwick VERIFY(zio_wait(zio) == 0); 4730*b24ab676SJeff Bonwick } else { 4731*b24ab676SJeff Bonwick bplist_sync(free_bpl, bplist_enqueue_cb, defer_bpl, tx); 4732fa9e4066Sahrens } 4733fa9e4066Sahrens 4734*b24ab676SJeff Bonwick ddt_sync(spa, txg); 4735*b24ab676SJeff Bonwick 4736*b24ab676SJeff Bonwick while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) 4737*b24ab676SJeff Bonwick vdev_sync(vd, txg); 4738*b24ab676SJeff Bonwick 4739*b24ab676SJeff Bonwick } while (dmu_objset_is_dirty(mos, txg)); 4740fa9e4066Sahrens 4741*b24ab676SJeff Bonwick ASSERT(free_bpl->bpl_queue == NULL); 4742fa9e4066Sahrens 4743*b24ab676SJeff Bonwick bplist_close(defer_bpl); 4744fa9e4066Sahrens 4745fa9e4066Sahrens /* 4746fa9e4066Sahrens * Rewrite the vdev configuration (which includes the uberblock) 4747fa9e4066Sahrens * to commit the transaction group. 47480373e76bSbonwick * 474917f17c2dSbonwick * If there are no dirty vdevs, we sync the uberblock to a few 475017f17c2dSbonwick * random top-level vdevs that are known to be visible in the 4751e14bb325SJeff Bonwick * config cache (see spa_vdev_add() for a complete description). 4752e14bb325SJeff Bonwick * If there *are* dirty vdevs, sync the uberblock to all vdevs. 47530373e76bSbonwick */ 4754e14bb325SJeff Bonwick for (;;) { 4755e14bb325SJeff Bonwick /* 4756e14bb325SJeff Bonwick * We hold SCL_STATE to prevent vdev open/close/etc. 4757e14bb325SJeff Bonwick * while we're attempting to write the vdev labels. 4758e14bb325SJeff Bonwick */ 4759e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4760e14bb325SJeff Bonwick 4761e14bb325SJeff Bonwick if (list_is_empty(&spa->spa_config_dirty_list)) { 4762e14bb325SJeff Bonwick vdev_t *svd[SPA_DVAS_PER_BP]; 4763e14bb325SJeff Bonwick int svdcount = 0; 4764e14bb325SJeff Bonwick int children = rvd->vdev_children; 4765e14bb325SJeff Bonwick int c0 = spa_get_random(children); 4766e14bb325SJeff Bonwick 4767573ca77eSGeorge Wilson for (int c = 0; c < children; c++) { 4768e14bb325SJeff Bonwick vd = rvd->vdev_child[(c0 + c) % children]; 4769e14bb325SJeff Bonwick if (vd->vdev_ms_array == 0 || vd->vdev_islog) 4770e14bb325SJeff Bonwick continue; 4771e14bb325SJeff Bonwick svd[svdcount++] = vd; 4772e14bb325SJeff Bonwick if (svdcount == SPA_DVAS_PER_BP) 4773e14bb325SJeff Bonwick break; 4774e14bb325SJeff Bonwick } 47758956713aSEric Schrock error = vdev_config_sync(svd, svdcount, txg, B_FALSE); 47768956713aSEric Schrock if (error != 0) 47778956713aSEric Schrock error = vdev_config_sync(svd, svdcount, txg, 47788956713aSEric Schrock B_TRUE); 4779e14bb325SJeff Bonwick } else { 4780e14bb325SJeff Bonwick error = vdev_config_sync(rvd->vdev_child, 47818956713aSEric Schrock rvd->vdev_children, txg, B_FALSE); 47828956713aSEric Schrock if (error != 0) 47838956713aSEric Schrock error = vdev_config_sync(rvd->vdev_child, 47848956713aSEric Schrock rvd->vdev_children, txg, B_TRUE); 47850373e76bSbonwick } 4786e14bb325SJeff Bonwick 4787e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 4788e14bb325SJeff Bonwick 4789e14bb325SJeff Bonwick if (error == 0) 4790e14bb325SJeff Bonwick break; 4791e14bb325SJeff Bonwick zio_suspend(spa, NULL); 4792e14bb325SJeff Bonwick zio_resume_wait(spa); 47930373e76bSbonwick } 479499653d4eSeschrock dmu_tx_commit(tx); 479599653d4eSeschrock 47960373e76bSbonwick /* 47970373e76bSbonwick * Clear the dirty config list. 4798fa9e4066Sahrens */ 4799e14bb325SJeff Bonwick while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 48000373e76bSbonwick vdev_config_clean(vd); 48010373e76bSbonwick 48020373e76bSbonwick /* 48030373e76bSbonwick * Now that the new config has synced transactionally, 48040373e76bSbonwick * let it become visible to the config cache. 48050373e76bSbonwick */ 48060373e76bSbonwick if (spa->spa_config_syncing != NULL) { 48070373e76bSbonwick spa_config_set(spa, spa->spa_config_syncing); 48080373e76bSbonwick spa->spa_config_txg = txg; 48090373e76bSbonwick spa->spa_config_syncing = NULL; 48100373e76bSbonwick } 4811fa9e4066Sahrens 4812fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 4813fa9e4066Sahrens 4814*b24ab676SJeff Bonwick dsl_pool_sync_done(dp, txg); 4815fa9e4066Sahrens 4816fa9e4066Sahrens /* 4817fa9e4066Sahrens * Update usable space statistics. 4818fa9e4066Sahrens */ 4819fa9e4066Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 4820fa9e4066Sahrens vdev_sync_done(vd, txg); 4821fa9e4066Sahrens 4822fa9e4066Sahrens /* 4823fa9e4066Sahrens * It had better be the case that we didn't dirty anything 482499653d4eSeschrock * since vdev_config_sync(). 4825fa9e4066Sahrens */ 4826fa9e4066Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 4827fa9e4066Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 4828fa9e4066Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 4829*b24ab676SJeff Bonwick ASSERT(defer_bpl->bpl_queue == NULL); 4830*b24ab676SJeff Bonwick ASSERT(free_bpl->bpl_queue == NULL); 4831*b24ab676SJeff Bonwick 4832*b24ab676SJeff Bonwick spa->spa_sync_pass = 0; 4833fa9e4066Sahrens 4834e14bb325SJeff Bonwick spa_config_exit(spa, SCL_CONFIG, FTAG); 4835ea8dc4b6Seschrock 4836468c413aSTim Haley spa_handle_ignored_writes(spa); 4837468c413aSTim Haley 4838ea8dc4b6Seschrock /* 4839ea8dc4b6Seschrock * If any async tasks have been requested, kick them off. 4840ea8dc4b6Seschrock */ 4841ea8dc4b6Seschrock spa_async_dispatch(spa); 4842fa9e4066Sahrens } 4843fa9e4066Sahrens 4844fa9e4066Sahrens /* 4845fa9e4066Sahrens * Sync all pools. We don't want to hold the namespace lock across these 4846fa9e4066Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 4847fa9e4066Sahrens * sync. 4848fa9e4066Sahrens */ 4849fa9e4066Sahrens void 4850fa9e4066Sahrens spa_sync_allpools(void) 4851fa9e4066Sahrens { 4852fa9e4066Sahrens spa_t *spa = NULL; 4853fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4854fa9e4066Sahrens while ((spa = spa_next(spa)) != NULL) { 4855e14bb325SJeff Bonwick if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa)) 4856fa9e4066Sahrens continue; 4857fa9e4066Sahrens spa_open_ref(spa, FTAG); 4858fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4859fa9e4066Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 4860fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4861fa9e4066Sahrens spa_close(spa, FTAG); 4862fa9e4066Sahrens } 4863fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4864fa9e4066Sahrens } 4865fa9e4066Sahrens 4866fa9e4066Sahrens /* 4867fa9e4066Sahrens * ========================================================================== 4868fa9e4066Sahrens * Miscellaneous routines 4869fa9e4066Sahrens * ========================================================================== 4870fa9e4066Sahrens */ 4871fa9e4066Sahrens 4872fa9e4066Sahrens /* 4873fa9e4066Sahrens * Remove all pools in the system. 4874fa9e4066Sahrens */ 4875fa9e4066Sahrens void 4876fa9e4066Sahrens spa_evict_all(void) 4877fa9e4066Sahrens { 4878fa9e4066Sahrens spa_t *spa; 4879fa9e4066Sahrens 4880fa9e4066Sahrens /* 4881fa9e4066Sahrens * Remove all cached state. All pools should be closed now, 4882fa9e4066Sahrens * so every spa in the AVL tree should be unreferenced. 4883fa9e4066Sahrens */ 4884fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4885fa9e4066Sahrens while ((spa = spa_next(NULL)) != NULL) { 4886fa9e4066Sahrens /* 4887ea8dc4b6Seschrock * Stop async tasks. The async thread may need to detach 4888ea8dc4b6Seschrock * a device that's been replaced, which requires grabbing 4889ea8dc4b6Seschrock * spa_namespace_lock, so we must drop it here. 4890fa9e4066Sahrens */ 4891fa9e4066Sahrens spa_open_ref(spa, FTAG); 4892fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4893ea8dc4b6Seschrock spa_async_suspend(spa); 4894fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4895fa9e4066Sahrens spa_close(spa, FTAG); 4896fa9e4066Sahrens 4897fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4898fa9e4066Sahrens spa_unload(spa); 4899fa9e4066Sahrens spa_deactivate(spa); 4900fa9e4066Sahrens } 4901fa9e4066Sahrens spa_remove(spa); 4902fa9e4066Sahrens } 4903fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4904fa9e4066Sahrens } 4905ea8dc4b6Seschrock 4906ea8dc4b6Seschrock vdev_t * 49076809eb4eSEric Schrock spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) 4908ea8dc4b6Seschrock { 4909c5904d13Seschrock vdev_t *vd; 4910c5904d13Seschrock int i; 4911c5904d13Seschrock 4912c5904d13Seschrock if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 4913c5904d13Seschrock return (vd); 4914c5904d13Seschrock 49156809eb4eSEric Schrock if (aux) { 4916c5904d13Seschrock for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 4917c5904d13Seschrock vd = spa->spa_l2cache.sav_vdevs[i]; 49186809eb4eSEric Schrock if (vd->vdev_guid == guid) 49196809eb4eSEric Schrock return (vd); 49206809eb4eSEric Schrock } 49216809eb4eSEric Schrock 49226809eb4eSEric Schrock for (i = 0; i < spa->spa_spares.sav_count; i++) { 49236809eb4eSEric Schrock vd = spa->spa_spares.sav_vdevs[i]; 4924c5904d13Seschrock if (vd->vdev_guid == guid) 4925c5904d13Seschrock return (vd); 4926c5904d13Seschrock } 4927c5904d13Seschrock } 4928c5904d13Seschrock 4929c5904d13Seschrock return (NULL); 4930ea8dc4b6Seschrock } 4931eaca9bbdSeschrock 4932eaca9bbdSeschrock void 4933990b4856Slling spa_upgrade(spa_t *spa, uint64_t version) 4934eaca9bbdSeschrock { 4935e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4936eaca9bbdSeschrock 4937eaca9bbdSeschrock /* 4938eaca9bbdSeschrock * This should only be called for a non-faulted pool, and since a 4939eaca9bbdSeschrock * future version would result in an unopenable pool, this shouldn't be 4940eaca9bbdSeschrock * possible. 4941eaca9bbdSeschrock */ 4942e7437265Sahrens ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 4943990b4856Slling ASSERT(version >= spa->spa_uberblock.ub_version); 4944eaca9bbdSeschrock 4945990b4856Slling spa->spa_uberblock.ub_version = version; 4946eaca9bbdSeschrock vdev_config_dirty(spa->spa_root_vdev); 4947eaca9bbdSeschrock 4948e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 494999653d4eSeschrock 495099653d4eSeschrock txg_wait_synced(spa_get_dsl(spa), 0); 495199653d4eSeschrock } 495299653d4eSeschrock 495399653d4eSeschrock boolean_t 495499653d4eSeschrock spa_has_spare(spa_t *spa, uint64_t guid) 495599653d4eSeschrock { 495699653d4eSeschrock int i; 495739c23413Seschrock uint64_t spareguid; 4958fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 495999653d4eSeschrock 4960fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 4961fa94a07fSbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 496299653d4eSeschrock return (B_TRUE); 496399653d4eSeschrock 4964fa94a07fSbrendan for (i = 0; i < sav->sav_npending; i++) { 4965fa94a07fSbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 4966fa94a07fSbrendan &spareguid) == 0 && spareguid == guid) 496739c23413Seschrock return (B_TRUE); 496839c23413Seschrock } 496939c23413Seschrock 497099653d4eSeschrock return (B_FALSE); 4971eaca9bbdSeschrock } 4972b1b8ab34Slling 497389a89ebfSlling /* 497489a89ebfSlling * Check if a pool has an active shared spare device. 497589a89ebfSlling * Note: reference count of an active spare is 2, as a spare and as a replace 497689a89ebfSlling */ 497789a89ebfSlling static boolean_t 497889a89ebfSlling spa_has_active_shared_spare(spa_t *spa) 497989a89ebfSlling { 498089a89ebfSlling int i, refcnt; 498189a89ebfSlling uint64_t pool; 498289a89ebfSlling spa_aux_vdev_t *sav = &spa->spa_spares; 498389a89ebfSlling 498489a89ebfSlling for (i = 0; i < sav->sav_count; i++) { 498589a89ebfSlling if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 498689a89ebfSlling &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 498789a89ebfSlling refcnt > 2) 498889a89ebfSlling return (B_TRUE); 498989a89ebfSlling } 499089a89ebfSlling 499189a89ebfSlling return (B_FALSE); 499289a89ebfSlling } 499389a89ebfSlling 49943d7072f8Seschrock /* 49953d7072f8Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 49963d7072f8Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 49973d7072f8Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 49983d7072f8Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 49993d7072f8Seschrock * or zdb as real changes. 50003d7072f8Seschrock */ 50013d7072f8Seschrock void 50023d7072f8Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 50033d7072f8Seschrock { 50043d7072f8Seschrock #ifdef _KERNEL 50053d7072f8Seschrock sysevent_t *ev; 50063d7072f8Seschrock sysevent_attr_list_t *attr = NULL; 50073d7072f8Seschrock sysevent_value_t value; 50083d7072f8Seschrock sysevent_id_t eid; 50093d7072f8Seschrock 50103d7072f8Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 50113d7072f8Seschrock SE_SLEEP); 50123d7072f8Seschrock 50133d7072f8Seschrock value.value_type = SE_DATA_TYPE_STRING; 50143d7072f8Seschrock value.value.sv_string = spa_name(spa); 50153d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 50163d7072f8Seschrock goto done; 50173d7072f8Seschrock 50183d7072f8Seschrock value.value_type = SE_DATA_TYPE_UINT64; 50193d7072f8Seschrock value.value.sv_uint64 = spa_guid(spa); 50203d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 50213d7072f8Seschrock goto done; 50223d7072f8Seschrock 50233d7072f8Seschrock if (vd) { 50243d7072f8Seschrock value.value_type = SE_DATA_TYPE_UINT64; 50253d7072f8Seschrock value.value.sv_uint64 = vd->vdev_guid; 50263d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 50273d7072f8Seschrock SE_SLEEP) != 0) 50283d7072f8Seschrock goto done; 50293d7072f8Seschrock 50303d7072f8Seschrock if (vd->vdev_path) { 50313d7072f8Seschrock value.value_type = SE_DATA_TYPE_STRING; 50323d7072f8Seschrock value.value.sv_string = vd->vdev_path; 50333d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 50343d7072f8Seschrock &value, SE_SLEEP) != 0) 50353d7072f8Seschrock goto done; 50363d7072f8Seschrock } 50373d7072f8Seschrock } 50383d7072f8Seschrock 5039b01c3b58Seschrock if (sysevent_attach_attributes(ev, attr) != 0) 5040b01c3b58Seschrock goto done; 5041b01c3b58Seschrock attr = NULL; 5042b01c3b58Seschrock 50433d7072f8Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 50443d7072f8Seschrock 50453d7072f8Seschrock done: 50463d7072f8Seschrock if (attr) 50473d7072f8Seschrock sysevent_free_attr(attr); 50483d7072f8Seschrock sysevent_free(ev); 50493d7072f8Seschrock #endif 50503d7072f8Seschrock } 5051