1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 2199653d4eSeschrock 22fa9e4066Sahrens /* 2398d1cbfeSGeorge Wilson * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 241b497ab8SAdam H. Leventhal * Copyright (c) 2011, 2014 by Delphix. All rights reserved. 25b8289d24SDaniil Lunev * Copyright (c) 2013, 2014, Nexenta Systems, Inc. All rights reserved. 265aeb9474SGarrett D'Amore */ 27fa9e4066Sahrens 28fa9e4066Sahrens /* 293e30c24aSWill Andrews * SPA: Storage Pool Allocator 303e30c24aSWill Andrews * 31fa9e4066Sahrens * This file contains all the routines used when modifying on-disk SPA state. 32fa9e4066Sahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 33fa9e4066Sahrens * pool. 34fa9e4066Sahrens */ 35fa9e4066Sahrens 36fa9e4066Sahrens #include <sys/zfs_context.h> 37ea8dc4b6Seschrock #include <sys/fm/fs/zfs.h> 38fa9e4066Sahrens #include <sys/spa_impl.h> 39fa9e4066Sahrens #include <sys/zio.h> 40fa9e4066Sahrens #include <sys/zio_checksum.h> 41fa9e4066Sahrens #include <sys/dmu.h> 42fa9e4066Sahrens #include <sys/dmu_tx.h> 43fa9e4066Sahrens #include <sys/zap.h> 44fa9e4066Sahrens #include <sys/zil.h> 45b24ab676SJeff Bonwick #include <sys/ddt.h> 46fa9e4066Sahrens #include <sys/vdev_impl.h> 47fa9e4066Sahrens #include <sys/metaslab.h> 4888ecc943SGeorge Wilson #include <sys/metaslab_impl.h> 49fa9e4066Sahrens #include <sys/uberblock_impl.h> 50fa9e4066Sahrens #include <sys/txg.h> 51fa9e4066Sahrens #include <sys/avl.h> 52fa9e4066Sahrens #include <sys/dmu_traverse.h> 53b1b8ab34Slling #include <sys/dmu_objset.h> 54fa9e4066Sahrens #include <sys/unique.h> 55fa9e4066Sahrens #include <sys/dsl_pool.h> 56b1b8ab34Slling #include <sys/dsl_dataset.h> 57fa9e4066Sahrens #include <sys/dsl_dir.h> 58fa9e4066Sahrens #include <sys/dsl_prop.h> 59b1b8ab34Slling #include <sys/dsl_synctask.h> 60fa9e4066Sahrens #include <sys/fs/zfs.h> 61fa94a07fSbrendan #include <sys/arc.h> 62fa9e4066Sahrens #include <sys/callb.h> 6395173954Sek #include <sys/systeminfo.h> 64e7cbe64fSgw #include <sys/spa_boot.h> 65573ca77eSGeorge Wilson #include <sys/zfs_ioctl.h> 663f9d6ad7SLin Ling #include <sys/dsl_scan.h> 67ad135b5dSChristopher Siden #include <sys/zfeature.h> 683b2aab18SMatthew Ahrens #include <sys/dsl_destroy.h> 69fa9e4066Sahrens 705679c89fSjv #ifdef _KERNEL 71dedec472SJack Meng #include <sys/bootprops.h> 7235a5a358SJonathan Adams #include <sys/callb.h> 7335a5a358SJonathan Adams #include <sys/cpupart.h> 7435a5a358SJonathan Adams #include <sys/pool.h> 7535a5a358SJonathan Adams #include <sys/sysdc.h> 7635a5a358SJonathan Adams #include <sys/zone.h> 775679c89fSjv #endif /* _KERNEL */ 785679c89fSjv 79990b4856Slling #include "zfs_prop.h" 80b7b97454Sperrin #include "zfs_comutil.h" 81990b4856Slling 823cb69f73SWill Andrews /* 833cb69f73SWill Andrews * The interval, in seconds, at which failed configuration cache file writes 843cb69f73SWill Andrews * should be retried. 853cb69f73SWill Andrews */ 863cb69f73SWill Andrews static int zfs_ccw_retry_interval = 300; 873cb69f73SWill Andrews 8835a5a358SJonathan Adams typedef enum zti_modes { 89ec94d322SAdam Leventhal ZTI_MODE_FIXED, /* value is # of threads (min 1) */ 90ec94d322SAdam Leventhal ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */ 91ec94d322SAdam Leventhal ZTI_MODE_NULL, /* don't create a taskq */ 92ec94d322SAdam Leventhal ZTI_NMODES 9335a5a358SJonathan Adams } zti_modes_t; 94416e0cd8Sek 95ec94d322SAdam Leventhal #define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) } 96ec94d322SAdam Leventhal #define ZTI_BATCH { ZTI_MODE_BATCH, 0, 1 } 97ec94d322SAdam Leventhal #define ZTI_NULL { ZTI_MODE_NULL, 0, 0 } 982e0c549eSJonathan Adams 99ec94d322SAdam Leventhal #define ZTI_N(n) ZTI_P(n, 1) 100ec94d322SAdam Leventhal #define ZTI_ONE ZTI_N(1) 1012e0c549eSJonathan Adams 1022e0c549eSJonathan Adams typedef struct zio_taskq_info { 103ec94d322SAdam Leventhal zti_modes_t zti_mode; 10480eb36f2SGeorge Wilson uint_t zti_value; 105ec94d322SAdam Leventhal uint_t zti_count; 1062e0c549eSJonathan Adams } zio_taskq_info_t; 1072e0c549eSJonathan Adams 1082e0c549eSJonathan Adams static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { 10935a5a358SJonathan Adams "issue", "issue_high", "intr", "intr_high" 1102e0c549eSJonathan Adams }; 1112e0c549eSJonathan Adams 11280eb36f2SGeorge Wilson /* 113ec94d322SAdam Leventhal * This table defines the taskq settings for each ZFS I/O type. When 114ec94d322SAdam Leventhal * initializing a pool, we use this table to create an appropriately sized 115ec94d322SAdam Leventhal * taskq. Some operations are low volume and therefore have a small, static 116ec94d322SAdam Leventhal * number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE 117ec94d322SAdam Leventhal * macros. Other operations process a large amount of data; the ZTI_BATCH 118ec94d322SAdam Leventhal * macro causes us to create a taskq oriented for throughput. Some operations 119ec94d322SAdam Leventhal * are so high frequency and short-lived that the taskq itself can become a a 120ec94d322SAdam Leventhal * point of lock contention. The ZTI_P(#, #) macro indicates that we need an 121ec94d322SAdam Leventhal * additional degree of parallelism specified by the number of threads per- 122ec94d322SAdam Leventhal * taskq and the number of taskqs; when dispatching an event in this case, the 123ec94d322SAdam Leventhal * particular taskq is chosen at random. 124ec94d322SAdam Leventhal * 125ec94d322SAdam Leventhal * The different taskq priorities are to handle the different contexts (issue 126ec94d322SAdam Leventhal * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that 127ec94d322SAdam Leventhal * need to be handled with minimum delay. 12880eb36f2SGeorge Wilson */ 12980eb36f2SGeorge Wilson const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { 13080eb36f2SGeorge Wilson /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ 131ec94d322SAdam Leventhal { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */ 1321b497ab8SAdam H. Leventhal { ZTI_N(8), ZTI_NULL, ZTI_P(12, 8), ZTI_NULL }, /* READ */ 133ec94d322SAdam Leventhal { ZTI_BATCH, ZTI_N(5), ZTI_N(8), ZTI_N(5) }, /* WRITE */ 134ec94d322SAdam Leventhal { ZTI_P(12, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */ 135ec94d322SAdam Leventhal { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */ 136ec94d322SAdam Leventhal { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */ 1372e0c549eSJonathan Adams }; 1382e0c549eSJonathan Adams 1393b2aab18SMatthew Ahrens static void spa_sync_version(void *arg, dmu_tx_t *tx); 1403b2aab18SMatthew Ahrens static void spa_sync_props(void *arg, dmu_tx_t *tx); 14189a89ebfSlling static boolean_t spa_has_active_shared_spare(spa_t *spa); 1421195e687SMark J Musante static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config, 1431195e687SMark J Musante spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, 1441195e687SMark J Musante char **ereport); 145cb04b873SMark J Musante static void spa_vdev_resilver_done(spa_t *spa); 146990b4856Slling 14769962b56SMatthew Ahrens uint_t zio_taskq_batch_pct = 75; /* 1 thread per cpu in pset */ 14835a5a358SJonathan Adams id_t zio_taskq_psrset_bind = PS_NONE; 14935a5a358SJonathan Adams boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */ 15035a5a358SJonathan Adams uint_t zio_taskq_basedc = 80; /* base duty cycle */ 15135a5a358SJonathan Adams 15235a5a358SJonathan Adams boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */ 15301f55e48SGeorge Wilson extern int zfs_sync_pass_deferred_free; 15435a5a358SJonathan Adams 15535a5a358SJonathan Adams /* 15635a5a358SJonathan Adams * This (illegal) pool name is used when temporarily importing a spa_t in order 15735a5a358SJonathan Adams * to get the vdev stats associated with the imported devices. 15835a5a358SJonathan Adams */ 15935a5a358SJonathan Adams #define TRYIMPORT_NAME "$import" 16035a5a358SJonathan Adams 161990b4856Slling /* 162990b4856Slling * ========================================================================== 163990b4856Slling * SPA properties routines 164990b4856Slling * ========================================================================== 165990b4856Slling */ 166990b4856Slling 167990b4856Slling /* 168990b4856Slling * Add a (source=src, propname=propval) list to an nvlist. 169990b4856Slling */ 1709d82f4f6Slling static void 171990b4856Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 172990b4856Slling uint64_t intval, zprop_source_t src) 173990b4856Slling { 174990b4856Slling const char *propname = zpool_prop_to_name(prop); 175990b4856Slling nvlist_t *propval; 176990b4856Slling 1779d82f4f6Slling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1789d82f4f6Slling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 179990b4856Slling 1809d82f4f6Slling if (strval != NULL) 1819d82f4f6Slling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 1829d82f4f6Slling else 1839d82f4f6Slling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 184990b4856Slling 1859d82f4f6Slling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 186990b4856Slling nvlist_free(propval); 187990b4856Slling } 188990b4856Slling 189990b4856Slling /* 190990b4856Slling * Get property values from the spa configuration. 191990b4856Slling */ 1929d82f4f6Slling static void 193990b4856Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 194990b4856Slling { 1954263d13fSGeorge Wilson vdev_t *rvd = spa->spa_root_vdev; 196ad135b5dSChristopher Siden dsl_pool_t *pool = spa->spa_dsl_pool; 197*2e4c9986SGeorge Wilson uint64_t size, alloc, cap, version; 198990b4856Slling zprop_source_t src = ZPROP_SRC_NONE; 199c5904d13Seschrock spa_config_dirent_t *dp; 200*2e4c9986SGeorge Wilson metaslab_class_t *mc = spa_normal_class(spa); 201990b4856Slling 202e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 203e14bb325SJeff Bonwick 2044263d13fSGeorge Wilson if (rvd != NULL) { 205485bbbf5SGeorge Wilson alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 206b24ab676SJeff Bonwick size = metaslab_class_get_space(spa_normal_class(spa)); 207379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 208379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 209485bbbf5SGeorge Wilson spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); 210485bbbf5SGeorge Wilson spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, 211485bbbf5SGeorge Wilson size - alloc, src); 2124263d13fSGeorge Wilson 213*2e4c9986SGeorge Wilson spa_prop_add_list(*nvp, ZPOOL_PROP_FRAGMENTATION, NULL, 214*2e4c9986SGeorge Wilson metaslab_class_fragmentation(mc), src); 215*2e4c9986SGeorge Wilson spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL, 216*2e4c9986SGeorge Wilson metaslab_class_expandable_space(mc), src); 217f9af39baSGeorge Wilson spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL, 218f9af39baSGeorge Wilson (spa_mode(spa) == FREAD), src); 219379c004dSEric Schrock 220485bbbf5SGeorge Wilson cap = (size == 0) ? 0 : (alloc * 100 / size); 221379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 222379c004dSEric Schrock 223b24ab676SJeff Bonwick spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL, 224b24ab676SJeff Bonwick ddt_get_pool_dedup_ratio(spa), src); 225b24ab676SJeff Bonwick 226379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 2274263d13fSGeorge Wilson rvd->vdev_state, src); 228379c004dSEric Schrock 229379c004dSEric Schrock version = spa_version(spa); 230379c004dSEric Schrock if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 231379c004dSEric Schrock src = ZPROP_SRC_DEFAULT; 232379c004dSEric Schrock else 233379c004dSEric Schrock src = ZPROP_SRC_LOCAL; 234379c004dSEric Schrock spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 235379c004dSEric Schrock } 236990b4856Slling 237ad135b5dSChristopher Siden if (pool != NULL) { 238ad135b5dSChristopher Siden /* 239ad135b5dSChristopher Siden * The $FREE directory was introduced in SPA_VERSION_DEADLISTS, 240ad135b5dSChristopher Siden * when opening pools before this version freedir will be NULL. 241ad135b5dSChristopher Siden */ 2427fd05ac4SMatthew Ahrens if (pool->dp_free_dir != NULL) { 243ad135b5dSChristopher Siden spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL, 2447fd05ac4SMatthew Ahrens pool->dp_free_dir->dd_phys->dd_used_bytes, src); 245ad135b5dSChristopher Siden } else { 246ad135b5dSChristopher Siden spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, 247ad135b5dSChristopher Siden NULL, 0, src); 248ad135b5dSChristopher Siden } 2497fd05ac4SMatthew Ahrens 2507fd05ac4SMatthew Ahrens if (pool->dp_leak_dir != NULL) { 2517fd05ac4SMatthew Ahrens spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL, 2527fd05ac4SMatthew Ahrens pool->dp_leak_dir->dd_phys->dd_used_bytes, src); 2537fd05ac4SMatthew Ahrens } else { 2547fd05ac4SMatthew Ahrens spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, 2557fd05ac4SMatthew Ahrens NULL, 0, src); 2567fd05ac4SMatthew Ahrens } 257ad135b5dSChristopher Siden } 258ad135b5dSChristopher Siden 2599d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 260990b4856Slling 2618704186eSDan McDonald if (spa->spa_comment != NULL) { 2628704186eSDan McDonald spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment, 2638704186eSDan McDonald 0, ZPROP_SRC_LOCAL); 2648704186eSDan McDonald } 2658704186eSDan McDonald 2669d82f4f6Slling if (spa->spa_root != NULL) 2679d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 2689d82f4f6Slling 0, ZPROP_SRC_LOCAL); 269990b4856Slling 270c5904d13Seschrock if ((dp = list_head(&spa->spa_config_list)) != NULL) { 271c5904d13Seschrock if (dp->scd_path == NULL) { 2729d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 273c5904d13Seschrock "none", 0, ZPROP_SRC_LOCAL); 274c5904d13Seschrock } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 2759d82f4f6Slling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 276c5904d13Seschrock dp->scd_path, 0, ZPROP_SRC_LOCAL); 2772f8aaab3Seschrock } 2782f8aaab3Seschrock } 279990b4856Slling } 280990b4856Slling 281990b4856Slling /* 282990b4856Slling * Get zpool property values. 283990b4856Slling */ 284990b4856Slling int 285990b4856Slling spa_prop_get(spa_t *spa, nvlist_t **nvp) 286990b4856Slling { 287b24ab676SJeff Bonwick objset_t *mos = spa->spa_meta_objset; 288990b4856Slling zap_cursor_t zc; 289990b4856Slling zap_attribute_t za; 290990b4856Slling int err; 291990b4856Slling 2929d82f4f6Slling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 293990b4856Slling 294e14bb325SJeff Bonwick mutex_enter(&spa->spa_props_lock); 295e14bb325SJeff Bonwick 296990b4856Slling /* 297990b4856Slling * Get properties from the spa config. 298990b4856Slling */ 2999d82f4f6Slling spa_prop_get_config(spa, nvp); 300990b4856Slling 301990b4856Slling /* If no pool property object, no more prop to get. */ 302afee20e4SGeorge Wilson if (mos == NULL || spa->spa_pool_props_object == 0) { 303990b4856Slling mutex_exit(&spa->spa_props_lock); 304990b4856Slling return (0); 305990b4856Slling } 306990b4856Slling 307990b4856Slling /* 308990b4856Slling * Get properties from the MOS pool property object. 309990b4856Slling */ 310990b4856Slling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 311990b4856Slling (err = zap_cursor_retrieve(&zc, &za)) == 0; 312990b4856Slling zap_cursor_advance(&zc)) { 313990b4856Slling uint64_t intval = 0; 314990b4856Slling char *strval = NULL; 315990b4856Slling zprop_source_t src = ZPROP_SRC_DEFAULT; 316990b4856Slling zpool_prop_t prop; 317990b4856Slling 318990b4856Slling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 319990b4856Slling continue; 320990b4856Slling 321990b4856Slling switch (za.za_integer_length) { 322990b4856Slling case 8: 323990b4856Slling /* integer property */ 324990b4856Slling if (za.za_first_integer != 325990b4856Slling zpool_prop_default_numeric(prop)) 326990b4856Slling src = ZPROP_SRC_LOCAL; 327990b4856Slling 328990b4856Slling if (prop == ZPOOL_PROP_BOOTFS) { 329990b4856Slling dsl_pool_t *dp; 330990b4856Slling dsl_dataset_t *ds = NULL; 331990b4856Slling 332990b4856Slling dp = spa_get_dsl(spa); 3333b2aab18SMatthew Ahrens dsl_pool_config_enter(dp, FTAG); 334745cd3c5Smaybee if (err = dsl_dataset_hold_obj(dp, 335745cd3c5Smaybee za.za_first_integer, FTAG, &ds)) { 3363b2aab18SMatthew Ahrens dsl_pool_config_exit(dp, FTAG); 337990b4856Slling break; 338990b4856Slling } 339990b4856Slling 340990b4856Slling strval = kmem_alloc( 341990b4856Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 342990b4856Slling KM_SLEEP); 343990b4856Slling dsl_dataset_name(ds, strval); 344745cd3c5Smaybee dsl_dataset_rele(ds, FTAG); 3453b2aab18SMatthew Ahrens dsl_pool_config_exit(dp, FTAG); 346990b4856Slling } else { 347990b4856Slling strval = NULL; 348990b4856Slling intval = za.za_first_integer; 349990b4856Slling } 350990b4856Slling 3519d82f4f6Slling spa_prop_add_list(*nvp, prop, strval, intval, src); 352990b4856Slling 353990b4856Slling if (strval != NULL) 354990b4856Slling kmem_free(strval, 355990b4856Slling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 356990b4856Slling 357990b4856Slling break; 358990b4856Slling 359990b4856Slling case 1: 360990b4856Slling /* string property */ 361990b4856Slling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 362990b4856Slling err = zap_lookup(mos, spa->spa_pool_props_object, 363990b4856Slling za.za_name, 1, za.za_num_integers, strval); 364990b4856Slling if (err) { 365990b4856Slling kmem_free(strval, za.za_num_integers); 366990b4856Slling break; 367990b4856Slling } 3689d82f4f6Slling spa_prop_add_list(*nvp, prop, strval, 0, src); 369990b4856Slling kmem_free(strval, za.za_num_integers); 370990b4856Slling break; 371990b4856Slling 372990b4856Slling default: 373990b4856Slling break; 374990b4856Slling } 375990b4856Slling } 376990b4856Slling zap_cursor_fini(&zc); 377990b4856Slling mutex_exit(&spa->spa_props_lock); 378990b4856Slling out: 379990b4856Slling if (err && err != ENOENT) { 380990b4856Slling nvlist_free(*nvp); 3819d82f4f6Slling *nvp = NULL; 382990b4856Slling return (err); 383990b4856Slling } 384990b4856Slling 385990b4856Slling return (0); 386990b4856Slling } 387990b4856Slling 388990b4856Slling /* 389990b4856Slling * Validate the given pool properties nvlist and modify the list 390990b4856Slling * for the property values to be set. 391990b4856Slling */ 392990b4856Slling static int 393990b4856Slling spa_prop_validate(spa_t *spa, nvlist_t *props) 394990b4856Slling { 395990b4856Slling nvpair_t *elem; 396990b4856Slling int error = 0, reset_bootfs = 0; 397d5285caeSGeorge Wilson uint64_t objnum = 0; 398ad135b5dSChristopher Siden boolean_t has_feature = B_FALSE; 399990b4856Slling 400990b4856Slling elem = NULL; 401990b4856Slling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 402990b4856Slling uint64_t intval; 403ad135b5dSChristopher Siden char *strval, *slash, *check, *fname; 404ad135b5dSChristopher Siden const char *propname = nvpair_name(elem); 405ad135b5dSChristopher Siden zpool_prop_t prop = zpool_name_to_prop(propname); 406ad135b5dSChristopher Siden 407ad135b5dSChristopher Siden switch (prop) { 408ad135b5dSChristopher Siden case ZPROP_INVAL: 409ad135b5dSChristopher Siden if (!zpool_prop_feature(propname)) { 410be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 411ad135b5dSChristopher Siden break; 412ad135b5dSChristopher Siden } 413990b4856Slling 414ad135b5dSChristopher Siden /* 415ad135b5dSChristopher Siden * Sanitize the input. 416ad135b5dSChristopher Siden */ 417ad135b5dSChristopher Siden if (nvpair_type(elem) != DATA_TYPE_UINT64) { 418be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 419ad135b5dSChristopher Siden break; 420ad135b5dSChristopher Siden } 421990b4856Slling 422ad135b5dSChristopher Siden if (nvpair_value_uint64(elem, &intval) != 0) { 423be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 424ad135b5dSChristopher Siden break; 425ad135b5dSChristopher Siden } 426ad135b5dSChristopher Siden 427ad135b5dSChristopher Siden if (intval != 0) { 428be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 429ad135b5dSChristopher Siden break; 430ad135b5dSChristopher Siden } 431ad135b5dSChristopher Siden 432ad135b5dSChristopher Siden fname = strchr(propname, '@') + 1; 433ad135b5dSChristopher Siden if (zfeature_lookup_name(fname, NULL) != 0) { 434be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 435ad135b5dSChristopher Siden break; 436ad135b5dSChristopher Siden } 437ad135b5dSChristopher Siden 438ad135b5dSChristopher Siden has_feature = B_TRUE; 439ad135b5dSChristopher Siden break; 440990b4856Slling 441990b4856Slling case ZPOOL_PROP_VERSION: 442990b4856Slling error = nvpair_value_uint64(elem, &intval); 443990b4856Slling if (!error && 444ad135b5dSChristopher Siden (intval < spa_version(spa) || 445ad135b5dSChristopher Siden intval > SPA_VERSION_BEFORE_FEATURES || 446ad135b5dSChristopher Siden has_feature)) 447be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 448990b4856Slling break; 449990b4856Slling 450990b4856Slling case ZPOOL_PROP_DELEGATION: 451990b4856Slling case ZPOOL_PROP_AUTOREPLACE: 452d5b5bb25SRich Morris case ZPOOL_PROP_LISTSNAPS: 453573ca77eSGeorge Wilson case ZPOOL_PROP_AUTOEXPAND: 454990b4856Slling error = nvpair_value_uint64(elem, &intval); 455990b4856Slling if (!error && intval > 1) 456be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 457990b4856Slling break; 458990b4856Slling 459990b4856Slling case ZPOOL_PROP_BOOTFS: 46025f89ee2SJeff Bonwick /* 46125f89ee2SJeff Bonwick * If the pool version is less than SPA_VERSION_BOOTFS, 46225f89ee2SJeff Bonwick * or the pool is still being created (version == 0), 46325f89ee2SJeff Bonwick * the bootfs property cannot be set. 46425f89ee2SJeff Bonwick */ 465990b4856Slling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 466be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTSUP); 467990b4856Slling break; 468990b4856Slling } 469990b4856Slling 470990b4856Slling /* 47115e6edf1Sgw * Make sure the vdev config is bootable 472990b4856Slling */ 47315e6edf1Sgw if (!vdev_is_bootable(spa->spa_root_vdev)) { 474be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTSUP); 475990b4856Slling break; 476990b4856Slling } 477990b4856Slling 478990b4856Slling reset_bootfs = 1; 479990b4856Slling 480990b4856Slling error = nvpair_value_string(elem, &strval); 481990b4856Slling 482990b4856Slling if (!error) { 483ad135b5dSChristopher Siden objset_t *os; 48415e6edf1Sgw uint64_t compress; 48515e6edf1Sgw 486990b4856Slling if (strval == NULL || strval[0] == '\0') { 487990b4856Slling objnum = zpool_prop_default_numeric( 488990b4856Slling ZPOOL_PROP_BOOTFS); 489990b4856Slling break; 490990b4856Slling } 491990b4856Slling 492503ad85cSMatthew Ahrens if (error = dmu_objset_hold(strval, FTAG, &os)) 493990b4856Slling break; 49415e6edf1Sgw 495503ad85cSMatthew Ahrens /* Must be ZPL and not gzip compressed. */ 496503ad85cSMatthew Ahrens 497503ad85cSMatthew Ahrens if (dmu_objset_type(os) != DMU_OST_ZFS) { 498be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTSUP); 4993b2aab18SMatthew Ahrens } else if ((error = 5003b2aab18SMatthew Ahrens dsl_prop_get_int_ds(dmu_objset_ds(os), 50115e6edf1Sgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), 5023b2aab18SMatthew Ahrens &compress)) == 0 && 50315e6edf1Sgw !BOOTFS_COMPRESS_VALID(compress)) { 504be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTSUP); 50515e6edf1Sgw } else { 50615e6edf1Sgw objnum = dmu_objset_id(os); 50715e6edf1Sgw } 508503ad85cSMatthew Ahrens dmu_objset_rele(os, FTAG); 509990b4856Slling } 510990b4856Slling break; 511e14bb325SJeff Bonwick 5120a4e9518Sgw case ZPOOL_PROP_FAILUREMODE: 5130a4e9518Sgw error = nvpair_value_uint64(elem, &intval); 5140a4e9518Sgw if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 5150a4e9518Sgw intval > ZIO_FAILURE_MODE_PANIC)) 516be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 5170a4e9518Sgw 5180a4e9518Sgw /* 5190a4e9518Sgw * This is a special case which only occurs when 5200a4e9518Sgw * the pool has completely failed. This allows 5210a4e9518Sgw * the user to change the in-core failmode property 5220a4e9518Sgw * without syncing it out to disk (I/Os might 5230a4e9518Sgw * currently be blocked). We do this by returning 5240a4e9518Sgw * EIO to the caller (spa_prop_set) to trick it 5250a4e9518Sgw * into thinking we encountered a property validation 5260a4e9518Sgw * error. 5270a4e9518Sgw */ 528e14bb325SJeff Bonwick if (!error && spa_suspended(spa)) { 5290a4e9518Sgw spa->spa_failmode = intval; 530be6fd75aSMatthew Ahrens error = SET_ERROR(EIO); 5310a4e9518Sgw } 5320a4e9518Sgw break; 5332f8aaab3Seschrock 5342f8aaab3Seschrock case ZPOOL_PROP_CACHEFILE: 5352f8aaab3Seschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 5362f8aaab3Seschrock break; 5372f8aaab3Seschrock 5382f8aaab3Seschrock if (strval[0] == '\0') 5392f8aaab3Seschrock break; 5402f8aaab3Seschrock 5412f8aaab3Seschrock if (strcmp(strval, "none") == 0) 5422f8aaab3Seschrock break; 5432f8aaab3Seschrock 5442f8aaab3Seschrock if (strval[0] != '/') { 545be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 5462f8aaab3Seschrock break; 5472f8aaab3Seschrock } 5482f8aaab3Seschrock 5492f8aaab3Seschrock slash = strrchr(strval, '/'); 5502f8aaab3Seschrock ASSERT(slash != NULL); 5512f8aaab3Seschrock 5522f8aaab3Seschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 5532f8aaab3Seschrock strcmp(slash, "/..") == 0) 554be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 5552f8aaab3Seschrock break; 556b24ab676SJeff Bonwick 5578704186eSDan McDonald case ZPOOL_PROP_COMMENT: 5588704186eSDan McDonald if ((error = nvpair_value_string(elem, &strval)) != 0) 5598704186eSDan McDonald break; 5608704186eSDan McDonald for (check = strval; *check != '\0'; check++) { 5618704186eSDan McDonald /* 5628704186eSDan McDonald * The kernel doesn't have an easy isprint() 5638704186eSDan McDonald * check. For this kernel check, we merely 5648704186eSDan McDonald * check ASCII apart from DEL. Fix this if 5658704186eSDan McDonald * there is an easy-to-use kernel isprint(). 5668704186eSDan McDonald */ 5678704186eSDan McDonald if (*check >= 0x7f) { 568be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 5698704186eSDan McDonald break; 5708704186eSDan McDonald } 5718704186eSDan McDonald check++; 5728704186eSDan McDonald } 5738704186eSDan McDonald if (strlen(strval) > ZPROP_MAX_COMMENT) 5748704186eSDan McDonald error = E2BIG; 5758704186eSDan McDonald break; 5768704186eSDan McDonald 577b24ab676SJeff Bonwick case ZPOOL_PROP_DEDUPDITTO: 578b24ab676SJeff Bonwick if (spa_version(spa) < SPA_VERSION_DEDUP) 579be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTSUP); 580b24ab676SJeff Bonwick else 581b24ab676SJeff Bonwick error = nvpair_value_uint64(elem, &intval); 582b24ab676SJeff Bonwick if (error == 0 && 583b24ab676SJeff Bonwick intval != 0 && intval < ZIO_DEDUPDITTO_MIN) 584be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 585b24ab676SJeff Bonwick break; 586990b4856Slling } 587990b4856Slling 588990b4856Slling if (error) 589990b4856Slling break; 590990b4856Slling } 591990b4856Slling 592990b4856Slling if (!error && reset_bootfs) { 593990b4856Slling error = nvlist_remove(props, 594990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 595990b4856Slling 596990b4856Slling if (!error) { 597990b4856Slling error = nvlist_add_uint64(props, 598990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 599990b4856Slling } 600990b4856Slling } 601990b4856Slling 602990b4856Slling return (error); 603990b4856Slling } 604990b4856Slling 605379c004dSEric Schrock void 606379c004dSEric Schrock spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) 607379c004dSEric Schrock { 608379c004dSEric Schrock char *cachefile; 609379c004dSEric Schrock spa_config_dirent_t *dp; 610379c004dSEric Schrock 611379c004dSEric Schrock if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), 612379c004dSEric Schrock &cachefile) != 0) 613379c004dSEric Schrock return; 614379c004dSEric Schrock 615379c004dSEric Schrock dp = kmem_alloc(sizeof (spa_config_dirent_t), 616379c004dSEric Schrock KM_SLEEP); 617379c004dSEric Schrock 618379c004dSEric Schrock if (cachefile[0] == '\0') 619379c004dSEric Schrock dp->scd_path = spa_strdup(spa_config_path); 620379c004dSEric Schrock else if (strcmp(cachefile, "none") == 0) 621379c004dSEric Schrock dp->scd_path = NULL; 622379c004dSEric Schrock else 623379c004dSEric Schrock dp->scd_path = spa_strdup(cachefile); 624379c004dSEric Schrock 625379c004dSEric Schrock list_insert_head(&spa->spa_config_list, dp); 626379c004dSEric Schrock if (need_sync) 627379c004dSEric Schrock spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 628379c004dSEric Schrock } 629379c004dSEric Schrock 630990b4856Slling int 631990b4856Slling spa_prop_set(spa_t *spa, nvlist_t *nvp) 632990b4856Slling { 633990b4856Slling int error; 634ad135b5dSChristopher Siden nvpair_t *elem = NULL; 635379c004dSEric Schrock boolean_t need_sync = B_FALSE; 636990b4856Slling 637990b4856Slling if ((error = spa_prop_validate(spa, nvp)) != 0) 638990b4856Slling return (error); 639990b4856Slling 640379c004dSEric Schrock while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { 641ad135b5dSChristopher Siden zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem)); 642379c004dSEric Schrock 643f9af39baSGeorge Wilson if (prop == ZPOOL_PROP_CACHEFILE || 644f9af39baSGeorge Wilson prop == ZPOOL_PROP_ALTROOT || 645f9af39baSGeorge Wilson prop == ZPOOL_PROP_READONLY) 646379c004dSEric Schrock continue; 647379c004dSEric Schrock 648ad135b5dSChristopher Siden if (prop == ZPOOL_PROP_VERSION || prop == ZPROP_INVAL) { 649ad135b5dSChristopher Siden uint64_t ver; 650ad135b5dSChristopher Siden 651ad135b5dSChristopher Siden if (prop == ZPOOL_PROP_VERSION) { 652ad135b5dSChristopher Siden VERIFY(nvpair_value_uint64(elem, &ver) == 0); 653ad135b5dSChristopher Siden } else { 654ad135b5dSChristopher Siden ASSERT(zpool_prop_feature(nvpair_name(elem))); 655ad135b5dSChristopher Siden ver = SPA_VERSION_FEATURES; 656ad135b5dSChristopher Siden need_sync = B_TRUE; 657ad135b5dSChristopher Siden } 658ad135b5dSChristopher Siden 659ad135b5dSChristopher Siden /* Save time if the version is already set. */ 660ad135b5dSChristopher Siden if (ver == spa_version(spa)) 661ad135b5dSChristopher Siden continue; 662ad135b5dSChristopher Siden 663ad135b5dSChristopher Siden /* 664ad135b5dSChristopher Siden * In addition to the pool directory object, we might 665ad135b5dSChristopher Siden * create the pool properties object, the features for 666ad135b5dSChristopher Siden * read object, the features for write object, or the 667ad135b5dSChristopher Siden * feature descriptions object. 668ad135b5dSChristopher Siden */ 6693b2aab18SMatthew Ahrens error = dsl_sync_task(spa->spa_name, NULL, 6707d46dc6cSMatthew Ahrens spa_sync_version, &ver, 6717d46dc6cSMatthew Ahrens 6, ZFS_SPACE_CHECK_RESERVED); 672ad135b5dSChristopher Siden if (error) 673ad135b5dSChristopher Siden return (error); 674ad135b5dSChristopher Siden continue; 675ad135b5dSChristopher Siden } 676ad135b5dSChristopher Siden 677379c004dSEric Schrock need_sync = B_TRUE; 678379c004dSEric Schrock break; 679379c004dSEric Schrock } 680379c004dSEric Schrock 681ad135b5dSChristopher Siden if (need_sync) { 6823b2aab18SMatthew Ahrens return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props, 6837d46dc6cSMatthew Ahrens nvp, 6, ZFS_SPACE_CHECK_RESERVED)); 684ad135b5dSChristopher Siden } 685ad135b5dSChristopher Siden 686ad135b5dSChristopher Siden return (0); 687990b4856Slling } 688990b4856Slling 689990b4856Slling /* 690990b4856Slling * If the bootfs property value is dsobj, clear it. 691990b4856Slling */ 692990b4856Slling void 693990b4856Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 694990b4856Slling { 695990b4856Slling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 696990b4856Slling VERIFY(zap_remove(spa->spa_meta_objset, 697990b4856Slling spa->spa_pool_props_object, 698990b4856Slling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 699990b4856Slling spa->spa_bootfs = 0; 700990b4856Slling } 701990b4856Slling } 702990b4856Slling 703dfbb9432SGeorge Wilson /*ARGSUSED*/ 704dfbb9432SGeorge Wilson static int 7053b2aab18SMatthew Ahrens spa_change_guid_check(void *arg, dmu_tx_t *tx) 706dfbb9432SGeorge Wilson { 7073b2aab18SMatthew Ahrens uint64_t *newguid = arg; 7083b2aab18SMatthew Ahrens spa_t *spa = dmu_tx_pool(tx)->dp_spa; 709dfbb9432SGeorge Wilson vdev_t *rvd = spa->spa_root_vdev; 710dfbb9432SGeorge Wilson uint64_t vdev_state; 711dfbb9432SGeorge Wilson 712dfbb9432SGeorge Wilson spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 713dfbb9432SGeorge Wilson vdev_state = rvd->vdev_state; 714dfbb9432SGeorge Wilson spa_config_exit(spa, SCL_STATE, FTAG); 715dfbb9432SGeorge Wilson 716dfbb9432SGeorge Wilson if (vdev_state != VDEV_STATE_HEALTHY) 717be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 718dfbb9432SGeorge Wilson 719dfbb9432SGeorge Wilson ASSERT3U(spa_guid(spa), !=, *newguid); 720dfbb9432SGeorge Wilson 721dfbb9432SGeorge Wilson return (0); 722dfbb9432SGeorge Wilson } 723dfbb9432SGeorge Wilson 724dfbb9432SGeorge Wilson static void 7253b2aab18SMatthew Ahrens spa_change_guid_sync(void *arg, dmu_tx_t *tx) 726dfbb9432SGeorge Wilson { 7273b2aab18SMatthew Ahrens uint64_t *newguid = arg; 7283b2aab18SMatthew Ahrens spa_t *spa = dmu_tx_pool(tx)->dp_spa; 729dfbb9432SGeorge Wilson uint64_t oldguid; 730dfbb9432SGeorge Wilson vdev_t *rvd = spa->spa_root_vdev; 731dfbb9432SGeorge Wilson 732dfbb9432SGeorge Wilson oldguid = spa_guid(spa); 733dfbb9432SGeorge Wilson 734dfbb9432SGeorge Wilson spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 735dfbb9432SGeorge Wilson rvd->vdev_guid = *newguid; 736dfbb9432SGeorge Wilson rvd->vdev_guid_sum += (*newguid - oldguid); 737dfbb9432SGeorge Wilson vdev_config_dirty(rvd); 738dfbb9432SGeorge Wilson spa_config_exit(spa, SCL_STATE, FTAG); 739dfbb9432SGeorge Wilson 74020128a08SGeorge Wilson spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu", 741dfbb9432SGeorge Wilson oldguid, *newguid); 742dfbb9432SGeorge Wilson } 743dfbb9432SGeorge Wilson 744e9103aaeSGarrett D'Amore /* 745e9103aaeSGarrett D'Amore * Change the GUID for the pool. This is done so that we can later 746e9103aaeSGarrett D'Amore * re-import a pool built from a clone of our own vdevs. We will modify 747e9103aaeSGarrett D'Amore * the root vdev's guid, our own pool guid, and then mark all of our 748e9103aaeSGarrett D'Amore * vdevs dirty. Note that we must make sure that all our vdevs are 749e9103aaeSGarrett D'Amore * online when we do this, or else any vdevs that weren't present 750e9103aaeSGarrett D'Amore * would be orphaned from our pool. We are also going to issue a 751e9103aaeSGarrett D'Amore * sysevent to update any watchers. 752e9103aaeSGarrett D'Amore */ 753e9103aaeSGarrett D'Amore int 754e9103aaeSGarrett D'Amore spa_change_guid(spa_t *spa) 755e9103aaeSGarrett D'Amore { 756dfbb9432SGeorge Wilson int error; 757dfbb9432SGeorge Wilson uint64_t guid; 758e9103aaeSGarrett D'Amore 7592c1e2b44SGeorge Wilson mutex_enter(&spa->spa_vdev_top_lock); 760dfbb9432SGeorge Wilson mutex_enter(&spa_namespace_lock); 761dfbb9432SGeorge Wilson guid = spa_generate_guid(NULL); 762e9103aaeSGarrett D'Amore 7633b2aab18SMatthew Ahrens error = dsl_sync_task(spa->spa_name, spa_change_guid_check, 7647d46dc6cSMatthew Ahrens spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED); 765e9103aaeSGarrett D'Amore 766dfbb9432SGeorge Wilson if (error == 0) { 767dfbb9432SGeorge Wilson spa_config_sync(spa, B_FALSE, B_TRUE); 768dfbb9432SGeorge Wilson spa_event_notify(spa, NULL, ESC_ZFS_POOL_REGUID); 769dfbb9432SGeorge Wilson } 770e9103aaeSGarrett D'Amore 771dfbb9432SGeorge Wilson mutex_exit(&spa_namespace_lock); 7722c1e2b44SGeorge Wilson mutex_exit(&spa->spa_vdev_top_lock); 773e9103aaeSGarrett D'Amore 774dfbb9432SGeorge Wilson return (error); 775e9103aaeSGarrett D'Amore } 776e9103aaeSGarrett D'Amore 777fa9e4066Sahrens /* 778fa9e4066Sahrens * ========================================================================== 779fa9e4066Sahrens * SPA state manipulation (open/create/destroy/import/export) 780fa9e4066Sahrens * ========================================================================== 781fa9e4066Sahrens */ 782fa9e4066Sahrens 783ea8dc4b6Seschrock static int 784ea8dc4b6Seschrock spa_error_entry_compare(const void *a, const void *b) 785ea8dc4b6Seschrock { 786ea8dc4b6Seschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 787ea8dc4b6Seschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 788ea8dc4b6Seschrock int ret; 789ea8dc4b6Seschrock 790ea8dc4b6Seschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 7917802d7bfSMatthew Ahrens sizeof (zbookmark_phys_t)); 792ea8dc4b6Seschrock 793ea8dc4b6Seschrock if (ret < 0) 794ea8dc4b6Seschrock return (-1); 795ea8dc4b6Seschrock else if (ret > 0) 796ea8dc4b6Seschrock return (1); 797ea8dc4b6Seschrock else 798ea8dc4b6Seschrock return (0); 799ea8dc4b6Seschrock } 800ea8dc4b6Seschrock 801ea8dc4b6Seschrock /* 802ea8dc4b6Seschrock * Utility function which retrieves copies of the current logs and 803ea8dc4b6Seschrock * re-initializes them in the process. 804ea8dc4b6Seschrock */ 805ea8dc4b6Seschrock void 806ea8dc4b6Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 807ea8dc4b6Seschrock { 808ea8dc4b6Seschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 809ea8dc4b6Seschrock 810ea8dc4b6Seschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 811ea8dc4b6Seschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 812ea8dc4b6Seschrock 813ea8dc4b6Seschrock avl_create(&spa->spa_errlist_scrub, 814ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 815ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 816ea8dc4b6Seschrock avl_create(&spa->spa_errlist_last, 817ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 818ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 819ea8dc4b6Seschrock } 820ea8dc4b6Seschrock 821ec94d322SAdam Leventhal static void 822ec94d322SAdam Leventhal spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) 823fa9e4066Sahrens { 824ec94d322SAdam Leventhal const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; 825ec94d322SAdam Leventhal enum zti_modes mode = ztip->zti_mode; 826ec94d322SAdam Leventhal uint_t value = ztip->zti_value; 827ec94d322SAdam Leventhal uint_t count = ztip->zti_count; 828ec94d322SAdam Leventhal spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; 829ec94d322SAdam Leventhal char name[32]; 8305aeb9474SGarrett D'Amore uint_t flags = 0; 83135a5a358SJonathan Adams boolean_t batch = B_FALSE; 832fa9e4066Sahrens 833ec94d322SAdam Leventhal if (mode == ZTI_MODE_NULL) { 834ec94d322SAdam Leventhal tqs->stqs_count = 0; 835ec94d322SAdam Leventhal tqs->stqs_taskq = NULL; 836ec94d322SAdam Leventhal return; 837ec94d322SAdam Leventhal } 838fa9e4066Sahrens 839ec94d322SAdam Leventhal ASSERT3U(count, >, 0); 840fa9e4066Sahrens 841ec94d322SAdam Leventhal tqs->stqs_count = count; 842ec94d322SAdam Leventhal tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP); 84335a5a358SJonathan Adams 84469962b56SMatthew Ahrens switch (mode) { 84569962b56SMatthew Ahrens case ZTI_MODE_FIXED: 84669962b56SMatthew Ahrens ASSERT3U(value, >=, 1); 84769962b56SMatthew Ahrens value = MAX(value, 1); 84869962b56SMatthew Ahrens break; 849ec94d322SAdam Leventhal 85069962b56SMatthew Ahrens case ZTI_MODE_BATCH: 85169962b56SMatthew Ahrens batch = B_TRUE; 85269962b56SMatthew Ahrens flags |= TASKQ_THREADS_CPU_PCT; 85369962b56SMatthew Ahrens value = zio_taskq_batch_pct; 85469962b56SMatthew Ahrens break; 855ec94d322SAdam Leventhal 85669962b56SMatthew Ahrens default: 85769962b56SMatthew Ahrens panic("unrecognized mode for %s_%s taskq (%u:%u) in " 85869962b56SMatthew Ahrens "spa_activate()", 85969962b56SMatthew Ahrens zio_type_name[t], zio_taskq_types[q], mode, value); 86069962b56SMatthew Ahrens break; 86169962b56SMatthew Ahrens } 862ec94d322SAdam Leventhal 86369962b56SMatthew Ahrens for (uint_t i = 0; i < count; i++) { 86469962b56SMatthew Ahrens taskq_t *tq; 865ec94d322SAdam Leventhal 866ec94d322SAdam Leventhal if (count > 1) { 867ec94d322SAdam Leventhal (void) snprintf(name, sizeof (name), "%s_%s_%u", 868ec94d322SAdam Leventhal zio_type_name[t], zio_taskq_types[q], i); 869ec94d322SAdam Leventhal } else { 870ec94d322SAdam Leventhal (void) snprintf(name, sizeof (name), "%s_%s", 871ec94d322SAdam Leventhal zio_type_name[t], zio_taskq_types[q]); 872ec94d322SAdam Leventhal } 873ec94d322SAdam Leventhal 874ec94d322SAdam Leventhal if (zio_taskq_sysdc && spa->spa_proc != &p0) { 875ec94d322SAdam Leventhal if (batch) 876ec94d322SAdam Leventhal flags |= TASKQ_DC_BATCH; 877ec94d322SAdam Leventhal 878ec94d322SAdam Leventhal tq = taskq_create_sysdc(name, value, 50, INT_MAX, 879ec94d322SAdam Leventhal spa->spa_proc, zio_taskq_basedc, flags); 880ec94d322SAdam Leventhal } else { 88169962b56SMatthew Ahrens pri_t pri = maxclsyspri; 88269962b56SMatthew Ahrens /* 88369962b56SMatthew Ahrens * The write issue taskq can be extremely CPU 88469962b56SMatthew Ahrens * intensive. Run it at slightly lower priority 88569962b56SMatthew Ahrens * than the other taskqs. 88669962b56SMatthew Ahrens */ 88769962b56SMatthew Ahrens if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE) 88869962b56SMatthew Ahrens pri--; 88969962b56SMatthew Ahrens 89069962b56SMatthew Ahrens tq = taskq_create_proc(name, value, pri, 50, 891ec94d322SAdam Leventhal INT_MAX, spa->spa_proc, flags); 892ec94d322SAdam Leventhal } 893ec94d322SAdam Leventhal 894ec94d322SAdam Leventhal tqs->stqs_taskq[i] = tq; 895ec94d322SAdam Leventhal } 896ec94d322SAdam Leventhal } 897ec94d322SAdam Leventhal 898ec94d322SAdam Leventhal static void 899ec94d322SAdam Leventhal spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q) 900ec94d322SAdam Leventhal { 901ec94d322SAdam Leventhal spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; 902ec94d322SAdam Leventhal 903ec94d322SAdam Leventhal if (tqs->stqs_taskq == NULL) { 904ec94d322SAdam Leventhal ASSERT0(tqs->stqs_count); 905ec94d322SAdam Leventhal return; 906ec94d322SAdam Leventhal } 907ec94d322SAdam Leventhal 908ec94d322SAdam Leventhal for (uint_t i = 0; i < tqs->stqs_count; i++) { 909ec94d322SAdam Leventhal ASSERT3P(tqs->stqs_taskq[i], !=, NULL); 910ec94d322SAdam Leventhal taskq_destroy(tqs->stqs_taskq[i]); 91135a5a358SJonathan Adams } 91235a5a358SJonathan Adams 913ec94d322SAdam Leventhal kmem_free(tqs->stqs_taskq, tqs->stqs_count * sizeof (taskq_t *)); 914ec94d322SAdam Leventhal tqs->stqs_taskq = NULL; 915ec94d322SAdam Leventhal } 91635a5a358SJonathan Adams 917ec94d322SAdam Leventhal /* 918ec94d322SAdam Leventhal * Dispatch a task to the appropriate taskq for the ZFS I/O type and priority. 919ec94d322SAdam Leventhal * Note that a type may have multiple discrete taskqs to avoid lock contention 920ec94d322SAdam Leventhal * on the taskq itself. In that case we choose which taskq at random by using 921ec94d322SAdam Leventhal * the low bits of gethrtime(). 922ec94d322SAdam Leventhal */ 923ec94d322SAdam Leventhal void 924ec94d322SAdam Leventhal spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q, 925ec94d322SAdam Leventhal task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent) 926ec94d322SAdam Leventhal { 927ec94d322SAdam Leventhal spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; 928ec94d322SAdam Leventhal taskq_t *tq; 929ec94d322SAdam Leventhal 930ec94d322SAdam Leventhal ASSERT3P(tqs->stqs_taskq, !=, NULL); 931ec94d322SAdam Leventhal ASSERT3U(tqs->stqs_count, !=, 0); 932ec94d322SAdam Leventhal 933ec94d322SAdam Leventhal if (tqs->stqs_count == 1) { 934ec94d322SAdam Leventhal tq = tqs->stqs_taskq[0]; 935ec94d322SAdam Leventhal } else { 936ec94d322SAdam Leventhal tq = tqs->stqs_taskq[gethrtime() % tqs->stqs_count]; 93735a5a358SJonathan Adams } 938ec94d322SAdam Leventhal 939ec94d322SAdam Leventhal taskq_dispatch_ent(tq, func, arg, flags, ent); 94035a5a358SJonathan Adams } 94135a5a358SJonathan Adams 94235a5a358SJonathan Adams static void 94335a5a358SJonathan Adams spa_create_zio_taskqs(spa_t *spa) 94435a5a358SJonathan Adams { 945e14bb325SJeff Bonwick for (int t = 0; t < ZIO_TYPES; t++) { 946e14bb325SJeff Bonwick for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 947ec94d322SAdam Leventhal spa_taskqs_init(spa, t, q); 94835a5a358SJonathan Adams } 94935a5a358SJonathan Adams } 95035a5a358SJonathan Adams } 95135a5a358SJonathan Adams 95235a5a358SJonathan Adams #ifdef _KERNEL 95335a5a358SJonathan Adams static void 95435a5a358SJonathan Adams spa_thread(void *arg) 95535a5a358SJonathan Adams { 95635a5a358SJonathan Adams callb_cpr_t cprinfo; 9572e0c549eSJonathan Adams 95835a5a358SJonathan Adams spa_t *spa = arg; 95935a5a358SJonathan Adams user_t *pu = PTOU(curproc); 9602e0c549eSJonathan Adams 96135a5a358SJonathan Adams CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr, 96235a5a358SJonathan Adams spa->spa_name); 9632e0c549eSJonathan Adams 96435a5a358SJonathan Adams ASSERT(curproc != &p0); 96535a5a358SJonathan Adams (void) snprintf(pu->u_psargs, sizeof (pu->u_psargs), 96635a5a358SJonathan Adams "zpool-%s", spa->spa_name); 96735a5a358SJonathan Adams (void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm)); 9682e0c549eSJonathan Adams 96935a5a358SJonathan Adams /* bind this thread to the requested psrset */ 97035a5a358SJonathan Adams if (zio_taskq_psrset_bind != PS_NONE) { 97135a5a358SJonathan Adams pool_lock(); 97235a5a358SJonathan Adams mutex_enter(&cpu_lock); 97335a5a358SJonathan Adams mutex_enter(&pidlock); 97435a5a358SJonathan Adams mutex_enter(&curproc->p_lock); 97580eb36f2SGeorge Wilson 97635a5a358SJonathan Adams if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind, 97735a5a358SJonathan Adams 0, NULL, NULL) == 0) { 97835a5a358SJonathan Adams curthread->t_bind_pset = zio_taskq_psrset_bind; 97935a5a358SJonathan Adams } else { 98035a5a358SJonathan Adams cmn_err(CE_WARN, 98135a5a358SJonathan Adams "Couldn't bind process for zfs pool \"%s\" to " 98235a5a358SJonathan Adams "pset %d\n", spa->spa_name, zio_taskq_psrset_bind); 98335a5a358SJonathan Adams } 98435a5a358SJonathan Adams 98535a5a358SJonathan Adams mutex_exit(&curproc->p_lock); 98635a5a358SJonathan Adams mutex_exit(&pidlock); 98735a5a358SJonathan Adams mutex_exit(&cpu_lock); 98835a5a358SJonathan Adams pool_unlock(); 98935a5a358SJonathan Adams } 99035a5a358SJonathan Adams 99135a5a358SJonathan Adams if (zio_taskq_sysdc) { 99235a5a358SJonathan Adams sysdc_thread_enter(curthread, 100, 0); 99335a5a358SJonathan Adams } 99435a5a358SJonathan Adams 99535a5a358SJonathan Adams spa->spa_proc = curproc; 99635a5a358SJonathan Adams spa->spa_did = curthread->t_did; 99735a5a358SJonathan Adams 99835a5a358SJonathan Adams spa_create_zio_taskqs(spa); 99935a5a358SJonathan Adams 100035a5a358SJonathan Adams mutex_enter(&spa->spa_proc_lock); 100135a5a358SJonathan Adams ASSERT(spa->spa_proc_state == SPA_PROC_CREATED); 100235a5a358SJonathan Adams 100335a5a358SJonathan Adams spa->spa_proc_state = SPA_PROC_ACTIVE; 100435a5a358SJonathan Adams cv_broadcast(&spa->spa_proc_cv); 100535a5a358SJonathan Adams 100635a5a358SJonathan Adams CALLB_CPR_SAFE_BEGIN(&cprinfo); 100735a5a358SJonathan Adams while (spa->spa_proc_state == SPA_PROC_ACTIVE) 100835a5a358SJonathan Adams cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); 100935a5a358SJonathan Adams CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock); 101035a5a358SJonathan Adams 101135a5a358SJonathan Adams ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE); 101235a5a358SJonathan Adams spa->spa_proc_state = SPA_PROC_GONE; 101335a5a358SJonathan Adams spa->spa_proc = &p0; 101435a5a358SJonathan Adams cv_broadcast(&spa->spa_proc_cv); 101535a5a358SJonathan Adams CALLB_CPR_EXIT(&cprinfo); /* drops spa_proc_lock */ 101635a5a358SJonathan Adams 101735a5a358SJonathan Adams mutex_enter(&curproc->p_lock); 101835a5a358SJonathan Adams lwp_exit(); 101935a5a358SJonathan Adams } 102035a5a358SJonathan Adams #endif 102135a5a358SJonathan Adams 102235a5a358SJonathan Adams /* 102335a5a358SJonathan Adams * Activate an uninitialized pool. 102435a5a358SJonathan Adams */ 102535a5a358SJonathan Adams static void 102635a5a358SJonathan Adams spa_activate(spa_t *spa, int mode) 102735a5a358SJonathan Adams { 102835a5a358SJonathan Adams ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 102935a5a358SJonathan Adams 103035a5a358SJonathan Adams spa->spa_state = POOL_STATE_ACTIVE; 103135a5a358SJonathan Adams spa->spa_mode = mode; 103235a5a358SJonathan Adams 103335a5a358SJonathan Adams spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); 103435a5a358SJonathan Adams spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); 103535a5a358SJonathan Adams 103635a5a358SJonathan Adams /* Try to create a covering process */ 103735a5a358SJonathan Adams mutex_enter(&spa->spa_proc_lock); 103835a5a358SJonathan Adams ASSERT(spa->spa_proc_state == SPA_PROC_NONE); 103935a5a358SJonathan Adams ASSERT(spa->spa_proc == &p0); 104035a5a358SJonathan Adams spa->spa_did = 0; 104135a5a358SJonathan Adams 104235a5a358SJonathan Adams /* Only create a process if we're going to be around a while. */ 104335a5a358SJonathan Adams if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) { 104435a5a358SJonathan Adams if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri, 104535a5a358SJonathan Adams NULL, 0) == 0) { 104635a5a358SJonathan Adams spa->spa_proc_state = SPA_PROC_CREATED; 104735a5a358SJonathan Adams while (spa->spa_proc_state == SPA_PROC_CREATED) { 104835a5a358SJonathan Adams cv_wait(&spa->spa_proc_cv, 104935a5a358SJonathan Adams &spa->spa_proc_lock); 10502e0c549eSJonathan Adams } 105135a5a358SJonathan Adams ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); 105235a5a358SJonathan Adams ASSERT(spa->spa_proc != &p0); 105335a5a358SJonathan Adams ASSERT(spa->spa_did != 0); 105435a5a358SJonathan Adams } else { 105535a5a358SJonathan Adams #ifdef _KERNEL 105635a5a358SJonathan Adams cmn_err(CE_WARN, 105735a5a358SJonathan Adams "Couldn't create process for zfs pool \"%s\"\n", 105835a5a358SJonathan Adams spa->spa_name); 105935a5a358SJonathan Adams #endif 1060e14bb325SJeff Bonwick } 1061fa9e4066Sahrens } 106235a5a358SJonathan Adams mutex_exit(&spa->spa_proc_lock); 106335a5a358SJonathan Adams 106435a5a358SJonathan Adams /* If we didn't create a process, we need to create our taskqs. */ 106535a5a358SJonathan Adams if (spa->spa_proc == &p0) { 106635a5a358SJonathan Adams spa_create_zio_taskqs(spa); 106735a5a358SJonathan Adams } 1068fa9e4066Sahrens 1069e14bb325SJeff Bonwick list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 1070e14bb325SJeff Bonwick offsetof(vdev_t, vdev_config_dirty_node)); 1071e14bb325SJeff Bonwick list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 1072e14bb325SJeff Bonwick offsetof(vdev_t, vdev_state_dirty_node)); 1073fa9e4066Sahrens 1074fa9e4066Sahrens txg_list_create(&spa->spa_vdev_txg_list, 1075fa9e4066Sahrens offsetof(struct vdev, vdev_txg_node)); 1076ea8dc4b6Seschrock 1077ea8dc4b6Seschrock avl_create(&spa->spa_errlist_scrub, 1078ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 1079ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 1080ea8dc4b6Seschrock avl_create(&spa->spa_errlist_last, 1081ea8dc4b6Seschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 1082ea8dc4b6Seschrock offsetof(spa_error_entry_t, se_avl)); 1083fa9e4066Sahrens } 1084fa9e4066Sahrens 1085fa9e4066Sahrens /* 1086fa9e4066Sahrens * Opposite of spa_activate(). 1087fa9e4066Sahrens */ 1088fa9e4066Sahrens static void 1089fa9e4066Sahrens spa_deactivate(spa_t *spa) 1090fa9e4066Sahrens { 1091fa9e4066Sahrens ASSERT(spa->spa_sync_on == B_FALSE); 1092fa9e4066Sahrens ASSERT(spa->spa_dsl_pool == NULL); 1093fa9e4066Sahrens ASSERT(spa->spa_root_vdev == NULL); 109425f89ee2SJeff Bonwick ASSERT(spa->spa_async_zio_root == NULL); 1095fa9e4066Sahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 1096fa9e4066Sahrens 1097fa9e4066Sahrens txg_list_destroy(&spa->spa_vdev_txg_list); 1098fa9e4066Sahrens 1099e14bb325SJeff Bonwick list_destroy(&spa->spa_config_dirty_list); 1100e14bb325SJeff Bonwick list_destroy(&spa->spa_state_dirty_list); 1101fa9e4066Sahrens 1102e14bb325SJeff Bonwick for (int t = 0; t < ZIO_TYPES; t++) { 1103e14bb325SJeff Bonwick for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 1104ec94d322SAdam Leventhal spa_taskqs_fini(spa, t, q); 1105e14bb325SJeff Bonwick } 1106fa9e4066Sahrens } 1107fa9e4066Sahrens 1108fa9e4066Sahrens metaslab_class_destroy(spa->spa_normal_class); 1109fa9e4066Sahrens spa->spa_normal_class = NULL; 1110fa9e4066Sahrens 11118654d025Sperrin metaslab_class_destroy(spa->spa_log_class); 11128654d025Sperrin spa->spa_log_class = NULL; 11138654d025Sperrin 1114ea8dc4b6Seschrock /* 1115ea8dc4b6Seschrock * If this was part of an import or the open otherwise failed, we may 1116ea8dc4b6Seschrock * still have errors left in the queues. Empty them just in case. 1117ea8dc4b6Seschrock */ 1118ea8dc4b6Seschrock spa_errlog_drain(spa); 1119ea8dc4b6Seschrock 1120ea8dc4b6Seschrock avl_destroy(&spa->spa_errlist_scrub); 1121ea8dc4b6Seschrock avl_destroy(&spa->spa_errlist_last); 1122ea8dc4b6Seschrock 1123fa9e4066Sahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 112435a5a358SJonathan Adams 112535a5a358SJonathan Adams mutex_enter(&spa->spa_proc_lock); 112635a5a358SJonathan Adams if (spa->spa_proc_state != SPA_PROC_NONE) { 112735a5a358SJonathan Adams ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); 112835a5a358SJonathan Adams spa->spa_proc_state = SPA_PROC_DEACTIVATE; 112935a5a358SJonathan Adams cv_broadcast(&spa->spa_proc_cv); 113035a5a358SJonathan Adams while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) { 113135a5a358SJonathan Adams ASSERT(spa->spa_proc != &p0); 113235a5a358SJonathan Adams cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); 113335a5a358SJonathan Adams } 113435a5a358SJonathan Adams ASSERT(spa->spa_proc_state == SPA_PROC_GONE); 113535a5a358SJonathan Adams spa->spa_proc_state = SPA_PROC_NONE; 113635a5a358SJonathan Adams } 113735a5a358SJonathan Adams ASSERT(spa->spa_proc == &p0); 113835a5a358SJonathan Adams mutex_exit(&spa->spa_proc_lock); 113935a5a358SJonathan Adams 114035a5a358SJonathan Adams /* 114135a5a358SJonathan Adams * We want to make sure spa_thread() has actually exited the ZFS 114235a5a358SJonathan Adams * module, so that the module can't be unloaded out from underneath 114335a5a358SJonathan Adams * it. 114435a5a358SJonathan Adams */ 114535a5a358SJonathan Adams if (spa->spa_did != 0) { 114635a5a358SJonathan Adams thread_join(spa->spa_did); 114735a5a358SJonathan Adams spa->spa_did = 0; 114835a5a358SJonathan Adams } 1149fa9e4066Sahrens } 1150fa9e4066Sahrens 1151fa9e4066Sahrens /* 1152fa9e4066Sahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 1153fa9e4066Sahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 1154fa9e4066Sahrens * in the CLOSED state. This will prep the pool before open/creation/import. 1155fa9e4066Sahrens * All vdev validation is done by the vdev_alloc() routine. 1156fa9e4066Sahrens */ 115799653d4eSeschrock static int 115899653d4eSeschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 115999653d4eSeschrock uint_t id, int atype) 1160fa9e4066Sahrens { 1161fa9e4066Sahrens nvlist_t **child; 1162573ca77eSGeorge Wilson uint_t children; 116399653d4eSeschrock int error; 1164fa9e4066Sahrens 116599653d4eSeschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 116699653d4eSeschrock return (error); 1167fa9e4066Sahrens 116899653d4eSeschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 116999653d4eSeschrock return (0); 1170fa9e4066Sahrens 1171e14bb325SJeff Bonwick error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 1172e14bb325SJeff Bonwick &child, &children); 1173e14bb325SJeff Bonwick 1174e14bb325SJeff Bonwick if (error == ENOENT) 1175e14bb325SJeff Bonwick return (0); 1176e14bb325SJeff Bonwick 1177e14bb325SJeff Bonwick if (error) { 117899653d4eSeschrock vdev_free(*vdp); 117999653d4eSeschrock *vdp = NULL; 1180be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1181fa9e4066Sahrens } 1182fa9e4066Sahrens 1183573ca77eSGeorge Wilson for (int c = 0; c < children; c++) { 118499653d4eSeschrock vdev_t *vd; 118599653d4eSeschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 118699653d4eSeschrock atype)) != 0) { 118799653d4eSeschrock vdev_free(*vdp); 118899653d4eSeschrock *vdp = NULL; 118999653d4eSeschrock return (error); 1190fa9e4066Sahrens } 1191fa9e4066Sahrens } 1192fa9e4066Sahrens 119399653d4eSeschrock ASSERT(*vdp != NULL); 119499653d4eSeschrock 119599653d4eSeschrock return (0); 1196fa9e4066Sahrens } 1197fa9e4066Sahrens 1198fa9e4066Sahrens /* 1199fa9e4066Sahrens * Opposite of spa_load(). 1200fa9e4066Sahrens */ 1201fa9e4066Sahrens static void 1202fa9e4066Sahrens spa_unload(spa_t *spa) 1203fa9e4066Sahrens { 120499653d4eSeschrock int i; 120599653d4eSeschrock 1206e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1207e14bb325SJeff Bonwick 1208ea8dc4b6Seschrock /* 1209ea8dc4b6Seschrock * Stop async tasks. 1210ea8dc4b6Seschrock */ 1211ea8dc4b6Seschrock spa_async_suspend(spa); 1212ea8dc4b6Seschrock 1213fa9e4066Sahrens /* 1214fa9e4066Sahrens * Stop syncing. 1215fa9e4066Sahrens */ 1216fa9e4066Sahrens if (spa->spa_sync_on) { 1217fa9e4066Sahrens txg_sync_stop(spa->spa_dsl_pool); 1218fa9e4066Sahrens spa->spa_sync_on = B_FALSE; 1219fa9e4066Sahrens } 1220fa9e4066Sahrens 1221fa9e4066Sahrens /* 1222e14bb325SJeff Bonwick * Wait for any outstanding async I/O to complete. 1223fa9e4066Sahrens */ 122454d692b7SGeorge Wilson if (spa->spa_async_zio_root != NULL) { 122554d692b7SGeorge Wilson (void) zio_wait(spa->spa_async_zio_root); 122654d692b7SGeorge Wilson spa->spa_async_zio_root = NULL; 122754d692b7SGeorge Wilson } 1228fa9e4066Sahrens 1229cde58dbcSMatthew Ahrens bpobj_close(&spa->spa_deferred_bpobj); 1230cde58dbcSMatthew Ahrens 12310713e232SGeorge Wilson spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 12320713e232SGeorge Wilson 12330713e232SGeorge Wilson /* 12340713e232SGeorge Wilson * Close all vdevs. 12350713e232SGeorge Wilson */ 12360713e232SGeorge Wilson if (spa->spa_root_vdev) 12370713e232SGeorge Wilson vdev_free(spa->spa_root_vdev); 12380713e232SGeorge Wilson ASSERT(spa->spa_root_vdev == NULL); 12390713e232SGeorge Wilson 1240fa9e4066Sahrens /* 1241fa9e4066Sahrens * Close the dsl pool. 1242fa9e4066Sahrens */ 1243fa9e4066Sahrens if (spa->spa_dsl_pool) { 1244fa9e4066Sahrens dsl_pool_close(spa->spa_dsl_pool); 1245fa9e4066Sahrens spa->spa_dsl_pool = NULL; 1246afee20e4SGeorge Wilson spa->spa_meta_objset = NULL; 1247fa9e4066Sahrens } 1248fa9e4066Sahrens 1249b24ab676SJeff Bonwick ddt_unload(spa); 1250b24ab676SJeff Bonwick 12518ad4d6ddSJeff Bonwick 12528ad4d6ddSJeff Bonwick /* 12538ad4d6ddSJeff Bonwick * Drop and purge level 2 cache 12548ad4d6ddSJeff Bonwick */ 12558ad4d6ddSJeff Bonwick spa_l2cache_drop(spa); 12568ad4d6ddSJeff Bonwick 1257fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 1258fa94a07fSbrendan vdev_free(spa->spa_spares.sav_vdevs[i]); 1259fa94a07fSbrendan if (spa->spa_spares.sav_vdevs) { 1260fa94a07fSbrendan kmem_free(spa->spa_spares.sav_vdevs, 1261fa94a07fSbrendan spa->spa_spares.sav_count * sizeof (void *)); 1262fa94a07fSbrendan spa->spa_spares.sav_vdevs = NULL; 126399653d4eSeschrock } 1264fa94a07fSbrendan if (spa->spa_spares.sav_config) { 1265fa94a07fSbrendan nvlist_free(spa->spa_spares.sav_config); 1266fa94a07fSbrendan spa->spa_spares.sav_config = NULL; 1267fa94a07fSbrendan } 12682ce8af81SEric Schrock spa->spa_spares.sav_count = 0; 1269fa94a07fSbrendan 1270cd0837ccSGeorge Wilson for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 1271cd0837ccSGeorge Wilson vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]); 1272fa94a07fSbrendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 1273cd0837ccSGeorge Wilson } 1274fa94a07fSbrendan if (spa->spa_l2cache.sav_vdevs) { 1275fa94a07fSbrendan kmem_free(spa->spa_l2cache.sav_vdevs, 1276fa94a07fSbrendan spa->spa_l2cache.sav_count * sizeof (void *)); 1277fa94a07fSbrendan spa->spa_l2cache.sav_vdevs = NULL; 1278fa94a07fSbrendan } 1279fa94a07fSbrendan if (spa->spa_l2cache.sav_config) { 1280fa94a07fSbrendan nvlist_free(spa->spa_l2cache.sav_config); 1281fa94a07fSbrendan spa->spa_l2cache.sav_config = NULL; 128299653d4eSeschrock } 12832ce8af81SEric Schrock spa->spa_l2cache.sav_count = 0; 128499653d4eSeschrock 1285ea8dc4b6Seschrock spa->spa_async_suspended = 0; 12868ad4d6ddSJeff Bonwick 12878704186eSDan McDonald if (spa->spa_comment != NULL) { 12888704186eSDan McDonald spa_strfree(spa->spa_comment); 12898704186eSDan McDonald spa->spa_comment = NULL; 12908704186eSDan McDonald } 12918704186eSDan McDonald 12928ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 1293fa9e4066Sahrens } 1294fa9e4066Sahrens 129599653d4eSeschrock /* 129699653d4eSeschrock * Load (or re-load) the current list of vdevs describing the active spares for 129799653d4eSeschrock * this pool. When this is called, we have some form of basic information in 1298fa94a07fSbrendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 1299fa94a07fSbrendan * then re-generate a more complete list including status information. 130099653d4eSeschrock */ 130199653d4eSeschrock static void 130299653d4eSeschrock spa_load_spares(spa_t *spa) 130399653d4eSeschrock { 130499653d4eSeschrock nvlist_t **spares; 130599653d4eSeschrock uint_t nspares; 130699653d4eSeschrock int i; 130739c23413Seschrock vdev_t *vd, *tvd; 130899653d4eSeschrock 1309e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 1310e14bb325SJeff Bonwick 131199653d4eSeschrock /* 131299653d4eSeschrock * First, close and free any existing spare vdevs. 131399653d4eSeschrock */ 1314fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 1315fa94a07fSbrendan vd = spa->spa_spares.sav_vdevs[i]; 131639c23413Seschrock 131739c23413Seschrock /* Undo the call to spa_activate() below */ 1318c5904d13Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 1319c5904d13Seschrock B_FALSE)) != NULL && tvd->vdev_isspare) 132039c23413Seschrock spa_spare_remove(tvd); 132139c23413Seschrock vdev_close(vd); 132239c23413Seschrock vdev_free(vd); 132399653d4eSeschrock } 132439c23413Seschrock 1325fa94a07fSbrendan if (spa->spa_spares.sav_vdevs) 1326fa94a07fSbrendan kmem_free(spa->spa_spares.sav_vdevs, 1327fa94a07fSbrendan spa->spa_spares.sav_count * sizeof (void *)); 132899653d4eSeschrock 1329fa94a07fSbrendan if (spa->spa_spares.sav_config == NULL) 133099653d4eSeschrock nspares = 0; 133199653d4eSeschrock else 1332fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 133399653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 133499653d4eSeschrock 1335fa94a07fSbrendan spa->spa_spares.sav_count = (int)nspares; 1336fa94a07fSbrendan spa->spa_spares.sav_vdevs = NULL; 133799653d4eSeschrock 133899653d4eSeschrock if (nspares == 0) 133999653d4eSeschrock return; 134099653d4eSeschrock 134199653d4eSeschrock /* 134299653d4eSeschrock * Construct the array of vdevs, opening them to get status in the 134339c23413Seschrock * process. For each spare, there is potentially two different vdev_t 134439c23413Seschrock * structures associated with it: one in the list of spares (used only 134539c23413Seschrock * for basic validation purposes) and one in the active vdev 134639c23413Seschrock * configuration (if it's spared in). During this phase we open and 134739c23413Seschrock * validate each vdev on the spare list. If the vdev also exists in the 134839c23413Seschrock * active configuration, then we also mark this vdev as an active spare. 134999653d4eSeschrock */ 1350fa94a07fSbrendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 1351fa94a07fSbrendan KM_SLEEP); 1352fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 135399653d4eSeschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 135499653d4eSeschrock VDEV_ALLOC_SPARE) == 0); 135599653d4eSeschrock ASSERT(vd != NULL); 135699653d4eSeschrock 1357fa94a07fSbrendan spa->spa_spares.sav_vdevs[i] = vd; 135899653d4eSeschrock 1359c5904d13Seschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 1360c5904d13Seschrock B_FALSE)) != NULL) { 136139c23413Seschrock if (!tvd->vdev_isspare) 136239c23413Seschrock spa_spare_add(tvd); 136339c23413Seschrock 136439c23413Seschrock /* 136539c23413Seschrock * We only mark the spare active if we were successfully 136639c23413Seschrock * able to load the vdev. Otherwise, importing a pool 136739c23413Seschrock * with a bad active spare would result in strange 136839c23413Seschrock * behavior, because multiple pool would think the spare 136939c23413Seschrock * is actively in use. 137039c23413Seschrock * 137139c23413Seschrock * There is a vulnerability here to an equally bizarre 137239c23413Seschrock * circumstance, where a dead active spare is later 137339c23413Seschrock * brought back to life (onlined or otherwise). Given 137439c23413Seschrock * the rarity of this scenario, and the extra complexity 137539c23413Seschrock * it adds, we ignore the possibility. 137639c23413Seschrock */ 137739c23413Seschrock if (!vdev_is_dead(tvd)) 137839c23413Seschrock spa_spare_activate(tvd); 137939c23413Seschrock } 138039c23413Seschrock 1381e14bb325SJeff Bonwick vd->vdev_top = vd; 13826809eb4eSEric Schrock vd->vdev_aux = &spa->spa_spares; 1383e14bb325SJeff Bonwick 138499653d4eSeschrock if (vdev_open(vd) != 0) 138599653d4eSeschrock continue; 138699653d4eSeschrock 1387fa94a07fSbrendan if (vdev_validate_aux(vd) == 0) 1388fa94a07fSbrendan spa_spare_add(vd); 138999653d4eSeschrock } 139099653d4eSeschrock 139199653d4eSeschrock /* 139299653d4eSeschrock * Recompute the stashed list of spares, with status information 139399653d4eSeschrock * this time. 139499653d4eSeschrock */ 1395fa94a07fSbrendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 139699653d4eSeschrock DATA_TYPE_NVLIST_ARRAY) == 0); 139799653d4eSeschrock 1398fa94a07fSbrendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 1399fa94a07fSbrendan KM_SLEEP); 1400fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 1401fa94a07fSbrendan spares[i] = vdev_config_generate(spa, 14023f9d6ad7SLin Ling spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE); 1403fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 1404fa94a07fSbrendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 1405fa94a07fSbrendan for (i = 0; i < spa->spa_spares.sav_count; i++) 140699653d4eSeschrock nvlist_free(spares[i]); 1407fa94a07fSbrendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 1408fa94a07fSbrendan } 1409fa94a07fSbrendan 1410fa94a07fSbrendan /* 1411fa94a07fSbrendan * Load (or re-load) the current list of vdevs describing the active l2cache for 1412fa94a07fSbrendan * this pool. When this is called, we have some form of basic information in 1413fa94a07fSbrendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 1414fa94a07fSbrendan * then re-generate a more complete list including status information. 1415fa94a07fSbrendan * Devices which are already active have their details maintained, and are 1416fa94a07fSbrendan * not re-opened. 1417fa94a07fSbrendan */ 1418fa94a07fSbrendan static void 1419fa94a07fSbrendan spa_load_l2cache(spa_t *spa) 1420fa94a07fSbrendan { 1421fa94a07fSbrendan nvlist_t **l2cache; 1422fa94a07fSbrendan uint_t nl2cache; 1423fa94a07fSbrendan int i, j, oldnvdevs; 1424573ca77eSGeorge Wilson uint64_t guid; 1425fa94a07fSbrendan vdev_t *vd, **oldvdevs, **newvdevs; 1426fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 1427fa94a07fSbrendan 1428e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 1429e14bb325SJeff Bonwick 1430fa94a07fSbrendan if (sav->sav_config != NULL) { 1431fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 1432fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 1433fa94a07fSbrendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 1434fa94a07fSbrendan } else { 1435fa94a07fSbrendan nl2cache = 0; 1436d5285caeSGeorge Wilson newvdevs = NULL; 1437fa94a07fSbrendan } 1438fa94a07fSbrendan 1439fa94a07fSbrendan oldvdevs = sav->sav_vdevs; 1440fa94a07fSbrendan oldnvdevs = sav->sav_count; 1441fa94a07fSbrendan sav->sav_vdevs = NULL; 1442fa94a07fSbrendan sav->sav_count = 0; 1443fa94a07fSbrendan 1444fa94a07fSbrendan /* 1445fa94a07fSbrendan * Process new nvlist of vdevs. 1446fa94a07fSbrendan */ 1447fa94a07fSbrendan for (i = 0; i < nl2cache; i++) { 1448fa94a07fSbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 1449fa94a07fSbrendan &guid) == 0); 1450fa94a07fSbrendan 1451fa94a07fSbrendan newvdevs[i] = NULL; 1452fa94a07fSbrendan for (j = 0; j < oldnvdevs; j++) { 1453fa94a07fSbrendan vd = oldvdevs[j]; 1454fa94a07fSbrendan if (vd != NULL && guid == vd->vdev_guid) { 1455fa94a07fSbrendan /* 1456fa94a07fSbrendan * Retain previous vdev for add/remove ops. 1457fa94a07fSbrendan */ 1458fa94a07fSbrendan newvdevs[i] = vd; 1459fa94a07fSbrendan oldvdevs[j] = NULL; 1460fa94a07fSbrendan break; 1461fa94a07fSbrendan } 1462fa94a07fSbrendan } 1463fa94a07fSbrendan 1464fa94a07fSbrendan if (newvdevs[i] == NULL) { 1465fa94a07fSbrendan /* 1466fa94a07fSbrendan * Create new vdev 1467fa94a07fSbrendan */ 1468fa94a07fSbrendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 1469fa94a07fSbrendan VDEV_ALLOC_L2CACHE) == 0); 1470fa94a07fSbrendan ASSERT(vd != NULL); 1471fa94a07fSbrendan newvdevs[i] = vd; 1472fa94a07fSbrendan 1473fa94a07fSbrendan /* 1474fa94a07fSbrendan * Commit this vdev as an l2cache device, 1475fa94a07fSbrendan * even if it fails to open. 1476fa94a07fSbrendan */ 1477fa94a07fSbrendan spa_l2cache_add(vd); 1478fa94a07fSbrendan 1479c5904d13Seschrock vd->vdev_top = vd; 1480c5904d13Seschrock vd->vdev_aux = sav; 1481c5904d13Seschrock 1482c5904d13Seschrock spa_l2cache_activate(vd); 1483c5904d13Seschrock 1484fa94a07fSbrendan if (vdev_open(vd) != 0) 1485fa94a07fSbrendan continue; 1486fa94a07fSbrendan 1487fa94a07fSbrendan (void) vdev_validate_aux(vd); 1488fa94a07fSbrendan 1489573ca77eSGeorge Wilson if (!vdev_is_dead(vd)) 1490573ca77eSGeorge Wilson l2arc_add_vdev(spa, vd); 1491fa94a07fSbrendan } 1492fa94a07fSbrendan } 1493fa94a07fSbrendan 1494fa94a07fSbrendan /* 1495fa94a07fSbrendan * Purge vdevs that were dropped 1496fa94a07fSbrendan */ 1497fa94a07fSbrendan for (i = 0; i < oldnvdevs; i++) { 1498fa94a07fSbrendan uint64_t pool; 1499fa94a07fSbrendan 1500fa94a07fSbrendan vd = oldvdevs[i]; 1501fa94a07fSbrendan if (vd != NULL) { 1502cd0837ccSGeorge Wilson ASSERT(vd->vdev_isl2cache); 1503cd0837ccSGeorge Wilson 15048ad4d6ddSJeff Bonwick if (spa_l2cache_exists(vd->vdev_guid, &pool) && 15058ad4d6ddSJeff Bonwick pool != 0ULL && l2arc_vdev_present(vd)) 1506fa94a07fSbrendan l2arc_remove_vdev(vd); 1507cd0837ccSGeorge Wilson vdev_clear_stats(vd); 1508cd0837ccSGeorge Wilson vdev_free(vd); 1509fa94a07fSbrendan } 1510fa94a07fSbrendan } 1511fa94a07fSbrendan 1512fa94a07fSbrendan if (oldvdevs) 1513fa94a07fSbrendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 1514fa94a07fSbrendan 1515fa94a07fSbrendan if (sav->sav_config == NULL) 1516fa94a07fSbrendan goto out; 1517fa94a07fSbrendan 1518fa94a07fSbrendan sav->sav_vdevs = newvdevs; 1519fa94a07fSbrendan sav->sav_count = (int)nl2cache; 1520fa94a07fSbrendan 1521fa94a07fSbrendan /* 1522fa94a07fSbrendan * Recompute the stashed list of l2cache devices, with status 1523fa94a07fSbrendan * information this time. 1524fa94a07fSbrendan */ 1525fa94a07fSbrendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 1526fa94a07fSbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 1527fa94a07fSbrendan 1528fa94a07fSbrendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 1529fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 1530fa94a07fSbrendan l2cache[i] = vdev_config_generate(spa, 15313f9d6ad7SLin Ling sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE); 1532fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 1533fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 1534fa94a07fSbrendan out: 1535fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 1536fa94a07fSbrendan nvlist_free(l2cache[i]); 1537fa94a07fSbrendan if (sav->sav_count) 1538fa94a07fSbrendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 153999653d4eSeschrock } 154099653d4eSeschrock 154199653d4eSeschrock static int 154299653d4eSeschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 154399653d4eSeschrock { 154499653d4eSeschrock dmu_buf_t *db; 154599653d4eSeschrock char *packed = NULL; 154699653d4eSeschrock size_t nvsize = 0; 154799653d4eSeschrock int error; 154899653d4eSeschrock *value = NULL; 154999653d4eSeschrock 155099653d4eSeschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 155199653d4eSeschrock nvsize = *(uint64_t *)db->db_data; 155299653d4eSeschrock dmu_buf_rele(db, FTAG); 155399653d4eSeschrock 155499653d4eSeschrock packed = kmem_alloc(nvsize, KM_SLEEP); 15557bfdf011SNeil Perrin error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, 15567bfdf011SNeil Perrin DMU_READ_PREFETCH); 155799653d4eSeschrock if (error == 0) 155899653d4eSeschrock error = nvlist_unpack(packed, nvsize, value, 0); 155999653d4eSeschrock kmem_free(packed, nvsize); 156099653d4eSeschrock 156199653d4eSeschrock return (error); 156299653d4eSeschrock } 156399653d4eSeschrock 15643d7072f8Seschrock /* 15653d7072f8Seschrock * Checks to see if the given vdev could not be opened, in which case we post a 15663d7072f8Seschrock * sysevent to notify the autoreplace code that the device has been removed. 15673d7072f8Seschrock */ 15683d7072f8Seschrock static void 15693d7072f8Seschrock spa_check_removed(vdev_t *vd) 15703d7072f8Seschrock { 1571573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) 15723d7072f8Seschrock spa_check_removed(vd->vdev_child[c]); 15733d7072f8Seschrock 1574efb4a871SYuri Pankov if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd) && 1575efb4a871SYuri Pankov !vd->vdev_ishole) { 15763d7072f8Seschrock zfs_post_autoreplace(vd->vdev_spa, vd); 15773d7072f8Seschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 15783d7072f8Seschrock } 15793d7072f8Seschrock } 15803d7072f8Seschrock 1581e6ca193dSGeorge Wilson /* 15824b964adaSGeorge Wilson * Validate the current config against the MOS config 1583e6ca193dSGeorge Wilson */ 15844b964adaSGeorge Wilson static boolean_t 15854b964adaSGeorge Wilson spa_config_valid(spa_t *spa, nvlist_t *config) 1586e6ca193dSGeorge Wilson { 15874b964adaSGeorge Wilson vdev_t *mrvd, *rvd = spa->spa_root_vdev; 15884b964adaSGeorge Wilson nvlist_t *nv; 15894b964adaSGeorge Wilson 15904b964adaSGeorge Wilson VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) == 0); 15914b964adaSGeorge Wilson 15924b964adaSGeorge Wilson spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 15934b964adaSGeorge Wilson VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); 15944b964adaSGeorge Wilson 15954b964adaSGeorge Wilson ASSERT3U(rvd->vdev_children, ==, mrvd->vdev_children); 1596e6ca193dSGeorge Wilson 159788ecc943SGeorge Wilson /* 15984b964adaSGeorge Wilson * If we're doing a normal import, then build up any additional 15994b964adaSGeorge Wilson * diagnostic information about missing devices in this config. 16004b964adaSGeorge Wilson * We'll pass this up to the user for further processing. 160188ecc943SGeorge Wilson */ 16024b964adaSGeorge Wilson if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) { 16034b964adaSGeorge Wilson nvlist_t **child, *nv; 16044b964adaSGeorge Wilson uint64_t idx = 0; 16054b964adaSGeorge Wilson 16064b964adaSGeorge Wilson child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t **), 16074b964adaSGeorge Wilson KM_SLEEP); 16084b964adaSGeorge Wilson VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1609e6ca193dSGeorge Wilson 16104b964adaSGeorge Wilson for (int c = 0; c < rvd->vdev_children; c++) { 16114b964adaSGeorge Wilson vdev_t *tvd = rvd->vdev_child[c]; 16124b964adaSGeorge Wilson vdev_t *mtvd = mrvd->vdev_child[c]; 16134b964adaSGeorge Wilson 16144b964adaSGeorge Wilson if (tvd->vdev_ops == &vdev_missing_ops && 16154b964adaSGeorge Wilson mtvd->vdev_ops != &vdev_missing_ops && 16164b964adaSGeorge Wilson mtvd->vdev_islog) 16174b964adaSGeorge Wilson child[idx++] = vdev_config_generate(spa, mtvd, 16184b964adaSGeorge Wilson B_FALSE, 0); 16194b964adaSGeorge Wilson } 16204b964adaSGeorge Wilson 16214b964adaSGeorge Wilson if (idx) { 16224b964adaSGeorge Wilson VERIFY(nvlist_add_nvlist_array(nv, 16234b964adaSGeorge Wilson ZPOOL_CONFIG_CHILDREN, child, idx) == 0); 16244b964adaSGeorge Wilson VERIFY(nvlist_add_nvlist(spa->spa_load_info, 16254b964adaSGeorge Wilson ZPOOL_CONFIG_MISSING_DEVICES, nv) == 0); 16264b964adaSGeorge Wilson 16274b964adaSGeorge Wilson for (int i = 0; i < idx; i++) 16284b964adaSGeorge Wilson nvlist_free(child[i]); 16294b964adaSGeorge Wilson } 16304b964adaSGeorge Wilson nvlist_free(nv); 16314b964adaSGeorge Wilson kmem_free(child, rvd->vdev_children * sizeof (char **)); 16324b964adaSGeorge Wilson } 16334b964adaSGeorge Wilson 16344b964adaSGeorge Wilson /* 16354b964adaSGeorge Wilson * Compare the root vdev tree with the information we have 16364b964adaSGeorge Wilson * from the MOS config (mrvd). Check each top-level vdev 16374b964adaSGeorge Wilson * with the corresponding MOS config top-level (mtvd). 16384b964adaSGeorge Wilson */ 163988ecc943SGeorge Wilson for (int c = 0; c < rvd->vdev_children; c++) { 16404b964adaSGeorge Wilson vdev_t *tvd = rvd->vdev_child[c]; 16414b964adaSGeorge Wilson vdev_t *mtvd = mrvd->vdev_child[c]; 16424b964adaSGeorge Wilson 16434b964adaSGeorge Wilson /* 16444b964adaSGeorge Wilson * Resolve any "missing" vdevs in the current configuration. 16454b964adaSGeorge Wilson * If we find that the MOS config has more accurate information 16464b964adaSGeorge Wilson * about the top-level vdev then use that vdev instead. 16474b964adaSGeorge Wilson */ 16484b964adaSGeorge Wilson if (tvd->vdev_ops == &vdev_missing_ops && 16494b964adaSGeorge Wilson mtvd->vdev_ops != &vdev_missing_ops) { 16504b964adaSGeorge Wilson 16514b964adaSGeorge Wilson if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) 16524b964adaSGeorge Wilson continue; 16534b964adaSGeorge Wilson 16544b964adaSGeorge Wilson /* 16554b964adaSGeorge Wilson * Device specific actions. 16564b964adaSGeorge Wilson */ 16574b964adaSGeorge Wilson if (mtvd->vdev_islog) { 16584b964adaSGeorge Wilson spa_set_log_state(spa, SPA_LOG_CLEAR); 16594b964adaSGeorge Wilson } else { 16604b964adaSGeorge Wilson /* 16614b964adaSGeorge Wilson * XXX - once we have 'readonly' pool 16624b964adaSGeorge Wilson * support we should be able to handle 16634b964adaSGeorge Wilson * missing data devices by transitioning 16644b964adaSGeorge Wilson * the pool to readonly. 16654b964adaSGeorge Wilson */ 16664b964adaSGeorge Wilson continue; 16674b964adaSGeorge Wilson } 16684b964adaSGeorge Wilson 16694b964adaSGeorge Wilson /* 16704b964adaSGeorge Wilson * Swap the missing vdev with the data we were 16714b964adaSGeorge Wilson * able to obtain from the MOS config. 16724b964adaSGeorge Wilson */ 16734b964adaSGeorge Wilson vdev_remove_child(rvd, tvd); 16744b964adaSGeorge Wilson vdev_remove_child(mrvd, mtvd); 16754b964adaSGeorge Wilson 16764b964adaSGeorge Wilson vdev_add_child(rvd, mtvd); 16774b964adaSGeorge Wilson vdev_add_child(mrvd, tvd); 16784b964adaSGeorge Wilson 16794b964adaSGeorge Wilson spa_config_exit(spa, SCL_ALL, FTAG); 16804b964adaSGeorge Wilson vdev_load(mtvd); 16814b964adaSGeorge Wilson spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 16824b964adaSGeorge Wilson 16834b964adaSGeorge Wilson vdev_reopen(rvd); 16844b964adaSGeorge Wilson } else if (mtvd->vdev_islog) { 16854b964adaSGeorge Wilson /* 16864b964adaSGeorge Wilson * Load the slog device's state from the MOS config 16874b964adaSGeorge Wilson * since it's possible that the label does not 16884b964adaSGeorge Wilson * contain the most up-to-date information. 16894b964adaSGeorge Wilson */ 16904b964adaSGeorge Wilson vdev_load_log_state(tvd, mtvd); 16914b964adaSGeorge Wilson vdev_reopen(tvd); 16924b964adaSGeorge Wilson } 1693e6ca193dSGeorge Wilson } 16944b964adaSGeorge Wilson vdev_free(mrvd); 169588ecc943SGeorge Wilson spa_config_exit(spa, SCL_ALL, FTAG); 16964b964adaSGeorge Wilson 16974b964adaSGeorge Wilson /* 16984b964adaSGeorge Wilson * Ensure we were able to validate the config. 16994b964adaSGeorge Wilson */ 17004b964adaSGeorge Wilson return (rvd->vdev_guid_sum == spa->spa_uberblock.ub_guid_sum); 1701e6ca193dSGeorge Wilson } 1702e6ca193dSGeorge Wilson 1703b87f3af3Sperrin /* 1704b87f3af3Sperrin * Check for missing log devices 1705b87f3af3Sperrin */ 17063b2aab18SMatthew Ahrens static boolean_t 1707b87f3af3Sperrin spa_check_logs(spa_t *spa) 1708b87f3af3Sperrin { 17093b2aab18SMatthew Ahrens boolean_t rv = B_FALSE; 17103b2aab18SMatthew Ahrens 1711b87f3af3Sperrin switch (spa->spa_log_state) { 1712b87f3af3Sperrin case SPA_LOG_MISSING: 1713b87f3af3Sperrin /* need to recheck in case slog has been restored */ 1714b87f3af3Sperrin case SPA_LOG_UNKNOWN: 17153b2aab18SMatthew Ahrens rv = (dmu_objset_find(spa->spa_name, zil_check_log_chain, 17163b2aab18SMatthew Ahrens NULL, DS_FIND_CHILDREN) != 0); 17173b2aab18SMatthew Ahrens if (rv) 17181195e687SMark J Musante spa_set_log_state(spa, SPA_LOG_MISSING); 1719b87f3af3Sperrin break; 1720b87f3af3Sperrin } 17213b2aab18SMatthew Ahrens return (rv); 1722b87f3af3Sperrin } 1723b87f3af3Sperrin 17241195e687SMark J Musante static boolean_t 17251195e687SMark J Musante spa_passivate_log(spa_t *spa) 17261195e687SMark J Musante { 17271195e687SMark J Musante vdev_t *rvd = spa->spa_root_vdev; 17281195e687SMark J Musante boolean_t slog_found = B_FALSE; 17291195e687SMark J Musante 17301195e687SMark J Musante ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); 17311195e687SMark J Musante 17321195e687SMark J Musante if (!spa_has_slogs(spa)) 17331195e687SMark J Musante return (B_FALSE); 17341195e687SMark J Musante 17351195e687SMark J Musante for (int c = 0; c < rvd->vdev_children; c++) { 17361195e687SMark J Musante vdev_t *tvd = rvd->vdev_child[c]; 17371195e687SMark J Musante metaslab_group_t *mg = tvd->vdev_mg; 17381195e687SMark J Musante 17391195e687SMark J Musante if (tvd->vdev_islog) { 17401195e687SMark J Musante metaslab_group_passivate(mg); 17411195e687SMark J Musante slog_found = B_TRUE; 17421195e687SMark J Musante } 17431195e687SMark J Musante } 17441195e687SMark J Musante 17451195e687SMark J Musante return (slog_found); 17461195e687SMark J Musante } 17471195e687SMark J Musante 17481195e687SMark J Musante static void 17491195e687SMark J Musante spa_activate_log(spa_t *spa) 17501195e687SMark J Musante { 17511195e687SMark J Musante vdev_t *rvd = spa->spa_root_vdev; 17521195e687SMark J Musante 17531195e687SMark J Musante ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); 17541195e687SMark J Musante 17551195e687SMark J Musante for (int c = 0; c < rvd->vdev_children; c++) { 17561195e687SMark J Musante vdev_t *tvd = rvd->vdev_child[c]; 17571195e687SMark J Musante metaslab_group_t *mg = tvd->vdev_mg; 17581195e687SMark J Musante 17591195e687SMark J Musante if (tvd->vdev_islog) 17601195e687SMark J Musante metaslab_group_activate(mg); 17611195e687SMark J Musante } 17621195e687SMark J Musante } 17631195e687SMark J Musante 17641195e687SMark J Musante int 17651195e687SMark J Musante spa_offline_log(spa_t *spa) 17661195e687SMark J Musante { 17673b2aab18SMatthew Ahrens int error; 17681195e687SMark J Musante 17693b2aab18SMatthew Ahrens error = dmu_objset_find(spa_name(spa), zil_vdev_offline, 17703b2aab18SMatthew Ahrens NULL, DS_FIND_CHILDREN); 17713b2aab18SMatthew Ahrens if (error == 0) { 17721195e687SMark J Musante /* 17731195e687SMark J Musante * We successfully offlined the log device, sync out the 17741195e687SMark J Musante * current txg so that the "stubby" block can be removed 17751195e687SMark J Musante * by zil_sync(). 17761195e687SMark J Musante */ 17771195e687SMark J Musante txg_wait_synced(spa->spa_dsl_pool, 0); 17781195e687SMark J Musante } 17791195e687SMark J Musante return (error); 17801195e687SMark J Musante } 17811195e687SMark J Musante 1782b693757aSEric Schrock static void 1783b693757aSEric Schrock spa_aux_check_removed(spa_aux_vdev_t *sav) 1784b693757aSEric Schrock { 1785b24ab676SJeff Bonwick for (int i = 0; i < sav->sav_count; i++) 1786b693757aSEric Schrock spa_check_removed(sav->sav_vdevs[i]); 1787b693757aSEric Schrock } 1788b693757aSEric Schrock 1789b24ab676SJeff Bonwick void 1790b24ab676SJeff Bonwick spa_claim_notify(zio_t *zio) 1791b24ab676SJeff Bonwick { 1792b24ab676SJeff Bonwick spa_t *spa = zio->io_spa; 1793b24ab676SJeff Bonwick 1794b24ab676SJeff Bonwick if (zio->io_error) 1795b24ab676SJeff Bonwick return; 1796b24ab676SJeff Bonwick 1797b24ab676SJeff Bonwick mutex_enter(&spa->spa_props_lock); /* any mutex will do */ 1798b24ab676SJeff Bonwick if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) 1799b24ab676SJeff Bonwick spa->spa_claim_max_txg = zio->io_bp->blk_birth; 1800b24ab676SJeff Bonwick mutex_exit(&spa->spa_props_lock); 1801b24ab676SJeff Bonwick } 1802b24ab676SJeff Bonwick 1803468c413aSTim Haley typedef struct spa_load_error { 1804c8ee1847SVictor Latushkin uint64_t sle_meta_count; 1805468c413aSTim Haley uint64_t sle_data_count; 1806468c413aSTim Haley } spa_load_error_t; 1807468c413aSTim Haley 1808468c413aSTim Haley static void 1809468c413aSTim Haley spa_load_verify_done(zio_t *zio) 1810468c413aSTim Haley { 1811468c413aSTim Haley blkptr_t *bp = zio->io_bp; 1812468c413aSTim Haley spa_load_error_t *sle = zio->io_private; 1813468c413aSTim Haley dmu_object_type_t type = BP_GET_TYPE(bp); 1814468c413aSTim Haley int error = zio->io_error; 1815e42d2059SMatthew Ahrens spa_t *spa = zio->io_spa; 1816468c413aSTim Haley 1817468c413aSTim Haley if (error) { 1818ad135b5dSChristopher Siden if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) && 1819468c413aSTim Haley type != DMU_OT_INTENT_LOG) 1820c8ee1847SVictor Latushkin atomic_add_64(&sle->sle_meta_count, 1); 1821468c413aSTim Haley else 1822468c413aSTim Haley atomic_add_64(&sle->sle_data_count, 1); 1823468c413aSTim Haley } 1824468c413aSTim Haley zio_data_buf_free(zio->io_data, zio->io_size); 1825e42d2059SMatthew Ahrens 1826e42d2059SMatthew Ahrens mutex_enter(&spa->spa_scrub_lock); 1827e42d2059SMatthew Ahrens spa->spa_scrub_inflight--; 1828e42d2059SMatthew Ahrens cv_broadcast(&spa->spa_scrub_io_cv); 1829e42d2059SMatthew Ahrens mutex_exit(&spa->spa_scrub_lock); 1830468c413aSTim Haley } 1831468c413aSTim Haley 1832e42d2059SMatthew Ahrens /* 1833e42d2059SMatthew Ahrens * Maximum number of concurrent scrub i/os to create while verifying 1834e42d2059SMatthew Ahrens * a pool while importing it. 1835e42d2059SMatthew Ahrens */ 1836e42d2059SMatthew Ahrens int spa_load_verify_maxinflight = 10000; 1837e42d2059SMatthew Ahrens boolean_t spa_load_verify_metadata = B_TRUE; 1838e42d2059SMatthew Ahrens boolean_t spa_load_verify_data = B_TRUE; 1839e42d2059SMatthew Ahrens 1840468c413aSTim Haley /*ARGSUSED*/ 1841468c413aSTim Haley static int 1842b24ab676SJeff Bonwick spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 18437802d7bfSMatthew Ahrens const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 1844468c413aSTim Haley { 1845e42d2059SMatthew Ahrens if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) 1846e42d2059SMatthew Ahrens return (0); 1847e42d2059SMatthew Ahrens /* 1848e42d2059SMatthew Ahrens * Note: normally this routine will not be called if 1849e42d2059SMatthew Ahrens * spa_load_verify_metadata is not set. However, it may be useful 1850e42d2059SMatthew Ahrens * to manually set the flag after the traversal has begun. 1851e42d2059SMatthew Ahrens */ 1852e42d2059SMatthew Ahrens if (!spa_load_verify_metadata) 1853e42d2059SMatthew Ahrens return (0); 1854e42d2059SMatthew Ahrens if (BP_GET_BUFC_TYPE(bp) == ARC_BUFC_DATA && !spa_load_verify_data) 1855e42d2059SMatthew Ahrens return (0); 1856468c413aSTim Haley 1857e42d2059SMatthew Ahrens zio_t *rio = arg; 1858e42d2059SMatthew Ahrens size_t size = BP_GET_PSIZE(bp); 1859e42d2059SMatthew Ahrens void *data = zio_data_buf_alloc(size); 1860e42d2059SMatthew Ahrens 1861e42d2059SMatthew Ahrens mutex_enter(&spa->spa_scrub_lock); 1862e42d2059SMatthew Ahrens while (spa->spa_scrub_inflight >= spa_load_verify_maxinflight) 1863e42d2059SMatthew Ahrens cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 1864e42d2059SMatthew Ahrens spa->spa_scrub_inflight++; 1865e42d2059SMatthew Ahrens mutex_exit(&spa->spa_scrub_lock); 1866e42d2059SMatthew Ahrens 1867e42d2059SMatthew Ahrens zio_nowait(zio_read(rio, spa, bp, data, size, 1868e42d2059SMatthew Ahrens spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, 1869e42d2059SMatthew Ahrens ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | 1870e42d2059SMatthew Ahrens ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); 1871468c413aSTim Haley return (0); 1872468c413aSTim Haley } 1873468c413aSTim Haley 1874468c413aSTim Haley static int 1875468c413aSTim Haley spa_load_verify(spa_t *spa) 1876468c413aSTim Haley { 1877468c413aSTim Haley zio_t *rio; 1878468c413aSTim Haley spa_load_error_t sle = { 0 }; 1879468c413aSTim Haley zpool_rewind_policy_t policy; 1880468c413aSTim Haley boolean_t verify_ok = B_FALSE; 1881e42d2059SMatthew Ahrens int error = 0; 1882468c413aSTim Haley 1883c8ee1847SVictor Latushkin zpool_get_rewind_policy(spa->spa_config, &policy); 1884c8ee1847SVictor Latushkin 1885c8ee1847SVictor Latushkin if (policy.zrp_request & ZPOOL_NEVER_REWIND) 1886c8ee1847SVictor Latushkin return (0); 1887c8ee1847SVictor Latushkin 1888468c413aSTim Haley rio = zio_root(spa, NULL, &sle, 1889468c413aSTim Haley ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 1890468c413aSTim Haley 1891e42d2059SMatthew Ahrens if (spa_load_verify_metadata) { 1892e42d2059SMatthew Ahrens error = traverse_pool(spa, spa->spa_verify_min_txg, 1893e42d2059SMatthew Ahrens TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, 1894e42d2059SMatthew Ahrens spa_load_verify_cb, rio); 1895e42d2059SMatthew Ahrens } 1896468c413aSTim Haley 1897468c413aSTim Haley (void) zio_wait(rio); 1898468c413aSTim Haley 1899c8ee1847SVictor Latushkin spa->spa_load_meta_errors = sle.sle_meta_count; 1900468c413aSTim Haley spa->spa_load_data_errors = sle.sle_data_count; 1901468c413aSTim Haley 1902c8ee1847SVictor Latushkin if (!error && sle.sle_meta_count <= policy.zrp_maxmeta && 1903468c413aSTim Haley sle.sle_data_count <= policy.zrp_maxdata) { 19044b964adaSGeorge Wilson int64_t loss = 0; 19054b964adaSGeorge Wilson 1906468c413aSTim Haley verify_ok = B_TRUE; 1907468c413aSTim Haley spa->spa_load_txg = spa->spa_uberblock.ub_txg; 1908468c413aSTim Haley spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; 19094b964adaSGeorge Wilson 19104b964adaSGeorge Wilson loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts; 19114b964adaSGeorge Wilson VERIFY(nvlist_add_uint64(spa->spa_load_info, 19124b964adaSGeorge Wilson ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0); 19134b964adaSGeorge Wilson VERIFY(nvlist_add_int64(spa->spa_load_info, 19144b964adaSGeorge Wilson ZPOOL_CONFIG_REWIND_TIME, loss) == 0); 19154b964adaSGeorge Wilson VERIFY(nvlist_add_uint64(spa->spa_load_info, 19164b964adaSGeorge Wilson ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0); 1917a33cae98STim Haley } else { 1918a33cae98STim Haley spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; 1919468c413aSTim Haley } 1920468c413aSTim Haley 1921468c413aSTim Haley if (error) { 1922468c413aSTim Haley if (error != ENXIO && error != EIO) 1923be6fd75aSMatthew Ahrens error = SET_ERROR(EIO); 1924468c413aSTim Haley return (error); 1925468c413aSTim Haley } 1926468c413aSTim Haley 1927468c413aSTim Haley return (verify_ok ? 0 : EIO); 1928468c413aSTim Haley } 1929468c413aSTim Haley 19301195e687SMark J Musante /* 19311195e687SMark J Musante * Find a value in the pool props object. 19321195e687SMark J Musante */ 19331195e687SMark J Musante static void 19341195e687SMark J Musante spa_prop_find(spa_t *spa, zpool_prop_t prop, uint64_t *val) 19351195e687SMark J Musante { 19361195e687SMark J Musante (void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object, 19371195e687SMark J Musante zpool_prop_to_name(prop), sizeof (uint64_t), 1, val); 19381195e687SMark J Musante } 19391195e687SMark J Musante 19401195e687SMark J Musante /* 19411195e687SMark J Musante * Find a value in the pool directory object. 19421195e687SMark J Musante */ 19431195e687SMark J Musante static int 19441195e687SMark J Musante spa_dir_prop(spa_t *spa, const char *name, uint64_t *val) 19451195e687SMark J Musante { 19461195e687SMark J Musante return (zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 19471195e687SMark J Musante name, sizeof (uint64_t), 1, val)); 19481195e687SMark J Musante } 19491195e687SMark J Musante 19501195e687SMark J Musante static int 19511195e687SMark J Musante spa_vdev_err(vdev_t *vdev, vdev_aux_t aux, int err) 19521195e687SMark J Musante { 19531195e687SMark J Musante vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux); 19541195e687SMark J Musante return (err); 19551195e687SMark J Musante } 19561195e687SMark J Musante 19571195e687SMark J Musante /* 19581195e687SMark J Musante * Fix up config after a partly-completed split. This is done with the 19591195e687SMark J Musante * ZPOOL_CONFIG_SPLIT nvlist. Both the splitting pool and the split-off 19601195e687SMark J Musante * pool have that entry in their config, but only the splitting one contains 19611195e687SMark J Musante * a list of all the guids of the vdevs that are being split off. 19621195e687SMark J Musante * 19631195e687SMark J Musante * This function determines what to do with that list: either rejoin 19641195e687SMark J Musante * all the disks to the pool, or complete the splitting process. To attempt 19651195e687SMark J Musante * the rejoin, each disk that is offlined is marked online again, and 19661195e687SMark J Musante * we do a reopen() call. If the vdev label for every disk that was 19671195e687SMark J Musante * marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL) 19681195e687SMark J Musante * then we call vdev_split() on each disk, and complete the split. 19691195e687SMark J Musante * 1970d41c4376SMark J Musante * Otherwise we leave the config alone, with all the vdevs in place in 1971d41c4376SMark J Musante * the original pool. 19721195e687SMark J Musante */ 19731195e687SMark J Musante static void 19741195e687SMark J Musante spa_try_repair(spa_t *spa, nvlist_t *config) 19751195e687SMark J Musante { 19761195e687SMark J Musante uint_t extracted; 19771195e687SMark J Musante uint64_t *glist; 19781195e687SMark J Musante uint_t i, gcount; 19791195e687SMark J Musante nvlist_t *nvl; 19801195e687SMark J Musante vdev_t **vd; 19811195e687SMark J Musante boolean_t attempt_reopen; 19821195e687SMark J Musante 19831195e687SMark J Musante if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0) 19841195e687SMark J Musante return; 19851195e687SMark J Musante 19861195e687SMark J Musante /* check that the config is complete */ 19871195e687SMark J Musante if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, 19881195e687SMark J Musante &glist, &gcount) != 0) 19891195e687SMark J Musante return; 19901195e687SMark J Musante 19911195e687SMark J Musante vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_SLEEP); 19921195e687SMark J Musante 19931195e687SMark J Musante /* attempt to online all the vdevs & validate */ 19941195e687SMark J Musante attempt_reopen = B_TRUE; 19951195e687SMark J Musante for (i = 0; i < gcount; i++) { 19961195e687SMark J Musante if (glist[i] == 0) /* vdev is hole */ 19971195e687SMark J Musante continue; 19981195e687SMark J Musante 19991195e687SMark J Musante vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE); 20001195e687SMark J Musante if (vd[i] == NULL) { 20011195e687SMark J Musante /* 20021195e687SMark J Musante * Don't bother attempting to reopen the disks; 20031195e687SMark J Musante * just do the split. 20041195e687SMark J Musante */ 20051195e687SMark J Musante attempt_reopen = B_FALSE; 20061195e687SMark J Musante } else { 20071195e687SMark J Musante /* attempt to re-online it */ 20081195e687SMark J Musante vd[i]->vdev_offline = B_FALSE; 20091195e687SMark J Musante } 20101195e687SMark J Musante } 20111195e687SMark J Musante 20121195e687SMark J Musante if (attempt_reopen) { 20131195e687SMark J Musante vdev_reopen(spa->spa_root_vdev); 20141195e687SMark J Musante 20151195e687SMark J Musante /* check each device to see what state it's in */ 20161195e687SMark J Musante for (extracted = 0, i = 0; i < gcount; i++) { 20171195e687SMark J Musante if (vd[i] != NULL && 20181195e687SMark J Musante vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL) 20191195e687SMark J Musante break; 20201195e687SMark J Musante ++extracted; 20211195e687SMark J Musante } 20221195e687SMark J Musante } 20231195e687SMark J Musante 20241195e687SMark J Musante /* 20251195e687SMark J Musante * If every disk has been moved to the new pool, or if we never 20261195e687SMark J Musante * even attempted to look at them, then we split them off for 20271195e687SMark J Musante * good. 20281195e687SMark J Musante */ 20291195e687SMark J Musante if (!attempt_reopen || gcount == extracted) { 20301195e687SMark J Musante for (i = 0; i < gcount; i++) 20311195e687SMark J Musante if (vd[i] != NULL) 20321195e687SMark J Musante vdev_split(vd[i]); 20331195e687SMark J Musante vdev_reopen(spa->spa_root_vdev); 20341195e687SMark J Musante } 20351195e687SMark J Musante 20361195e687SMark J Musante kmem_free(vd, gcount * sizeof (vdev_t *)); 20371195e687SMark J Musante } 20381195e687SMark J Musante 20391195e687SMark J Musante static int 20401195e687SMark J Musante spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type, 20411195e687SMark J Musante boolean_t mosconfig) 20421195e687SMark J Musante { 20431195e687SMark J Musante nvlist_t *config = spa->spa_config; 20441195e687SMark J Musante char *ereport = FM_EREPORT_ZFS_POOL; 20458704186eSDan McDonald char *comment; 20461195e687SMark J Musante int error; 20471195e687SMark J Musante uint64_t pool_guid; 20481195e687SMark J Musante nvlist_t *nvl; 20491195e687SMark J Musante 20501195e687SMark J Musante if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) 2051be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 20521195e687SMark J Musante 20538704186eSDan McDonald ASSERT(spa->spa_comment == NULL); 20548704186eSDan McDonald if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) 20558704186eSDan McDonald spa->spa_comment = spa_strdup(comment); 20568704186eSDan McDonald 20571195e687SMark J Musante /* 20581195e687SMark J Musante * Versioning wasn't explicitly added to the label until later, so if 20591195e687SMark J Musante * it's not present treat it as the initial version. 20601195e687SMark J Musante */ 20611195e687SMark J Musante if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 20621195e687SMark J Musante &spa->spa_ubsync.ub_version) != 0) 20631195e687SMark J Musante spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; 20641195e687SMark J Musante 20651195e687SMark J Musante (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 20661195e687SMark J Musante &spa->spa_config_txg); 20671195e687SMark J Musante 20681195e687SMark J Musante if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 20691195e687SMark J Musante spa_guid_exists(pool_guid, 0)) { 2070be6fd75aSMatthew Ahrens error = SET_ERROR(EEXIST); 20711195e687SMark J Musante } else { 2072e9103aaeSGarrett D'Amore spa->spa_config_guid = pool_guid; 20731195e687SMark J Musante 20741195e687SMark J Musante if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, 20751195e687SMark J Musante &nvl) == 0) { 20761195e687SMark J Musante VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting, 20771195e687SMark J Musante KM_SLEEP) == 0); 20781195e687SMark J Musante } 20791195e687SMark J Musante 2080ad135b5dSChristopher Siden nvlist_free(spa->spa_load_info); 2081ad135b5dSChristopher Siden spa->spa_load_info = fnvlist_alloc(); 2082ad135b5dSChristopher Siden 208311027bc7STim Haley gethrestime(&spa->spa_loaded_ts); 20841195e687SMark J Musante error = spa_load_impl(spa, pool_guid, config, state, type, 20851195e687SMark J Musante mosconfig, &ereport); 20861195e687SMark J Musante } 20871195e687SMark J Musante 20881195e687SMark J Musante spa->spa_minref = refcount_count(&spa->spa_refcount); 208911027bc7STim Haley if (error) { 209011027bc7STim Haley if (error != EEXIST) { 209111027bc7STim Haley spa->spa_loaded_ts.tv_sec = 0; 209211027bc7STim Haley spa->spa_loaded_ts.tv_nsec = 0; 209311027bc7STim Haley } 209411027bc7STim Haley if (error != EBADF) { 209511027bc7STim Haley zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 209611027bc7STim Haley } 209711027bc7STim Haley } 20981195e687SMark J Musante spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; 20991195e687SMark J Musante spa->spa_ena = 0; 21001195e687SMark J Musante 21011195e687SMark J Musante return (error); 21021195e687SMark J Musante } 21031195e687SMark J Musante 2104fa9e4066Sahrens /* 2105fa9e4066Sahrens * Load an existing storage pool, using the pool's builtin spa_config as a 2106ea8dc4b6Seschrock * source of configuration information. 2107fa9e4066Sahrens */ 2108fa9e4066Sahrens static int 21091195e687SMark J Musante spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, 21101195e687SMark J Musante spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, 21111195e687SMark J Musante char **ereport) 2112fa9e4066Sahrens { 2113fa9e4066Sahrens int error = 0; 2114871a9500SMark J Musante nvlist_t *nvroot = NULL; 2115ad135b5dSChristopher Siden nvlist_t *label; 2116fa9e4066Sahrens vdev_t *rvd; 2117fa9e4066Sahrens uberblock_t *ub = &spa->spa_uberblock; 21184b964adaSGeorge Wilson uint64_t children, config_cache_txg = spa->spa_config_txg; 21198ad4d6ddSJeff Bonwick int orig_mode = spa->spa_mode; 21201195e687SMark J Musante int parse; 2121cde58dbcSMatthew Ahrens uint64_t obj; 2122ad135b5dSChristopher Siden boolean_t missing_feat_write = B_FALSE; 2123fa9e4066Sahrens 21248ad4d6ddSJeff Bonwick /* 21258ad4d6ddSJeff Bonwick * If this is an untrusted config, access the pool in read-only mode. 21268ad4d6ddSJeff Bonwick * This prevents things like resilvering recently removed devices. 21278ad4d6ddSJeff Bonwick */ 21288ad4d6ddSJeff Bonwick if (!mosconfig) 21298ad4d6ddSJeff Bonwick spa->spa_mode = FREAD; 21308ad4d6ddSJeff Bonwick 2131e14bb325SJeff Bonwick ASSERT(MUTEX_HELD(&spa_namespace_lock)); 2132e14bb325SJeff Bonwick 2133ea8dc4b6Seschrock spa->spa_load_state = state; 21340373e76bSbonwick 21351195e687SMark J Musante if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot)) 2136be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 2137fa9e4066Sahrens 21381195e687SMark J Musante parse = (type == SPA_IMPORT_EXISTING ? 21391195e687SMark J Musante VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT); 2140b5989ec7Seschrock 214154d692b7SGeorge Wilson /* 214254d692b7SGeorge Wilson * Create "The Godfather" zio to hold all async IOs 214354d692b7SGeorge Wilson */ 214425f89ee2SJeff Bonwick spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 214525f89ee2SJeff Bonwick ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 214654d692b7SGeorge Wilson 2147fa9e4066Sahrens /* 214899653d4eSeschrock * Parse the configuration into a vdev tree. We explicitly set the 214999653d4eSeschrock * value that will be returned by spa_version() since parsing the 215099653d4eSeschrock * configuration requires knowing the version number. 2151fa9e4066Sahrens */ 2152e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 21531195e687SMark J Musante error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, parse); 2154e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2155fa9e4066Sahrens 215699653d4eSeschrock if (error != 0) 21571195e687SMark J Musante return (error); 2158fa9e4066Sahrens 21590e34b6a7Sbonwick ASSERT(spa->spa_root_vdev == rvd); 21601195e687SMark J Musante 21611195e687SMark J Musante if (type != SPA_IMPORT_ASSEMBLE) { 21621195e687SMark J Musante ASSERT(spa_guid(spa) == pool_guid); 21631195e687SMark J Musante } 2164fa9e4066Sahrens 2165fa9e4066Sahrens /* 2166fa9e4066Sahrens * Try to open all vdevs, loading each label in the process. 2167fa9e4066Sahrens */ 2168e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 21690bf246f5Smc error = vdev_open(rvd); 2170e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 21710bf246f5Smc if (error != 0) 21721195e687SMark J Musante return (error); 2173fa9e4066Sahrens 2174560e6e96Seschrock /* 217577e3a39cSMark J Musante * We need to validate the vdev labels against the configuration that 217677e3a39cSMark J Musante * we have in hand, which is dependent on the setting of mosconfig. If 217777e3a39cSMark J Musante * mosconfig is true then we're validating the vdev labels based on 21781195e687SMark J Musante * that config. Otherwise, we're validating against the cached config 217977e3a39cSMark J Musante * (zpool.cache) that was read when we loaded the zfs module, and then 218077e3a39cSMark J Musante * later we will recursively call spa_load() and validate against 218177e3a39cSMark J Musante * the vdev config. 21821195e687SMark J Musante * 21831195e687SMark J Musante * If we're assembling a new pool that's been split off from an 21841195e687SMark J Musante * existing pool, the labels haven't yet been updated so we skip 21851195e687SMark J Musante * validation for now. 2186560e6e96Seschrock */ 21871195e687SMark J Musante if (type != SPA_IMPORT_ASSEMBLE) { 21881195e687SMark J Musante spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2189d7f601efSGeorge Wilson error = vdev_validate(rvd, mosconfig); 21901195e687SMark J Musante spa_config_exit(spa, SCL_ALL, FTAG); 2191560e6e96Seschrock 21921195e687SMark J Musante if (error != 0) 21931195e687SMark J Musante return (error); 21941195e687SMark J Musante 21951195e687SMark J Musante if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) 2196be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 2197560e6e96Seschrock } 2198560e6e96Seschrock 2199fa9e4066Sahrens /* 2200fa9e4066Sahrens * Find the best uberblock. 2201fa9e4066Sahrens */ 2202ad135b5dSChristopher Siden vdev_uberblock_load(rvd, ub, &label); 2203fa9e4066Sahrens 2204fa9e4066Sahrens /* 2205fa9e4066Sahrens * If we weren't able to find a single valid uberblock, return failure. 2206fa9e4066Sahrens */ 2207ad135b5dSChristopher Siden if (ub->ub_txg == 0) { 2208ad135b5dSChristopher Siden nvlist_free(label); 22091195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO)); 2210ad135b5dSChristopher Siden } 2211ea8dc4b6Seschrock 2212ea8dc4b6Seschrock /* 2213ad135b5dSChristopher Siden * If the pool has an unsupported version we can't open it. 2214ea8dc4b6Seschrock */ 2215ad135b5dSChristopher Siden if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) { 2216ad135b5dSChristopher Siden nvlist_free(label); 22171195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP)); 2218ad135b5dSChristopher Siden } 2219ad135b5dSChristopher Siden 2220ad135b5dSChristopher Siden if (ub->ub_version >= SPA_VERSION_FEATURES) { 2221ad135b5dSChristopher Siden nvlist_t *features; 2222ad135b5dSChristopher Siden 2223ad135b5dSChristopher Siden /* 2224ad135b5dSChristopher Siden * If we weren't able to find what's necessary for reading the 2225ad135b5dSChristopher Siden * MOS in the label, return failure. 2226ad135b5dSChristopher Siden */ 2227ad135b5dSChristopher Siden if (label == NULL || nvlist_lookup_nvlist(label, 2228ad135b5dSChristopher Siden ZPOOL_CONFIG_FEATURES_FOR_READ, &features) != 0) { 2229ad135b5dSChristopher Siden nvlist_free(label); 2230ad135b5dSChristopher Siden return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, 2231ad135b5dSChristopher Siden ENXIO)); 2232ad135b5dSChristopher Siden } 2233ad135b5dSChristopher Siden 2234ad135b5dSChristopher Siden /* 2235ad135b5dSChristopher Siden * Update our in-core representation with the definitive values 2236ad135b5dSChristopher Siden * from the label. 2237ad135b5dSChristopher Siden */ 2238ad135b5dSChristopher Siden nvlist_free(spa->spa_label_features); 2239ad135b5dSChristopher Siden VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0); 2240ad135b5dSChristopher Siden } 2241ad135b5dSChristopher Siden 2242ad135b5dSChristopher Siden nvlist_free(label); 2243ad135b5dSChristopher Siden 2244ad135b5dSChristopher Siden /* 2245ad135b5dSChristopher Siden * Look through entries in the label nvlist's features_for_read. If 2246ad135b5dSChristopher Siden * there is a feature listed there which we don't understand then we 2247ad135b5dSChristopher Siden * cannot open a pool. 2248ad135b5dSChristopher Siden */ 2249ad135b5dSChristopher Siden if (ub->ub_version >= SPA_VERSION_FEATURES) { 2250ad135b5dSChristopher Siden nvlist_t *unsup_feat; 2251ad135b5dSChristopher Siden 2252ad135b5dSChristopher Siden VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) == 2253ad135b5dSChristopher Siden 0); 2254ad135b5dSChristopher Siden 2255ad135b5dSChristopher Siden for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features, 2256ad135b5dSChristopher Siden NULL); nvp != NULL; 2257ad135b5dSChristopher Siden nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) { 2258ad135b5dSChristopher Siden if (!zfeature_is_supported(nvpair_name(nvp))) { 2259ad135b5dSChristopher Siden VERIFY(nvlist_add_string(unsup_feat, 2260ad135b5dSChristopher Siden nvpair_name(nvp), "") == 0); 2261ad135b5dSChristopher Siden } 2262ad135b5dSChristopher Siden } 2263ad135b5dSChristopher Siden 2264ad135b5dSChristopher Siden if (!nvlist_empty(unsup_feat)) { 2265ad135b5dSChristopher Siden VERIFY(nvlist_add_nvlist(spa->spa_load_info, 2266ad135b5dSChristopher Siden ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0); 2267ad135b5dSChristopher Siden nvlist_free(unsup_feat); 2268ad135b5dSChristopher Siden return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, 2269ad135b5dSChristopher Siden ENOTSUP)); 2270ad135b5dSChristopher Siden } 2271ad135b5dSChristopher Siden 2272ad135b5dSChristopher Siden nvlist_free(unsup_feat); 2273ad135b5dSChristopher Siden } 2274fa9e4066Sahrens 2275fa9e4066Sahrens /* 2276fa9e4066Sahrens * If the vdev guid sum doesn't match the uberblock, we have an 22774b964adaSGeorge Wilson * incomplete configuration. We first check to see if the pool 22784b964adaSGeorge Wilson * is aware of the complete config (i.e ZPOOL_CONFIG_VDEV_CHILDREN). 22794b964adaSGeorge Wilson * If it is, defer the vdev_guid_sum check till later so we 22804b964adaSGeorge Wilson * can handle missing vdevs. 2281fa9e4066Sahrens */ 22824b964adaSGeorge Wilson if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN, 22834b964adaSGeorge Wilson &children) != 0 && mosconfig && type != SPA_IMPORT_ASSEMBLE && 22841195e687SMark J Musante rvd->vdev_guid_sum != ub->ub_guid_sum) 22851195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO)); 22861195e687SMark J Musante 22871195e687SMark J Musante if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) { 22881195e687SMark J Musante spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 22891195e687SMark J Musante spa_try_repair(spa, config); 22901195e687SMark J Musante spa_config_exit(spa, SCL_ALL, FTAG); 22911195e687SMark J Musante nvlist_free(spa->spa_config_splitting); 22921195e687SMark J Musante spa->spa_config_splitting = NULL; 2293fa9e4066Sahrens } 2294fa9e4066Sahrens 2295fa9e4066Sahrens /* 2296fa9e4066Sahrens * Initialize internal SPA structures. 2297fa9e4066Sahrens */ 2298fa9e4066Sahrens spa->spa_state = POOL_STATE_ACTIVE; 2299fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 2300468c413aSTim Haley spa->spa_verify_min_txg = spa->spa_extreme_rewind ? 2301c8ee1847SVictor Latushkin TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1; 2302468c413aSTim Haley spa->spa_first_txg = spa->spa_last_ubsync_txg ? 2303468c413aSTim Haley spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; 2304b24ab676SJeff Bonwick spa->spa_claim_max_txg = spa->spa_first_txg; 23053f9d6ad7SLin Ling spa->spa_prev_software_version = ub->ub_software_version; 2306b24ab676SJeff Bonwick 2307ad135b5dSChristopher Siden error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 23081195e687SMark J Musante if (error) 23091195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2310fa9e4066Sahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 2311fa9e4066Sahrens 23121195e687SMark J Musante if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0) 23131195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2314fa9e4066Sahrens 2315ad135b5dSChristopher Siden if (spa_version(spa) >= SPA_VERSION_FEATURES) { 2316ad135b5dSChristopher Siden boolean_t missing_feat_read = B_FALSE; 231757221772SChristopher Siden nvlist_t *unsup_feat, *enabled_feat; 2318ad135b5dSChristopher Siden 2319ad135b5dSChristopher Siden if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ, 2320ad135b5dSChristopher Siden &spa->spa_feat_for_read_obj) != 0) { 2321ad135b5dSChristopher Siden return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2322ad135b5dSChristopher Siden } 2323ad135b5dSChristopher Siden 2324ad135b5dSChristopher Siden if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE, 2325ad135b5dSChristopher Siden &spa->spa_feat_for_write_obj) != 0) { 2326ad135b5dSChristopher Siden return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2327ad135b5dSChristopher Siden } 2328ad135b5dSChristopher Siden 2329ad135b5dSChristopher Siden if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS, 2330ad135b5dSChristopher Siden &spa->spa_feat_desc_obj) != 0) { 2331ad135b5dSChristopher Siden return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2332ad135b5dSChristopher Siden } 2333ad135b5dSChristopher Siden 233457221772SChristopher Siden enabled_feat = fnvlist_alloc(); 233557221772SChristopher Siden unsup_feat = fnvlist_alloc(); 2336ad135b5dSChristopher Siden 23372acef22dSMatthew Ahrens if (!spa_features_check(spa, B_FALSE, 233857221772SChristopher Siden unsup_feat, enabled_feat)) 2339ad135b5dSChristopher Siden missing_feat_read = B_TRUE; 2340ad135b5dSChristopher Siden 2341ad135b5dSChristopher Siden if (spa_writeable(spa) || state == SPA_LOAD_TRYIMPORT) { 23422acef22dSMatthew Ahrens if (!spa_features_check(spa, B_TRUE, 234357221772SChristopher Siden unsup_feat, enabled_feat)) { 2344ad135b5dSChristopher Siden missing_feat_write = B_TRUE; 234557221772SChristopher Siden } 2346ad135b5dSChristopher Siden } 2347ad135b5dSChristopher Siden 234857221772SChristopher Siden fnvlist_add_nvlist(spa->spa_load_info, 234957221772SChristopher Siden ZPOOL_CONFIG_ENABLED_FEAT, enabled_feat); 235057221772SChristopher Siden 2351ad135b5dSChristopher Siden if (!nvlist_empty(unsup_feat)) { 235257221772SChristopher Siden fnvlist_add_nvlist(spa->spa_load_info, 235357221772SChristopher Siden ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat); 2354ad135b5dSChristopher Siden } 2355ad135b5dSChristopher Siden 235657221772SChristopher Siden fnvlist_free(enabled_feat); 235757221772SChristopher Siden fnvlist_free(unsup_feat); 2358ad135b5dSChristopher Siden 2359ad135b5dSChristopher Siden if (!missing_feat_read) { 2360ad135b5dSChristopher Siden fnvlist_add_boolean(spa->spa_load_info, 2361ad135b5dSChristopher Siden ZPOOL_CONFIG_CAN_RDONLY); 2362ad135b5dSChristopher Siden } 2363ad135b5dSChristopher Siden 2364ad135b5dSChristopher Siden /* 2365ad135b5dSChristopher Siden * If the state is SPA_LOAD_TRYIMPORT, our objective is 2366ad135b5dSChristopher Siden * twofold: to determine whether the pool is available for 2367ad135b5dSChristopher Siden * import in read-write mode and (if it is not) whether the 2368ad135b5dSChristopher Siden * pool is available for import in read-only mode. If the pool 2369ad135b5dSChristopher Siden * is available for import in read-write mode, it is displayed 2370ad135b5dSChristopher Siden * as available in userland; if it is not available for import 2371ad135b5dSChristopher Siden * in read-only mode, it is displayed as unavailable in 2372ad135b5dSChristopher Siden * userland. If the pool is available for import in read-only 2373ad135b5dSChristopher Siden * mode but not read-write mode, it is displayed as unavailable 2374ad135b5dSChristopher Siden * in userland with a special note that the pool is actually 2375ad135b5dSChristopher Siden * available for open in read-only mode. 2376ad135b5dSChristopher Siden * 2377ad135b5dSChristopher Siden * As a result, if the state is SPA_LOAD_TRYIMPORT and we are 2378ad135b5dSChristopher Siden * missing a feature for write, we must first determine whether 2379ad135b5dSChristopher Siden * the pool can be opened read-only before returning to 2380ad135b5dSChristopher Siden * userland in order to know whether to display the 2381ad135b5dSChristopher Siden * abovementioned note. 2382ad135b5dSChristopher Siden */ 2383ad135b5dSChristopher Siden if (missing_feat_read || (missing_feat_write && 2384ad135b5dSChristopher Siden spa_writeable(spa))) { 2385ad135b5dSChristopher Siden return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, 2386ad135b5dSChristopher Siden ENOTSUP)); 2387ad135b5dSChristopher Siden } 238843466aaeSMax Grossman 238943466aaeSMax Grossman /* 239043466aaeSMax Grossman * Load refcounts for ZFS features from disk into an in-memory 239143466aaeSMax Grossman * cache during SPA initialization. 239243466aaeSMax Grossman */ 239343466aaeSMax Grossman for (spa_feature_t i = 0; i < SPA_FEATURES; i++) { 239443466aaeSMax Grossman uint64_t refcount; 239543466aaeSMax Grossman 239643466aaeSMax Grossman error = feature_get_refcount_from_disk(spa, 239743466aaeSMax Grossman &spa_feature_table[i], &refcount); 239843466aaeSMax Grossman if (error == 0) { 239943466aaeSMax Grossman spa->spa_feat_refcount_cache[i] = refcount; 240043466aaeSMax Grossman } else if (error == ENOTSUP) { 240143466aaeSMax Grossman spa->spa_feat_refcount_cache[i] = 240243466aaeSMax Grossman SPA_FEATURE_DISABLED; 240343466aaeSMax Grossman } else { 240443466aaeSMax Grossman return (spa_vdev_err(rvd, 240543466aaeSMax Grossman VDEV_AUX_CORRUPT_DATA, EIO)); 240643466aaeSMax Grossman } 240743466aaeSMax Grossman } 240843466aaeSMax Grossman } 240943466aaeSMax Grossman 241043466aaeSMax Grossman if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) { 241143466aaeSMax Grossman if (spa_dir_prop(spa, DMU_POOL_FEATURE_ENABLED_TXG, 24125d7b4d43SMatthew Ahrens &spa->spa_feat_enabled_txg_obj) != 0) 241343466aaeSMax Grossman return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2414ad135b5dSChristopher Siden } 2415ad135b5dSChristopher Siden 2416ad135b5dSChristopher Siden spa->spa_is_initializing = B_TRUE; 2417ad135b5dSChristopher Siden error = dsl_pool_open(spa->spa_dsl_pool); 2418ad135b5dSChristopher Siden spa->spa_is_initializing = B_FALSE; 2419ad135b5dSChristopher Siden if (error != 0) 2420ad135b5dSChristopher Siden return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2421ad135b5dSChristopher Siden 2422fa9e4066Sahrens if (!mosconfig) { 242395173954Sek uint64_t hostid; 2424871a9500SMark J Musante nvlist_t *policy = NULL, *nvconfig; 2425871a9500SMark J Musante 2426871a9500SMark J Musante if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) 2427871a9500SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2428fa9e4066Sahrens 242988ecc943SGeorge Wilson if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig, 243077650510SLin Ling ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 243195173954Sek char *hostname; 243295173954Sek unsigned long myhostid = 0; 243395173954Sek 243488ecc943SGeorge Wilson VERIFY(nvlist_lookup_string(nvconfig, 243595173954Sek ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 243695173954Sek 24375679c89fSjv #ifdef _KERNEL 24385679c89fSjv myhostid = zone_get_hostid(NULL); 24395679c89fSjv #else /* _KERNEL */ 24405679c89fSjv /* 24415679c89fSjv * We're emulating the system's hostid in userland, so 24425679c89fSjv * we can't use zone_get_hostid(). 24435679c89fSjv */ 244495173954Sek (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 24455679c89fSjv #endif /* _KERNEL */ 244617194a52Slling if (hostid != 0 && myhostid != 0 && 24475679c89fSjv hostid != myhostid) { 2448871a9500SMark J Musante nvlist_free(nvconfig); 244995173954Sek cmn_err(CE_WARN, "pool '%s' could not be " 245095173954Sek "loaded as it was last accessed by " 245177650510SLin Ling "another system (host: %s hostid: 0x%lx). " 2452654b400cSJoshua M. Clulow "See: http://illumos.org/msg/ZFS-8000-EY", 2453e14bb325SJeff Bonwick spa_name(spa), hostname, 245495173954Sek (unsigned long)hostid); 2455be6fd75aSMatthew Ahrens return (SET_ERROR(EBADF)); 245695173954Sek } 245795173954Sek } 2458c8ee1847SVictor Latushkin if (nvlist_lookup_nvlist(spa->spa_config, 2459c8ee1847SVictor Latushkin ZPOOL_REWIND_POLICY, &policy) == 0) 2460c8ee1847SVictor Latushkin VERIFY(nvlist_add_nvlist(nvconfig, 2461c8ee1847SVictor Latushkin ZPOOL_REWIND_POLICY, policy) == 0); 246295173954Sek 246388ecc943SGeorge Wilson spa_config_set(spa, nvconfig); 2464fa9e4066Sahrens spa_unload(spa); 2465fa9e4066Sahrens spa_deactivate(spa); 24668ad4d6ddSJeff Bonwick spa_activate(spa, orig_mode); 2467fa9e4066Sahrens 24681195e687SMark J Musante return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE)); 2469fa9e4066Sahrens } 2470fa9e4066Sahrens 2471cde58dbcSMatthew Ahrens if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj) != 0) 2472cde58dbcSMatthew Ahrens return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2473cde58dbcSMatthew Ahrens error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj); 2474cde58dbcSMatthew Ahrens if (error != 0) 24751195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2476fa9e4066Sahrens 247799653d4eSeschrock /* 247899653d4eSeschrock * Load the bit that tells us to use the new accounting function 247999653d4eSeschrock * (raid-z deflation). If we have an older pool, this will not 248099653d4eSeschrock * be present. 248199653d4eSeschrock */ 24821195e687SMark J Musante error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate); 24831195e687SMark J Musante if (error != 0 && error != ENOENT) 24841195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 248599653d4eSeschrock 24863f9d6ad7SLin Ling error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION, 24873f9d6ad7SLin Ling &spa->spa_creation_version); 24883f9d6ad7SLin Ling if (error != 0 && error != ENOENT) 24893f9d6ad7SLin Ling return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 24903f9d6ad7SLin Ling 2491fa9e4066Sahrens /* 2492ea8dc4b6Seschrock * Load the persistent error log. If we have an older pool, this will 2493ea8dc4b6Seschrock * not be present. 2494fa9e4066Sahrens */ 24951195e687SMark J Musante error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last); 24961195e687SMark J Musante if (error != 0 && error != ENOENT) 24971195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2498ea8dc4b6Seschrock 24991195e687SMark J Musante error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB, 25001195e687SMark J Musante &spa->spa_errlog_scrub); 25011195e687SMark J Musante if (error != 0 && error != ENOENT) 25021195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2503ea8dc4b6Seschrock 250406eeb2adSek /* 250506eeb2adSek * Load the history object. If we have an older pool, this 250606eeb2adSek * will not be present. 250706eeb2adSek */ 25081195e687SMark J Musante error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history); 25091195e687SMark J Musante if (error != 0 && error != ENOENT) 25101195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 25111195e687SMark J Musante 25121195e687SMark J Musante /* 25131195e687SMark J Musante * If we're assembling the pool from the split-off vdevs of 25141195e687SMark J Musante * an existing pool, we don't want to attach the spares & cache 25151195e687SMark J Musante * devices. 25161195e687SMark J Musante */ 251706eeb2adSek 251899653d4eSeschrock /* 251999653d4eSeschrock * Load any hot spares for this pool. 252099653d4eSeschrock */ 25211195e687SMark J Musante error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object); 25221195e687SMark J Musante if (error != 0 && error != ENOENT) 25231195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 25241195e687SMark J Musante if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { 2525e7437265Sahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 2526fa94a07fSbrendan if (load_nvlist(spa, spa->spa_spares.sav_object, 25271195e687SMark J Musante &spa->spa_spares.sav_config) != 0) 25281195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 252999653d4eSeschrock 2530e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 253199653d4eSeschrock spa_load_spares(spa); 2532e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 25331195e687SMark J Musante } else if (error == 0) { 25341195e687SMark J Musante spa->spa_spares.sav_sync = B_TRUE; 253599653d4eSeschrock } 253699653d4eSeschrock 2537fa94a07fSbrendan /* 2538fa94a07fSbrendan * Load any level 2 ARC devices for this pool. 2539fa94a07fSbrendan */ 25401195e687SMark J Musante error = spa_dir_prop(spa, DMU_POOL_L2CACHE, 2541fa94a07fSbrendan &spa->spa_l2cache.sav_object); 25421195e687SMark J Musante if (error != 0 && error != ENOENT) 25431195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 25441195e687SMark J Musante if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { 2545fa94a07fSbrendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 2546fa94a07fSbrendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 25471195e687SMark J Musante &spa->spa_l2cache.sav_config) != 0) 25481195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2549fa94a07fSbrendan 2550e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2551fa94a07fSbrendan spa_load_l2cache(spa); 2552e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 25531195e687SMark J Musante } else if (error == 0) { 25541195e687SMark J Musante spa->spa_l2cache.sav_sync = B_TRUE; 2555fa94a07fSbrendan } 2556fa94a07fSbrendan 2557990b4856Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 2558ecd6cf80Smarks 25591195e687SMark J Musante error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object); 25601195e687SMark J Musante if (error && error != ENOENT) 25611195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2562b1b8ab34Slling 2563b1b8ab34Slling if (error == 0) { 25641195e687SMark J Musante uint64_t autoreplace; 25651195e687SMark J Musante 25661195e687SMark J Musante spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs); 25671195e687SMark J Musante spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace); 25681195e687SMark J Musante spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation); 25691195e687SMark J Musante spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode); 25701195e687SMark J Musante spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand); 25711195e687SMark J Musante spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO, 25721195e687SMark J Musante &spa->spa_dedup_ditto); 25731195e687SMark J Musante 2574b693757aSEric Schrock spa->spa_autoreplace = (autoreplace != 0); 2575b1b8ab34Slling } 2576b1b8ab34Slling 25773d7072f8Seschrock /* 25783d7072f8Seschrock * If the 'autoreplace' property is set, then post a resource notifying 25793d7072f8Seschrock * the ZFS DE that it should not issue any faults for unopenable 25803d7072f8Seschrock * devices. We also iterate over the vdevs, and post a sysevent for any 25813d7072f8Seschrock * unopenable vdevs so that the normal autoreplace handler can take 25823d7072f8Seschrock * over. 25833d7072f8Seschrock */ 2584b693757aSEric Schrock if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) { 25853d7072f8Seschrock spa_check_removed(spa->spa_root_vdev); 2586b693757aSEric Schrock /* 2587b693757aSEric Schrock * For the import case, this is done in spa_import(), because 2588b693757aSEric Schrock * at this point we're using the spare definitions from 2589b693757aSEric Schrock * the MOS config, not necessarily from the userland config. 2590b693757aSEric Schrock */ 2591b693757aSEric Schrock if (state != SPA_LOAD_IMPORT) { 2592b693757aSEric Schrock spa_aux_check_removed(&spa->spa_spares); 2593b693757aSEric Schrock spa_aux_check_removed(&spa->spa_l2cache); 2594b693757aSEric Schrock } 2595b693757aSEric Schrock } 25963d7072f8Seschrock 2597ea8dc4b6Seschrock /* 2598560e6e96Seschrock * Load the vdev state for all toplevel vdevs. 2599ea8dc4b6Seschrock */ 2600560e6e96Seschrock vdev_load(rvd); 26010373e76bSbonwick 2602fa9e4066Sahrens /* 2603fa9e4066Sahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 2604fa9e4066Sahrens */ 2605e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2606fa9e4066Sahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 2607e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 2608fa9e4066Sahrens 2609b24ab676SJeff Bonwick /* 2610b24ab676SJeff Bonwick * Load the DDTs (dedup tables). 2611b24ab676SJeff Bonwick */ 2612b24ab676SJeff Bonwick error = ddt_load(spa); 26131195e687SMark J Musante if (error != 0) 26141195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2615b24ab676SJeff Bonwick 2616485bbbf5SGeorge Wilson spa_update_dspace(spa); 2617485bbbf5SGeorge Wilson 2618b24ab676SJeff Bonwick /* 26194b964adaSGeorge Wilson * Validate the config, using the MOS config to fill in any 26204b964adaSGeorge Wilson * information which might be missing. If we fail to validate 26214b964adaSGeorge Wilson * the config then declare the pool unfit for use. If we're 26224b964adaSGeorge Wilson * assembling a pool from a split, the log is not transferred 26234b964adaSGeorge Wilson * over. 2624b24ab676SJeff Bonwick */ 26251195e687SMark J Musante if (type != SPA_IMPORT_ASSEMBLE) { 2626871a9500SMark J Musante nvlist_t *nvconfig; 2627871a9500SMark J Musante 2628871a9500SMark J Musante if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) 2629871a9500SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2630871a9500SMark J Musante 26314b964adaSGeorge Wilson if (!spa_config_valid(spa, nvconfig)) { 26324b964adaSGeorge Wilson nvlist_free(nvconfig); 26334b964adaSGeorge Wilson return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, 26344b964adaSGeorge Wilson ENXIO)); 26354b964adaSGeorge Wilson } 26361195e687SMark J Musante nvlist_free(nvconfig); 26371195e687SMark J Musante 26384b964adaSGeorge Wilson /* 2639ad135b5dSChristopher Siden * Now that we've validated the config, check the state of the 26404b964adaSGeorge Wilson * root vdev. If it can't be opened, it indicates one or 26414b964adaSGeorge Wilson * more toplevel vdevs are faulted. 26424b964adaSGeorge Wilson */ 26434b964adaSGeorge Wilson if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) 2644be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 26454b964adaSGeorge Wilson 26461195e687SMark J Musante if (spa_check_logs(spa)) { 26471195e687SMark J Musante *ereport = FM_EREPORT_ZFS_LOG_REPLAY; 26481195e687SMark J Musante return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO)); 26491195e687SMark J Musante } 2650b24ab676SJeff Bonwick } 2651b24ab676SJeff Bonwick 2652ad135b5dSChristopher Siden if (missing_feat_write) { 2653ad135b5dSChristopher Siden ASSERT(state == SPA_LOAD_TRYIMPORT); 2654ad135b5dSChristopher Siden 2655ad135b5dSChristopher Siden /* 2656ad135b5dSChristopher Siden * At this point, we know that we can open the pool in 2657ad135b5dSChristopher Siden * read-only mode but not read-write mode. We now have enough 2658ad135b5dSChristopher Siden * information and can return to userland. 2659ad135b5dSChristopher Siden */ 2660ad135b5dSChristopher Siden return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP)); 2661ad135b5dSChristopher Siden } 2662ad135b5dSChristopher Siden 26634b964adaSGeorge Wilson /* 26644b964adaSGeorge Wilson * We've successfully opened the pool, verify that we're ready 26654b964adaSGeorge Wilson * to start pushing transactions. 26664b964adaSGeorge Wilson */ 26674b964adaSGeorge Wilson if (state != SPA_LOAD_TRYIMPORT) { 26684b964adaSGeorge Wilson if (error = spa_load_verify(spa)) 26694b964adaSGeorge Wilson return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, 26704b964adaSGeorge Wilson error)); 26714b964adaSGeorge Wilson } 26724b964adaSGeorge Wilson 2673468c413aSTim Haley if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER || 2674468c413aSTim Haley spa->spa_load_max_txg == UINT64_MAX)) { 26755dabedeeSbonwick dmu_tx_t *tx; 26760373e76bSbonwick int need_update = B_FALSE; 26778ad4d6ddSJeff Bonwick 26788ad4d6ddSJeff Bonwick ASSERT(state != SPA_LOAD_TRYIMPORT); 26795dabedeeSbonwick 26800373e76bSbonwick /* 26810373e76bSbonwick * Claim log blocks that haven't been committed yet. 26820373e76bSbonwick * This must all happen in a single txg. 2683b24ab676SJeff Bonwick * Note: spa_claim_max_txg is updated by spa_claim_notify(), 2684b24ab676SJeff Bonwick * invoked from zil_claim_log_block()'s i/o done callback. 2685468c413aSTim Haley * Price of rollback is that we abandon the log. 26860373e76bSbonwick */ 2687b24ab676SJeff Bonwick spa->spa_claiming = B_TRUE; 2688b24ab676SJeff Bonwick 26895dabedeeSbonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 2690fa9e4066Sahrens spa_first_txg(spa)); 2691e14bb325SJeff Bonwick (void) dmu_objset_find(spa_name(spa), 26920b69c2f0Sahrens zil_claim, tx, DS_FIND_CHILDREN); 2693fa9e4066Sahrens dmu_tx_commit(tx); 2694fa9e4066Sahrens 2695b24ab676SJeff Bonwick spa->spa_claiming = B_FALSE; 2696b24ab676SJeff Bonwick 26971195e687SMark J Musante spa_set_log_state(spa, SPA_LOG_GOOD); 2698fa9e4066Sahrens spa->spa_sync_on = B_TRUE; 2699fa9e4066Sahrens txg_sync_start(spa->spa_dsl_pool); 2700fa9e4066Sahrens 2701fa9e4066Sahrens /* 2702b24ab676SJeff Bonwick * Wait for all claims to sync. We sync up to the highest 2703b24ab676SJeff Bonwick * claimed log block birth time so that claimed log blocks 2704b24ab676SJeff Bonwick * don't appear to be from the future. spa_claim_max_txg 2705b24ab676SJeff Bonwick * will have been set for us by either zil_check_log_chain() 2706b24ab676SJeff Bonwick * (invoked from spa_check_logs()) or zil_claim() above. 2707fa9e4066Sahrens */ 2708b24ab676SJeff Bonwick txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); 27090e34b6a7Sbonwick 27100e34b6a7Sbonwick /* 27110373e76bSbonwick * If the config cache is stale, or we have uninitialized 27120373e76bSbonwick * metaslabs (see spa_vdev_add()), then update the config. 2713bc758434SLin Ling * 27144b964adaSGeorge Wilson * If this is a verbatim import, trust the current 2715bc758434SLin Ling * in-core spa_config and update the disk labels. 27160e34b6a7Sbonwick */ 27170373e76bSbonwick if (config_cache_txg != spa->spa_config_txg || 27184b964adaSGeorge Wilson state == SPA_LOAD_IMPORT || 27194b964adaSGeorge Wilson state == SPA_LOAD_RECOVER || 27204b964adaSGeorge Wilson (spa->spa_import_flags & ZFS_IMPORT_VERBATIM)) 27210373e76bSbonwick need_update = B_TRUE; 27220373e76bSbonwick 27238ad4d6ddSJeff Bonwick for (int c = 0; c < rvd->vdev_children; c++) 27240373e76bSbonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 27250373e76bSbonwick need_update = B_TRUE; 27260e34b6a7Sbonwick 27270e34b6a7Sbonwick /* 27280373e76bSbonwick * Update the config cache asychronously in case we're the 27290373e76bSbonwick * root pool, in which case the config cache isn't writable yet. 27300e34b6a7Sbonwick */ 27310373e76bSbonwick if (need_update) 27320373e76bSbonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 27338ad4d6ddSJeff Bonwick 27348ad4d6ddSJeff Bonwick /* 27358ad4d6ddSJeff Bonwick * Check all DTLs to see if anything needs resilvering. 27368ad4d6ddSJeff Bonwick */ 27373f9d6ad7SLin Ling if (!dsl_scan_resilvering(spa->spa_dsl_pool) && 27383f9d6ad7SLin Ling vdev_resilver_needed(rvd, NULL, NULL)) 27398ad4d6ddSJeff Bonwick spa_async_request(spa, SPA_ASYNC_RESILVER); 2740503ad85cSMatthew Ahrens 27414445fffbSMatthew Ahrens /* 27424445fffbSMatthew Ahrens * Log the fact that we booted up (so that we can detect if 27434445fffbSMatthew Ahrens * we rebooted in the middle of an operation). 27444445fffbSMatthew Ahrens */ 27454445fffbSMatthew Ahrens spa_history_log_version(spa, "open"); 27464445fffbSMatthew Ahrens 2747503ad85cSMatthew Ahrens /* 2748503ad85cSMatthew Ahrens * Delete any inconsistent datasets. 2749503ad85cSMatthew Ahrens */ 2750503ad85cSMatthew Ahrens (void) dmu_objset_find(spa_name(spa), 2751503ad85cSMatthew Ahrens dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); 2752ca45db41SChris Kirby 2753ca45db41SChris Kirby /* 2754ca45db41SChris Kirby * Clean up any stale temporary dataset userrefs. 2755ca45db41SChris Kirby */ 2756ca45db41SChris Kirby dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); 2757fa9e4066Sahrens } 2758fa9e4066Sahrens 27591195e687SMark J Musante return (0); 2760fa9e4066Sahrens } 2761fa9e4066Sahrens 2762468c413aSTim Haley static int 2763468c413aSTim Haley spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig) 2764468c413aSTim Haley { 2765f9af39baSGeorge Wilson int mode = spa->spa_mode; 2766f9af39baSGeorge Wilson 2767468c413aSTim Haley spa_unload(spa); 2768468c413aSTim Haley spa_deactivate(spa); 2769468c413aSTim Haley 2770e42d2059SMatthew Ahrens spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1; 2771468c413aSTim Haley 2772f9af39baSGeorge Wilson spa_activate(spa, mode); 2773468c413aSTim Haley spa_async_suspend(spa); 2774468c413aSTim Haley 27751195e687SMark J Musante return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig)); 2776468c413aSTim Haley } 2777468c413aSTim Haley 2778ad135b5dSChristopher Siden /* 2779ad135b5dSChristopher Siden * If spa_load() fails this function will try loading prior txg's. If 2780ad135b5dSChristopher Siden * 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool 2781ad135b5dSChristopher Siden * will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this 2782ad135b5dSChristopher Siden * function will not rewind the pool and will return the same error as 2783ad135b5dSChristopher Siden * spa_load(). 2784ad135b5dSChristopher Siden */ 2785468c413aSTim Haley static int 2786468c413aSTim Haley spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig, 2787c8ee1847SVictor Latushkin uint64_t max_request, int rewind_flags) 2788468c413aSTim Haley { 2789ad135b5dSChristopher Siden nvlist_t *loadinfo = NULL; 2790468c413aSTim Haley nvlist_t *config = NULL; 2791468c413aSTim Haley int load_error, rewind_error; 2792c8ee1847SVictor Latushkin uint64_t safe_rewind_txg; 2793468c413aSTim Haley uint64_t min_txg; 2794468c413aSTim Haley 2795a33cae98STim Haley if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) { 2796468c413aSTim Haley spa->spa_load_max_txg = spa->spa_load_txg; 27971195e687SMark J Musante spa_set_log_state(spa, SPA_LOG_CLEAR); 2798a33cae98STim Haley } else { 2799468c413aSTim Haley spa->spa_load_max_txg = max_request; 2800e42d2059SMatthew Ahrens if (max_request != UINT64_MAX) 2801e42d2059SMatthew Ahrens spa->spa_extreme_rewind = B_TRUE; 2802a33cae98STim Haley } 2803468c413aSTim Haley 28041195e687SMark J Musante load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING, 28051195e687SMark J Musante mosconfig); 2806468c413aSTim Haley if (load_error == 0) 2807468c413aSTim Haley return (0); 2808468c413aSTim Haley 2809468c413aSTim Haley if (spa->spa_root_vdev != NULL) 2810468c413aSTim Haley config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 2811468c413aSTim Haley 2812468c413aSTim Haley spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg; 2813468c413aSTim Haley spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp; 2814468c413aSTim Haley 2815c8ee1847SVictor Latushkin if (rewind_flags & ZPOOL_NEVER_REWIND) { 2816468c413aSTim Haley nvlist_free(config); 2817468c413aSTim Haley return (load_error); 2818468c413aSTim Haley } 2819468c413aSTim Haley 2820ad135b5dSChristopher Siden if (state == SPA_LOAD_RECOVER) { 2821ad135b5dSChristopher Siden /* Price of rolling back is discarding txgs, including log */ 28221195e687SMark J Musante spa_set_log_state(spa, SPA_LOG_CLEAR); 2823ad135b5dSChristopher Siden } else { 2824ad135b5dSChristopher Siden /* 2825ad135b5dSChristopher Siden * If we aren't rolling back save the load info from our first 2826ad135b5dSChristopher Siden * import attempt so that we can restore it after attempting 2827ad135b5dSChristopher Siden * to rewind. 2828ad135b5dSChristopher Siden */ 2829ad135b5dSChristopher Siden loadinfo = spa->spa_load_info; 2830ad135b5dSChristopher Siden spa->spa_load_info = fnvlist_alloc(); 2831ad135b5dSChristopher Siden } 2832468c413aSTim Haley 2833c8ee1847SVictor Latushkin spa->spa_load_max_txg = spa->spa_last_ubsync_txg; 2834c8ee1847SVictor Latushkin safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE; 2835c8ee1847SVictor Latushkin min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ? 2836c8ee1847SVictor Latushkin TXG_INITIAL : safe_rewind_txg; 2837468c413aSTim Haley 2838c8ee1847SVictor Latushkin /* 2839c8ee1847SVictor Latushkin * Continue as long as we're finding errors, we're still within 2840c8ee1847SVictor Latushkin * the acceptable rewind range, and we're still finding uberblocks 2841c8ee1847SVictor Latushkin */ 2842c8ee1847SVictor Latushkin while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg && 2843c8ee1847SVictor Latushkin spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) { 2844c8ee1847SVictor Latushkin if (spa->spa_load_max_txg < safe_rewind_txg) 2845468c413aSTim Haley spa->spa_extreme_rewind = B_TRUE; 2846468c413aSTim Haley rewind_error = spa_load_retry(spa, state, mosconfig); 2847468c413aSTim Haley } 2848468c413aSTim Haley 2849468c413aSTim Haley spa->spa_extreme_rewind = B_FALSE; 2850468c413aSTim Haley spa->spa_load_max_txg = UINT64_MAX; 2851468c413aSTim Haley 2852468c413aSTim Haley if (config && (rewind_error || state != SPA_LOAD_RECOVER)) 2853468c413aSTim Haley spa_config_set(spa, config); 2854468c413aSTim Haley 2855ad135b5dSChristopher Siden if (state == SPA_LOAD_RECOVER) { 2856ad135b5dSChristopher Siden ASSERT3P(loadinfo, ==, NULL); 2857ad135b5dSChristopher Siden return (rewind_error); 2858ad135b5dSChristopher Siden } else { 2859ad135b5dSChristopher Siden /* Store the rewind info as part of the initial load info */ 2860ad135b5dSChristopher Siden fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO, 2861ad135b5dSChristopher Siden spa->spa_load_info); 2862ad135b5dSChristopher Siden 2863ad135b5dSChristopher Siden /* Restore the initial load info */ 2864ad135b5dSChristopher Siden fnvlist_free(spa->spa_load_info); 2865ad135b5dSChristopher Siden spa->spa_load_info = loadinfo; 2866ad135b5dSChristopher Siden 2867ad135b5dSChristopher Siden return (load_error); 2868ad135b5dSChristopher Siden } 2869468c413aSTim Haley } 2870468c413aSTim Haley 2871fa9e4066Sahrens /* 2872fa9e4066Sahrens * Pool Open/Import 2873fa9e4066Sahrens * 2874fa9e4066Sahrens * The import case is identical to an open except that the configuration is sent 2875fa9e4066Sahrens * down from userland, instead of grabbed from the configuration cache. For the 2876fa9e4066Sahrens * case of an open, the pool configuration will exist in the 28773d7072f8Seschrock * POOL_STATE_UNINITIALIZED state. 2878fa9e4066Sahrens * 2879fa9e4066Sahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 2880fa9e4066Sahrens * the same time open the pool, without having to keep around the spa_t in some 2881fa9e4066Sahrens * ambiguous state. 2882fa9e4066Sahrens */ 2883fa9e4066Sahrens static int 2884468c413aSTim Haley spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, 2885468c413aSTim Haley nvlist_t **config) 2886fa9e4066Sahrens { 2887fa9e4066Sahrens spa_t *spa; 28884b964adaSGeorge Wilson spa_load_state_t state = SPA_LOAD_OPEN; 2889fa9e4066Sahrens int error; 2890fa9e4066Sahrens int locked = B_FALSE; 2891fa9e4066Sahrens 2892fa9e4066Sahrens *spapp = NULL; 2893fa9e4066Sahrens 2894fa9e4066Sahrens /* 2895fa9e4066Sahrens * As disgusting as this is, we need to support recursive calls to this 2896fa9e4066Sahrens * function because dsl_dir_open() is called during spa_load(), and ends 2897fa9e4066Sahrens * up calling spa_open() again. The real fix is to figure out how to 2898fa9e4066Sahrens * avoid dsl_dir_open() calling this in the first place. 2899fa9e4066Sahrens */ 2900fa9e4066Sahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 2901fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 2902fa9e4066Sahrens locked = B_TRUE; 2903fa9e4066Sahrens } 2904fa9e4066Sahrens 2905fa9e4066Sahrens if ((spa = spa_lookup(pool)) == NULL) { 2906fa9e4066Sahrens if (locked) 2907fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2908be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 2909fa9e4066Sahrens } 2910468c413aSTim Haley 2911fa9e4066Sahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 29124b44c88cSTim Haley zpool_rewind_policy_t policy; 29134b44c88cSTim Haley 29144b44c88cSTim Haley zpool_get_rewind_policy(nvpolicy ? nvpolicy : spa->spa_config, 29154b44c88cSTim Haley &policy); 29164b44c88cSTim Haley if (policy.zrp_request & ZPOOL_DO_REWIND) 29174b44c88cSTim Haley state = SPA_LOAD_RECOVER; 2918fa9e4066Sahrens 29198ad4d6ddSJeff Bonwick spa_activate(spa, spa_mode_global); 2920fa9e4066Sahrens 2921468c413aSTim Haley if (state != SPA_LOAD_RECOVER) 2922468c413aSTim Haley spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; 2923468c413aSTim Haley 2924468c413aSTim Haley error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg, 2925c8ee1847SVictor Latushkin policy.zrp_request); 2926fa9e4066Sahrens 2927fa9e4066Sahrens if (error == EBADF) { 2928fa9e4066Sahrens /* 2929560e6e96Seschrock * If vdev_validate() returns failure (indicated by 2930560e6e96Seschrock * EBADF), it indicates that one of the vdevs indicates 2931560e6e96Seschrock * that the pool has been exported or destroyed. If 2932560e6e96Seschrock * this is the case, the config cache is out of sync and 2933560e6e96Seschrock * we should remove the pool from the namespace. 2934fa9e4066Sahrens */ 2935fa9e4066Sahrens spa_unload(spa); 2936fa9e4066Sahrens spa_deactivate(spa); 2937c5904d13Seschrock spa_config_sync(spa, B_TRUE, B_TRUE); 2938fa9e4066Sahrens spa_remove(spa); 2939fa9e4066Sahrens if (locked) 2940fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2941be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 2942ea8dc4b6Seschrock } 2943ea8dc4b6Seschrock 2944ea8dc4b6Seschrock if (error) { 2945fa9e4066Sahrens /* 2946fa9e4066Sahrens * We can't open the pool, but we still have useful 2947fa9e4066Sahrens * information: the state of each vdev after the 2948fa9e4066Sahrens * attempted vdev_open(). Return this to the user. 2949fa9e4066Sahrens */ 29504b964adaSGeorge Wilson if (config != NULL && spa->spa_config) { 2951468c413aSTim Haley VERIFY(nvlist_dup(spa->spa_config, config, 2952468c413aSTim Haley KM_SLEEP) == 0); 29534b964adaSGeorge Wilson VERIFY(nvlist_add_nvlist(*config, 29544b964adaSGeorge Wilson ZPOOL_CONFIG_LOAD_INFO, 29554b964adaSGeorge Wilson spa->spa_load_info) == 0); 29564b964adaSGeorge Wilson } 2957fa9e4066Sahrens spa_unload(spa); 2958fa9e4066Sahrens spa_deactivate(spa); 2959468c413aSTim Haley spa->spa_last_open_failed = error; 2960fa9e4066Sahrens if (locked) 2961fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2962fa9e4066Sahrens *spapp = NULL; 2963fa9e4066Sahrens return (error); 2964fa9e4066Sahrens } 2965fa9e4066Sahrens } 2966fa9e4066Sahrens 2967fa9e4066Sahrens spa_open_ref(spa, tag); 29683d7072f8Seschrock 2969468c413aSTim Haley if (config != NULL) 2970468c413aSTim Haley *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 2971468c413aSTim Haley 29724b964adaSGeorge Wilson /* 29734b964adaSGeorge Wilson * If we've recovered the pool, pass back any information we 29744b964adaSGeorge Wilson * gathered while doing the load. 29754b964adaSGeorge Wilson */ 29764b964adaSGeorge Wilson if (state == SPA_LOAD_RECOVER) { 29774b964adaSGeorge Wilson VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO, 29784b964adaSGeorge Wilson spa->spa_load_info) == 0); 29794b964adaSGeorge Wilson } 29804b964adaSGeorge Wilson 2981a33cae98STim Haley if (locked) { 2982a33cae98STim Haley spa->spa_last_open_failed = 0; 2983a33cae98STim Haley spa->spa_last_ubsync_txg = 0; 2984a33cae98STim Haley spa->spa_load_txg = 0; 2985fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 2986a33cae98STim Haley } 2987fa9e4066Sahrens 2988fa9e4066Sahrens *spapp = spa; 2989fa9e4066Sahrens 2990fa9e4066Sahrens return (0); 2991fa9e4066Sahrens } 2992fa9e4066Sahrens 2993468c413aSTim Haley int 2994468c413aSTim Haley spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy, 2995468c413aSTim Haley nvlist_t **config) 2996468c413aSTim Haley { 2997468c413aSTim Haley return (spa_open_common(name, spapp, tag, policy, config)); 2998468c413aSTim Haley } 2999468c413aSTim Haley 3000fa9e4066Sahrens int 3001fa9e4066Sahrens spa_open(const char *name, spa_t **spapp, void *tag) 3002fa9e4066Sahrens { 3003468c413aSTim Haley return (spa_open_common(name, spapp, tag, NULL, NULL)); 3004fa9e4066Sahrens } 3005fa9e4066Sahrens 3006ea8dc4b6Seschrock /* 3007ea8dc4b6Seschrock * Lookup the given spa_t, incrementing the inject count in the process, 3008ea8dc4b6Seschrock * preventing it from being exported or destroyed. 3009ea8dc4b6Seschrock */ 3010ea8dc4b6Seschrock spa_t * 3011ea8dc4b6Seschrock spa_inject_addref(char *name) 3012ea8dc4b6Seschrock { 3013ea8dc4b6Seschrock spa_t *spa; 3014ea8dc4b6Seschrock 3015ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 3016ea8dc4b6Seschrock if ((spa = spa_lookup(name)) == NULL) { 3017ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 3018ea8dc4b6Seschrock return (NULL); 3019ea8dc4b6Seschrock } 3020ea8dc4b6Seschrock spa->spa_inject_ref++; 3021ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 3022ea8dc4b6Seschrock 3023ea8dc4b6Seschrock return (spa); 3024ea8dc4b6Seschrock } 3025ea8dc4b6Seschrock 3026ea8dc4b6Seschrock void 3027ea8dc4b6Seschrock spa_inject_delref(spa_t *spa) 3028ea8dc4b6Seschrock { 3029ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 3030ea8dc4b6Seschrock spa->spa_inject_ref--; 3031ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 3032ea8dc4b6Seschrock } 3033ea8dc4b6Seschrock 3034fa94a07fSbrendan /* 3035fa94a07fSbrendan * Add spares device information to the nvlist. 3036fa94a07fSbrendan */ 303799653d4eSeschrock static void 303899653d4eSeschrock spa_add_spares(spa_t *spa, nvlist_t *config) 303999653d4eSeschrock { 304099653d4eSeschrock nvlist_t **spares; 304199653d4eSeschrock uint_t i, nspares; 304299653d4eSeschrock nvlist_t *nvroot; 304399653d4eSeschrock uint64_t guid; 304499653d4eSeschrock vdev_stat_t *vs; 304599653d4eSeschrock uint_t vsc; 304639c23413Seschrock uint64_t pool; 304799653d4eSeschrock 30486809eb4eSEric Schrock ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 30496809eb4eSEric Schrock 3050fa94a07fSbrendan if (spa->spa_spares.sav_count == 0) 305199653d4eSeschrock return; 305299653d4eSeschrock 305399653d4eSeschrock VERIFY(nvlist_lookup_nvlist(config, 305499653d4eSeschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 3055fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 305699653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 305799653d4eSeschrock if (nspares != 0) { 305899653d4eSeschrock VERIFY(nvlist_add_nvlist_array(nvroot, 305999653d4eSeschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 306099653d4eSeschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 306199653d4eSeschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 306299653d4eSeschrock 306399653d4eSeschrock /* 306499653d4eSeschrock * Go through and find any spares which have since been 306599653d4eSeschrock * repurposed as an active spare. If this is the case, update 306699653d4eSeschrock * their status appropriately. 306799653d4eSeschrock */ 306899653d4eSeschrock for (i = 0; i < nspares; i++) { 306999653d4eSeschrock VERIFY(nvlist_lookup_uint64(spares[i], 307099653d4eSeschrock ZPOOL_CONFIG_GUID, &guid) == 0); 307189a89ebfSlling if (spa_spare_exists(guid, &pool, NULL) && 307289a89ebfSlling pool != 0ULL) { 307399653d4eSeschrock VERIFY(nvlist_lookup_uint64_array( 30743f9d6ad7SLin Ling spares[i], ZPOOL_CONFIG_VDEV_STATS, 307599653d4eSeschrock (uint64_t **)&vs, &vsc) == 0); 307699653d4eSeschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 307799653d4eSeschrock vs->vs_aux = VDEV_AUX_SPARED; 307899653d4eSeschrock } 307999653d4eSeschrock } 308099653d4eSeschrock } 308199653d4eSeschrock } 308299653d4eSeschrock 3083fa94a07fSbrendan /* 3084fa94a07fSbrendan * Add l2cache device information to the nvlist, including vdev stats. 3085fa94a07fSbrendan */ 3086fa94a07fSbrendan static void 3087fa94a07fSbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 3088fa94a07fSbrendan { 3089fa94a07fSbrendan nvlist_t **l2cache; 3090fa94a07fSbrendan uint_t i, j, nl2cache; 3091fa94a07fSbrendan nvlist_t *nvroot; 3092fa94a07fSbrendan uint64_t guid; 3093fa94a07fSbrendan vdev_t *vd; 3094fa94a07fSbrendan vdev_stat_t *vs; 3095fa94a07fSbrendan uint_t vsc; 3096fa94a07fSbrendan 30976809eb4eSEric Schrock ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 30986809eb4eSEric Schrock 3099fa94a07fSbrendan if (spa->spa_l2cache.sav_count == 0) 3100fa94a07fSbrendan return; 3101fa94a07fSbrendan 3102fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist(config, 3103fa94a07fSbrendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 3104fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 3105fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 3106fa94a07fSbrendan if (nl2cache != 0) { 3107fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, 3108fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 3109fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 3110fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 3111fa94a07fSbrendan 3112fa94a07fSbrendan /* 3113fa94a07fSbrendan * Update level 2 cache device stats. 3114fa94a07fSbrendan */ 3115fa94a07fSbrendan 3116fa94a07fSbrendan for (i = 0; i < nl2cache; i++) { 3117fa94a07fSbrendan VERIFY(nvlist_lookup_uint64(l2cache[i], 3118fa94a07fSbrendan ZPOOL_CONFIG_GUID, &guid) == 0); 3119fa94a07fSbrendan 3120fa94a07fSbrendan vd = NULL; 3121fa94a07fSbrendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 3122fa94a07fSbrendan if (guid == 3123fa94a07fSbrendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 3124fa94a07fSbrendan vd = spa->spa_l2cache.sav_vdevs[j]; 3125fa94a07fSbrendan break; 3126fa94a07fSbrendan } 3127fa94a07fSbrendan } 3128fa94a07fSbrendan ASSERT(vd != NULL); 3129fa94a07fSbrendan 3130fa94a07fSbrendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 31313f9d6ad7SLin Ling ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) 31323f9d6ad7SLin Ling == 0); 3133fa94a07fSbrendan vdev_get_stats(vd, vs); 3134fa94a07fSbrendan } 3135fa94a07fSbrendan } 3136fa94a07fSbrendan } 3137fa94a07fSbrendan 3138ad135b5dSChristopher Siden static void 3139ad135b5dSChristopher Siden spa_add_feature_stats(spa_t *spa, nvlist_t *config) 3140ad135b5dSChristopher Siden { 3141ad135b5dSChristopher Siden nvlist_t *features; 3142ad135b5dSChristopher Siden zap_cursor_t zc; 3143ad135b5dSChristopher Siden zap_attribute_t za; 3144ad135b5dSChristopher Siden 3145ad135b5dSChristopher Siden ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 3146ad135b5dSChristopher Siden VERIFY(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP) == 0); 3147ad135b5dSChristopher Siden 3148ad135b5dSChristopher Siden if (spa->spa_feat_for_read_obj != 0) { 3149ad135b5dSChristopher Siden for (zap_cursor_init(&zc, spa->spa_meta_objset, 3150ad135b5dSChristopher Siden spa->spa_feat_for_read_obj); 3151ad135b5dSChristopher Siden zap_cursor_retrieve(&zc, &za) == 0; 3152ad135b5dSChristopher Siden zap_cursor_advance(&zc)) { 3153ad135b5dSChristopher Siden ASSERT(za.za_integer_length == sizeof (uint64_t) && 3154ad135b5dSChristopher Siden za.za_num_integers == 1); 3155b420f3adSRichard Lowe VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name, 3156ad135b5dSChristopher Siden za.za_first_integer)); 3157ad135b5dSChristopher Siden } 3158ad135b5dSChristopher Siden zap_cursor_fini(&zc); 3159ad135b5dSChristopher Siden } 3160ad135b5dSChristopher Siden 3161ad135b5dSChristopher Siden if (spa->spa_feat_for_write_obj != 0) { 3162ad135b5dSChristopher Siden for (zap_cursor_init(&zc, spa->spa_meta_objset, 3163ad135b5dSChristopher Siden spa->spa_feat_for_write_obj); 3164ad135b5dSChristopher Siden zap_cursor_retrieve(&zc, &za) == 0; 3165ad135b5dSChristopher Siden zap_cursor_advance(&zc)) { 3166ad135b5dSChristopher Siden ASSERT(za.za_integer_length == sizeof (uint64_t) && 3167ad135b5dSChristopher Siden za.za_num_integers == 1); 3168b420f3adSRichard Lowe VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name, 3169ad135b5dSChristopher Siden za.za_first_integer)); 3170ad135b5dSChristopher Siden } 3171ad135b5dSChristopher Siden zap_cursor_fini(&zc); 3172ad135b5dSChristopher Siden } 3173ad135b5dSChristopher Siden 3174ad135b5dSChristopher Siden VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS, 3175ad135b5dSChristopher Siden features) == 0); 3176ad135b5dSChristopher Siden nvlist_free(features); 3177ad135b5dSChristopher Siden } 3178ad135b5dSChristopher Siden 3179fa9e4066Sahrens int 3180ad135b5dSChristopher Siden spa_get_stats(const char *name, nvlist_t **config, 3181ad135b5dSChristopher Siden char *altroot, size_t buflen) 3182fa9e4066Sahrens { 3183fa9e4066Sahrens int error; 3184fa9e4066Sahrens spa_t *spa; 3185fa9e4066Sahrens 3186fa9e4066Sahrens *config = NULL; 3187468c413aSTim Haley error = spa_open_common(name, &spa, FTAG, NULL, config); 3188fa9e4066Sahrens 31896809eb4eSEric Schrock if (spa != NULL) { 31906809eb4eSEric Schrock /* 31916809eb4eSEric Schrock * This still leaves a window of inconsistency where the spares 31926809eb4eSEric Schrock * or l2cache devices could change and the config would be 31936809eb4eSEric Schrock * self-inconsistent. 31946809eb4eSEric Schrock */ 31956809eb4eSEric Schrock spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 3196ea8dc4b6Seschrock 31976809eb4eSEric Schrock if (*config != NULL) { 319811027bc7STim Haley uint64_t loadtimes[2]; 319911027bc7STim Haley 320011027bc7STim Haley loadtimes[0] = spa->spa_loaded_ts.tv_sec; 320111027bc7STim Haley loadtimes[1] = spa->spa_loaded_ts.tv_nsec; 320211027bc7STim Haley VERIFY(nvlist_add_uint64_array(*config, 320311027bc7STim Haley ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0); 320411027bc7STim Haley 3205e14bb325SJeff Bonwick VERIFY(nvlist_add_uint64(*config, 32066809eb4eSEric Schrock ZPOOL_CONFIG_ERRCOUNT, 32076809eb4eSEric Schrock spa_get_errlog_size(spa)) == 0); 3208e14bb325SJeff Bonwick 32096809eb4eSEric Schrock if (spa_suspended(spa)) 32106809eb4eSEric Schrock VERIFY(nvlist_add_uint64(*config, 32116809eb4eSEric Schrock ZPOOL_CONFIG_SUSPENDED, 32126809eb4eSEric Schrock spa->spa_failmode) == 0); 32136809eb4eSEric Schrock 32146809eb4eSEric Schrock spa_add_spares(spa, *config); 32156809eb4eSEric Schrock spa_add_l2cache(spa, *config); 3216ad135b5dSChristopher Siden spa_add_feature_stats(spa, *config); 32176809eb4eSEric Schrock } 321899653d4eSeschrock } 321999653d4eSeschrock 3220ea8dc4b6Seschrock /* 3221ea8dc4b6Seschrock * We want to get the alternate root even for faulted pools, so we cheat 3222ea8dc4b6Seschrock * and call spa_lookup() directly. 3223ea8dc4b6Seschrock */ 3224ea8dc4b6Seschrock if (altroot) { 3225ea8dc4b6Seschrock if (spa == NULL) { 3226ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 3227ea8dc4b6Seschrock spa = spa_lookup(name); 3228ea8dc4b6Seschrock if (spa) 3229ea8dc4b6Seschrock spa_altroot(spa, altroot, buflen); 3230ea8dc4b6Seschrock else 3231ea8dc4b6Seschrock altroot[0] = '\0'; 3232ea8dc4b6Seschrock spa = NULL; 3233ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 3234ea8dc4b6Seschrock } else { 3235ea8dc4b6Seschrock spa_altroot(spa, altroot, buflen); 3236ea8dc4b6Seschrock } 3237ea8dc4b6Seschrock } 3238ea8dc4b6Seschrock 32396809eb4eSEric Schrock if (spa != NULL) { 32406809eb4eSEric Schrock spa_config_exit(spa, SCL_CONFIG, FTAG); 3241fa9e4066Sahrens spa_close(spa, FTAG); 32426809eb4eSEric Schrock } 3243fa9e4066Sahrens 3244fa9e4066Sahrens return (error); 3245fa9e4066Sahrens } 3246fa9e4066Sahrens 324799653d4eSeschrock /* 3248fa94a07fSbrendan * Validate that the auxiliary device array is well formed. We must have an 3249fa94a07fSbrendan * array of nvlists, each which describes a valid leaf vdev. If this is an 3250fa94a07fSbrendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 3251fa94a07fSbrendan * specified, as long as they are well-formed. 325299653d4eSeschrock */ 325399653d4eSeschrock static int 3254fa94a07fSbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 3255fa94a07fSbrendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 3256fa94a07fSbrendan vdev_labeltype_t label) 325799653d4eSeschrock { 3258fa94a07fSbrendan nvlist_t **dev; 3259fa94a07fSbrendan uint_t i, ndev; 326099653d4eSeschrock vdev_t *vd; 326199653d4eSeschrock int error; 326299653d4eSeschrock 3263e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 3264e14bb325SJeff Bonwick 326599653d4eSeschrock /* 3266fa94a07fSbrendan * It's acceptable to have no devs specified. 326799653d4eSeschrock */ 3268fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 326999653d4eSeschrock return (0); 327099653d4eSeschrock 3271fa94a07fSbrendan if (ndev == 0) 3272be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 327399653d4eSeschrock 327499653d4eSeschrock /* 3275fa94a07fSbrendan * Make sure the pool is formatted with a version that supports this 3276fa94a07fSbrendan * device type. 327799653d4eSeschrock */ 3278fa94a07fSbrendan if (spa_version(spa) < version) 3279be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTSUP)); 328099653d4eSeschrock 328139c23413Seschrock /* 3282fa94a07fSbrendan * Set the pending device list so we correctly handle device in-use 328339c23413Seschrock * checking. 328439c23413Seschrock */ 3285fa94a07fSbrendan sav->sav_pending = dev; 3286fa94a07fSbrendan sav->sav_npending = ndev; 328739c23413Seschrock 3288fa94a07fSbrendan for (i = 0; i < ndev; i++) { 3289fa94a07fSbrendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 329099653d4eSeschrock mode)) != 0) 329139c23413Seschrock goto out; 329299653d4eSeschrock 329399653d4eSeschrock if (!vd->vdev_ops->vdev_op_leaf) { 329499653d4eSeschrock vdev_free(vd); 3295be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 329639c23413Seschrock goto out; 329799653d4eSeschrock } 329899653d4eSeschrock 3299fa94a07fSbrendan /* 3300e14bb325SJeff Bonwick * The L2ARC currently only supports disk devices in 3301e14bb325SJeff Bonwick * kernel context. For user-level testing, we allow it. 3302fa94a07fSbrendan */ 3303e14bb325SJeff Bonwick #ifdef _KERNEL 3304fa94a07fSbrendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 3305fa94a07fSbrendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 3306be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTBLK); 3307cd0837ccSGeorge Wilson vdev_free(vd); 3308fa94a07fSbrendan goto out; 3309fa94a07fSbrendan } 3310e14bb325SJeff Bonwick #endif 331199653d4eSeschrock vd->vdev_top = vd; 331299653d4eSeschrock 331339c23413Seschrock if ((error = vdev_open(vd)) == 0 && 3314fa94a07fSbrendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 3315fa94a07fSbrendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 331639c23413Seschrock vd->vdev_guid) == 0); 331739c23413Seschrock } 331899653d4eSeschrock 331999653d4eSeschrock vdev_free(vd); 332039c23413Seschrock 3321fa94a07fSbrendan if (error && 3322fa94a07fSbrendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 332339c23413Seschrock goto out; 332439c23413Seschrock else 332539c23413Seschrock error = 0; 332699653d4eSeschrock } 332799653d4eSeschrock 332839c23413Seschrock out: 3329fa94a07fSbrendan sav->sav_pending = NULL; 3330fa94a07fSbrendan sav->sav_npending = 0; 333139c23413Seschrock return (error); 333299653d4eSeschrock } 333399653d4eSeschrock 3334fa94a07fSbrendan static int 3335fa94a07fSbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 3336fa94a07fSbrendan { 3337fa94a07fSbrendan int error; 3338fa94a07fSbrendan 3339e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 3340e14bb325SJeff Bonwick 3341fa94a07fSbrendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 3342fa94a07fSbrendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 3343fa94a07fSbrendan VDEV_LABEL_SPARE)) != 0) { 3344fa94a07fSbrendan return (error); 3345fa94a07fSbrendan } 3346fa94a07fSbrendan 3347fa94a07fSbrendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 3348fa94a07fSbrendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 3349fa94a07fSbrendan VDEV_LABEL_L2CACHE)); 3350fa94a07fSbrendan } 3351fa94a07fSbrendan 3352fa94a07fSbrendan static void 3353fa94a07fSbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 3354fa94a07fSbrendan const char *config) 3355fa94a07fSbrendan { 3356fa94a07fSbrendan int i; 3357fa94a07fSbrendan 3358fa94a07fSbrendan if (sav->sav_config != NULL) { 3359fa94a07fSbrendan nvlist_t **olddevs; 3360fa94a07fSbrendan uint_t oldndevs; 3361fa94a07fSbrendan nvlist_t **newdevs; 3362fa94a07fSbrendan 3363fa94a07fSbrendan /* 3364fa94a07fSbrendan * Generate new dev list by concatentating with the 3365fa94a07fSbrendan * current dev list. 3366fa94a07fSbrendan */ 3367fa94a07fSbrendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 3368fa94a07fSbrendan &olddevs, &oldndevs) == 0); 3369fa94a07fSbrendan 3370fa94a07fSbrendan newdevs = kmem_alloc(sizeof (void *) * 3371fa94a07fSbrendan (ndevs + oldndevs), KM_SLEEP); 3372fa94a07fSbrendan for (i = 0; i < oldndevs; i++) 3373fa94a07fSbrendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 3374fa94a07fSbrendan KM_SLEEP) == 0); 3375fa94a07fSbrendan for (i = 0; i < ndevs; i++) 3376fa94a07fSbrendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 3377fa94a07fSbrendan KM_SLEEP) == 0); 3378fa94a07fSbrendan 3379fa94a07fSbrendan VERIFY(nvlist_remove(sav->sav_config, config, 3380fa94a07fSbrendan DATA_TYPE_NVLIST_ARRAY) == 0); 3381fa94a07fSbrendan 3382fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 3383fa94a07fSbrendan config, newdevs, ndevs + oldndevs) == 0); 3384fa94a07fSbrendan for (i = 0; i < oldndevs + ndevs; i++) 3385fa94a07fSbrendan nvlist_free(newdevs[i]); 3386fa94a07fSbrendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 3387fa94a07fSbrendan } else { 3388fa94a07fSbrendan /* 3389fa94a07fSbrendan * Generate a new dev list. 3390fa94a07fSbrendan */ 3391fa94a07fSbrendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 3392fa94a07fSbrendan KM_SLEEP) == 0); 3393fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 3394fa94a07fSbrendan devs, ndevs) == 0); 3395fa94a07fSbrendan } 3396fa94a07fSbrendan } 3397fa94a07fSbrendan 3398fa94a07fSbrendan /* 3399fa94a07fSbrendan * Stop and drop level 2 ARC devices 3400fa94a07fSbrendan */ 3401fa94a07fSbrendan void 3402fa94a07fSbrendan spa_l2cache_drop(spa_t *spa) 3403fa94a07fSbrendan { 3404fa94a07fSbrendan vdev_t *vd; 3405fa94a07fSbrendan int i; 3406fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 3407fa94a07fSbrendan 3408fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) { 3409fa94a07fSbrendan uint64_t pool; 3410fa94a07fSbrendan 3411fa94a07fSbrendan vd = sav->sav_vdevs[i]; 3412fa94a07fSbrendan ASSERT(vd != NULL); 3413fa94a07fSbrendan 34148ad4d6ddSJeff Bonwick if (spa_l2cache_exists(vd->vdev_guid, &pool) && 34158ad4d6ddSJeff Bonwick pool != 0ULL && l2arc_vdev_present(vd)) 3416fa94a07fSbrendan l2arc_remove_vdev(vd); 3417fa94a07fSbrendan } 3418fa94a07fSbrendan } 3419fa94a07fSbrendan 3420fa9e4066Sahrens /* 3421fa9e4066Sahrens * Pool Creation 3422fa9e4066Sahrens */ 3423fa9e4066Sahrens int 3424990b4856Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 34254445fffbSMatthew Ahrens nvlist_t *zplprops) 3426fa9e4066Sahrens { 3427fa9e4066Sahrens spa_t *spa; 3428990b4856Slling char *altroot = NULL; 34290373e76bSbonwick vdev_t *rvd; 3430fa9e4066Sahrens dsl_pool_t *dp; 3431fa9e4066Sahrens dmu_tx_t *tx; 3432573ca77eSGeorge Wilson int error = 0; 3433fa9e4066Sahrens uint64_t txg = TXG_INITIAL; 3434fa94a07fSbrendan nvlist_t **spares, **l2cache; 3435fa94a07fSbrendan uint_t nspares, nl2cache; 3436cde58dbcSMatthew Ahrens uint64_t version, obj; 3437ad135b5dSChristopher Siden boolean_t has_features; 3438fa9e4066Sahrens 3439fa9e4066Sahrens /* 3440fa9e4066Sahrens * If this pool already exists, return failure. 3441fa9e4066Sahrens */ 3442fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 3443fa9e4066Sahrens if (spa_lookup(pool) != NULL) { 3444fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 3445be6fd75aSMatthew Ahrens return (SET_ERROR(EEXIST)); 3446fa9e4066Sahrens } 3447fa9e4066Sahrens 3448fa9e4066Sahrens /* 3449fa9e4066Sahrens * Allocate a new spa_t structure. 3450fa9e4066Sahrens */ 3451990b4856Slling (void) nvlist_lookup_string(props, 3452990b4856Slling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 3453468c413aSTim Haley spa = spa_add(pool, NULL, altroot); 34548ad4d6ddSJeff Bonwick spa_activate(spa, spa_mode_global); 3455fa9e4066Sahrens 3456990b4856Slling if (props && (error = spa_prop_validate(spa, props))) { 3457990b4856Slling spa_deactivate(spa); 3458990b4856Slling spa_remove(spa); 3459c5904d13Seschrock mutex_exit(&spa_namespace_lock); 3460990b4856Slling return (error); 3461990b4856Slling } 3462990b4856Slling 3463ad135b5dSChristopher Siden has_features = B_FALSE; 3464ad135b5dSChristopher Siden for (nvpair_t *elem = nvlist_next_nvpair(props, NULL); 3465ad135b5dSChristopher Siden elem != NULL; elem = nvlist_next_nvpair(props, elem)) { 3466ad135b5dSChristopher Siden if (zpool_prop_feature(nvpair_name(elem))) 3467ad135b5dSChristopher Siden has_features = B_TRUE; 3468ad135b5dSChristopher Siden } 3469ad135b5dSChristopher Siden 3470ad135b5dSChristopher Siden if (has_features || nvlist_lookup_uint64(props, 3471ad135b5dSChristopher Siden zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) { 3472990b4856Slling version = SPA_VERSION; 3473ad135b5dSChristopher Siden } 3474ad135b5dSChristopher Siden ASSERT(SPA_VERSION_IS_SUPPORTED(version)); 3475b24ab676SJeff Bonwick 3476b24ab676SJeff Bonwick spa->spa_first_txg = txg; 3477b24ab676SJeff Bonwick spa->spa_uberblock.ub_txg = txg - 1; 3478990b4856Slling spa->spa_uberblock.ub_version = version; 3479fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 3480fa9e4066Sahrens 348154d692b7SGeorge Wilson /* 348254d692b7SGeorge Wilson * Create "The Godfather" zio to hold all async IOs 348354d692b7SGeorge Wilson */ 348425f89ee2SJeff Bonwick spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 348525f89ee2SJeff Bonwick ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 348654d692b7SGeorge Wilson 34870373e76bSbonwick /* 34880373e76bSbonwick * Create the root vdev. 34890373e76bSbonwick */ 3490e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 34910373e76bSbonwick 349299653d4eSeschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 34930373e76bSbonwick 349499653d4eSeschrock ASSERT(error != 0 || rvd != NULL); 349599653d4eSeschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 34960373e76bSbonwick 3497b7b97454Sperrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 3498be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 349999653d4eSeschrock 350099653d4eSeschrock if (error == 0 && 350199653d4eSeschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 3502fa94a07fSbrendan (error = spa_validate_aux(spa, nvroot, txg, 350399653d4eSeschrock VDEV_ALLOC_ADD)) == 0) { 3504573ca77eSGeorge Wilson for (int c = 0; c < rvd->vdev_children; c++) { 3505573ca77eSGeorge Wilson vdev_metaslab_set_size(rvd->vdev_child[c]); 3506573ca77eSGeorge Wilson vdev_expand(rvd->vdev_child[c], txg); 3507573ca77eSGeorge Wilson } 35080373e76bSbonwick } 35090373e76bSbonwick 3510e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 3511fa9e4066Sahrens 351299653d4eSeschrock if (error != 0) { 3513fa9e4066Sahrens spa_unload(spa); 3514fa9e4066Sahrens spa_deactivate(spa); 3515fa9e4066Sahrens spa_remove(spa); 3516fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 3517fa9e4066Sahrens return (error); 3518fa9e4066Sahrens } 3519fa9e4066Sahrens 352099653d4eSeschrock /* 352199653d4eSeschrock * Get the list of spares, if specified. 352299653d4eSeschrock */ 352399653d4eSeschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 352499653d4eSeschrock &spares, &nspares) == 0) { 3525fa94a07fSbrendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 352699653d4eSeschrock KM_SLEEP) == 0); 3527fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 352899653d4eSeschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 3529e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 353099653d4eSeschrock spa_load_spares(spa); 3531e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 3532fa94a07fSbrendan spa->spa_spares.sav_sync = B_TRUE; 3533fa94a07fSbrendan } 3534fa94a07fSbrendan 3535fa94a07fSbrendan /* 3536fa94a07fSbrendan * Get the list of level 2 cache devices, if specified. 3537fa94a07fSbrendan */ 3538fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 3539fa94a07fSbrendan &l2cache, &nl2cache) == 0) { 3540fa94a07fSbrendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 3541fa94a07fSbrendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 3542fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 3543fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 3544e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3545fa94a07fSbrendan spa_load_l2cache(spa); 3546e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 3547fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 354899653d4eSeschrock } 354999653d4eSeschrock 3550ad135b5dSChristopher Siden spa->spa_is_initializing = B_TRUE; 35510a48a24eStimh spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 3552fa9e4066Sahrens spa->spa_meta_objset = dp->dp_meta_objset; 3553ad135b5dSChristopher Siden spa->spa_is_initializing = B_FALSE; 3554fa9e4066Sahrens 3555485bbbf5SGeorge Wilson /* 3556485bbbf5SGeorge Wilson * Create DDTs (dedup tables). 3557485bbbf5SGeorge Wilson */ 3558485bbbf5SGeorge Wilson ddt_create(spa); 3559485bbbf5SGeorge Wilson 3560485bbbf5SGeorge Wilson spa_update_dspace(spa); 3561485bbbf5SGeorge Wilson 3562fa9e4066Sahrens tx = dmu_tx_create_assigned(dp, txg); 3563fa9e4066Sahrens 3564fa9e4066Sahrens /* 3565fa9e4066Sahrens * Create the pool config object. 3566fa9e4066Sahrens */ 3567fa9e4066Sahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 3568f7991ba4STim Haley DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 3569fa9e4066Sahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 3570fa9e4066Sahrens 3571ea8dc4b6Seschrock if (zap_add(spa->spa_meta_objset, 3572fa9e4066Sahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 3573ea8dc4b6Seschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 3574ea8dc4b6Seschrock cmn_err(CE_PANIC, "failed to add pool config"); 3575ea8dc4b6Seschrock } 3576fa9e4066Sahrens 3577ad135b5dSChristopher Siden if (spa_version(spa) >= SPA_VERSION_FEATURES) 3578ad135b5dSChristopher Siden spa_feature_create_zap_objects(spa, tx); 3579ad135b5dSChristopher Siden 35803f9d6ad7SLin Ling if (zap_add(spa->spa_meta_objset, 35813f9d6ad7SLin Ling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION, 35823f9d6ad7SLin Ling sizeof (uint64_t), 1, &version, tx) != 0) { 35833f9d6ad7SLin Ling cmn_err(CE_PANIC, "failed to add pool version"); 35843f9d6ad7SLin Ling } 35853f9d6ad7SLin Ling 3586990b4856Slling /* Newly created pools with the right version are always deflated. */ 3587990b4856Slling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 3588990b4856Slling spa->spa_deflate = TRUE; 3589990b4856Slling if (zap_add(spa->spa_meta_objset, 3590990b4856Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 3591990b4856Slling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 3592990b4856Slling cmn_err(CE_PANIC, "failed to add deflate"); 3593990b4856Slling } 359499653d4eSeschrock } 359599653d4eSeschrock 3596fa9e4066Sahrens /* 3597cde58dbcSMatthew Ahrens * Create the deferred-free bpobj. Turn off compression 3598fa9e4066Sahrens * because sync-to-convergence takes longer if the blocksize 3599fa9e4066Sahrens * keeps changing. 3600fa9e4066Sahrens */ 3601cde58dbcSMatthew Ahrens obj = bpobj_alloc(spa->spa_meta_objset, 1 << 14, tx); 3602cde58dbcSMatthew Ahrens dmu_object_set_compress(spa->spa_meta_objset, obj, 3603cde58dbcSMatthew Ahrens ZIO_COMPRESS_OFF, tx); 3604ea8dc4b6Seschrock if (zap_add(spa->spa_meta_objset, 3605cde58dbcSMatthew Ahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPOBJ, 3606cde58dbcSMatthew Ahrens sizeof (uint64_t), 1, &obj, tx) != 0) { 3607cde58dbcSMatthew Ahrens cmn_err(CE_PANIC, "failed to add bpobj"); 3608ea8dc4b6Seschrock } 3609b420f3adSRichard Lowe VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj, 3610cde58dbcSMatthew Ahrens spa->spa_meta_objset, obj)); 3611fa9e4066Sahrens 361206eeb2adSek /* 361306eeb2adSek * Create the pool's history object. 361406eeb2adSek */ 3615990b4856Slling if (version >= SPA_VERSION_ZPOOL_HISTORY) 3616990b4856Slling spa_history_create_obj(spa, tx); 3617990b4856Slling 3618990b4856Slling /* 3619990b4856Slling * Set pool properties. 3620990b4856Slling */ 3621990b4856Slling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 3622990b4856Slling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 36230a4e9518Sgw spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 3624573ca77eSGeorge Wilson spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); 3625b24ab676SJeff Bonwick 3626379c004dSEric Schrock if (props != NULL) { 3627379c004dSEric Schrock spa_configfile_set(spa, props, B_FALSE); 36283b2aab18SMatthew Ahrens spa_sync_props(props, tx); 3629379c004dSEric Schrock } 363006eeb2adSek 3631fa9e4066Sahrens dmu_tx_commit(tx); 3632fa9e4066Sahrens 3633fa9e4066Sahrens spa->spa_sync_on = B_TRUE; 3634fa9e4066Sahrens txg_sync_start(spa->spa_dsl_pool); 3635fa9e4066Sahrens 3636fa9e4066Sahrens /* 3637fa9e4066Sahrens * We explicitly wait for the first transaction to complete so that our 3638fa9e4066Sahrens * bean counters are appropriately updated. 3639fa9e4066Sahrens */ 3640fa9e4066Sahrens txg_wait_synced(spa->spa_dsl_pool, txg); 3641fa9e4066Sahrens 3642c5904d13Seschrock spa_config_sync(spa, B_FALSE, B_TRUE); 3643fa9e4066Sahrens 36444445fffbSMatthew Ahrens spa_history_log_version(spa, "create"); 3645228975ccSek 3646088f3894Sahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 3647088f3894Sahrens 3648daaa36a7SGeorge Wilson mutex_exit(&spa_namespace_lock); 3649daaa36a7SGeorge Wilson 3650fa9e4066Sahrens return (0); 3651fa9e4066Sahrens } 3652fa9e4066Sahrens 3653e7cbe64fSgw #ifdef _KERNEL 3654e7cbe64fSgw /* 365521ecdf64SLin Ling * Get the root pool information from the root disk, then import the root pool 365621ecdf64SLin Ling * during the system boot up time. 3657e7cbe64fSgw */ 365821ecdf64SLin Ling extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 365921ecdf64SLin Ling 366021ecdf64SLin Ling static nvlist_t * 366121ecdf64SLin Ling spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) 3662e7cbe64fSgw { 366321ecdf64SLin Ling nvlist_t *config; 3664e7cbe64fSgw nvlist_t *nvtop, *nvroot; 3665e7cbe64fSgw uint64_t pgid; 3666e7cbe64fSgw 366721ecdf64SLin Ling if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) 366821ecdf64SLin Ling return (NULL); 366921ecdf64SLin Ling 3670e7cbe64fSgw /* 3671e7cbe64fSgw * Add this top-level vdev to the child array. 3672e7cbe64fSgw */ 367321ecdf64SLin Ling VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 367421ecdf64SLin Ling &nvtop) == 0); 367521ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 367621ecdf64SLin Ling &pgid) == 0); 367721ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); 3678e7cbe64fSgw 3679e7cbe64fSgw /* 3680e7cbe64fSgw * Put this pool's top-level vdevs into a root vdev. 3681e7cbe64fSgw */ 3682e7cbe64fSgw VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 368321ecdf64SLin Ling VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 368421ecdf64SLin Ling VDEV_TYPE_ROOT) == 0); 3685e7cbe64fSgw VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 3686e7cbe64fSgw VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 3687e7cbe64fSgw VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 3688e7cbe64fSgw &nvtop, 1) == 0); 3689e7cbe64fSgw 3690e7cbe64fSgw /* 3691e7cbe64fSgw * Replace the existing vdev_tree with the new root vdev in 3692e7cbe64fSgw * this pool's configuration (remove the old, add the new). 3693e7cbe64fSgw */ 3694e7cbe64fSgw VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 3695e7cbe64fSgw nvlist_free(nvroot); 369621ecdf64SLin Ling return (config); 3697e7cbe64fSgw } 3698e7cbe64fSgw 3699e7cbe64fSgw /* 370021ecdf64SLin Ling * Walk the vdev tree and see if we can find a device with "better" 370121ecdf64SLin Ling * configuration. A configuration is "better" if the label on that 370221ecdf64SLin Ling * device has a more recent txg. 3703051aabe6Staylor */ 370421ecdf64SLin Ling static void 370521ecdf64SLin Ling spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) 3706051aabe6Staylor { 3707573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) 370821ecdf64SLin Ling spa_alt_rootvdev(vd->vdev_child[c], avd, txg); 3709051aabe6Staylor 371021ecdf64SLin Ling if (vd->vdev_ops->vdev_op_leaf) { 371121ecdf64SLin Ling nvlist_t *label; 371221ecdf64SLin Ling uint64_t label_txg; 3713051aabe6Staylor 371421ecdf64SLin Ling if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, 371521ecdf64SLin Ling &label) != 0) 371621ecdf64SLin Ling return; 3717051aabe6Staylor 371821ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, 371921ecdf64SLin Ling &label_txg) == 0); 3720051aabe6Staylor 372121ecdf64SLin Ling /* 372221ecdf64SLin Ling * Do we have a better boot device? 372321ecdf64SLin Ling */ 372421ecdf64SLin Ling if (label_txg > *txg) { 372521ecdf64SLin Ling *txg = label_txg; 372621ecdf64SLin Ling *avd = vd; 3727051aabe6Staylor } 372821ecdf64SLin Ling nvlist_free(label); 3729051aabe6Staylor } 3730051aabe6Staylor } 3731051aabe6Staylor 3732e7cbe64fSgw /* 3733e7cbe64fSgw * Import a root pool. 3734e7cbe64fSgw * 3735051aabe6Staylor * For x86. devpath_list will consist of devid and/or physpath name of 3736051aabe6Staylor * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 3737051aabe6Staylor * The GRUB "findroot" command will return the vdev we should boot. 3738e7cbe64fSgw * 3739e7cbe64fSgw * For Sparc, devpath_list consists the physpath name of the booting device 3740e7cbe64fSgw * no matter the rootpool is a single device pool or a mirrored pool. 3741e7cbe64fSgw * e.g. 3742e7cbe64fSgw * "/pci@1f,0/ide@d/disk@0,0:a" 3743e7cbe64fSgw */ 3744e7cbe64fSgw int 3745051aabe6Staylor spa_import_rootpool(char *devpath, char *devid) 3746e7cbe64fSgw { 374721ecdf64SLin Ling spa_t *spa; 374821ecdf64SLin Ling vdev_t *rvd, *bvd, *avd = NULL; 374921ecdf64SLin Ling nvlist_t *config, *nvtop; 375021ecdf64SLin Ling uint64_t guid, txg; 3751e7cbe64fSgw char *pname; 3752e7cbe64fSgw int error; 3753e7cbe64fSgw 3754e7cbe64fSgw /* 375521ecdf64SLin Ling * Read the label from the boot device and generate a configuration. 3756e7cbe64fSgw */ 3757dedec472SJack Meng config = spa_generate_rootconf(devpath, devid, &guid); 3758dedec472SJack Meng #if defined(_OBP) && defined(_KERNEL) 3759dedec472SJack Meng if (config == NULL) { 3760dedec472SJack Meng if (strstr(devpath, "/iscsi/ssd") != NULL) { 3761dedec472SJack Meng /* iscsi boot */ 3762dedec472SJack Meng get_iscsi_bootpath_phy(devpath); 3763dedec472SJack Meng config = spa_generate_rootconf(devpath, devid, &guid); 3764dedec472SJack Meng } 3765dedec472SJack Meng } 3766dedec472SJack Meng #endif 3767dedec472SJack Meng if (config == NULL) { 3768ad135b5dSChristopher Siden cmn_err(CE_NOTE, "Cannot read the pool label from '%s'", 376921ecdf64SLin Ling devpath); 3770be6fd75aSMatthew Ahrens return (SET_ERROR(EIO)); 377121ecdf64SLin Ling } 3772e7cbe64fSgw 377321ecdf64SLin Ling VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 377421ecdf64SLin Ling &pname) == 0); 377521ecdf64SLin Ling VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 3776e7cbe64fSgw 37776809eb4eSEric Schrock mutex_enter(&spa_namespace_lock); 37786809eb4eSEric Schrock if ((spa = spa_lookup(pname)) != NULL) { 37796809eb4eSEric Schrock /* 37806809eb4eSEric Schrock * Remove the existing root pool from the namespace so that we 37816809eb4eSEric Schrock * can replace it with the correct config we just read in. 37826809eb4eSEric Schrock */ 37836809eb4eSEric Schrock spa_remove(spa); 37846809eb4eSEric Schrock } 37856809eb4eSEric Schrock 3786468c413aSTim Haley spa = spa_add(pname, config, NULL); 37876809eb4eSEric Schrock spa->spa_is_root = B_TRUE; 37884b964adaSGeorge Wilson spa->spa_import_flags = ZFS_IMPORT_VERBATIM; 3789e7cbe64fSgw 379021ecdf64SLin Ling /* 379121ecdf64SLin Ling * Build up a vdev tree based on the boot device's label config. 379221ecdf64SLin Ling */ 379321ecdf64SLin Ling VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 379421ecdf64SLin Ling &nvtop) == 0); 379521ecdf64SLin Ling spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 379621ecdf64SLin Ling error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, 379721ecdf64SLin Ling VDEV_ALLOC_ROOTPOOL); 379821ecdf64SLin Ling spa_config_exit(spa, SCL_ALL, FTAG); 379921ecdf64SLin Ling if (error) { 380021ecdf64SLin Ling mutex_exit(&spa_namespace_lock); 380121ecdf64SLin Ling nvlist_free(config); 380221ecdf64SLin Ling cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", 380321ecdf64SLin Ling pname); 380421ecdf64SLin Ling return (error); 380521ecdf64SLin Ling } 380621ecdf64SLin Ling 380721ecdf64SLin Ling /* 380821ecdf64SLin Ling * Get the boot vdev. 380921ecdf64SLin Ling */ 381021ecdf64SLin Ling if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 381121ecdf64SLin Ling cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", 381221ecdf64SLin Ling (u_longlong_t)guid); 3813be6fd75aSMatthew Ahrens error = SET_ERROR(ENOENT); 381421ecdf64SLin Ling goto out; 381521ecdf64SLin Ling } 3816e7cbe64fSgw 381721ecdf64SLin Ling /* 381821ecdf64SLin Ling * Determine if there is a better boot device. 381921ecdf64SLin Ling */ 382021ecdf64SLin Ling avd = bvd; 382121ecdf64SLin Ling spa_alt_rootvdev(rvd, &avd, &txg); 382221ecdf64SLin Ling if (avd != bvd) { 382321ecdf64SLin Ling cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " 382421ecdf64SLin Ling "try booting from '%s'", avd->vdev_path); 3825be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 382621ecdf64SLin Ling goto out; 382721ecdf64SLin Ling } 3828e7cbe64fSgw 382921ecdf64SLin Ling /* 383021ecdf64SLin Ling * If the boot device is part of a spare vdev then ensure that 383121ecdf64SLin Ling * we're booting off the active spare. 383221ecdf64SLin Ling */ 383321ecdf64SLin Ling if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && 383421ecdf64SLin Ling !bvd->vdev_isspare) { 383521ecdf64SLin Ling cmn_err(CE_NOTE, "The boot device is currently spared. Please " 383621ecdf64SLin Ling "try booting from '%s'", 3837cb04b873SMark J Musante bvd->vdev_parent-> 3838cb04b873SMark J Musante vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path); 3839be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 384021ecdf64SLin Ling goto out; 384121ecdf64SLin Ling } 384221ecdf64SLin Ling 384321ecdf64SLin Ling error = 0; 384421ecdf64SLin Ling out: 384521ecdf64SLin Ling spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 384621ecdf64SLin Ling vdev_free(rvd); 384721ecdf64SLin Ling spa_config_exit(spa, SCL_ALL, FTAG); 384821ecdf64SLin Ling mutex_exit(&spa_namespace_lock); 384921ecdf64SLin Ling 385021ecdf64SLin Ling nvlist_free(config); 3851e7cbe64fSgw return (error); 3852e7cbe64fSgw } 385321ecdf64SLin Ling 3854e7cbe64fSgw #endif 3855e7cbe64fSgw 38566809eb4eSEric Schrock /* 38576809eb4eSEric Schrock * Import a non-root pool into the system. 38586809eb4eSEric Schrock */ 3859c5904d13Seschrock int 38604b964adaSGeorge Wilson spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) 3861c5904d13Seschrock { 38626809eb4eSEric Schrock spa_t *spa; 38636809eb4eSEric Schrock char *altroot = NULL; 3864468c413aSTim Haley spa_load_state_t state = SPA_LOAD_IMPORT; 3865468c413aSTim Haley zpool_rewind_policy_t policy; 3866f9af39baSGeorge Wilson uint64_t mode = spa_mode_global; 3867f9af39baSGeorge Wilson uint64_t readonly = B_FALSE; 38686809eb4eSEric Schrock int error; 38696809eb4eSEric Schrock nvlist_t *nvroot; 38706809eb4eSEric Schrock nvlist_t **spares, **l2cache; 38716809eb4eSEric Schrock uint_t nspares, nl2cache; 38726809eb4eSEric Schrock 38736809eb4eSEric Schrock /* 38746809eb4eSEric Schrock * If a pool with this name exists, return failure. 38756809eb4eSEric Schrock */ 38766809eb4eSEric Schrock mutex_enter(&spa_namespace_lock); 38771195e687SMark J Musante if (spa_lookup(pool) != NULL) { 38786809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 3879be6fd75aSMatthew Ahrens return (SET_ERROR(EEXIST)); 38806809eb4eSEric Schrock } 38816809eb4eSEric Schrock 38826809eb4eSEric Schrock /* 38836809eb4eSEric Schrock * Create and initialize the spa structure. 38846809eb4eSEric Schrock */ 38856809eb4eSEric Schrock (void) nvlist_lookup_string(props, 38866809eb4eSEric Schrock zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 3887f9af39baSGeorge Wilson (void) nvlist_lookup_uint64(props, 3888f9af39baSGeorge Wilson zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly); 3889f9af39baSGeorge Wilson if (readonly) 3890f9af39baSGeorge Wilson mode = FREAD; 3891468c413aSTim Haley spa = spa_add(pool, config, altroot); 38924b964adaSGeorge Wilson spa->spa_import_flags = flags; 38934b964adaSGeorge Wilson 38944b964adaSGeorge Wilson /* 38954b964adaSGeorge Wilson * Verbatim import - Take a pool and insert it into the namespace 38964b964adaSGeorge Wilson * as if it had been loaded at boot. 38974b964adaSGeorge Wilson */ 38984b964adaSGeorge Wilson if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) { 38994b964adaSGeorge Wilson if (props != NULL) 39004b964adaSGeorge Wilson spa_configfile_set(spa, props, B_FALSE); 39014b964adaSGeorge Wilson 39024b964adaSGeorge Wilson spa_config_sync(spa, B_FALSE, B_TRUE); 39034b964adaSGeorge Wilson 39044b964adaSGeorge Wilson mutex_exit(&spa_namespace_lock); 39054b964adaSGeorge Wilson return (0); 39064b964adaSGeorge Wilson } 39074b964adaSGeorge Wilson 3908f9af39baSGeorge Wilson spa_activate(spa, mode); 39096809eb4eSEric Schrock 391025f89ee2SJeff Bonwick /* 391125f89ee2SJeff Bonwick * Don't start async tasks until we know everything is healthy. 391225f89ee2SJeff Bonwick */ 391325f89ee2SJeff Bonwick spa_async_suspend(spa); 391425f89ee2SJeff Bonwick 39154b964adaSGeorge Wilson zpool_get_rewind_policy(config, &policy); 39164b964adaSGeorge Wilson if (policy.zrp_request & ZPOOL_DO_REWIND) 39174b964adaSGeorge Wilson state = SPA_LOAD_RECOVER; 39184b964adaSGeorge Wilson 39196809eb4eSEric Schrock /* 39206809eb4eSEric Schrock * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig 39216809eb4eSEric Schrock * because the user-supplied config is actually the one to trust when 39226809eb4eSEric Schrock * doing an import. 39236809eb4eSEric Schrock */ 3924468c413aSTim Haley if (state != SPA_LOAD_RECOVER) 3925468c413aSTim Haley spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; 39264b964adaSGeorge Wilson 3927468c413aSTim Haley error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg, 3928c8ee1847SVictor Latushkin policy.zrp_request); 3929468c413aSTim Haley 3930468c413aSTim Haley /* 39314b964adaSGeorge Wilson * Propagate anything learned while loading the pool and pass it 39324b964adaSGeorge Wilson * back to caller (i.e. rewind info, missing devices, etc). 3933468c413aSTim Haley */ 39344b964adaSGeorge Wilson VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, 39354b964adaSGeorge Wilson spa->spa_load_info) == 0); 39366809eb4eSEric Schrock 39376809eb4eSEric Schrock spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 39386809eb4eSEric Schrock /* 39396809eb4eSEric Schrock * Toss any existing sparelist, as it doesn't have any validity 39406809eb4eSEric Schrock * anymore, and conflicts with spa_has_spare(). 39416809eb4eSEric Schrock */ 39426809eb4eSEric Schrock if (spa->spa_spares.sav_config) { 39436809eb4eSEric Schrock nvlist_free(spa->spa_spares.sav_config); 39446809eb4eSEric Schrock spa->spa_spares.sav_config = NULL; 39456809eb4eSEric Schrock spa_load_spares(spa); 39466809eb4eSEric Schrock } 39476809eb4eSEric Schrock if (spa->spa_l2cache.sav_config) { 39486809eb4eSEric Schrock nvlist_free(spa->spa_l2cache.sav_config); 39496809eb4eSEric Schrock spa->spa_l2cache.sav_config = NULL; 39506809eb4eSEric Schrock spa_load_l2cache(spa); 39516809eb4eSEric Schrock } 39526809eb4eSEric Schrock 39536809eb4eSEric Schrock VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 39546809eb4eSEric Schrock &nvroot) == 0); 39556809eb4eSEric Schrock if (error == 0) 39566809eb4eSEric Schrock error = spa_validate_aux(spa, nvroot, -1ULL, 39576809eb4eSEric Schrock VDEV_ALLOC_SPARE); 39586809eb4eSEric Schrock if (error == 0) 39596809eb4eSEric Schrock error = spa_validate_aux(spa, nvroot, -1ULL, 39606809eb4eSEric Schrock VDEV_ALLOC_L2CACHE); 39616809eb4eSEric Schrock spa_config_exit(spa, SCL_ALL, FTAG); 39626809eb4eSEric Schrock 39636809eb4eSEric Schrock if (props != NULL) 39646809eb4eSEric Schrock spa_configfile_set(spa, props, B_FALSE); 39656809eb4eSEric Schrock 39666809eb4eSEric Schrock if (error != 0 || (props && spa_writeable(spa) && 39676809eb4eSEric Schrock (error = spa_prop_set(spa, props)))) { 39686809eb4eSEric Schrock spa_unload(spa); 39696809eb4eSEric Schrock spa_deactivate(spa); 39706809eb4eSEric Schrock spa_remove(spa); 39716809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 39726809eb4eSEric Schrock return (error); 39736809eb4eSEric Schrock } 39746809eb4eSEric Schrock 3975955ef359SLin Ling spa_async_resume(spa); 3976955ef359SLin Ling 39776809eb4eSEric Schrock /* 39786809eb4eSEric Schrock * Override any spares and level 2 cache devices as specified by 39796809eb4eSEric Schrock * the user, as these may have correct device names/devids, etc. 39806809eb4eSEric Schrock */ 39816809eb4eSEric Schrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 39826809eb4eSEric Schrock &spares, &nspares) == 0) { 39836809eb4eSEric Schrock if (spa->spa_spares.sav_config) 39846809eb4eSEric Schrock VERIFY(nvlist_remove(spa->spa_spares.sav_config, 39856809eb4eSEric Schrock ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 39866809eb4eSEric Schrock else 39876809eb4eSEric Schrock VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 39886809eb4eSEric Schrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 39896809eb4eSEric Schrock VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 39906809eb4eSEric Schrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 39916809eb4eSEric Schrock spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 39926809eb4eSEric Schrock spa_load_spares(spa); 39936809eb4eSEric Schrock spa_config_exit(spa, SCL_ALL, FTAG); 39946809eb4eSEric Schrock spa->spa_spares.sav_sync = B_TRUE; 39956809eb4eSEric Schrock } 39966809eb4eSEric Schrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 39976809eb4eSEric Schrock &l2cache, &nl2cache) == 0) { 39986809eb4eSEric Schrock if (spa->spa_l2cache.sav_config) 39996809eb4eSEric Schrock VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 40006809eb4eSEric Schrock ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 40016809eb4eSEric Schrock else 40026809eb4eSEric Schrock VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 40036809eb4eSEric Schrock NV_UNIQUE_NAME, KM_SLEEP) == 0); 40046809eb4eSEric Schrock VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 40056809eb4eSEric Schrock ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 40066809eb4eSEric Schrock spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 40076809eb4eSEric Schrock spa_load_l2cache(spa); 40086809eb4eSEric Schrock spa_config_exit(spa, SCL_ALL, FTAG); 40096809eb4eSEric Schrock spa->spa_l2cache.sav_sync = B_TRUE; 40106809eb4eSEric Schrock } 40116809eb4eSEric Schrock 4012b693757aSEric Schrock /* 4013b693757aSEric Schrock * Check for any removed devices. 4014b693757aSEric Schrock */ 4015b693757aSEric Schrock if (spa->spa_autoreplace) { 4016b693757aSEric Schrock spa_aux_check_removed(&spa->spa_spares); 4017b693757aSEric Schrock spa_aux_check_removed(&spa->spa_l2cache); 4018b693757aSEric Schrock } 4019b693757aSEric Schrock 40206809eb4eSEric Schrock if (spa_writeable(spa)) { 40216809eb4eSEric Schrock /* 40226809eb4eSEric Schrock * Update the config cache to include the newly-imported pool. 40236809eb4eSEric Schrock */ 4024bc758434SLin Ling spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 40256809eb4eSEric Schrock } 40266809eb4eSEric Schrock 4027573ca77eSGeorge Wilson /* 4028573ca77eSGeorge Wilson * It's possible that the pool was expanded while it was exported. 4029573ca77eSGeorge Wilson * We kick off an async task to handle this for us. 4030573ca77eSGeorge Wilson */ 4031573ca77eSGeorge Wilson spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 4032573ca77eSGeorge Wilson 40336809eb4eSEric Schrock mutex_exit(&spa_namespace_lock); 40344445fffbSMatthew Ahrens spa_history_log_version(spa, "import"); 40356809eb4eSEric Schrock 40366809eb4eSEric Schrock return (0); 4037c5904d13Seschrock } 4038c5904d13Seschrock 4039fa9e4066Sahrens nvlist_t * 4040fa9e4066Sahrens spa_tryimport(nvlist_t *tryconfig) 4041fa9e4066Sahrens { 4042fa9e4066Sahrens nvlist_t *config = NULL; 4043fa9e4066Sahrens char *poolname; 4044fa9e4066Sahrens spa_t *spa; 4045fa9e4066Sahrens uint64_t state; 40467b7154beSLin Ling int error; 4047fa9e4066Sahrens 4048fa9e4066Sahrens if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 4049fa9e4066Sahrens return (NULL); 4050fa9e4066Sahrens 4051fa9e4066Sahrens if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 4052fa9e4066Sahrens return (NULL); 4053fa9e4066Sahrens 4054fa9e4066Sahrens /* 40550373e76bSbonwick * Create and initialize the spa structure. 4056fa9e4066Sahrens */ 40570373e76bSbonwick mutex_enter(&spa_namespace_lock); 4058468c413aSTim Haley spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); 40598ad4d6ddSJeff Bonwick spa_activate(spa, FREAD); 4060fa9e4066Sahrens 4061fa9e4066Sahrens /* 40620373e76bSbonwick * Pass off the heavy lifting to spa_load(). 4063ecc2d604Sbonwick * Pass TRUE for mosconfig because the user-supplied config 4064ecc2d604Sbonwick * is actually the one to trust when doing an import. 4065fa9e4066Sahrens */ 40661195e687SMark J Musante error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING, B_TRUE); 4067fa9e4066Sahrens 4068fa9e4066Sahrens /* 4069fa9e4066Sahrens * If 'tryconfig' was at least parsable, return the current config. 4070fa9e4066Sahrens */ 4071fa9e4066Sahrens if (spa->spa_root_vdev != NULL) { 4072fa9e4066Sahrens config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 4073fa9e4066Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 4074fa9e4066Sahrens poolname) == 0); 4075fa9e4066Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 4076fa9e4066Sahrens state) == 0); 407795173954Sek VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 407895173954Sek spa->spa_uberblock.ub_timestamp) == 0); 4079ad135b5dSChristopher Siden VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, 4080ad135b5dSChristopher Siden spa->spa_load_info) == 0); 408199653d4eSeschrock 4082e7cbe64fSgw /* 4083e7cbe64fSgw * If the bootfs property exists on this pool then we 4084e7cbe64fSgw * copy it out so that external consumers can tell which 4085e7cbe64fSgw * pools are bootable. 4086e7cbe64fSgw */ 40877b7154beSLin Ling if ((!error || error == EEXIST) && spa->spa_bootfs) { 4088e7cbe64fSgw char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4089e7cbe64fSgw 4090e7cbe64fSgw /* 4091e7cbe64fSgw * We have to play games with the name since the 4092e7cbe64fSgw * pool was opened as TRYIMPORT_NAME. 4093e7cbe64fSgw */ 4094e14bb325SJeff Bonwick if (dsl_dsobj_to_dsname(spa_name(spa), 4095e7cbe64fSgw spa->spa_bootfs, tmpname) == 0) { 4096e7cbe64fSgw char *cp; 4097e7cbe64fSgw char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4098e7cbe64fSgw 4099e7cbe64fSgw cp = strchr(tmpname, '/'); 4100e7cbe64fSgw if (cp == NULL) { 4101e7cbe64fSgw (void) strlcpy(dsname, tmpname, 4102e7cbe64fSgw MAXPATHLEN); 4103e7cbe64fSgw } else { 4104e7cbe64fSgw (void) snprintf(dsname, MAXPATHLEN, 4105e7cbe64fSgw "%s/%s", poolname, ++cp); 4106e7cbe64fSgw } 4107e7cbe64fSgw VERIFY(nvlist_add_string(config, 4108e7cbe64fSgw ZPOOL_CONFIG_BOOTFS, dsname) == 0); 4109e7cbe64fSgw kmem_free(dsname, MAXPATHLEN); 4110e7cbe64fSgw } 4111e7cbe64fSgw kmem_free(tmpname, MAXPATHLEN); 4112e7cbe64fSgw } 4113e7cbe64fSgw 411499653d4eSeschrock /* 4115fa94a07fSbrendan * Add the list of hot spares and level 2 cache devices. 411699653d4eSeschrock */ 41176809eb4eSEric Schrock spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 411899653d4eSeschrock spa_add_spares(spa, config); 4119fa94a07fSbrendan spa_add_l2cache(spa, config); 41206809eb4eSEric Schrock spa_config_exit(spa, SCL_CONFIG, FTAG); 4121fa9e4066Sahrens } 4122fa9e4066Sahrens 4123fa9e4066Sahrens spa_unload(spa); 4124fa9e4066Sahrens spa_deactivate(spa); 4125fa9e4066Sahrens spa_remove(spa); 4126fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4127fa9e4066Sahrens 4128fa9e4066Sahrens return (config); 4129fa9e4066Sahrens } 4130fa9e4066Sahrens 4131fa9e4066Sahrens /* 4132fa9e4066Sahrens * Pool export/destroy 4133fa9e4066Sahrens * 4134fa9e4066Sahrens * The act of destroying or exporting a pool is very simple. We make sure there 4135fa9e4066Sahrens * is no more pending I/O and any references to the pool are gone. Then, we 4136fa9e4066Sahrens * update the pool state and sync all the labels to disk, removing the 4137394ab0cbSGeorge Wilson * configuration from the cache afterwards. If the 'hardforce' flag is set, then 4138394ab0cbSGeorge Wilson * we don't sync the labels or remove the configuration cache. 4139fa9e4066Sahrens */ 4140fa9e4066Sahrens static int 414189a89ebfSlling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 4142394ab0cbSGeorge Wilson boolean_t force, boolean_t hardforce) 4143fa9e4066Sahrens { 4144fa9e4066Sahrens spa_t *spa; 4145fa9e4066Sahrens 414644cd46caSbillm if (oldconfig) 414744cd46caSbillm *oldconfig = NULL; 414844cd46caSbillm 41498ad4d6ddSJeff Bonwick if (!(spa_mode_global & FWRITE)) 4150be6fd75aSMatthew Ahrens return (SET_ERROR(EROFS)); 4151fa9e4066Sahrens 4152fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 4153fa9e4066Sahrens if ((spa = spa_lookup(pool)) == NULL) { 4154fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4155be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 4156fa9e4066Sahrens } 4157fa9e4066Sahrens 4158ea8dc4b6Seschrock /* 4159ea8dc4b6Seschrock * Put a hold on the pool, drop the namespace lock, stop async tasks, 4160ea8dc4b6Seschrock * reacquire the namespace lock, and see if we can export. 4161ea8dc4b6Seschrock */ 4162ea8dc4b6Seschrock spa_open_ref(spa, FTAG); 4163ea8dc4b6Seschrock mutex_exit(&spa_namespace_lock); 4164ea8dc4b6Seschrock spa_async_suspend(spa); 4165ea8dc4b6Seschrock mutex_enter(&spa_namespace_lock); 4166ea8dc4b6Seschrock spa_close(spa, FTAG); 4167ea8dc4b6Seschrock 4168fa9e4066Sahrens /* 4169fa9e4066Sahrens * The pool will be in core if it's openable, 4170fa9e4066Sahrens * in which case we can modify its state. 4171fa9e4066Sahrens */ 4172fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 4173fa9e4066Sahrens /* 4174fa9e4066Sahrens * Objsets may be open only because they're dirty, so we 4175fa9e4066Sahrens * have to force it to sync before checking spa_refcnt. 4176fa9e4066Sahrens */ 4177fa9e4066Sahrens txg_wait_synced(spa->spa_dsl_pool, 0); 4178fa9e4066Sahrens 4179ea8dc4b6Seschrock /* 4180ea8dc4b6Seschrock * A pool cannot be exported or destroyed if there are active 4181ea8dc4b6Seschrock * references. If we are resetting a pool, allow references by 4182ea8dc4b6Seschrock * fault injection handlers. 4183ea8dc4b6Seschrock */ 4184ea8dc4b6Seschrock if (!spa_refcount_zero(spa) || 4185ea8dc4b6Seschrock (spa->spa_inject_ref != 0 && 4186ea8dc4b6Seschrock new_state != POOL_STATE_UNINITIALIZED)) { 4187ea8dc4b6Seschrock spa_async_resume(spa); 4188fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4189be6fd75aSMatthew Ahrens return (SET_ERROR(EBUSY)); 4190fa9e4066Sahrens } 4191fa9e4066Sahrens 419289a89ebfSlling /* 419389a89ebfSlling * A pool cannot be exported if it has an active shared spare. 419489a89ebfSlling * This is to prevent other pools stealing the active spare 419589a89ebfSlling * from an exported pool. At user's own will, such pool can 419689a89ebfSlling * be forcedly exported. 419789a89ebfSlling */ 419889a89ebfSlling if (!force && new_state == POOL_STATE_EXPORTED && 419989a89ebfSlling spa_has_active_shared_spare(spa)) { 420089a89ebfSlling spa_async_resume(spa); 420189a89ebfSlling mutex_exit(&spa_namespace_lock); 4202be6fd75aSMatthew Ahrens return (SET_ERROR(EXDEV)); 420389a89ebfSlling } 420489a89ebfSlling 4205fa9e4066Sahrens /* 4206fa9e4066Sahrens * We want this to be reflected on every label, 4207fa9e4066Sahrens * so mark them all dirty. spa_unload() will do the 4208fa9e4066Sahrens * final sync that pushes these changes out. 4209fa9e4066Sahrens */ 4210394ab0cbSGeorge Wilson if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 4211e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4212ea8dc4b6Seschrock spa->spa_state = new_state; 42133f9d6ad7SLin Ling spa->spa_final_txg = spa_last_synced_txg(spa) + 42143f9d6ad7SLin Ling TXG_DEFER_SIZE + 1; 4215ea8dc4b6Seschrock vdev_config_dirty(spa->spa_root_vdev); 4216e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 4217ea8dc4b6Seschrock } 4218fa9e4066Sahrens } 4219fa9e4066Sahrens 42203d7072f8Seschrock spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 42213d7072f8Seschrock 4222fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4223fa9e4066Sahrens spa_unload(spa); 4224fa9e4066Sahrens spa_deactivate(spa); 4225fa9e4066Sahrens } 4226fa9e4066Sahrens 422744cd46caSbillm if (oldconfig && spa->spa_config) 422844cd46caSbillm VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 422944cd46caSbillm 4230ea8dc4b6Seschrock if (new_state != POOL_STATE_UNINITIALIZED) { 4231394ab0cbSGeorge Wilson if (!hardforce) 4232394ab0cbSGeorge Wilson spa_config_sync(spa, B_TRUE, B_TRUE); 4233ea8dc4b6Seschrock spa_remove(spa); 4234ea8dc4b6Seschrock } 4235fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 4236fa9e4066Sahrens 4237fa9e4066Sahrens return (0); 4238fa9e4066Sahrens } 4239fa9e4066Sahrens 4240fa9e4066Sahrens /* 4241fa9e4066Sahrens * Destroy a storage pool. 4242fa9e4066Sahrens */ 4243fa9e4066Sahrens int 4244fa9e4066Sahrens spa_destroy(char *pool) 4245fa9e4066Sahrens { 4246394ab0cbSGeorge Wilson return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 4247394ab0cbSGeorge Wilson B_FALSE, B_FALSE)); 4248fa9e4066Sahrens } 4249fa9e4066Sahrens 4250fa9e4066Sahrens /* 4251fa9e4066Sahrens * Export a storage pool. 4252fa9e4066Sahrens */ 4253fa9e4066Sahrens int 4254394ab0cbSGeorge Wilson spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 4255394ab0cbSGeorge Wilson boolean_t hardforce) 4256fa9e4066Sahrens { 4257394ab0cbSGeorge Wilson return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 4258394ab0cbSGeorge Wilson force, hardforce)); 4259fa9e4066Sahrens } 4260fa9e4066Sahrens 4261ea8dc4b6Seschrock /* 4262ea8dc4b6Seschrock * Similar to spa_export(), this unloads the spa_t without actually removing it 4263ea8dc4b6Seschrock * from the namespace in any way. 4264ea8dc4b6Seschrock */ 4265ea8dc4b6Seschrock int 4266ea8dc4b6Seschrock spa_reset(char *pool) 4267ea8dc4b6Seschrock { 426889a89ebfSlling return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 4269394ab0cbSGeorge Wilson B_FALSE, B_FALSE)); 4270ea8dc4b6Seschrock } 4271ea8dc4b6Seschrock 4272fa9e4066Sahrens /* 4273fa9e4066Sahrens * ========================================================================== 4274fa9e4066Sahrens * Device manipulation 4275fa9e4066Sahrens * ========================================================================== 4276fa9e4066Sahrens */ 4277fa9e4066Sahrens 4278fa9e4066Sahrens /* 42798654d025Sperrin * Add a device to a storage pool. 4280fa9e4066Sahrens */ 4281fa9e4066Sahrens int 4282fa9e4066Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot) 4283fa9e4066Sahrens { 428488ecc943SGeorge Wilson uint64_t txg, id; 42858ad4d6ddSJeff Bonwick int error; 4286fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 42870e34b6a7Sbonwick vdev_t *vd, *tvd; 4288fa94a07fSbrendan nvlist_t **spares, **l2cache; 4289fa94a07fSbrendan uint_t nspares, nl2cache; 4290fa9e4066Sahrens 4291f9af39baSGeorge Wilson ASSERT(spa_writeable(spa)); 4292f9af39baSGeorge Wilson 4293fa9e4066Sahrens txg = spa_vdev_enter(spa); 4294fa9e4066Sahrens 429599653d4eSeschrock if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 429699653d4eSeschrock VDEV_ALLOC_ADD)) != 0) 429799653d4eSeschrock return (spa_vdev_exit(spa, NULL, txg, error)); 4298fa9e4066Sahrens 4299e14bb325SJeff Bonwick spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 430099653d4eSeschrock 4301fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 4302fa94a07fSbrendan &nspares) != 0) 430399653d4eSeschrock nspares = 0; 430499653d4eSeschrock 4305fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 4306fa94a07fSbrendan &nl2cache) != 0) 4307fa94a07fSbrendan nl2cache = 0; 4308fa94a07fSbrendan 4309e14bb325SJeff Bonwick if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 4310fa9e4066Sahrens return (spa_vdev_exit(spa, vd, txg, EINVAL)); 4311fa9e4066Sahrens 4312e14bb325SJeff Bonwick if (vd->vdev_children != 0 && 4313e14bb325SJeff Bonwick (error = vdev_create(vd, txg, B_FALSE)) != 0) 4314e14bb325SJeff Bonwick return (spa_vdev_exit(spa, vd, txg, error)); 431599653d4eSeschrock 431639c23413Seschrock /* 4317fa94a07fSbrendan * We must validate the spares and l2cache devices after checking the 4318fa94a07fSbrendan * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 431939c23413Seschrock */ 4320e14bb325SJeff Bonwick if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 432139c23413Seschrock return (spa_vdev_exit(spa, vd, txg, error)); 432239c23413Seschrock 432339c23413Seschrock /* 432439c23413Seschrock * Transfer each new top-level vdev from vd to rvd. 432539c23413Seschrock */ 43268ad4d6ddSJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) { 432788ecc943SGeorge Wilson 432888ecc943SGeorge Wilson /* 432988ecc943SGeorge Wilson * Set the vdev id to the first hole, if one exists. 433088ecc943SGeorge Wilson */ 433188ecc943SGeorge Wilson for (id = 0; id < rvd->vdev_children; id++) { 433288ecc943SGeorge Wilson if (rvd->vdev_child[id]->vdev_ishole) { 433388ecc943SGeorge Wilson vdev_free(rvd->vdev_child[id]); 433488ecc943SGeorge Wilson break; 433588ecc943SGeorge Wilson } 433688ecc943SGeorge Wilson } 433739c23413Seschrock tvd = vd->vdev_child[c]; 433839c23413Seschrock vdev_remove_child(vd, tvd); 433988ecc943SGeorge Wilson tvd->vdev_id = id; 434039c23413Seschrock vdev_add_child(rvd, tvd); 434139c23413Seschrock vdev_config_dirty(tvd); 434239c23413Seschrock } 434339c23413Seschrock 434499653d4eSeschrock if (nspares != 0) { 4345fa94a07fSbrendan spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 4346fa94a07fSbrendan ZPOOL_CONFIG_SPARES); 434799653d4eSeschrock spa_load_spares(spa); 4348fa94a07fSbrendan spa->spa_spares.sav_sync = B_TRUE; 4349fa94a07fSbrendan } 4350fa94a07fSbrendan 4351fa94a07fSbrendan if (nl2cache != 0) { 4352fa94a07fSbrendan spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 4353fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE); 4354fa94a07fSbrendan spa_load_l2cache(spa); 4355fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 4356fa9e4066Sahrens } 4357fa9e4066Sahrens 4358fa9e4066Sahrens /* 43590e34b6a7Sbonwick * We have to be careful when adding new vdevs to an existing pool. 43600e34b6a7Sbonwick * If other threads start allocating from these vdevs before we 43610e34b6a7Sbonwick * sync the config cache, and we lose power, then upon reboot we may 43620e34b6a7Sbonwick * fail to open the pool because there are DVAs that the config cache 43630e34b6a7Sbonwick * can't translate. Therefore, we first add the vdevs without 43640e34b6a7Sbonwick * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 43650373e76bSbonwick * and then let spa_config_update() initialize the new metaslabs. 43660e34b6a7Sbonwick * 43670e34b6a7Sbonwick * spa_load() checks for added-but-not-initialized vdevs, so that 43680e34b6a7Sbonwick * if we lose power at any point in this sequence, the remaining 43690e34b6a7Sbonwick * steps will be completed the next time we load the pool. 43700e34b6a7Sbonwick */ 43710373e76bSbonwick (void) spa_vdev_exit(spa, vd, txg, 0); 43720e34b6a7Sbonwick 43730373e76bSbonwick mutex_enter(&spa_namespace_lock); 43740373e76bSbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 43750373e76bSbonwick mutex_exit(&spa_namespace_lock); 4376fa9e4066Sahrens 43770373e76bSbonwick return (0); 4378fa9e4066Sahrens } 4379fa9e4066Sahrens 4380fa9e4066Sahrens /* 4381fa9e4066Sahrens * Attach a device to a mirror. The arguments are the path to any device 4382fa9e4066Sahrens * in the mirror, and the nvroot for the new device. If the path specifies 4383fa9e4066Sahrens * a device that is not mirrored, we automatically insert the mirror vdev. 4384fa9e4066Sahrens * 4385fa9e4066Sahrens * If 'replacing' is specified, the new device is intended to replace the 4386fa9e4066Sahrens * existing device; in this case the two devices are made into their own 43873d7072f8Seschrock * mirror using the 'replacing' vdev, which is functionally identical to 4388fa9e4066Sahrens * the mirror vdev (it actually reuses all the same ops) but has a few 4389fa9e4066Sahrens * extra rules: you can't attach to it after it's been created, and upon 4390fa9e4066Sahrens * completion of resilvering, the first disk (the one being replaced) 4391fa9e4066Sahrens * is automatically detached. 4392fa9e4066Sahrens */ 4393fa9e4066Sahrens int 4394ea8dc4b6Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 4395fa9e4066Sahrens { 43963f9d6ad7SLin Ling uint64_t txg, dtl_max_txg; 4397fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 4398fa9e4066Sahrens vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 439999653d4eSeschrock vdev_ops_t *pvops; 44009b3f6b42SEric Kustarz char *oldvdpath, *newvdpath; 44019b3f6b42SEric Kustarz int newvd_isspare; 44029b3f6b42SEric Kustarz int error; 4403fa9e4066Sahrens 4404f9af39baSGeorge Wilson ASSERT(spa_writeable(spa)); 4405f9af39baSGeorge Wilson 4406fa9e4066Sahrens txg = spa_vdev_enter(spa); 4407fa9e4066Sahrens 4408c5904d13Seschrock oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 4409fa9e4066Sahrens 4410fa9e4066Sahrens if (oldvd == NULL) 4411fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 4412fa9e4066Sahrens 44130e34b6a7Sbonwick if (!oldvd->vdev_ops->vdev_op_leaf) 44140e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 44150e34b6a7Sbonwick 4416fa9e4066Sahrens pvd = oldvd->vdev_parent; 4417fa9e4066Sahrens 441899653d4eSeschrock if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 4419cd0837ccSGeorge Wilson VDEV_ALLOC_ATTACH)) != 0) 44203d7072f8Seschrock return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 44213d7072f8Seschrock 44223d7072f8Seschrock if (newrootvd->vdev_children != 1) 4423fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 4424fa9e4066Sahrens 4425fa9e4066Sahrens newvd = newrootvd->vdev_child[0]; 4426fa9e4066Sahrens 4427fa9e4066Sahrens if (!newvd->vdev_ops->vdev_op_leaf) 4428fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 4429fa9e4066Sahrens 443099653d4eSeschrock if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 4431fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, error)); 4432fa9e4066Sahrens 44338654d025Sperrin /* 44348654d025Sperrin * Spares can't replace logs 44358654d025Sperrin */ 4436ee0eb9f2SEric Schrock if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 44378654d025Sperrin return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 44388654d025Sperrin 443999653d4eSeschrock if (!replacing) { 444099653d4eSeschrock /* 444199653d4eSeschrock * For attach, the only allowable parent is a mirror or the root 444299653d4eSeschrock * vdev. 444399653d4eSeschrock */ 444499653d4eSeschrock if (pvd->vdev_ops != &vdev_mirror_ops && 444599653d4eSeschrock pvd->vdev_ops != &vdev_root_ops) 444699653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 444799653d4eSeschrock 444899653d4eSeschrock pvops = &vdev_mirror_ops; 444999653d4eSeschrock } else { 445099653d4eSeschrock /* 445199653d4eSeschrock * Active hot spares can only be replaced by inactive hot 445299653d4eSeschrock * spares. 445399653d4eSeschrock */ 445499653d4eSeschrock if (pvd->vdev_ops == &vdev_spare_ops && 4455cb04b873SMark J Musante oldvd->vdev_isspare && 445699653d4eSeschrock !spa_has_spare(spa, newvd->vdev_guid)) 445799653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 445899653d4eSeschrock 445999653d4eSeschrock /* 446099653d4eSeschrock * If the source is a hot spare, and the parent isn't already a 446199653d4eSeschrock * spare, then we want to create a new hot spare. Otherwise, we 446239c23413Seschrock * want to create a replacing vdev. The user is not allowed to 446339c23413Seschrock * attach to a spared vdev child unless the 'isspare' state is 446439c23413Seschrock * the same (spare replaces spare, non-spare replaces 446539c23413Seschrock * non-spare). 446699653d4eSeschrock */ 4467cb04b873SMark J Musante if (pvd->vdev_ops == &vdev_replacing_ops && 4468cb04b873SMark J Musante spa_version(spa) < SPA_VERSION_MULTI_REPLACE) { 446999653d4eSeschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 4470cb04b873SMark J Musante } else if (pvd->vdev_ops == &vdev_spare_ops && 4471cb04b873SMark J Musante newvd->vdev_isspare != oldvd->vdev_isspare) { 447239c23413Seschrock return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 4473cb04b873SMark J Musante } 4474cb04b873SMark J Musante 4475cb04b873SMark J Musante if (newvd->vdev_isspare) 447699653d4eSeschrock pvops = &vdev_spare_ops; 447799653d4eSeschrock else 447899653d4eSeschrock pvops = &vdev_replacing_ops; 447999653d4eSeschrock } 448099653d4eSeschrock 44812a79c5feSlling /* 4482573ca77eSGeorge Wilson * Make sure the new device is big enough. 44832a79c5feSlling */ 4484573ca77eSGeorge Wilson if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) 4485fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 4486fa9e4066Sahrens 4487ecc2d604Sbonwick /* 4488ecc2d604Sbonwick * The new device cannot have a higher alignment requirement 4489ecc2d604Sbonwick * than the top-level vdev. 4490ecc2d604Sbonwick */ 4491ecc2d604Sbonwick if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 4492fa9e4066Sahrens return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 4493fa9e4066Sahrens 4494fa9e4066Sahrens /* 4495fa9e4066Sahrens * If this is an in-place replacement, update oldvd's path and devid 4496fa9e4066Sahrens * to make it distinguishable from newvd, and unopenable from now on. 4497fa9e4066Sahrens */ 4498fa9e4066Sahrens if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 4499fa9e4066Sahrens spa_strfree(oldvd->vdev_path); 4500fa9e4066Sahrens oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 4501fa9e4066Sahrens KM_SLEEP); 4502fa9e4066Sahrens (void) sprintf(oldvd->vdev_path, "%s/%s", 4503fa9e4066Sahrens newvd->vdev_path, "old"); 4504fa9e4066Sahrens if (oldvd->vdev_devid != NULL) { 4505fa9e4066Sahrens spa_strfree(oldvd->vdev_devid); 4506fa9e4066Sahrens oldvd->vdev_devid = NULL; 4507fa9e4066Sahrens } 4508fa9e4066Sahrens } 4509fa9e4066Sahrens 4510cb04b873SMark J Musante /* mark the device being resilvered */ 4511b4952e17SGeorge Wilson newvd->vdev_resilver_txg = txg; 4512cb04b873SMark J Musante 4513fa9e4066Sahrens /* 451499653d4eSeschrock * If the parent is not a mirror, or if we're replacing, insert the new 451599653d4eSeschrock * mirror/replacing/spare vdev above oldvd. 4516fa9e4066Sahrens */ 4517fa9e4066Sahrens if (pvd->vdev_ops != pvops) 4518fa9e4066Sahrens pvd = vdev_add_parent(oldvd, pvops); 4519fa9e4066Sahrens 4520fa9e4066Sahrens ASSERT(pvd->vdev_top->vdev_parent == rvd); 4521fa9e4066Sahrens ASSERT(pvd->vdev_ops == pvops); 4522fa9e4066Sahrens ASSERT(oldvd->vdev_parent == pvd); 4523fa9e4066Sahrens 4524fa9e4066Sahrens /* 4525fa9e4066Sahrens * Extract the new device from its root and add it to pvd. 4526fa9e4066Sahrens */ 4527fa9e4066Sahrens vdev_remove_child(newrootvd, newvd); 4528fa9e4066Sahrens newvd->vdev_id = pvd->vdev_children; 452988ecc943SGeorge Wilson newvd->vdev_crtxg = oldvd->vdev_crtxg; 4530fa9e4066Sahrens vdev_add_child(pvd, newvd); 4531fa9e4066Sahrens 4532fa9e4066Sahrens tvd = newvd->vdev_top; 4533fa9e4066Sahrens ASSERT(pvd->vdev_top == tvd); 4534fa9e4066Sahrens ASSERT(tvd->vdev_parent == rvd); 4535fa9e4066Sahrens 4536fa9e4066Sahrens vdev_config_dirty(tvd); 4537fa9e4066Sahrens 4538fa9e4066Sahrens /* 45393f9d6ad7SLin Ling * Set newvd's DTL to [TXG_INITIAL, dtl_max_txg) so that we account 45403f9d6ad7SLin Ling * for any dmu_sync-ed blocks. It will propagate upward when 45413f9d6ad7SLin Ling * spa_vdev_exit() calls vdev_dtl_reassess(). 4542fa9e4066Sahrens */ 45433f9d6ad7SLin Ling dtl_max_txg = txg + TXG_CONCURRENT_STATES; 4544fa9e4066Sahrens 45453f9d6ad7SLin Ling vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL, 45463f9d6ad7SLin Ling dtl_max_txg - TXG_INITIAL); 4547fa9e4066Sahrens 45486809eb4eSEric Schrock if (newvd->vdev_isspare) { 454939c23413Seschrock spa_spare_activate(newvd); 45506809eb4eSEric Schrock spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); 45516809eb4eSEric Schrock } 45526809eb4eSEric Schrock 4553e14bb325SJeff Bonwick oldvdpath = spa_strdup(oldvd->vdev_path); 4554e14bb325SJeff Bonwick newvdpath = spa_strdup(newvd->vdev_path); 45559b3f6b42SEric Kustarz newvd_isspare = newvd->vdev_isspare; 4556ea8dc4b6Seschrock 4557fa9e4066Sahrens /* 4558fa9e4066Sahrens * Mark newvd's DTL dirty in this txg. 4559fa9e4066Sahrens */ 4560ecc2d604Sbonwick vdev_dirty(tvd, VDD_DTL, newvd, txg); 4561fa9e4066Sahrens 45623f9d6ad7SLin Ling /* 45630713e232SGeorge Wilson * Schedule the resilver to restart in the future. We do this to 45640713e232SGeorge Wilson * ensure that dmu_sync-ed blocks have been stitched into the 45650713e232SGeorge Wilson * respective datasets. 45663f9d6ad7SLin Ling */ 45673f9d6ad7SLin Ling dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg); 45683f9d6ad7SLin Ling 45693f9d6ad7SLin Ling /* 45703f9d6ad7SLin Ling * Commit the config 45713f9d6ad7SLin Ling */ 45723f9d6ad7SLin Ling (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0); 4573fa9e4066Sahrens 45744445fffbSMatthew Ahrens spa_history_log_internal(spa, "vdev attach", NULL, 45753f9d6ad7SLin Ling "%s vdev=%s %s vdev=%s", 4576c8e1f6d2SMark J Musante replacing && newvd_isspare ? "spare in" : 4577c8e1f6d2SMark J Musante replacing ? "replace" : "attach", newvdpath, 4578c8e1f6d2SMark J Musante replacing ? "for" : "to", oldvdpath); 45799b3f6b42SEric Kustarz 45809b3f6b42SEric Kustarz spa_strfree(oldvdpath); 45819b3f6b42SEric Kustarz spa_strfree(newvdpath); 45829b3f6b42SEric Kustarz 4583943e9869SLori Alt if (spa->spa_bootfs) 4584943e9869SLori Alt spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH); 4585943e9869SLori Alt 4586fa9e4066Sahrens return (0); 4587fa9e4066Sahrens } 4588fa9e4066Sahrens 4589fa9e4066Sahrens /* 4590fa9e4066Sahrens * Detach a device from a mirror or replacing vdev. 4591f7170741SWill Andrews * 4592fa9e4066Sahrens * If 'replace_done' is specified, only detach if the parent 4593fa9e4066Sahrens * is a replacing vdev. 4594fa9e4066Sahrens */ 4595fa9e4066Sahrens int 45968ad4d6ddSJeff Bonwick spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) 4597fa9e4066Sahrens { 4598fa9e4066Sahrens uint64_t txg; 45998ad4d6ddSJeff Bonwick int error; 4600fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 4601fa9e4066Sahrens vdev_t *vd, *pvd, *cvd, *tvd; 460299653d4eSeschrock boolean_t unspare = B_FALSE; 4603d5285caeSGeorge Wilson uint64_t unspare_guid = 0; 46041195e687SMark J Musante char *vdpath; 4605fa9e4066Sahrens 4606f9af39baSGeorge Wilson ASSERT(spa_writeable(spa)); 4607f9af39baSGeorge Wilson 4608fa9e4066Sahrens txg = spa_vdev_enter(spa); 4609fa9e4066Sahrens 4610c5904d13Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 4611fa9e4066Sahrens 4612fa9e4066Sahrens if (vd == NULL) 4613fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 4614fa9e4066Sahrens 46150e34b6a7Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 46160e34b6a7Sbonwick return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 46170e34b6a7Sbonwick 4618fa9e4066Sahrens pvd = vd->vdev_parent; 4619fa9e4066Sahrens 46208ad4d6ddSJeff Bonwick /* 46218ad4d6ddSJeff Bonwick * If the parent/child relationship is not as expected, don't do it. 46228ad4d6ddSJeff Bonwick * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing 46238ad4d6ddSJeff Bonwick * vdev that's replacing B with C. The user's intent in replacing 46248ad4d6ddSJeff Bonwick * is to go from M(A,B) to M(A,C). If the user decides to cancel 46258ad4d6ddSJeff Bonwick * the replace by detaching C, the expected behavior is to end up 46268ad4d6ddSJeff Bonwick * M(A,B). But suppose that right after deciding to detach C, 46278ad4d6ddSJeff Bonwick * the replacement of B completes. We would have M(A,C), and then 46288ad4d6ddSJeff Bonwick * ask to detach C, which would leave us with just A -- not what 46298ad4d6ddSJeff Bonwick * the user wanted. To prevent this, we make sure that the 46308ad4d6ddSJeff Bonwick * parent/child relationship hasn't changed -- in this example, 46318ad4d6ddSJeff Bonwick * that C's parent is still the replacing vdev R. 46328ad4d6ddSJeff Bonwick */ 46338ad4d6ddSJeff Bonwick if (pvd->vdev_guid != pguid && pguid != 0) 46348ad4d6ddSJeff Bonwick return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 46358ad4d6ddSJeff Bonwick 4636fa9e4066Sahrens /* 4637cb04b873SMark J Musante * Only 'replacing' or 'spare' vdevs can be replaced. 463899653d4eSeschrock */ 4639cb04b873SMark J Musante if (replace_done && pvd->vdev_ops != &vdev_replacing_ops && 4640cb04b873SMark J Musante pvd->vdev_ops != &vdev_spare_ops) 4641cb04b873SMark J Musante return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 464299653d4eSeschrock 464399653d4eSeschrock ASSERT(pvd->vdev_ops != &vdev_spare_ops || 4644e7437265Sahrens spa_version(spa) >= SPA_VERSION_SPARES); 4645fa9e4066Sahrens 4646fa9e4066Sahrens /* 464799653d4eSeschrock * Only mirror, replacing, and spare vdevs support detach. 4648fa9e4066Sahrens */ 4649fa9e4066Sahrens if (pvd->vdev_ops != &vdev_replacing_ops && 465099653d4eSeschrock pvd->vdev_ops != &vdev_mirror_ops && 465199653d4eSeschrock pvd->vdev_ops != &vdev_spare_ops) 4652fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 4653fa9e4066Sahrens 4654fa9e4066Sahrens /* 46558ad4d6ddSJeff Bonwick * If this device has the only valid copy of some data, 46568ad4d6ddSJeff Bonwick * we cannot safely detach it. 4657fa9e4066Sahrens */ 46588ad4d6ddSJeff Bonwick if (vdev_dtl_required(vd)) 4659fa9e4066Sahrens return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 4660fa9e4066Sahrens 46618ad4d6ddSJeff Bonwick ASSERT(pvd->vdev_children >= 2); 4662fa9e4066Sahrens 4663bf82a41bSeschrock /* 4664bf82a41bSeschrock * If we are detaching the second disk from a replacing vdev, then 4665bf82a41bSeschrock * check to see if we changed the original vdev's path to have "/old" 4666bf82a41bSeschrock * at the end in spa_vdev_attach(). If so, undo that change now. 4667bf82a41bSeschrock */ 4668cb04b873SMark J Musante if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id > 0 && 4669cb04b873SMark J Musante vd->vdev_path != NULL) { 4670cb04b873SMark J Musante size_t len = strlen(vd->vdev_path); 4671cb04b873SMark J Musante 4672cb04b873SMark J Musante for (int c = 0; c < pvd->vdev_children; c++) { 4673cb04b873SMark J Musante cvd = pvd->vdev_child[c]; 4674cb04b873SMark J Musante 4675cb04b873SMark J Musante if (cvd == vd || cvd->vdev_path == NULL) 4676cb04b873SMark J Musante continue; 4677cb04b873SMark J Musante 4678cb04b873SMark J Musante if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 4679cb04b873SMark J Musante strcmp(cvd->vdev_path + len, "/old") == 0) { 4680cb04b873SMark J Musante spa_strfree(cvd->vdev_path); 4681cb04b873SMark J Musante cvd->vdev_path = spa_strdup(vd->vdev_path); 4682cb04b873SMark J Musante break; 4683cb04b873SMark J Musante } 4684bf82a41bSeschrock } 4685bf82a41bSeschrock } 4686bf82a41bSeschrock 468799653d4eSeschrock /* 468899653d4eSeschrock * If we are detaching the original disk from a spare, then it implies 468999653d4eSeschrock * that the spare should become a real disk, and be removed from the 469099653d4eSeschrock * active spare list for the pool. 469199653d4eSeschrock */ 469299653d4eSeschrock if (pvd->vdev_ops == &vdev_spare_ops && 4693cb04b873SMark J Musante vd->vdev_id == 0 && 4694cb04b873SMark J Musante pvd->vdev_child[pvd->vdev_children - 1]->vdev_isspare) 469599653d4eSeschrock unspare = B_TRUE; 469699653d4eSeschrock 4697fa9e4066Sahrens /* 4698fa9e4066Sahrens * Erase the disk labels so the disk can be used for other things. 4699fa9e4066Sahrens * This must be done after all other error cases are handled, 4700fa9e4066Sahrens * but before we disembowel vd (so we can still do I/O to it). 4701fa9e4066Sahrens * But if we can't do it, don't treat the error as fatal -- 4702fa9e4066Sahrens * it may be that the unwritability of the disk is the reason 4703fa9e4066Sahrens * it's being detached! 4704fa9e4066Sahrens */ 470539c23413Seschrock error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 4706fa9e4066Sahrens 4707fa9e4066Sahrens /* 4708fa9e4066Sahrens * Remove vd from its parent and compact the parent's children. 4709fa9e4066Sahrens */ 4710fa9e4066Sahrens vdev_remove_child(pvd, vd); 4711fa9e4066Sahrens vdev_compact_children(pvd); 4712fa9e4066Sahrens 4713fa9e4066Sahrens /* 4714fa9e4066Sahrens * Remember one of the remaining children so we can get tvd below. 4715fa9e4066Sahrens */ 4716cb04b873SMark J Musante cvd = pvd->vdev_child[pvd->vdev_children - 1]; 4717fa9e4066Sahrens 471899653d4eSeschrock /* 471999653d4eSeschrock * If we need to remove the remaining child from the list of hot spares, 47208ad4d6ddSJeff Bonwick * do it now, marking the vdev as no longer a spare in the process. 47218ad4d6ddSJeff Bonwick * We must do this before vdev_remove_parent(), because that can 47228ad4d6ddSJeff Bonwick * change the GUID if it creates a new toplevel GUID. For a similar 47238ad4d6ddSJeff Bonwick * reason, we must remove the spare now, in the same txg as the detach; 47248ad4d6ddSJeff Bonwick * otherwise someone could attach a new sibling, change the GUID, and 47258ad4d6ddSJeff Bonwick * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. 472699653d4eSeschrock */ 472799653d4eSeschrock if (unspare) { 472899653d4eSeschrock ASSERT(cvd->vdev_isspare); 472939c23413Seschrock spa_spare_remove(cvd); 473099653d4eSeschrock unspare_guid = cvd->vdev_guid; 47318ad4d6ddSJeff Bonwick (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 4732cb04b873SMark J Musante cvd->vdev_unspare = B_TRUE; 473399653d4eSeschrock } 473499653d4eSeschrock 4735fa9e4066Sahrens /* 4736fa9e4066Sahrens * If the parent mirror/replacing vdev only has one child, 4737fa9e4066Sahrens * the parent is no longer needed. Remove it from the tree. 4738fa9e4066Sahrens */ 4739cb04b873SMark J Musante if (pvd->vdev_children == 1) { 4740cb04b873SMark J Musante if (pvd->vdev_ops == &vdev_spare_ops) 4741cb04b873SMark J Musante cvd->vdev_unspare = B_FALSE; 4742fa9e4066Sahrens vdev_remove_parent(cvd); 4743cb04b873SMark J Musante } 4744cb04b873SMark J Musante 4745fa9e4066Sahrens 4746fa9e4066Sahrens /* 4747fa9e4066Sahrens * We don't set tvd until now because the parent we just removed 4748fa9e4066Sahrens * may have been the previous top-level vdev. 4749fa9e4066Sahrens */ 4750fa9e4066Sahrens tvd = cvd->vdev_top; 4751fa9e4066Sahrens ASSERT(tvd->vdev_parent == rvd); 4752fa9e4066Sahrens 4753fa9e4066Sahrens /* 475439c23413Seschrock * Reevaluate the parent vdev state. 4755fa9e4066Sahrens */ 47563d7072f8Seschrock vdev_propagate_state(cvd); 4757fa9e4066Sahrens 4758fa9e4066Sahrens /* 4759573ca77eSGeorge Wilson * If the 'autoexpand' property is set on the pool then automatically 4760573ca77eSGeorge Wilson * try to expand the size of the pool. For example if the device we 4761573ca77eSGeorge Wilson * just detached was smaller than the others, it may be possible to 4762573ca77eSGeorge Wilson * add metaslabs (i.e. grow the pool). We need to reopen the vdev 4763573ca77eSGeorge Wilson * first so that we can obtain the updated sizes of the leaf vdevs. 4764fa9e4066Sahrens */ 4765573ca77eSGeorge Wilson if (spa->spa_autoexpand) { 4766573ca77eSGeorge Wilson vdev_reopen(tvd); 4767573ca77eSGeorge Wilson vdev_expand(tvd, txg); 4768573ca77eSGeorge Wilson } 4769fa9e4066Sahrens 4770fa9e4066Sahrens vdev_config_dirty(tvd); 4771fa9e4066Sahrens 4772fa9e4066Sahrens /* 477339c23413Seschrock * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 477439c23413Seschrock * vd->vdev_detached is set and free vd's DTL object in syncing context. 477539c23413Seschrock * But first make sure we're not on any *other* txg's DTL list, to 477639c23413Seschrock * prevent vd from being accessed after it's freed. 4777fa9e4066Sahrens */ 47781195e687SMark J Musante vdpath = spa_strdup(vd->vdev_path); 47798ad4d6ddSJeff Bonwick for (int t = 0; t < TXG_SIZE; t++) 4780fa9e4066Sahrens (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 4781ecc2d604Sbonwick vd->vdev_detached = B_TRUE; 4782ecc2d604Sbonwick vdev_dirty(tvd, VDD_DTL, vd, txg); 4783fa9e4066Sahrens 47843d7072f8Seschrock spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 47853d7072f8Seschrock 4786cb04b873SMark J Musante /* hang on to the spa before we release the lock */ 4787cb04b873SMark J Musante spa_open_ref(spa, FTAG); 4788cb04b873SMark J Musante 478999653d4eSeschrock error = spa_vdev_exit(spa, vd, txg, 0); 479099653d4eSeschrock 47914445fffbSMatthew Ahrens spa_history_log_internal(spa, "detach", NULL, 47921195e687SMark J Musante "vdev=%s", vdpath); 47931195e687SMark J Musante spa_strfree(vdpath); 47941195e687SMark J Musante 479599653d4eSeschrock /* 479639c23413Seschrock * If this was the removal of the original device in a hot spare vdev, 479739c23413Seschrock * then we want to go through and remove the device from the hot spare 479839c23413Seschrock * list of every other pool. 479999653d4eSeschrock */ 480099653d4eSeschrock if (unspare) { 4801cb04b873SMark J Musante spa_t *altspa = NULL; 4802cb04b873SMark J Musante 480399653d4eSeschrock mutex_enter(&spa_namespace_lock); 4804cb04b873SMark J Musante while ((altspa = spa_next(altspa)) != NULL) { 4805cb04b873SMark J Musante if (altspa->spa_state != POOL_STATE_ACTIVE || 4806cb04b873SMark J Musante altspa == spa) 480799653d4eSeschrock continue; 4808cb04b873SMark J Musante 4809cb04b873SMark J Musante spa_open_ref(altspa, FTAG); 48109af0a4dfSJeff Bonwick mutex_exit(&spa_namespace_lock); 4811cb04b873SMark J Musante (void) spa_vdev_remove(altspa, unspare_guid, B_TRUE); 48129af0a4dfSJeff Bonwick mutex_enter(&spa_namespace_lock); 4813cb04b873SMark J Musante spa_close(altspa, FTAG); 481499653d4eSeschrock } 481599653d4eSeschrock mutex_exit(&spa_namespace_lock); 4816cb04b873SMark J Musante 4817cb04b873SMark J Musante /* search the rest of the vdevs for spares to remove */ 4818cb04b873SMark J Musante spa_vdev_resilver_done(spa); 481999653d4eSeschrock } 482099653d4eSeschrock 4821cb04b873SMark J Musante /* all done with the spa; OK to release */ 4822cb04b873SMark J Musante mutex_enter(&spa_namespace_lock); 4823cb04b873SMark J Musante spa_close(spa, FTAG); 4824cb04b873SMark J Musante mutex_exit(&spa_namespace_lock); 4825cb04b873SMark J Musante 482699653d4eSeschrock return (error); 482799653d4eSeschrock } 482899653d4eSeschrock 48291195e687SMark J Musante /* 48301195e687SMark J Musante * Split a set of devices from their mirrors, and create a new pool from them. 48311195e687SMark J Musante */ 48321195e687SMark J Musante int 48331195e687SMark J Musante spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, 48341195e687SMark J Musante nvlist_t *props, boolean_t exp) 48351195e687SMark J Musante { 48361195e687SMark J Musante int error = 0; 48371195e687SMark J Musante uint64_t txg, *glist; 48381195e687SMark J Musante spa_t *newspa; 48391195e687SMark J Musante uint_t c, children, lastlog; 48401195e687SMark J Musante nvlist_t **child, *nvl, *tmp; 48411195e687SMark J Musante dmu_tx_t *tx; 48421195e687SMark J Musante char *altroot = NULL; 48431195e687SMark J Musante vdev_t *rvd, **vml = NULL; /* vdev modify list */ 48441195e687SMark J Musante boolean_t activate_slog; 48451195e687SMark J Musante 4846f9af39baSGeorge Wilson ASSERT(spa_writeable(spa)); 48471195e687SMark J Musante 48481195e687SMark J Musante txg = spa_vdev_enter(spa); 48491195e687SMark J Musante 48501195e687SMark J Musante /* clear the log and flush everything up to now */ 48511195e687SMark J Musante activate_slog = spa_passivate_log(spa); 48521195e687SMark J Musante (void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 48531195e687SMark J Musante error = spa_offline_log(spa); 48541195e687SMark J Musante txg = spa_vdev_config_enter(spa); 48551195e687SMark J Musante 48561195e687SMark J Musante if (activate_slog) 48571195e687SMark J Musante spa_activate_log(spa); 48581195e687SMark J Musante 48591195e687SMark J Musante if (error != 0) 48601195e687SMark J Musante return (spa_vdev_exit(spa, NULL, txg, error)); 48611195e687SMark J Musante 48621195e687SMark J Musante /* check new spa name before going any further */ 48631195e687SMark J Musante if (spa_lookup(newname) != NULL) 48641195e687SMark J Musante return (spa_vdev_exit(spa, NULL, txg, EEXIST)); 48651195e687SMark J Musante 48661195e687SMark J Musante /* 48671195e687SMark J Musante * scan through all the children to ensure they're all mirrors 48681195e687SMark J Musante */ 48691195e687SMark J Musante if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) != 0 || 48701195e687SMark J Musante nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child, 48711195e687SMark J Musante &children) != 0) 48721195e687SMark J Musante return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 48731195e687SMark J Musante 48741195e687SMark J Musante /* first, check to ensure we've got the right child count */ 48751195e687SMark J Musante rvd = spa->spa_root_vdev; 48761195e687SMark J Musante lastlog = 0; 48771195e687SMark J Musante for (c = 0; c < rvd->vdev_children; c++) { 48781195e687SMark J Musante vdev_t *vd = rvd->vdev_child[c]; 48791195e687SMark J Musante 48801195e687SMark J Musante /* don't count the holes & logs as children */ 48811195e687SMark J Musante if (vd->vdev_islog || vd->vdev_ishole) { 48821195e687SMark J Musante if (lastlog == 0) 48831195e687SMark J Musante lastlog = c; 48841195e687SMark J Musante continue; 48851195e687SMark J Musante } 48861195e687SMark J Musante 48871195e687SMark J Musante lastlog = 0; 48881195e687SMark J Musante } 48891195e687SMark J Musante if (children != (lastlog != 0 ? lastlog : rvd->vdev_children)) 48901195e687SMark J Musante return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 48911195e687SMark J Musante 48921195e687SMark J Musante /* next, ensure no spare or cache devices are part of the split */ 48931195e687SMark J Musante if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_SPARES, &tmp) == 0 || 48941195e687SMark J Musante nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_L2CACHE, &tmp) == 0) 48951195e687SMark J Musante return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 48961195e687SMark J Musante 48971195e687SMark J Musante vml = kmem_zalloc(children * sizeof (vdev_t *), KM_SLEEP); 48981195e687SMark J Musante glist = kmem_zalloc(children * sizeof (uint64_t), KM_SLEEP); 48991195e687SMark J Musante 49001195e687SMark J Musante /* then, loop over each vdev and validate it */ 49011195e687SMark J Musante for (c = 0; c < children; c++) { 49021195e687SMark J Musante uint64_t is_hole = 0; 49031195e687SMark J Musante 49041195e687SMark J Musante (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, 49051195e687SMark J Musante &is_hole); 49061195e687SMark J Musante 49071195e687SMark J Musante if (is_hole != 0) { 49081195e687SMark J Musante if (spa->spa_root_vdev->vdev_child[c]->vdev_ishole || 49091195e687SMark J Musante spa->spa_root_vdev->vdev_child[c]->vdev_islog) { 49101195e687SMark J Musante continue; 49111195e687SMark J Musante } else { 4912be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 49131195e687SMark J Musante break; 49141195e687SMark J Musante } 49151195e687SMark J Musante } 49161195e687SMark J Musante 49171195e687SMark J Musante /* which disk is going to be split? */ 49181195e687SMark J Musante if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID, 49191195e687SMark J Musante &glist[c]) != 0) { 4920be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 49211195e687SMark J Musante break; 49221195e687SMark J Musante } 49231195e687SMark J Musante 49241195e687SMark J Musante /* look it up in the spa */ 49251195e687SMark J Musante vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE); 49261195e687SMark J Musante if (vml[c] == NULL) { 4927be6fd75aSMatthew Ahrens error = SET_ERROR(ENODEV); 49281195e687SMark J Musante break; 49291195e687SMark J Musante } 49301195e687SMark J Musante 49311195e687SMark J Musante /* make sure there's nothing stopping the split */ 49321195e687SMark J Musante if (vml[c]->vdev_parent->vdev_ops != &vdev_mirror_ops || 49331195e687SMark J Musante vml[c]->vdev_islog || 49341195e687SMark J Musante vml[c]->vdev_ishole || 49351195e687SMark J Musante vml[c]->vdev_isspare || 49361195e687SMark J Musante vml[c]->vdev_isl2cache || 49371195e687SMark J Musante !vdev_writeable(vml[c]) || 4938d41c4376SMark J Musante vml[c]->vdev_children != 0 || 49391195e687SMark J Musante vml[c]->vdev_state != VDEV_STATE_HEALTHY || 49401195e687SMark J Musante c != spa->spa_root_vdev->vdev_child[c]->vdev_id) { 4941be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 49421195e687SMark J Musante break; 49431195e687SMark J Musante } 49441195e687SMark J Musante 49451195e687SMark J Musante if (vdev_dtl_required(vml[c])) { 4946be6fd75aSMatthew Ahrens error = SET_ERROR(EBUSY); 49471195e687SMark J Musante break; 49481195e687SMark J Musante } 49491195e687SMark J Musante 49501195e687SMark J Musante /* we need certain info from the top level */ 49511195e687SMark J Musante VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY, 49521195e687SMark J Musante vml[c]->vdev_top->vdev_ms_array) == 0); 49531195e687SMark J Musante VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT, 49541195e687SMark J Musante vml[c]->vdev_top->vdev_ms_shift) == 0); 49551195e687SMark J Musante VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE, 49561195e687SMark J Musante vml[c]->vdev_top->vdev_asize) == 0); 49571195e687SMark J Musante VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT, 49581195e687SMark J Musante vml[c]->vdev_top->vdev_ashift) == 0); 49591195e687SMark J Musante } 49601195e687SMark J Musante 49611195e687SMark J Musante if (error != 0) { 49621195e687SMark J Musante kmem_free(vml, children * sizeof (vdev_t *)); 49631195e687SMark J Musante kmem_free(glist, children * sizeof (uint64_t)); 49641195e687SMark J Musante return (spa_vdev_exit(spa, NULL, txg, error)); 49651195e687SMark J Musante } 49661195e687SMark J Musante 49671195e687SMark J Musante /* stop writers from using the disks */ 49681195e687SMark J Musante for (c = 0; c < children; c++) { 49691195e687SMark J Musante if (vml[c] != NULL) 49701195e687SMark J Musante vml[c]->vdev_offline = B_TRUE; 49711195e687SMark J Musante } 49721195e687SMark J Musante vdev_reopen(spa->spa_root_vdev); 49731195e687SMark J Musante 49741195e687SMark J Musante /* 49751195e687SMark J Musante * Temporarily record the splitting vdevs in the spa config. This 49761195e687SMark J Musante * will disappear once the config is regenerated. 49771195e687SMark J Musante */ 49781195e687SMark J Musante VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 49791195e687SMark J Musante VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, 49801195e687SMark J Musante glist, children) == 0); 49811195e687SMark J Musante kmem_free(glist, children * sizeof (uint64_t)); 49821195e687SMark J Musante 498398295d61SMark J Musante mutex_enter(&spa->spa_props_lock); 49841195e687SMark J Musante VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT, 49851195e687SMark J Musante nvl) == 0); 498698295d61SMark J Musante mutex_exit(&spa->spa_props_lock); 49871195e687SMark J Musante spa->spa_config_splitting = nvl; 49881195e687SMark J Musante vdev_config_dirty(spa->spa_root_vdev); 49891195e687SMark J Musante 49901195e687SMark J Musante /* configure and create the new pool */ 49911195e687SMark J Musante VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0); 49921195e687SMark J Musante VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 49931195e687SMark J Musante exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0); 49941195e687SMark J Musante VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, 49951195e687SMark J Musante spa_version(spa)) == 0); 49961195e687SMark J Musante VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, 49971195e687SMark J Musante spa->spa_config_txg) == 0); 49981195e687SMark J Musante VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, 49991195e687SMark J Musante spa_generate_guid(NULL)) == 0); 50001195e687SMark J Musante (void) nvlist_lookup_string(props, 50011195e687SMark J Musante zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 50021195e687SMark J Musante 5003d41c4376SMark J Musante /* add the new pool to the namespace */ 50041195e687SMark J Musante newspa = spa_add(newname, config, altroot); 50051195e687SMark J Musante newspa->spa_config_txg = spa->spa_config_txg; 50061195e687SMark J Musante spa_set_log_state(newspa, SPA_LOG_CLEAR); 50071195e687SMark J Musante 50081195e687SMark J Musante /* release the spa config lock, retaining the namespace lock */ 50091195e687SMark J Musante spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 50101195e687SMark J Musante 50111195e687SMark J Musante if (zio_injection_enabled) 50121195e687SMark J Musante zio_handle_panic_injection(spa, FTAG, 1); 50131195e687SMark J Musante 50141195e687SMark J Musante spa_activate(newspa, spa_mode_global); 50151195e687SMark J Musante spa_async_suspend(newspa); 50161195e687SMark J Musante 50171195e687SMark J Musante /* create the new pool from the disks of the original pool */ 50181195e687SMark J Musante error = spa_load(newspa, SPA_LOAD_IMPORT, SPA_IMPORT_ASSEMBLE, B_TRUE); 50191195e687SMark J Musante if (error) 50201195e687SMark J Musante goto out; 50211195e687SMark J Musante 50221195e687SMark J Musante /* if that worked, generate a real config for the new pool */ 50231195e687SMark J Musante if (newspa->spa_root_vdev != NULL) { 50241195e687SMark J Musante VERIFY(nvlist_alloc(&newspa->spa_config_splitting, 50251195e687SMark J Musante NV_UNIQUE_NAME, KM_SLEEP) == 0); 50261195e687SMark J Musante VERIFY(nvlist_add_uint64(newspa->spa_config_splitting, 50271195e687SMark J Musante ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0); 50281195e687SMark J Musante spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL, 50291195e687SMark J Musante B_TRUE)); 50301195e687SMark J Musante } 50311195e687SMark J Musante 50321195e687SMark J Musante /* set the props */ 50331195e687SMark J Musante if (props != NULL) { 50341195e687SMark J Musante spa_configfile_set(newspa, props, B_FALSE); 50351195e687SMark J Musante error = spa_prop_set(newspa, props); 50361195e687SMark J Musante if (error) 50371195e687SMark J Musante goto out; 50381195e687SMark J Musante } 50391195e687SMark J Musante 50401195e687SMark J Musante /* flush everything */ 50411195e687SMark J Musante txg = spa_vdev_config_enter(newspa); 50421195e687SMark J Musante vdev_config_dirty(newspa->spa_root_vdev); 50431195e687SMark J Musante (void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG); 50441195e687SMark J Musante 50451195e687SMark J Musante if (zio_injection_enabled) 50461195e687SMark J Musante zio_handle_panic_injection(spa, FTAG, 2); 50471195e687SMark J Musante 50481195e687SMark J Musante spa_async_resume(newspa); 50491195e687SMark J Musante 50501195e687SMark J Musante /* finally, update the original pool's config */ 50511195e687SMark J Musante txg = spa_vdev_config_enter(spa); 50521195e687SMark J Musante tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 50531195e687SMark J Musante error = dmu_tx_assign(tx, TXG_WAIT); 50541195e687SMark J Musante if (error != 0) 50551195e687SMark J Musante dmu_tx_abort(tx); 50561195e687SMark J Musante for (c = 0; c < children; c++) { 50571195e687SMark J Musante if (vml[c] != NULL) { 50581195e687SMark J Musante vdev_split(vml[c]); 50591195e687SMark J Musante if (error == 0) 50604445fffbSMatthew Ahrens spa_history_log_internal(spa, "detach", tx, 50614445fffbSMatthew Ahrens "vdev=%s", vml[c]->vdev_path); 50621195e687SMark J Musante vdev_free(vml[c]); 50631195e687SMark J Musante } 50641195e687SMark J Musante } 50651195e687SMark J Musante vdev_config_dirty(spa->spa_root_vdev); 50661195e687SMark J Musante spa->spa_config_splitting = NULL; 50671195e687SMark J Musante nvlist_free(nvl); 50681195e687SMark J Musante if (error == 0) 50691195e687SMark J Musante dmu_tx_commit(tx); 50701195e687SMark J Musante (void) spa_vdev_exit(spa, NULL, txg, 0); 50711195e687SMark J Musante 50721195e687SMark J Musante if (zio_injection_enabled) 50731195e687SMark J Musante zio_handle_panic_injection(spa, FTAG, 3); 50741195e687SMark J Musante 50751195e687SMark J Musante /* split is complete; log a history record */ 50764445fffbSMatthew Ahrens spa_history_log_internal(newspa, "split", NULL, 50774445fffbSMatthew Ahrens "from pool %s", spa_name(spa)); 50781195e687SMark J Musante 50791195e687SMark J Musante kmem_free(vml, children * sizeof (vdev_t *)); 50801195e687SMark J Musante 50811195e687SMark J Musante /* if we're not going to mount the filesystems in userland, export */ 50821195e687SMark J Musante if (exp) 50831195e687SMark J Musante error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL, 50841195e687SMark J Musante B_FALSE, B_FALSE); 50851195e687SMark J Musante 50861195e687SMark J Musante return (error); 50871195e687SMark J Musante 50881195e687SMark J Musante out: 50891195e687SMark J Musante spa_unload(newspa); 50901195e687SMark J Musante spa_deactivate(newspa); 50911195e687SMark J Musante spa_remove(newspa); 50921195e687SMark J Musante 50931195e687SMark J Musante txg = spa_vdev_config_enter(spa); 509498295d61SMark J Musante 509598295d61SMark J Musante /* re-online all offlined disks */ 509698295d61SMark J Musante for (c = 0; c < children; c++) { 509798295d61SMark J Musante if (vml[c] != NULL) 509898295d61SMark J Musante vml[c]->vdev_offline = B_FALSE; 509998295d61SMark J Musante } 510098295d61SMark J Musante vdev_reopen(spa->spa_root_vdev); 510198295d61SMark J Musante 51021195e687SMark J Musante nvlist_free(spa->spa_config_splitting); 51031195e687SMark J Musante spa->spa_config_splitting = NULL; 5104d41c4376SMark J Musante (void) spa_vdev_exit(spa, NULL, txg, error); 51051195e687SMark J Musante 51061195e687SMark J Musante kmem_free(vml, children * sizeof (vdev_t *)); 51071195e687SMark J Musante return (error); 51081195e687SMark J Musante } 51091195e687SMark J Musante 5110e14bb325SJeff Bonwick static nvlist_t * 5111e14bb325SJeff Bonwick spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 511299653d4eSeschrock { 5113e14bb325SJeff Bonwick for (int i = 0; i < count; i++) { 5114e14bb325SJeff Bonwick uint64_t guid; 511599653d4eSeschrock 5116e14bb325SJeff Bonwick VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 5117e14bb325SJeff Bonwick &guid) == 0); 511899653d4eSeschrock 5119e14bb325SJeff Bonwick if (guid == target_guid) 5120e14bb325SJeff Bonwick return (nvpp[i]); 512199653d4eSeschrock } 512299653d4eSeschrock 5123e14bb325SJeff Bonwick return (NULL); 5124fa94a07fSbrendan } 5125fa94a07fSbrendan 5126e14bb325SJeff Bonwick static void 5127e14bb325SJeff Bonwick spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 5128e14bb325SJeff Bonwick nvlist_t *dev_to_remove) 5129fa94a07fSbrendan { 5130e14bb325SJeff Bonwick nvlist_t **newdev = NULL; 5131fa94a07fSbrendan 5132e14bb325SJeff Bonwick if (count > 1) 5133e14bb325SJeff Bonwick newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 5134fa94a07fSbrendan 5135e14bb325SJeff Bonwick for (int i = 0, j = 0; i < count; i++) { 5136e14bb325SJeff Bonwick if (dev[i] == dev_to_remove) 5137e14bb325SJeff Bonwick continue; 5138e14bb325SJeff Bonwick VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 5139fa94a07fSbrendan } 5140fa94a07fSbrendan 5141e14bb325SJeff Bonwick VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 5142e14bb325SJeff Bonwick VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 5143fa94a07fSbrendan 5144e14bb325SJeff Bonwick for (int i = 0; i < count - 1; i++) 5145e14bb325SJeff Bonwick nvlist_free(newdev[i]); 5146fa94a07fSbrendan 5147e14bb325SJeff Bonwick if (count > 1) 5148e14bb325SJeff Bonwick kmem_free(newdev, (count - 1) * sizeof (void *)); 5149fa94a07fSbrendan } 5150fa94a07fSbrendan 515188ecc943SGeorge Wilson /* 515288ecc943SGeorge Wilson * Evacuate the device. 515388ecc943SGeorge Wilson */ 51543f9d6ad7SLin Ling static int 515588ecc943SGeorge Wilson spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) 515688ecc943SGeorge Wilson { 515788ecc943SGeorge Wilson uint64_t txg; 51583f9d6ad7SLin Ling int error = 0; 515988ecc943SGeorge Wilson 516088ecc943SGeorge Wilson ASSERT(MUTEX_HELD(&spa_namespace_lock)); 516188ecc943SGeorge Wilson ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 5162b24ab676SJeff Bonwick ASSERT(vd == vd->vdev_top); 516388ecc943SGeorge Wilson 516488ecc943SGeorge Wilson /* 516588ecc943SGeorge Wilson * Evacuate the device. We don't hold the config lock as writer 516688ecc943SGeorge Wilson * since we need to do I/O but we do keep the 516788ecc943SGeorge Wilson * spa_namespace_lock held. Once this completes the device 516888ecc943SGeorge Wilson * should no longer have any blocks allocated on it. 516988ecc943SGeorge Wilson */ 517088ecc943SGeorge Wilson if (vd->vdev_islog) { 51713f9d6ad7SLin Ling if (vd->vdev_stat.vs_alloc != 0) 51723f9d6ad7SLin Ling error = spa_offline_log(spa); 5173a1521560SJeff Bonwick } else { 5174be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTSUP); 517588ecc943SGeorge Wilson } 517688ecc943SGeorge Wilson 5177a1521560SJeff Bonwick if (error) 5178a1521560SJeff Bonwick return (error); 5179a1521560SJeff Bonwick 518088ecc943SGeorge Wilson /* 5181a1521560SJeff Bonwick * The evacuation succeeded. Remove any remaining MOS metadata 5182a1521560SJeff Bonwick * associated with this vdev, and wait for these changes to sync. 518388ecc943SGeorge Wilson */ 5184fb09f5aaSMadhav Suresh ASSERT0(vd->vdev_stat.vs_alloc); 518588ecc943SGeorge Wilson txg = spa_vdev_config_enter(spa); 518688ecc943SGeorge Wilson vd->vdev_removing = B_TRUE; 51870713e232SGeorge Wilson vdev_dirty_leaves(vd, VDD_DTL, txg); 518888ecc943SGeorge Wilson vdev_config_dirty(vd); 518988ecc943SGeorge Wilson spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 519088ecc943SGeorge Wilson 519188ecc943SGeorge Wilson return (0); 519288ecc943SGeorge Wilson } 519388ecc943SGeorge Wilson 519488ecc943SGeorge Wilson /* 519588ecc943SGeorge Wilson * Complete the removal by cleaning up the namespace. 519688ecc943SGeorge Wilson */ 51973f9d6ad7SLin Ling static void 5198a1521560SJeff Bonwick spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd) 519988ecc943SGeorge Wilson { 520088ecc943SGeorge Wilson vdev_t *rvd = spa->spa_root_vdev; 520188ecc943SGeorge Wilson uint64_t id = vd->vdev_id; 520288ecc943SGeorge Wilson boolean_t last_vdev = (id == (rvd->vdev_children - 1)); 520388ecc943SGeorge Wilson 520488ecc943SGeorge Wilson ASSERT(MUTEX_HELD(&spa_namespace_lock)); 520588ecc943SGeorge Wilson ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 5206b24ab676SJeff Bonwick ASSERT(vd == vd->vdev_top); 520788ecc943SGeorge Wilson 52083f9d6ad7SLin Ling /* 52093f9d6ad7SLin Ling * Only remove any devices which are empty. 52103f9d6ad7SLin Ling */ 52113f9d6ad7SLin Ling if (vd->vdev_stat.vs_alloc != 0) 52123f9d6ad7SLin Ling return; 52133f9d6ad7SLin Ling 521488ecc943SGeorge Wilson (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 5215b24ab676SJeff Bonwick 5216b24ab676SJeff Bonwick if (list_link_active(&vd->vdev_state_dirty_node)) 5217b24ab676SJeff Bonwick vdev_state_clean(vd); 5218b24ab676SJeff Bonwick if (list_link_active(&vd->vdev_config_dirty_node)) 5219b24ab676SJeff Bonwick vdev_config_clean(vd); 5220b24ab676SJeff Bonwick 522188ecc943SGeorge Wilson vdev_free(vd); 522288ecc943SGeorge Wilson 522388ecc943SGeorge Wilson if (last_vdev) { 522488ecc943SGeorge Wilson vdev_compact_children(rvd); 522588ecc943SGeorge Wilson } else { 522688ecc943SGeorge Wilson vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); 522788ecc943SGeorge Wilson vdev_add_child(rvd, vd); 522888ecc943SGeorge Wilson } 5229fcbfa62bSLin Ling vdev_config_dirty(rvd); 5230fcbfa62bSLin Ling 5231fcbfa62bSLin Ling /* 5232fcbfa62bSLin Ling * Reassess the health of our root vdev. 5233fcbfa62bSLin Ling */ 5234fcbfa62bSLin Ling vdev_reopen(rvd); 523588ecc943SGeorge Wilson } 523688ecc943SGeorge Wilson 52373f9d6ad7SLin Ling /* 52383f9d6ad7SLin Ling * Remove a device from the pool - 52393f9d6ad7SLin Ling * 52403f9d6ad7SLin Ling * Removing a device from the vdev namespace requires several steps 52413f9d6ad7SLin Ling * and can take a significant amount of time. As a result we use 52423f9d6ad7SLin Ling * the spa_vdev_config_[enter/exit] functions which allow us to 52433f9d6ad7SLin Ling * grab and release the spa_config_lock while still holding the namespace 52443f9d6ad7SLin Ling * lock. During each step the configuration is synced out. 5245f7170741SWill Andrews * 5246f7170741SWill Andrews * Currently, this supports removing only hot spares, slogs, and level 2 ARC 5247f7170741SWill Andrews * devices. 5248fa94a07fSbrendan */ 5249fa94a07fSbrendan int 5250fa94a07fSbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 5251fa94a07fSbrendan { 5252fa94a07fSbrendan vdev_t *vd; 5253a1521560SJeff Bonwick metaslab_group_t *mg; 5254e14bb325SJeff Bonwick nvlist_t **spares, **l2cache, *nv; 52558ad4d6ddSJeff Bonwick uint64_t txg = 0; 525688ecc943SGeorge Wilson uint_t nspares, nl2cache; 5257fa94a07fSbrendan int error = 0; 52588ad4d6ddSJeff Bonwick boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 5259fa94a07fSbrendan 5260f9af39baSGeorge Wilson ASSERT(spa_writeable(spa)); 5261f9af39baSGeorge Wilson 52628ad4d6ddSJeff Bonwick if (!locked) 52638ad4d6ddSJeff Bonwick txg = spa_vdev_enter(spa); 5264fa94a07fSbrendan 5265c5904d13Seschrock vd = spa_lookup_by_guid(spa, guid, B_FALSE); 5266fa94a07fSbrendan 5267fa94a07fSbrendan if (spa->spa_spares.sav_vdevs != NULL && 5268fa94a07fSbrendan nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 5269e14bb325SJeff Bonwick ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 5270e14bb325SJeff Bonwick (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 5271e14bb325SJeff Bonwick /* 5272e14bb325SJeff Bonwick * Only remove the hot spare if it's not currently in use 5273e14bb325SJeff Bonwick * in this pool. 5274e14bb325SJeff Bonwick */ 5275e14bb325SJeff Bonwick if (vd == NULL || unspare) { 5276e14bb325SJeff Bonwick spa_vdev_remove_aux(spa->spa_spares.sav_config, 5277e14bb325SJeff Bonwick ZPOOL_CONFIG_SPARES, spares, nspares, nv); 5278e14bb325SJeff Bonwick spa_load_spares(spa); 5279e14bb325SJeff Bonwick spa->spa_spares.sav_sync = B_TRUE; 5280e14bb325SJeff Bonwick } else { 5281be6fd75aSMatthew Ahrens error = SET_ERROR(EBUSY); 5282e14bb325SJeff Bonwick } 5283e14bb325SJeff Bonwick } else if (spa->spa_l2cache.sav_vdevs != NULL && 5284fa94a07fSbrendan nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 5285e14bb325SJeff Bonwick ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 5286e14bb325SJeff Bonwick (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 5287e14bb325SJeff Bonwick /* 5288e14bb325SJeff Bonwick * Cache devices can always be removed. 5289e14bb325SJeff Bonwick */ 5290e14bb325SJeff Bonwick spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 5291e14bb325SJeff Bonwick ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 5292fa94a07fSbrendan spa_load_l2cache(spa); 5293fa94a07fSbrendan spa->spa_l2cache.sav_sync = B_TRUE; 529488ecc943SGeorge Wilson } else if (vd != NULL && vd->vdev_islog) { 529588ecc943SGeorge Wilson ASSERT(!locked); 5296b24ab676SJeff Bonwick ASSERT(vd == vd->vdev_top); 529788ecc943SGeorge Wilson 5298a1521560SJeff Bonwick mg = vd->vdev_mg; 5299a1521560SJeff Bonwick 530088ecc943SGeorge Wilson /* 5301a1521560SJeff Bonwick * Stop allocating from this vdev. 530288ecc943SGeorge Wilson */ 5303a1521560SJeff Bonwick metaslab_group_passivate(mg); 530488ecc943SGeorge Wilson 5305b24ab676SJeff Bonwick /* 5306b24ab676SJeff Bonwick * Wait for the youngest allocations and frees to sync, 5307b24ab676SJeff Bonwick * and then wait for the deferral of those frees to finish. 5308b24ab676SJeff Bonwick */ 5309b24ab676SJeff Bonwick spa_vdev_config_exit(spa, NULL, 5310b24ab676SJeff Bonwick txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); 5311b24ab676SJeff Bonwick 5312a1521560SJeff Bonwick /* 5313a1521560SJeff Bonwick * Attempt to evacuate the vdev. 5314a1521560SJeff Bonwick */ 5315a1521560SJeff Bonwick error = spa_vdev_remove_evacuate(spa, vd); 5316a1521560SJeff Bonwick 531788ecc943SGeorge Wilson txg = spa_vdev_config_enter(spa); 531888ecc943SGeorge Wilson 5319a1521560SJeff Bonwick /* 5320a1521560SJeff Bonwick * If we couldn't evacuate the vdev, unwind. 5321a1521560SJeff Bonwick */ 5322a1521560SJeff Bonwick if (error) { 5323a1521560SJeff Bonwick metaslab_group_activate(mg); 5324a1521560SJeff Bonwick return (spa_vdev_exit(spa, NULL, txg, error)); 5325a1521560SJeff Bonwick } 5326a1521560SJeff Bonwick 5327a1521560SJeff Bonwick /* 5328a1521560SJeff Bonwick * Clean up the vdev namespace. 5329a1521560SJeff Bonwick */ 5330a1521560SJeff Bonwick spa_vdev_remove_from_namespace(spa, vd); 533188ecc943SGeorge Wilson 5332e14bb325SJeff Bonwick } else if (vd != NULL) { 5333e14bb325SJeff Bonwick /* 5334e14bb325SJeff Bonwick * Normal vdevs cannot be removed (yet). 5335e14bb325SJeff Bonwick */ 5336be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTSUP); 5337e14bb325SJeff Bonwick } else { 5338e14bb325SJeff Bonwick /* 5339e14bb325SJeff Bonwick * There is no vdev of any kind with the specified guid. 5340e14bb325SJeff Bonwick */ 5341be6fd75aSMatthew Ahrens error = SET_ERROR(ENOENT); 5342fa94a07fSbrendan } 534399653d4eSeschrock 53448ad4d6ddSJeff Bonwick if (!locked) 53458ad4d6ddSJeff Bonwick return (spa_vdev_exit(spa, NULL, txg, error)); 53468ad4d6ddSJeff Bonwick 53478ad4d6ddSJeff Bonwick return (error); 5348fa9e4066Sahrens } 5349fa9e4066Sahrens 5350fa9e4066Sahrens /* 53513d7072f8Seschrock * Find any device that's done replacing, or a vdev marked 'unspare' that's 5352f7170741SWill Andrews * currently spared, so we can detach it. 5353fa9e4066Sahrens */ 5354ea8dc4b6Seschrock static vdev_t * 53553d7072f8Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd) 5356fa9e4066Sahrens { 5357ea8dc4b6Seschrock vdev_t *newvd, *oldvd; 5358fa9e4066Sahrens 5359573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) { 53603d7072f8Seschrock oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 5361ea8dc4b6Seschrock if (oldvd != NULL) 5362ea8dc4b6Seschrock return (oldvd); 5363ea8dc4b6Seschrock } 5364fa9e4066Sahrens 53653d7072f8Seschrock /* 5366cb04b873SMark J Musante * Check for a completed replacement. We always consider the first 5367cb04b873SMark J Musante * vdev in the list to be the oldest vdev, and the last one to be 5368cb04b873SMark J Musante * the newest (see spa_vdev_attach() for how that works). In 5369cb04b873SMark J Musante * the case where the newest vdev is faulted, we will not automatically 5370cb04b873SMark J Musante * remove it after a resilver completes. This is OK as it will require 5371cb04b873SMark J Musante * user intervention to determine which disk the admin wishes to keep. 53723d7072f8Seschrock */ 5373cb04b873SMark J Musante if (vd->vdev_ops == &vdev_replacing_ops) { 5374cb04b873SMark J Musante ASSERT(vd->vdev_children > 1); 5375cb04b873SMark J Musante 5376cb04b873SMark J Musante newvd = vd->vdev_child[vd->vdev_children - 1]; 5377ea8dc4b6Seschrock oldvd = vd->vdev_child[0]; 5378ea8dc4b6Seschrock 53798ad4d6ddSJeff Bonwick if (vdev_dtl_empty(newvd, DTL_MISSING) && 5380e69acc92SVictor Latushkin vdev_dtl_empty(newvd, DTL_OUTAGE) && 53818ad4d6ddSJeff Bonwick !vdev_dtl_required(oldvd)) 5382ea8dc4b6Seschrock return (oldvd); 5383fa9e4066Sahrens } 5384ea8dc4b6Seschrock 53853d7072f8Seschrock /* 53863d7072f8Seschrock * Check for a completed resilver with the 'unspare' flag set. 53873d7072f8Seschrock */ 5388cb04b873SMark J Musante if (vd->vdev_ops == &vdev_spare_ops) { 5389cb04b873SMark J Musante vdev_t *first = vd->vdev_child[0]; 5390cb04b873SMark J Musante vdev_t *last = vd->vdev_child[vd->vdev_children - 1]; 5391cb04b873SMark J Musante 5392cb04b873SMark J Musante if (last->vdev_unspare) { 5393cb04b873SMark J Musante oldvd = first; 5394cb04b873SMark J Musante newvd = last; 5395cb04b873SMark J Musante } else if (first->vdev_unspare) { 5396cb04b873SMark J Musante oldvd = last; 5397cb04b873SMark J Musante newvd = first; 5398cb04b873SMark J Musante } else { 5399cb04b873SMark J Musante oldvd = NULL; 5400cb04b873SMark J Musante } 54013d7072f8Seschrock 5402cb04b873SMark J Musante if (oldvd != NULL && 54038ad4d6ddSJeff Bonwick vdev_dtl_empty(newvd, DTL_MISSING) && 5404e69acc92SVictor Latushkin vdev_dtl_empty(newvd, DTL_OUTAGE) && 5405cb04b873SMark J Musante !vdev_dtl_required(oldvd)) 54063d7072f8Seschrock return (oldvd); 5407cb04b873SMark J Musante 5408cb04b873SMark J Musante /* 5409cb04b873SMark J Musante * If there are more than two spares attached to a disk, 5410cb04b873SMark J Musante * and those spares are not required, then we want to 5411cb04b873SMark J Musante * attempt to free them up now so that they can be used 5412cb04b873SMark J Musante * by other pools. Once we're back down to a single 5413cb04b873SMark J Musante * disk+spare, we stop removing them. 5414cb04b873SMark J Musante */ 5415cb04b873SMark J Musante if (vd->vdev_children > 2) { 5416cb04b873SMark J Musante newvd = vd->vdev_child[1]; 5417cb04b873SMark J Musante 5418cb04b873SMark J Musante if (newvd->vdev_isspare && last->vdev_isspare && 5419cb04b873SMark J Musante vdev_dtl_empty(last, DTL_MISSING) && 5420cb04b873SMark J Musante vdev_dtl_empty(last, DTL_OUTAGE) && 5421cb04b873SMark J Musante !vdev_dtl_required(newvd)) 5422cb04b873SMark J Musante return (newvd); 54233d7072f8Seschrock } 54243d7072f8Seschrock } 54253d7072f8Seschrock 5426ea8dc4b6Seschrock return (NULL); 5427fa9e4066Sahrens } 5428fa9e4066Sahrens 5429ea8dc4b6Seschrock static void 54303d7072f8Seschrock spa_vdev_resilver_done(spa_t *spa) 5431fa9e4066Sahrens { 54328ad4d6ddSJeff Bonwick vdev_t *vd, *pvd, *ppvd; 54338ad4d6ddSJeff Bonwick uint64_t guid, sguid, pguid, ppguid; 5434ea8dc4b6Seschrock 54358ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 5436ea8dc4b6Seschrock 54373d7072f8Seschrock while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 54388ad4d6ddSJeff Bonwick pvd = vd->vdev_parent; 54398ad4d6ddSJeff Bonwick ppvd = pvd->vdev_parent; 5440ea8dc4b6Seschrock guid = vd->vdev_guid; 54418ad4d6ddSJeff Bonwick pguid = pvd->vdev_guid; 54428ad4d6ddSJeff Bonwick ppguid = ppvd->vdev_guid; 54438ad4d6ddSJeff Bonwick sguid = 0; 544499653d4eSeschrock /* 544599653d4eSeschrock * If we have just finished replacing a hot spared device, then 544699653d4eSeschrock * we need to detach the parent's first child (the original hot 544799653d4eSeschrock * spare) as well. 544899653d4eSeschrock */ 5449cb04b873SMark J Musante if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 && 5450cb04b873SMark J Musante ppvd->vdev_children == 2) { 545199653d4eSeschrock ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 54528ad4d6ddSJeff Bonwick sguid = ppvd->vdev_child[1]->vdev_guid; 545399653d4eSeschrock } 5454b4952e17SGeorge Wilson ASSERT(vd->vdev_resilver_txg == 0 || !vdev_dtl_required(vd)); 5455b4952e17SGeorge Wilson 54568ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 54578ad4d6ddSJeff Bonwick if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) 5458ea8dc4b6Seschrock return; 54598ad4d6ddSJeff Bonwick if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) 546099653d4eSeschrock return; 54618ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 5462fa9e4066Sahrens } 5463fa9e4066Sahrens 54648ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 5465fa9e4066Sahrens } 5466fa9e4066Sahrens 5467c67d9675Seschrock /* 5468b3388e4fSEric Taylor * Update the stored path or FRU for this vdev. 5469c67d9675Seschrock */ 5470c67d9675Seschrock int 54716809eb4eSEric Schrock spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, 54726809eb4eSEric Schrock boolean_t ispath) 5473c67d9675Seschrock { 5474c5904d13Seschrock vdev_t *vd; 5475208044b8SGeorge Wilson boolean_t sync = B_FALSE; 5476c67d9675Seschrock 5477f9af39baSGeorge Wilson ASSERT(spa_writeable(spa)); 5478f9af39baSGeorge Wilson 5479b3388e4fSEric Taylor spa_vdev_state_enter(spa, SCL_ALL); 5480c67d9675Seschrock 54816809eb4eSEric Schrock if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 5482b3388e4fSEric Taylor return (spa_vdev_state_exit(spa, NULL, ENOENT)); 5483c67d9675Seschrock 54840e34b6a7Sbonwick if (!vd->vdev_ops->vdev_op_leaf) 5485b3388e4fSEric Taylor return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); 54860e34b6a7Sbonwick 54876809eb4eSEric Schrock if (ispath) { 5488208044b8SGeorge Wilson if (strcmp(value, vd->vdev_path) != 0) { 5489208044b8SGeorge Wilson spa_strfree(vd->vdev_path); 5490208044b8SGeorge Wilson vd->vdev_path = spa_strdup(value); 5491208044b8SGeorge Wilson sync = B_TRUE; 5492208044b8SGeorge Wilson } 54936809eb4eSEric Schrock } else { 5494208044b8SGeorge Wilson if (vd->vdev_fru == NULL) { 5495208044b8SGeorge Wilson vd->vdev_fru = spa_strdup(value); 5496208044b8SGeorge Wilson sync = B_TRUE; 5497208044b8SGeorge Wilson } else if (strcmp(value, vd->vdev_fru) != 0) { 54986809eb4eSEric Schrock spa_strfree(vd->vdev_fru); 5499208044b8SGeorge Wilson vd->vdev_fru = spa_strdup(value); 5500208044b8SGeorge Wilson sync = B_TRUE; 5501208044b8SGeorge Wilson } 55026809eb4eSEric Schrock } 5503c67d9675Seschrock 5504208044b8SGeorge Wilson return (spa_vdev_state_exit(spa, sync ? vd : NULL, 0)); 5505c67d9675Seschrock } 5506c67d9675Seschrock 55076809eb4eSEric Schrock int 55086809eb4eSEric Schrock spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 55096809eb4eSEric Schrock { 55106809eb4eSEric Schrock return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); 55116809eb4eSEric Schrock } 55126809eb4eSEric Schrock 55136809eb4eSEric Schrock int 55146809eb4eSEric Schrock spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) 55156809eb4eSEric Schrock { 55166809eb4eSEric Schrock return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); 55176809eb4eSEric Schrock } 55186809eb4eSEric Schrock 5519fa9e4066Sahrens /* 5520fa9e4066Sahrens * ========================================================================== 55213f9d6ad7SLin Ling * SPA Scanning 5522fa9e4066Sahrens * ========================================================================== 5523fa9e4066Sahrens */ 5524fa9e4066Sahrens 5525ea8dc4b6Seschrock int 55263f9d6ad7SLin Ling spa_scan_stop(spa_t *spa) 5527fa9e4066Sahrens { 5528e14bb325SJeff Bonwick ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 55293f9d6ad7SLin Ling if (dsl_scan_resilvering(spa->spa_dsl_pool)) 5530be6fd75aSMatthew Ahrens return (SET_ERROR(EBUSY)); 55313f9d6ad7SLin Ling return (dsl_scan_cancel(spa->spa_dsl_pool)); 55323f9d6ad7SLin Ling } 5533bb8b5132Sek 55343f9d6ad7SLin Ling int 55353f9d6ad7SLin Ling spa_scan(spa_t *spa, pool_scan_func_t func) 55363f9d6ad7SLin Ling { 55373f9d6ad7SLin Ling ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 55383f9d6ad7SLin Ling 55393f9d6ad7SLin Ling if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE) 5540be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTSUP)); 5541fa9e4066Sahrens 5542fa9e4066Sahrens /* 5543088f3894Sahrens * If a resilver was requested, but there is no DTL on a 5544088f3894Sahrens * writeable leaf device, we have nothing to do. 5545fa9e4066Sahrens */ 55463f9d6ad7SLin Ling if (func == POOL_SCAN_RESILVER && 5547088f3894Sahrens !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 5548088f3894Sahrens spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 5549ea8dc4b6Seschrock return (0); 5550ea8dc4b6Seschrock } 5551fa9e4066Sahrens 55523f9d6ad7SLin Ling return (dsl_scan(spa->spa_dsl_pool, func)); 5553fa9e4066Sahrens } 5554fa9e4066Sahrens 5555ea8dc4b6Seschrock /* 5556ea8dc4b6Seschrock * ========================================================================== 5557ea8dc4b6Seschrock * SPA async task processing 5558ea8dc4b6Seschrock * ========================================================================== 5559ea8dc4b6Seschrock */ 5560ea8dc4b6Seschrock 5561ea8dc4b6Seschrock static void 55623d7072f8Seschrock spa_async_remove(spa_t *spa, vdev_t *vd) 5563fa9e4066Sahrens { 556449cf58c0SBrendan Gregg - Sun Microsystems if (vd->vdev_remove_wanted) { 556598d1cbfeSGeorge Wilson vd->vdev_remove_wanted = B_FALSE; 556698d1cbfeSGeorge Wilson vd->vdev_delayed_close = B_FALSE; 556749cf58c0SBrendan Gregg - Sun Microsystems vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 55681d713200SEric Schrock 55691d713200SEric Schrock /* 55701d713200SEric Schrock * We want to clear the stats, but we don't want to do a full 55711d713200SEric Schrock * vdev_clear() as that will cause us to throw away 55721d713200SEric Schrock * degraded/faulted state as well as attempt to reopen the 55731d713200SEric Schrock * device, all of which is a waste. 55741d713200SEric Schrock */ 55751d713200SEric Schrock vd->vdev_stat.vs_read_errors = 0; 55761d713200SEric Schrock vd->vdev_stat.vs_write_errors = 0; 55771d713200SEric Schrock vd->vdev_stat.vs_checksum_errors = 0; 55781d713200SEric Schrock 5579e14bb325SJeff Bonwick vdev_state_dirty(vd->vdev_top); 5580ea8dc4b6Seschrock } 558149cf58c0SBrendan Gregg - Sun Microsystems 5582e14bb325SJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) 558349cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, vd->vdev_child[c]); 5584ea8dc4b6Seschrock } 5585fa9e4066Sahrens 5586e14bb325SJeff Bonwick static void 5587e14bb325SJeff Bonwick spa_async_probe(spa_t *spa, vdev_t *vd) 5588e14bb325SJeff Bonwick { 5589e14bb325SJeff Bonwick if (vd->vdev_probe_wanted) { 559098d1cbfeSGeorge Wilson vd->vdev_probe_wanted = B_FALSE; 5591e14bb325SJeff Bonwick vdev_reopen(vd); /* vdev_open() does the actual probe */ 5592e14bb325SJeff Bonwick } 5593e14bb325SJeff Bonwick 5594e14bb325SJeff Bonwick for (int c = 0; c < vd->vdev_children; c++) 5595e14bb325SJeff Bonwick spa_async_probe(spa, vd->vdev_child[c]); 5596e14bb325SJeff Bonwick } 5597e14bb325SJeff Bonwick 5598573ca77eSGeorge Wilson static void 5599573ca77eSGeorge Wilson spa_async_autoexpand(spa_t *spa, vdev_t *vd) 5600573ca77eSGeorge Wilson { 5601573ca77eSGeorge Wilson sysevent_id_t eid; 5602573ca77eSGeorge Wilson nvlist_t *attr; 5603573ca77eSGeorge Wilson char *physpath; 5604573ca77eSGeorge Wilson 5605573ca77eSGeorge Wilson if (!spa->spa_autoexpand) 5606573ca77eSGeorge Wilson return; 5607573ca77eSGeorge Wilson 5608573ca77eSGeorge Wilson for (int c = 0; c < vd->vdev_children; c++) { 5609573ca77eSGeorge Wilson vdev_t *cvd = vd->vdev_child[c]; 5610573ca77eSGeorge Wilson spa_async_autoexpand(spa, cvd); 5611573ca77eSGeorge Wilson } 5612573ca77eSGeorge Wilson 5613573ca77eSGeorge Wilson if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) 5614573ca77eSGeorge Wilson return; 5615573ca77eSGeorge Wilson 5616573ca77eSGeorge Wilson physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 5617573ca77eSGeorge Wilson (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); 5618573ca77eSGeorge Wilson 5619573ca77eSGeorge Wilson VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 5620573ca77eSGeorge Wilson VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 5621573ca77eSGeorge Wilson 5622573ca77eSGeorge Wilson (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 5623573ca77eSGeorge Wilson ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 5624573ca77eSGeorge Wilson 5625573ca77eSGeorge Wilson nvlist_free(attr); 5626573ca77eSGeorge Wilson kmem_free(physpath, MAXPATHLEN); 5627573ca77eSGeorge Wilson } 5628573ca77eSGeorge Wilson 5629ea8dc4b6Seschrock static void 5630ea8dc4b6Seschrock spa_async_thread(spa_t *spa) 5631ea8dc4b6Seschrock { 5632e14bb325SJeff Bonwick int tasks; 5633ea8dc4b6Seschrock 5634ea8dc4b6Seschrock ASSERT(spa->spa_sync_on); 5635ea8dc4b6Seschrock 5636ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 5637ea8dc4b6Seschrock tasks = spa->spa_async_tasks; 5638ea8dc4b6Seschrock spa->spa_async_tasks = 0; 5639ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 5640ea8dc4b6Seschrock 56410373e76bSbonwick /* 56420373e76bSbonwick * See if the config needs to be updated. 56430373e76bSbonwick */ 56440373e76bSbonwick if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 5645b24ab676SJeff Bonwick uint64_t old_space, new_space; 5646573ca77eSGeorge Wilson 56470373e76bSbonwick mutex_enter(&spa_namespace_lock); 5648b24ab676SJeff Bonwick old_space = metaslab_class_get_space(spa_normal_class(spa)); 56490373e76bSbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 5650b24ab676SJeff Bonwick new_space = metaslab_class_get_space(spa_normal_class(spa)); 56510373e76bSbonwick mutex_exit(&spa_namespace_lock); 5652573ca77eSGeorge Wilson 5653573ca77eSGeorge Wilson /* 5654573ca77eSGeorge Wilson * If the pool grew as a result of the config update, 5655573ca77eSGeorge Wilson * then log an internal history event. 5656573ca77eSGeorge Wilson */ 5657b24ab676SJeff Bonwick if (new_space != old_space) { 56584445fffbSMatthew Ahrens spa_history_log_internal(spa, "vdev online", NULL, 5659c8e1f6d2SMark J Musante "pool '%s' size: %llu(+%llu)", 5660b24ab676SJeff Bonwick spa_name(spa), new_space, new_space - old_space); 5661573ca77eSGeorge Wilson } 56620373e76bSbonwick } 56630373e76bSbonwick 5664ea8dc4b6Seschrock /* 56653d7072f8Seschrock * See if any devices need to be marked REMOVED. 5666ea8dc4b6Seschrock */ 5667e14bb325SJeff Bonwick if (tasks & SPA_ASYNC_REMOVE) { 56688f18d1faSGeorge Wilson spa_vdev_state_enter(spa, SCL_NONE); 56693d7072f8Seschrock spa_async_remove(spa, spa->spa_root_vdev); 5670e14bb325SJeff Bonwick for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 567149cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 5672e14bb325SJeff Bonwick for (int i = 0; i < spa->spa_spares.sav_count; i++) 567349cf58c0SBrendan Gregg - Sun Microsystems spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 5674e14bb325SJeff Bonwick (void) spa_vdev_state_exit(spa, NULL, 0); 5675e14bb325SJeff Bonwick } 5676e14bb325SJeff Bonwick 5677573ca77eSGeorge Wilson if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { 5678573ca77eSGeorge Wilson spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 5679573ca77eSGeorge Wilson spa_async_autoexpand(spa, spa->spa_root_vdev); 5680573ca77eSGeorge Wilson spa_config_exit(spa, SCL_CONFIG, FTAG); 5681573ca77eSGeorge Wilson } 5682573ca77eSGeorge Wilson 5683e14bb325SJeff Bonwick /* 5684e14bb325SJeff Bonwick * See if any devices need to be probed. 5685e14bb325SJeff Bonwick */ 5686e14bb325SJeff Bonwick if (tasks & SPA_ASYNC_PROBE) { 56878f18d1faSGeorge Wilson spa_vdev_state_enter(spa, SCL_NONE); 5688e14bb325SJeff Bonwick spa_async_probe(spa, spa->spa_root_vdev); 5689e14bb325SJeff Bonwick (void) spa_vdev_state_exit(spa, NULL, 0); 56903d7072f8Seschrock } 5691ea8dc4b6Seschrock 5692ea8dc4b6Seschrock /* 5693ea8dc4b6Seschrock * If any devices are done replacing, detach them. 5694ea8dc4b6Seschrock */ 56953d7072f8Seschrock if (tasks & SPA_ASYNC_RESILVER_DONE) 56963d7072f8Seschrock spa_vdev_resilver_done(spa); 5697fa9e4066Sahrens 5698ea8dc4b6Seschrock /* 5699ea8dc4b6Seschrock * Kick off a resilver. 5700ea8dc4b6Seschrock */ 5701088f3894Sahrens if (tasks & SPA_ASYNC_RESILVER) 57023f9d6ad7SLin Ling dsl_resilver_restart(spa->spa_dsl_pool, 0); 5703ea8dc4b6Seschrock 5704ea8dc4b6Seschrock /* 5705ea8dc4b6Seschrock * Let the world know that we're done. 5706ea8dc4b6Seschrock */ 5707ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 5708ea8dc4b6Seschrock spa->spa_async_thread = NULL; 5709ea8dc4b6Seschrock cv_broadcast(&spa->spa_async_cv); 5710ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 5711ea8dc4b6Seschrock thread_exit(); 5712ea8dc4b6Seschrock } 5713ea8dc4b6Seschrock 5714ea8dc4b6Seschrock void 5715ea8dc4b6Seschrock spa_async_suspend(spa_t *spa) 5716ea8dc4b6Seschrock { 5717ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 5718ea8dc4b6Seschrock spa->spa_async_suspended++; 5719ea8dc4b6Seschrock while (spa->spa_async_thread != NULL) 5720ea8dc4b6Seschrock cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 5721ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 5722ea8dc4b6Seschrock } 5723ea8dc4b6Seschrock 5724ea8dc4b6Seschrock void 5725ea8dc4b6Seschrock spa_async_resume(spa_t *spa) 5726ea8dc4b6Seschrock { 5727ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 5728ea8dc4b6Seschrock ASSERT(spa->spa_async_suspended != 0); 5729ea8dc4b6Seschrock spa->spa_async_suspended--; 5730ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 5731ea8dc4b6Seschrock } 5732ea8dc4b6Seschrock 57333cb69f73SWill Andrews static boolean_t 57343cb69f73SWill Andrews spa_async_tasks_pending(spa_t *spa) 57353cb69f73SWill Andrews { 57363cb69f73SWill Andrews uint_t non_config_tasks; 57373cb69f73SWill Andrews uint_t config_task; 57383cb69f73SWill Andrews boolean_t config_task_suspended; 57393cb69f73SWill Andrews 57403cb69f73SWill Andrews non_config_tasks = spa->spa_async_tasks & ~SPA_ASYNC_CONFIG_UPDATE; 57413cb69f73SWill Andrews config_task = spa->spa_async_tasks & SPA_ASYNC_CONFIG_UPDATE; 57423cb69f73SWill Andrews if (spa->spa_ccw_fail_time == 0) { 57433cb69f73SWill Andrews config_task_suspended = B_FALSE; 57443cb69f73SWill Andrews } else { 57453cb69f73SWill Andrews config_task_suspended = 57463cb69f73SWill Andrews (gethrtime() - spa->spa_ccw_fail_time) < 57473cb69f73SWill Andrews (zfs_ccw_retry_interval * NANOSEC); 57483cb69f73SWill Andrews } 57493cb69f73SWill Andrews 57503cb69f73SWill Andrews return (non_config_tasks || (config_task && !config_task_suspended)); 57513cb69f73SWill Andrews } 57523cb69f73SWill Andrews 5753ea8dc4b6Seschrock static void 5754ea8dc4b6Seschrock spa_async_dispatch(spa_t *spa) 5755ea8dc4b6Seschrock { 5756ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 57573cb69f73SWill Andrews if (spa_async_tasks_pending(spa) && 57583cb69f73SWill Andrews !spa->spa_async_suspended && 57590373e76bSbonwick spa->spa_async_thread == NULL && 57603cb69f73SWill Andrews rootdir != NULL) 5761ea8dc4b6Seschrock spa->spa_async_thread = thread_create(NULL, 0, 5762ea8dc4b6Seschrock spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 5763ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 5764ea8dc4b6Seschrock } 5765ea8dc4b6Seschrock 5766ea8dc4b6Seschrock void 5767ea8dc4b6Seschrock spa_async_request(spa_t *spa, int task) 5768ea8dc4b6Seschrock { 57693f9d6ad7SLin Ling zfs_dbgmsg("spa=%s async request task=%u", spa->spa_name, task); 5770ea8dc4b6Seschrock mutex_enter(&spa->spa_async_lock); 5771ea8dc4b6Seschrock spa->spa_async_tasks |= task; 5772ea8dc4b6Seschrock mutex_exit(&spa->spa_async_lock); 5773fa9e4066Sahrens } 5774fa9e4066Sahrens 5775fa9e4066Sahrens /* 5776fa9e4066Sahrens * ========================================================================== 5777fa9e4066Sahrens * SPA syncing routines 5778fa9e4066Sahrens * ========================================================================== 5779fa9e4066Sahrens */ 5780fa9e4066Sahrens 5781cde58dbcSMatthew Ahrens static int 5782cde58dbcSMatthew Ahrens bpobj_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 5783cde58dbcSMatthew Ahrens { 5784cde58dbcSMatthew Ahrens bpobj_t *bpo = arg; 5785cde58dbcSMatthew Ahrens bpobj_enqueue(bpo, bp, tx); 5786cde58dbcSMatthew Ahrens return (0); 5787b24ab676SJeff Bonwick } 5788b24ab676SJeff Bonwick 5789cde58dbcSMatthew Ahrens static int 5790cde58dbcSMatthew Ahrens spa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 5791b24ab676SJeff Bonwick { 5792b24ab676SJeff Bonwick zio_t *zio = arg; 5793b24ab676SJeff Bonwick 5794b24ab676SJeff Bonwick zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp, 5795b24ab676SJeff Bonwick zio->io_flags)); 5796cde58dbcSMatthew Ahrens return (0); 5797fa9e4066Sahrens } 5798fa9e4066Sahrens 579969962b56SMatthew Ahrens /* 580069962b56SMatthew Ahrens * Note: this simple function is not inlined to make it easier to dtrace the 580169962b56SMatthew Ahrens * amount of time spent syncing frees. 580269962b56SMatthew Ahrens */ 580369962b56SMatthew Ahrens static void 580469962b56SMatthew Ahrens spa_sync_frees(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx) 580569962b56SMatthew Ahrens { 580669962b56SMatthew Ahrens zio_t *zio = zio_root(spa, NULL, NULL, 0); 580769962b56SMatthew Ahrens bplist_iterate(bpl, spa_free_sync_cb, zio, tx); 580869962b56SMatthew Ahrens VERIFY(zio_wait(zio) == 0); 580969962b56SMatthew Ahrens } 581069962b56SMatthew Ahrens 581169962b56SMatthew Ahrens /* 581269962b56SMatthew Ahrens * Note: this simple function is not inlined to make it easier to dtrace the 581369962b56SMatthew Ahrens * amount of time spent syncing deferred frees. 581469962b56SMatthew Ahrens */ 581569962b56SMatthew Ahrens static void 581669962b56SMatthew Ahrens spa_sync_deferred_frees(spa_t *spa, dmu_tx_t *tx) 581769962b56SMatthew Ahrens { 581869962b56SMatthew Ahrens zio_t *zio = zio_root(spa, NULL, NULL, 0); 581969962b56SMatthew Ahrens VERIFY3U(bpobj_iterate(&spa->spa_deferred_bpobj, 582069962b56SMatthew Ahrens spa_free_sync_cb, zio, tx), ==, 0); 582169962b56SMatthew Ahrens VERIFY0(zio_wait(zio)); 582269962b56SMatthew Ahrens } 582369962b56SMatthew Ahrens 582469962b56SMatthew Ahrens 5825fa9e4066Sahrens static void 582699653d4eSeschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 5827fa9e4066Sahrens { 5828fa9e4066Sahrens char *packed = NULL; 5829f7991ba4STim Haley size_t bufsize; 5830fa9e4066Sahrens size_t nvsize = 0; 5831fa9e4066Sahrens dmu_buf_t *db; 5832fa9e4066Sahrens 583399653d4eSeschrock VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 5834fa9e4066Sahrens 5835f7991ba4STim Haley /* 5836f7991ba4STim Haley * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 583743466aaeSMax Grossman * information. This avoids the dmu_buf_will_dirty() path and 5838f7991ba4STim Haley * saves us a pre-read to get data we don't actually care about. 5839f7991ba4STim Haley */ 5840ad135b5dSChristopher Siden bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE); 5841f7991ba4STim Haley packed = kmem_alloc(bufsize, KM_SLEEP); 5842fa9e4066Sahrens 584399653d4eSeschrock VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 5844ea8dc4b6Seschrock KM_SLEEP) == 0); 5845f7991ba4STim Haley bzero(packed + nvsize, bufsize - nvsize); 5846fa9e4066Sahrens 5847f7991ba4STim Haley dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 5848fa9e4066Sahrens 5849f7991ba4STim Haley kmem_free(packed, bufsize); 5850fa9e4066Sahrens 585199653d4eSeschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 5852fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 5853fa9e4066Sahrens *(uint64_t *)db->db_data = nvsize; 5854ea8dc4b6Seschrock dmu_buf_rele(db, FTAG); 5855fa9e4066Sahrens } 5856fa9e4066Sahrens 585799653d4eSeschrock static void 5858fa94a07fSbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 5859fa94a07fSbrendan const char *config, const char *entry) 586099653d4eSeschrock { 586199653d4eSeschrock nvlist_t *nvroot; 5862fa94a07fSbrendan nvlist_t **list; 586399653d4eSeschrock int i; 586499653d4eSeschrock 5865fa94a07fSbrendan if (!sav->sav_sync) 586699653d4eSeschrock return; 586799653d4eSeschrock 586899653d4eSeschrock /* 5869fa94a07fSbrendan * Update the MOS nvlist describing the list of available devices. 5870fa94a07fSbrendan * spa_validate_aux() will have already made sure this nvlist is 58713d7072f8Seschrock * valid and the vdevs are labeled appropriately. 587299653d4eSeschrock */ 5873fa94a07fSbrendan if (sav->sav_object == 0) { 5874fa94a07fSbrendan sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 5875fa94a07fSbrendan DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 5876fa94a07fSbrendan sizeof (uint64_t), tx); 587799653d4eSeschrock VERIFY(zap_update(spa->spa_meta_objset, 5878fa94a07fSbrendan DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 5879fa94a07fSbrendan &sav->sav_object, tx) == 0); 588099653d4eSeschrock } 588199653d4eSeschrock 588299653d4eSeschrock VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 5883fa94a07fSbrendan if (sav->sav_count == 0) { 5884fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 588599653d4eSeschrock } else { 5886fa94a07fSbrendan list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 5887fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 5888fa94a07fSbrendan list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 58893f9d6ad7SLin Ling B_FALSE, VDEV_CONFIG_L2CACHE); 5890fa94a07fSbrendan VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 5891fa94a07fSbrendan sav->sav_count) == 0); 5892fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 5893fa94a07fSbrendan nvlist_free(list[i]); 5894fa94a07fSbrendan kmem_free(list, sav->sav_count * sizeof (void *)); 589599653d4eSeschrock } 589699653d4eSeschrock 5897fa94a07fSbrendan spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 589806eeb2adSek nvlist_free(nvroot); 589999653d4eSeschrock 5900fa94a07fSbrendan sav->sav_sync = B_FALSE; 590199653d4eSeschrock } 590299653d4eSeschrock 590399653d4eSeschrock static void 590499653d4eSeschrock spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 590599653d4eSeschrock { 590699653d4eSeschrock nvlist_t *config; 590799653d4eSeschrock 5908e14bb325SJeff Bonwick if (list_is_empty(&spa->spa_config_dirty_list)) 590999653d4eSeschrock return; 591099653d4eSeschrock 5911e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 5912e14bb325SJeff Bonwick 5913e14bb325SJeff Bonwick config = spa_config_generate(spa, spa->spa_root_vdev, 5914e14bb325SJeff Bonwick dmu_tx_get_txg(tx), B_FALSE); 5915e14bb325SJeff Bonwick 591625345e46SGeorge Wilson /* 591725345e46SGeorge Wilson * If we're upgrading the spa version then make sure that 591825345e46SGeorge Wilson * the config object gets updated with the correct version. 591925345e46SGeorge Wilson */ 592025345e46SGeorge Wilson if (spa->spa_ubsync.ub_version < spa->spa_uberblock.ub_version) 592125345e46SGeorge Wilson fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, 592225345e46SGeorge Wilson spa->spa_uberblock.ub_version); 592325345e46SGeorge Wilson 5924e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 592599653d4eSeschrock 592699653d4eSeschrock if (spa->spa_config_syncing) 592799653d4eSeschrock nvlist_free(spa->spa_config_syncing); 592899653d4eSeschrock spa->spa_config_syncing = config; 592999653d4eSeschrock 593099653d4eSeschrock spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 593199653d4eSeschrock } 593299653d4eSeschrock 5933ad135b5dSChristopher Siden static void 59343b2aab18SMatthew Ahrens spa_sync_version(void *arg, dmu_tx_t *tx) 5935ad135b5dSChristopher Siden { 59363b2aab18SMatthew Ahrens uint64_t *versionp = arg; 59373b2aab18SMatthew Ahrens uint64_t version = *versionp; 59383b2aab18SMatthew Ahrens spa_t *spa = dmu_tx_pool(tx)->dp_spa; 5939ad135b5dSChristopher Siden 5940ad135b5dSChristopher Siden /* 5941ad135b5dSChristopher Siden * Setting the version is special cased when first creating the pool. 5942ad135b5dSChristopher Siden */ 5943ad135b5dSChristopher Siden ASSERT(tx->tx_txg != TXG_INITIAL); 5944ad135b5dSChristopher Siden 594562eae887SRichard Yao ASSERT(SPA_VERSION_IS_SUPPORTED(version)); 5946ad135b5dSChristopher Siden ASSERT(version >= spa_version(spa)); 5947ad135b5dSChristopher Siden 5948ad135b5dSChristopher Siden spa->spa_uberblock.ub_version = version; 5949ad135b5dSChristopher Siden vdev_config_dirty(spa->spa_root_vdev); 59504445fffbSMatthew Ahrens spa_history_log_internal(spa, "set", tx, "version=%lld", version); 5951ad135b5dSChristopher Siden } 5952ad135b5dSChristopher Siden 5953990b4856Slling /* 5954990b4856Slling * Set zpool properties. 5955990b4856Slling */ 5956b1b8ab34Slling static void 59573b2aab18SMatthew Ahrens spa_sync_props(void *arg, dmu_tx_t *tx) 5958b1b8ab34Slling { 59593b2aab18SMatthew Ahrens nvlist_t *nvp = arg; 59603b2aab18SMatthew Ahrens spa_t *spa = dmu_tx_pool(tx)->dp_spa; 5961b1b8ab34Slling objset_t *mos = spa->spa_meta_objset; 5962ad135b5dSChristopher Siden nvpair_t *elem = NULL; 5963b1b8ab34Slling 5964e14bb325SJeff Bonwick mutex_enter(&spa->spa_props_lock); 5965e14bb325SJeff Bonwick 5966990b4856Slling while ((elem = nvlist_next_nvpair(nvp, elem))) { 5967ad135b5dSChristopher Siden uint64_t intval; 5968ad135b5dSChristopher Siden char *strval, *fname; 5969ad135b5dSChristopher Siden zpool_prop_t prop; 5970ad135b5dSChristopher Siden const char *propname; 5971ad135b5dSChristopher Siden zprop_type_t proptype; 59722acef22dSMatthew Ahrens spa_feature_t fid; 5973ad135b5dSChristopher Siden 5974990b4856Slling switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 5975ad135b5dSChristopher Siden case ZPROP_INVAL: 5976ad135b5dSChristopher Siden /* 5977ad135b5dSChristopher Siden * We checked this earlier in spa_prop_validate(). 5978ad135b5dSChristopher Siden */ 5979ad135b5dSChristopher Siden ASSERT(zpool_prop_feature(nvpair_name(elem))); 5980ad135b5dSChristopher Siden 5981ad135b5dSChristopher Siden fname = strchr(nvpair_name(elem), '@') + 1; 59822acef22dSMatthew Ahrens VERIFY0(zfeature_lookup_name(fname, &fid)); 5983ad135b5dSChristopher Siden 59842acef22dSMatthew Ahrens spa_feature_enable(spa, fid, tx); 59854445fffbSMatthew Ahrens spa_history_log_internal(spa, "set", tx, 59864445fffbSMatthew Ahrens "%s=enabled", nvpair_name(elem)); 5987ad135b5dSChristopher Siden break; 5988ad135b5dSChristopher Siden 5989990b4856Slling case ZPOOL_PROP_VERSION: 59900713e232SGeorge Wilson intval = fnvpair_value_uint64(elem); 5991990b4856Slling /* 5992ad135b5dSChristopher Siden * The version is synced seperatly before other 5993ad135b5dSChristopher Siden * properties and should be correct by now. 5994990b4856Slling */ 5995ad135b5dSChristopher Siden ASSERT3U(spa_version(spa), >=, intval); 5996ecd6cf80Smarks break; 5997990b4856Slling 5998990b4856Slling case ZPOOL_PROP_ALTROOT: 5999990b4856Slling /* 6000990b4856Slling * 'altroot' is a non-persistent property. It should 6001990b4856Slling * have been set temporarily at creation or import time. 6002990b4856Slling */ 6003990b4856Slling ASSERT(spa->spa_root != NULL); 6004b1b8ab34Slling break; 60053d7072f8Seschrock 6006f9af39baSGeorge Wilson case ZPOOL_PROP_READONLY: 60072f8aaab3Seschrock case ZPOOL_PROP_CACHEFILE: 6008990b4856Slling /* 6009f9af39baSGeorge Wilson * 'readonly' and 'cachefile' are also non-persisitent 6010f9af39baSGeorge Wilson * properties. 6011990b4856Slling */ 60123d7072f8Seschrock break; 60138704186eSDan McDonald case ZPOOL_PROP_COMMENT: 60140713e232SGeorge Wilson strval = fnvpair_value_string(elem); 60158704186eSDan McDonald if (spa->spa_comment != NULL) 60168704186eSDan McDonald spa_strfree(spa->spa_comment); 60178704186eSDan McDonald spa->spa_comment = spa_strdup(strval); 60188704186eSDan McDonald /* 60198704186eSDan McDonald * We need to dirty the configuration on all the vdevs 60208704186eSDan McDonald * so that their labels get updated. It's unnecessary 60218704186eSDan McDonald * to do this for pool creation since the vdev's 60228704186eSDan McDonald * configuratoin has already been dirtied. 60238704186eSDan McDonald */ 60248704186eSDan McDonald if (tx->tx_txg != TXG_INITIAL) 60258704186eSDan McDonald vdev_config_dirty(spa->spa_root_vdev); 60264445fffbSMatthew Ahrens spa_history_log_internal(spa, "set", tx, 60274445fffbSMatthew Ahrens "%s=%s", nvpair_name(elem), strval); 60288704186eSDan McDonald break; 6029990b4856Slling default: 6030990b4856Slling /* 6031990b4856Slling * Set pool property values in the poolprops mos object. 6032990b4856Slling */ 6033990b4856Slling if (spa->spa_pool_props_object == 0) { 6034ad135b5dSChristopher Siden spa->spa_pool_props_object = 6035ad135b5dSChristopher Siden zap_create_link(mos, DMU_OT_POOL_PROPS, 6036990b4856Slling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 6037ad135b5dSChristopher Siden tx); 6038990b4856Slling } 6039990b4856Slling 6040990b4856Slling /* normalize the property name */ 6041990b4856Slling propname = zpool_prop_to_name(prop); 6042990b4856Slling proptype = zpool_prop_get_type(prop); 6043990b4856Slling 6044990b4856Slling if (nvpair_type(elem) == DATA_TYPE_STRING) { 6045990b4856Slling ASSERT(proptype == PROP_TYPE_STRING); 60460713e232SGeorge Wilson strval = fnvpair_value_string(elem); 60470713e232SGeorge Wilson VERIFY0(zap_update(mos, 6048990b4856Slling spa->spa_pool_props_object, propname, 60490713e232SGeorge Wilson 1, strlen(strval) + 1, strval, tx)); 60504445fffbSMatthew Ahrens spa_history_log_internal(spa, "set", tx, 60514445fffbSMatthew Ahrens "%s=%s", nvpair_name(elem), strval); 6052990b4856Slling } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 60530713e232SGeorge Wilson intval = fnvpair_value_uint64(elem); 6054990b4856Slling 6055990b4856Slling if (proptype == PROP_TYPE_INDEX) { 6056990b4856Slling const char *unused; 60570713e232SGeorge Wilson VERIFY0(zpool_prop_index_to_string( 60580713e232SGeorge Wilson prop, intval, &unused)); 6059990b4856Slling } 60600713e232SGeorge Wilson VERIFY0(zap_update(mos, 6061990b4856Slling spa->spa_pool_props_object, propname, 60620713e232SGeorge Wilson 8, 1, &intval, tx)); 60634445fffbSMatthew Ahrens spa_history_log_internal(spa, "set", tx, 60644445fffbSMatthew Ahrens "%s=%lld", nvpair_name(elem), intval); 6065990b4856Slling } else { 6066990b4856Slling ASSERT(0); /* not allowed */ 6067990b4856Slling } 6068990b4856Slling 60690a4e9518Sgw switch (prop) { 60700a4e9518Sgw case ZPOOL_PROP_DELEGATION: 6071990b4856Slling spa->spa_delegation = intval; 60720a4e9518Sgw break; 60730a4e9518Sgw case ZPOOL_PROP_BOOTFS: 6074990b4856Slling spa->spa_bootfs = intval; 60750a4e9518Sgw break; 60760a4e9518Sgw case ZPOOL_PROP_FAILUREMODE: 60770a4e9518Sgw spa->spa_failmode = intval; 60780a4e9518Sgw break; 6079573ca77eSGeorge Wilson case ZPOOL_PROP_AUTOEXPAND: 6080573ca77eSGeorge Wilson spa->spa_autoexpand = intval; 6081b98131cfSEric Taylor if (tx->tx_txg != TXG_INITIAL) 6082b98131cfSEric Taylor spa_async_request(spa, 6083b98131cfSEric Taylor SPA_ASYNC_AUTOEXPAND); 6084573ca77eSGeorge Wilson break; 6085b24ab676SJeff Bonwick case ZPOOL_PROP_DEDUPDITTO: 6086b24ab676SJeff Bonwick spa->spa_dedup_ditto = intval; 6087b24ab676SJeff Bonwick break; 60880a4e9518Sgw default: 60890a4e9518Sgw break; 60900a4e9518Sgw } 6091990b4856Slling } 6092990b4856Slling 6093b1b8ab34Slling } 6094e14bb325SJeff Bonwick 6095e14bb325SJeff Bonwick mutex_exit(&spa->spa_props_lock); 6096b1b8ab34Slling } 6097b1b8ab34Slling 6098cde58dbcSMatthew Ahrens /* 6099cde58dbcSMatthew Ahrens * Perform one-time upgrade on-disk changes. spa_version() does not 6100cde58dbcSMatthew Ahrens * reflect the new version this txg, so there must be no changes this 6101cde58dbcSMatthew Ahrens * txg to anything that the upgrade code depends on after it executes. 6102cde58dbcSMatthew Ahrens * Therefore this must be called after dsl_pool_sync() does the sync 6103cde58dbcSMatthew Ahrens * tasks. 6104cde58dbcSMatthew Ahrens */ 6105cde58dbcSMatthew Ahrens static void 6106cde58dbcSMatthew Ahrens spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx) 6107cde58dbcSMatthew Ahrens { 6108cde58dbcSMatthew Ahrens dsl_pool_t *dp = spa->spa_dsl_pool; 6109cde58dbcSMatthew Ahrens 6110cde58dbcSMatthew Ahrens ASSERT(spa->spa_sync_pass == 1); 6111cde58dbcSMatthew Ahrens 61123b2aab18SMatthew Ahrens rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); 61133b2aab18SMatthew Ahrens 6114cde58dbcSMatthew Ahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 6115cde58dbcSMatthew Ahrens spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 6116cde58dbcSMatthew Ahrens dsl_pool_create_origin(dp, tx); 6117cde58dbcSMatthew Ahrens 6118cde58dbcSMatthew Ahrens /* Keeping the origin open increases spa_minref */ 6119cde58dbcSMatthew Ahrens spa->spa_minref += 3; 6120cde58dbcSMatthew Ahrens } 6121cde58dbcSMatthew Ahrens 6122cde58dbcSMatthew Ahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 6123cde58dbcSMatthew Ahrens spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 6124cde58dbcSMatthew Ahrens dsl_pool_upgrade_clones(dp, tx); 6125cde58dbcSMatthew Ahrens } 6126cde58dbcSMatthew Ahrens 6127cde58dbcSMatthew Ahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_DIR_CLONES && 6128cde58dbcSMatthew Ahrens spa->spa_uberblock.ub_version >= SPA_VERSION_DIR_CLONES) { 6129cde58dbcSMatthew Ahrens dsl_pool_upgrade_dir_clones(dp, tx); 6130cde58dbcSMatthew Ahrens 6131cde58dbcSMatthew Ahrens /* Keeping the freedir open increases spa_minref */ 6132cde58dbcSMatthew Ahrens spa->spa_minref += 3; 6133cde58dbcSMatthew Ahrens } 6134ad135b5dSChristopher Siden 6135ad135b5dSChristopher Siden if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES && 6136ad135b5dSChristopher Siden spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) { 6137ad135b5dSChristopher Siden spa_feature_create_zap_objects(spa, tx); 6138ad135b5dSChristopher Siden } 6139b8289d24SDaniil Lunev 6140b8289d24SDaniil Lunev /* 6141b8289d24SDaniil Lunev * LZ4_COMPRESS feature's behaviour was changed to activate_on_enable 6142b8289d24SDaniil Lunev * when possibility to use lz4 compression for metadata was added 6143b8289d24SDaniil Lunev * Old pools that have this feature enabled must be upgraded to have 6144b8289d24SDaniil Lunev * this feature active 6145b8289d24SDaniil Lunev */ 6146b8289d24SDaniil Lunev if (spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) { 6147b8289d24SDaniil Lunev boolean_t lz4_en = spa_feature_is_enabled(spa, 6148b8289d24SDaniil Lunev SPA_FEATURE_LZ4_COMPRESS); 6149b8289d24SDaniil Lunev boolean_t lz4_ac = spa_feature_is_active(spa, 6150b8289d24SDaniil Lunev SPA_FEATURE_LZ4_COMPRESS); 6151b8289d24SDaniil Lunev 6152b8289d24SDaniil Lunev if (lz4_en && !lz4_ac) 6153b8289d24SDaniil Lunev spa_feature_incr(spa, SPA_FEATURE_LZ4_COMPRESS, tx); 6154b8289d24SDaniil Lunev } 61553b2aab18SMatthew Ahrens rrw_exit(&dp->dp_config_rwlock, FTAG); 6156cde58dbcSMatthew Ahrens } 6157cde58dbcSMatthew Ahrens 6158fa9e4066Sahrens /* 6159fa9e4066Sahrens * Sync the specified transaction group. New blocks may be dirtied as 6160fa9e4066Sahrens * part of the process, so we iterate until it converges. 6161fa9e4066Sahrens */ 6162fa9e4066Sahrens void 6163fa9e4066Sahrens spa_sync(spa_t *spa, uint64_t txg) 6164fa9e4066Sahrens { 6165fa9e4066Sahrens dsl_pool_t *dp = spa->spa_dsl_pool; 6166fa9e4066Sahrens objset_t *mos = spa->spa_meta_objset; 6167b24ab676SJeff Bonwick bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK]; 61680373e76bSbonwick vdev_t *rvd = spa->spa_root_vdev; 6169fa9e4066Sahrens vdev_t *vd; 6170fa9e4066Sahrens dmu_tx_t *tx; 6171e14bb325SJeff Bonwick int error; 6172fa9e4066Sahrens 6173f9af39baSGeorge Wilson VERIFY(spa_writeable(spa)); 6174f9af39baSGeorge Wilson 6175fa9e4066Sahrens /* 6176fa9e4066Sahrens * Lock out configuration changes. 6177fa9e4066Sahrens */ 6178e14bb325SJeff Bonwick spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 6179fa9e4066Sahrens 6180fa9e4066Sahrens spa->spa_syncing_txg = txg; 6181fa9e4066Sahrens spa->spa_sync_pass = 0; 6182fa9e4066Sahrens 6183e14bb325SJeff Bonwick /* 6184e14bb325SJeff Bonwick * If there are any pending vdev state changes, convert them 6185e14bb325SJeff Bonwick * into config changes that go out with this transaction group. 6186e14bb325SJeff Bonwick */ 6187e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 61888ad4d6ddSJeff Bonwick while (list_head(&spa->spa_state_dirty_list) != NULL) { 61898ad4d6ddSJeff Bonwick /* 61908ad4d6ddSJeff Bonwick * We need the write lock here because, for aux vdevs, 61918ad4d6ddSJeff Bonwick * calling vdev_config_dirty() modifies sav_config. 61928ad4d6ddSJeff Bonwick * This is ugly and will become unnecessary when we 61938ad4d6ddSJeff Bonwick * eliminate the aux vdev wart by integrating all vdevs 61948ad4d6ddSJeff Bonwick * into the root vdev tree. 61958ad4d6ddSJeff Bonwick */ 61968ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 61978ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); 61988ad4d6ddSJeff Bonwick while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 61998ad4d6ddSJeff Bonwick vdev_state_clean(vd); 62008ad4d6ddSJeff Bonwick vdev_config_dirty(vd); 62018ad4d6ddSJeff Bonwick } 62028ad4d6ddSJeff Bonwick spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 62038ad4d6ddSJeff Bonwick spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); 6204e14bb325SJeff Bonwick } 6205e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 6206e14bb325SJeff Bonwick 620799653d4eSeschrock tx = dmu_tx_create_assigned(dp, txg); 620899653d4eSeschrock 6209283b8460SGeorge.Wilson spa->spa_sync_starttime = gethrtime(); 6210283b8460SGeorge.Wilson VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, 6211283b8460SGeorge.Wilson spa->spa_sync_starttime + spa->spa_deadman_synctime)); 6212283b8460SGeorge.Wilson 621399653d4eSeschrock /* 6214e7437265Sahrens * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 621599653d4eSeschrock * set spa_deflate if we have no raid-z vdevs. 621699653d4eSeschrock */ 6217e7437265Sahrens if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 6218e7437265Sahrens spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 621999653d4eSeschrock int i; 622099653d4eSeschrock 622199653d4eSeschrock for (i = 0; i < rvd->vdev_children; i++) { 622299653d4eSeschrock vd = rvd->vdev_child[i]; 622399653d4eSeschrock if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 622499653d4eSeschrock break; 622599653d4eSeschrock } 622699653d4eSeschrock if (i == rvd->vdev_children) { 622799653d4eSeschrock spa->spa_deflate = TRUE; 622899653d4eSeschrock VERIFY(0 == zap_add(spa->spa_meta_objset, 622999653d4eSeschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 623099653d4eSeschrock sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 623199653d4eSeschrock } 623299653d4eSeschrock } 623399653d4eSeschrock 6234fa9e4066Sahrens /* 62353f9d6ad7SLin Ling * If anything has changed in this txg, or if someone is waiting 62363f9d6ad7SLin Ling * for this txg to sync (eg, spa_vdev_remove()), push the 62373f9d6ad7SLin Ling * deferred frees from the previous txg. If not, leave them 62383f9d6ad7SLin Ling * alone so that we don't generate work on an otherwise idle 62393f9d6ad7SLin Ling * system. 6240fa9e4066Sahrens */ 6241fa9e4066Sahrens if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 62421615a317Sek !txg_list_empty(&dp->dp_dirty_dirs, txg) || 62433f9d6ad7SLin Ling !txg_list_empty(&dp->dp_sync_tasks, txg) || 6244cde58dbcSMatthew Ahrens ((dsl_scan_active(dp->dp_scan) || 6245cde58dbcSMatthew Ahrens txg_sync_waiting(dp)) && !spa_shutting_down(spa))) { 624669962b56SMatthew Ahrens spa_sync_deferred_frees(spa, tx); 6247cde58dbcSMatthew Ahrens } 6248fa9e4066Sahrens 6249fa9e4066Sahrens /* 6250fa9e4066Sahrens * Iterate to convergence. 6251fa9e4066Sahrens */ 6252fa9e4066Sahrens do { 6253b24ab676SJeff Bonwick int pass = ++spa->spa_sync_pass; 6254fa9e4066Sahrens 6255fa9e4066Sahrens spa_sync_config_object(spa, tx); 6256fa94a07fSbrendan spa_sync_aux_dev(spa, &spa->spa_spares, tx, 6257fa94a07fSbrendan ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 6258fa94a07fSbrendan spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 6259fa94a07fSbrendan ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 6260ea8dc4b6Seschrock spa_errlog_sync(spa, txg); 6261fa9e4066Sahrens dsl_pool_sync(dp, txg); 6262fa9e4066Sahrens 626301f55e48SGeorge Wilson if (pass < zfs_sync_pass_deferred_free) { 626469962b56SMatthew Ahrens spa_sync_frees(spa, free_bpl, tx); 6265b24ab676SJeff Bonwick } else { 6266cde58dbcSMatthew Ahrens bplist_iterate(free_bpl, bpobj_enqueue_cb, 626769962b56SMatthew Ahrens &spa->spa_deferred_bpobj, tx); 6268fa9e4066Sahrens } 6269fa9e4066Sahrens 6270b24ab676SJeff Bonwick ddt_sync(spa, txg); 62713f9d6ad7SLin Ling dsl_scan_sync(dp, tx); 6272afee20e4SGeorge Wilson 6273b24ab676SJeff Bonwick while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) 6274b24ab676SJeff Bonwick vdev_sync(vd, txg); 6275b24ab676SJeff Bonwick 6276cde58dbcSMatthew Ahrens if (pass == 1) 6277cde58dbcSMatthew Ahrens spa_sync_upgrades(spa, tx); 6278fa9e4066Sahrens 6279cde58dbcSMatthew Ahrens } while (dmu_objset_is_dirty(mos, txg)); 6280fa9e4066Sahrens 6281fa9e4066Sahrens /* 6282fa9e4066Sahrens * Rewrite the vdev configuration (which includes the uberblock) 6283fa9e4066Sahrens * to commit the transaction group. 62840373e76bSbonwick * 628517f17c2dSbonwick * If there are no dirty vdevs, we sync the uberblock to a few 628617f17c2dSbonwick * random top-level vdevs that are known to be visible in the 6287e14bb325SJeff Bonwick * config cache (see spa_vdev_add() for a complete description). 6288e14bb325SJeff Bonwick * If there *are* dirty vdevs, sync the uberblock to all vdevs. 62890373e76bSbonwick */ 6290e14bb325SJeff Bonwick for (;;) { 6291e14bb325SJeff Bonwick /* 6292e14bb325SJeff Bonwick * We hold SCL_STATE to prevent vdev open/close/etc. 6293e14bb325SJeff Bonwick * while we're attempting to write the vdev labels. 6294e14bb325SJeff Bonwick */ 6295e14bb325SJeff Bonwick spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 6296e14bb325SJeff Bonwick 6297e14bb325SJeff Bonwick if (list_is_empty(&spa->spa_config_dirty_list)) { 6298e14bb325SJeff Bonwick vdev_t *svd[SPA_DVAS_PER_BP]; 6299e14bb325SJeff Bonwick int svdcount = 0; 6300e14bb325SJeff Bonwick int children = rvd->vdev_children; 6301e14bb325SJeff Bonwick int c0 = spa_get_random(children); 6302e14bb325SJeff Bonwick 6303573ca77eSGeorge Wilson for (int c = 0; c < children; c++) { 6304e14bb325SJeff Bonwick vd = rvd->vdev_child[(c0 + c) % children]; 6305e14bb325SJeff Bonwick if (vd->vdev_ms_array == 0 || vd->vdev_islog) 6306e14bb325SJeff Bonwick continue; 6307e14bb325SJeff Bonwick svd[svdcount++] = vd; 6308e14bb325SJeff Bonwick if (svdcount == SPA_DVAS_PER_BP) 6309e14bb325SJeff Bonwick break; 6310e14bb325SJeff Bonwick } 63118956713aSEric Schrock error = vdev_config_sync(svd, svdcount, txg, B_FALSE); 63128956713aSEric Schrock if (error != 0) 63138956713aSEric Schrock error = vdev_config_sync(svd, svdcount, txg, 63148956713aSEric Schrock B_TRUE); 6315e14bb325SJeff Bonwick } else { 6316e14bb325SJeff Bonwick error = vdev_config_sync(rvd->vdev_child, 63178956713aSEric Schrock rvd->vdev_children, txg, B_FALSE); 63188956713aSEric Schrock if (error != 0) 63198956713aSEric Schrock error = vdev_config_sync(rvd->vdev_child, 63208956713aSEric Schrock rvd->vdev_children, txg, B_TRUE); 63210373e76bSbonwick } 6322e14bb325SJeff Bonwick 6323dfbb9432SGeorge Wilson if (error == 0) 6324dfbb9432SGeorge Wilson spa->spa_last_synced_guid = rvd->vdev_guid; 6325dfbb9432SGeorge Wilson 6326e14bb325SJeff Bonwick spa_config_exit(spa, SCL_STATE, FTAG); 6327e14bb325SJeff Bonwick 6328e14bb325SJeff Bonwick if (error == 0) 6329e14bb325SJeff Bonwick break; 6330e14bb325SJeff Bonwick zio_suspend(spa, NULL); 6331e14bb325SJeff Bonwick zio_resume_wait(spa); 63320373e76bSbonwick } 633399653d4eSeschrock dmu_tx_commit(tx); 633499653d4eSeschrock 6335283b8460SGeorge.Wilson VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, CY_INFINITY)); 6336283b8460SGeorge.Wilson 63370373e76bSbonwick /* 63380373e76bSbonwick * Clear the dirty config list. 6339fa9e4066Sahrens */ 6340e14bb325SJeff Bonwick while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 63410373e76bSbonwick vdev_config_clean(vd); 63420373e76bSbonwick 63430373e76bSbonwick /* 63440373e76bSbonwick * Now that the new config has synced transactionally, 63450373e76bSbonwick * let it become visible to the config cache. 63460373e76bSbonwick */ 63470373e76bSbonwick if (spa->spa_config_syncing != NULL) { 63480373e76bSbonwick spa_config_set(spa, spa->spa_config_syncing); 63490373e76bSbonwick spa->spa_config_txg = txg; 63500373e76bSbonwick spa->spa_config_syncing = NULL; 63510373e76bSbonwick } 6352fa9e4066Sahrens 6353fa9e4066Sahrens spa->spa_ubsync = spa->spa_uberblock; 6354fa9e4066Sahrens 6355b24ab676SJeff Bonwick dsl_pool_sync_done(dp, txg); 6356fa9e4066Sahrens 6357fa9e4066Sahrens /* 6358fa9e4066Sahrens * Update usable space statistics. 6359fa9e4066Sahrens */ 6360fa9e4066Sahrens while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 6361fa9e4066Sahrens vdev_sync_done(vd, txg); 6362fa9e4066Sahrens 6363485bbbf5SGeorge Wilson spa_update_dspace(spa); 6364485bbbf5SGeorge Wilson 6365fa9e4066Sahrens /* 6366fa9e4066Sahrens * It had better be the case that we didn't dirty anything 636799653d4eSeschrock * since vdev_config_sync(). 6368fa9e4066Sahrens */ 6369fa9e4066Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 6370fa9e4066Sahrens ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 6371fa9e4066Sahrens ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 6372b24ab676SJeff Bonwick 6373b24ab676SJeff Bonwick spa->spa_sync_pass = 0; 6374fa9e4066Sahrens 6375e14bb325SJeff Bonwick spa_config_exit(spa, SCL_CONFIG, FTAG); 6376ea8dc4b6Seschrock 6377468c413aSTim Haley spa_handle_ignored_writes(spa); 6378468c413aSTim Haley 6379ea8dc4b6Seschrock /* 6380ea8dc4b6Seschrock * If any async tasks have been requested, kick them off. 6381ea8dc4b6Seschrock */ 6382ea8dc4b6Seschrock spa_async_dispatch(spa); 6383fa9e4066Sahrens } 6384fa9e4066Sahrens 6385fa9e4066Sahrens /* 6386fa9e4066Sahrens * Sync all pools. We don't want to hold the namespace lock across these 6387fa9e4066Sahrens * operations, so we take a reference on the spa_t and drop the lock during the 6388fa9e4066Sahrens * sync. 6389fa9e4066Sahrens */ 6390fa9e4066Sahrens void 6391fa9e4066Sahrens spa_sync_allpools(void) 6392fa9e4066Sahrens { 6393fa9e4066Sahrens spa_t *spa = NULL; 6394fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 6395fa9e4066Sahrens while ((spa = spa_next(spa)) != NULL) { 6396f9af39baSGeorge Wilson if (spa_state(spa) != POOL_STATE_ACTIVE || 6397f9af39baSGeorge Wilson !spa_writeable(spa) || spa_suspended(spa)) 6398fa9e4066Sahrens continue; 6399fa9e4066Sahrens spa_open_ref(spa, FTAG); 6400fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 6401fa9e4066Sahrens txg_wait_synced(spa_get_dsl(spa), 0); 6402fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 6403fa9e4066Sahrens spa_close(spa, FTAG); 6404fa9e4066Sahrens } 6405fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 6406fa9e4066Sahrens } 6407fa9e4066Sahrens 6408fa9e4066Sahrens /* 6409fa9e4066Sahrens * ========================================================================== 6410fa9e4066Sahrens * Miscellaneous routines 6411fa9e4066Sahrens * ========================================================================== 6412fa9e4066Sahrens */ 6413fa9e4066Sahrens 6414fa9e4066Sahrens /* 6415fa9e4066Sahrens * Remove all pools in the system. 6416fa9e4066Sahrens */ 6417fa9e4066Sahrens void 6418fa9e4066Sahrens spa_evict_all(void) 6419fa9e4066Sahrens { 6420fa9e4066Sahrens spa_t *spa; 6421fa9e4066Sahrens 6422fa9e4066Sahrens /* 6423fa9e4066Sahrens * Remove all cached state. All pools should be closed now, 6424fa9e4066Sahrens * so every spa in the AVL tree should be unreferenced. 6425fa9e4066Sahrens */ 6426fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 6427fa9e4066Sahrens while ((spa = spa_next(NULL)) != NULL) { 6428fa9e4066Sahrens /* 6429ea8dc4b6Seschrock * Stop async tasks. The async thread may need to detach 6430ea8dc4b6Seschrock * a device that's been replaced, which requires grabbing 6431ea8dc4b6Seschrock * spa_namespace_lock, so we must drop it here. 6432fa9e4066Sahrens */ 6433fa9e4066Sahrens spa_open_ref(spa, FTAG); 6434fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 6435ea8dc4b6Seschrock spa_async_suspend(spa); 6436fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 6437fa9e4066Sahrens spa_close(spa, FTAG); 6438fa9e4066Sahrens 6439fa9e4066Sahrens if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 6440fa9e4066Sahrens spa_unload(spa); 6441fa9e4066Sahrens spa_deactivate(spa); 6442fa9e4066Sahrens } 6443fa9e4066Sahrens spa_remove(spa); 6444fa9e4066Sahrens } 6445fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 6446fa9e4066Sahrens } 6447ea8dc4b6Seschrock 6448ea8dc4b6Seschrock vdev_t * 64496809eb4eSEric Schrock spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) 6450ea8dc4b6Seschrock { 6451c5904d13Seschrock vdev_t *vd; 6452c5904d13Seschrock int i; 6453c5904d13Seschrock 6454c5904d13Seschrock if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 6455c5904d13Seschrock return (vd); 6456c5904d13Seschrock 64576809eb4eSEric Schrock if (aux) { 6458c5904d13Seschrock for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 6459c5904d13Seschrock vd = spa->spa_l2cache.sav_vdevs[i]; 64606809eb4eSEric Schrock if (vd->vdev_guid == guid) 64616809eb4eSEric Schrock return (vd); 64626809eb4eSEric Schrock } 64636809eb4eSEric Schrock 64646809eb4eSEric Schrock for (i = 0; i < spa->spa_spares.sav_count; i++) { 64656809eb4eSEric Schrock vd = spa->spa_spares.sav_vdevs[i]; 6466c5904d13Seschrock if (vd->vdev_guid == guid) 6467c5904d13Seschrock return (vd); 6468c5904d13Seschrock } 6469c5904d13Seschrock } 6470c5904d13Seschrock 6471c5904d13Seschrock return (NULL); 6472ea8dc4b6Seschrock } 6473eaca9bbdSeschrock 6474eaca9bbdSeschrock void 6475990b4856Slling spa_upgrade(spa_t *spa, uint64_t version) 6476eaca9bbdSeschrock { 6477f9af39baSGeorge Wilson ASSERT(spa_writeable(spa)); 6478f9af39baSGeorge Wilson 6479e14bb325SJeff Bonwick spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 6480eaca9bbdSeschrock 6481eaca9bbdSeschrock /* 6482eaca9bbdSeschrock * This should only be called for a non-faulted pool, and since a 6483eaca9bbdSeschrock * future version would result in an unopenable pool, this shouldn't be 6484eaca9bbdSeschrock * possible. 6485eaca9bbdSeschrock */ 648662eae887SRichard Yao ASSERT(SPA_VERSION_IS_SUPPORTED(spa->spa_uberblock.ub_version)); 64875d7b4d43SMatthew Ahrens ASSERT3U(version, >=, spa->spa_uberblock.ub_version); 6488eaca9bbdSeschrock 6489990b4856Slling spa->spa_uberblock.ub_version = version; 6490eaca9bbdSeschrock vdev_config_dirty(spa->spa_root_vdev); 6491eaca9bbdSeschrock 6492e14bb325SJeff Bonwick spa_config_exit(spa, SCL_ALL, FTAG); 649399653d4eSeschrock 649499653d4eSeschrock txg_wait_synced(spa_get_dsl(spa), 0); 649599653d4eSeschrock } 649699653d4eSeschrock 649799653d4eSeschrock boolean_t 649899653d4eSeschrock spa_has_spare(spa_t *spa, uint64_t guid) 649999653d4eSeschrock { 650099653d4eSeschrock int i; 650139c23413Seschrock uint64_t spareguid; 6502fa94a07fSbrendan spa_aux_vdev_t *sav = &spa->spa_spares; 650399653d4eSeschrock 6504fa94a07fSbrendan for (i = 0; i < sav->sav_count; i++) 6505fa94a07fSbrendan if (sav->sav_vdevs[i]->vdev_guid == guid) 650699653d4eSeschrock return (B_TRUE); 650799653d4eSeschrock 6508fa94a07fSbrendan for (i = 0; i < sav->sav_npending; i++) { 6509fa94a07fSbrendan if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 6510fa94a07fSbrendan &spareguid) == 0 && spareguid == guid) 651139c23413Seschrock return (B_TRUE); 651239c23413Seschrock } 651339c23413Seschrock 651499653d4eSeschrock return (B_FALSE); 6515eaca9bbdSeschrock } 6516b1b8ab34Slling 651789a89ebfSlling /* 651889a89ebfSlling * Check if a pool has an active shared spare device. 651989a89ebfSlling * Note: reference count of an active spare is 2, as a spare and as a replace 652089a89ebfSlling */ 652189a89ebfSlling static boolean_t 652289a89ebfSlling spa_has_active_shared_spare(spa_t *spa) 652389a89ebfSlling { 652489a89ebfSlling int i, refcnt; 652589a89ebfSlling uint64_t pool; 652689a89ebfSlling spa_aux_vdev_t *sav = &spa->spa_spares; 652789a89ebfSlling 652889a89ebfSlling for (i = 0; i < sav->sav_count; i++) { 652989a89ebfSlling if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 653089a89ebfSlling &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 653189a89ebfSlling refcnt > 2) 653289a89ebfSlling return (B_TRUE); 653389a89ebfSlling } 653489a89ebfSlling 653589a89ebfSlling return (B_FALSE); 653689a89ebfSlling } 653789a89ebfSlling 65383d7072f8Seschrock /* 65393d7072f8Seschrock * Post a sysevent corresponding to the given event. The 'name' must be one of 65403d7072f8Seschrock * the event definitions in sys/sysevent/eventdefs.h. The payload will be 65413d7072f8Seschrock * filled in from the spa and (optionally) the vdev. This doesn't do anything 65423d7072f8Seschrock * in the userland libzpool, as we don't want consumers to misinterpret ztest 65433d7072f8Seschrock * or zdb as real changes. 65443d7072f8Seschrock */ 65453d7072f8Seschrock void 65463d7072f8Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 65473d7072f8Seschrock { 65483d7072f8Seschrock #ifdef _KERNEL 65493d7072f8Seschrock sysevent_t *ev; 65503d7072f8Seschrock sysevent_attr_list_t *attr = NULL; 65513d7072f8Seschrock sysevent_value_t value; 65523d7072f8Seschrock sysevent_id_t eid; 65533d7072f8Seschrock 65543d7072f8Seschrock ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 65553d7072f8Seschrock SE_SLEEP); 65563d7072f8Seschrock 65573d7072f8Seschrock value.value_type = SE_DATA_TYPE_STRING; 65583d7072f8Seschrock value.value.sv_string = spa_name(spa); 65593d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 65603d7072f8Seschrock goto done; 65613d7072f8Seschrock 65623d7072f8Seschrock value.value_type = SE_DATA_TYPE_UINT64; 65633d7072f8Seschrock value.value.sv_uint64 = spa_guid(spa); 65643d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 65653d7072f8Seschrock goto done; 65663d7072f8Seschrock 65673d7072f8Seschrock if (vd) { 65683d7072f8Seschrock value.value_type = SE_DATA_TYPE_UINT64; 65693d7072f8Seschrock value.value.sv_uint64 = vd->vdev_guid; 65703d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 65713d7072f8Seschrock SE_SLEEP) != 0) 65723d7072f8Seschrock goto done; 65733d7072f8Seschrock 65743d7072f8Seschrock if (vd->vdev_path) { 65753d7072f8Seschrock value.value_type = SE_DATA_TYPE_STRING; 65763d7072f8Seschrock value.value.sv_string = vd->vdev_path; 65773d7072f8Seschrock if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 65783d7072f8Seschrock &value, SE_SLEEP) != 0) 65793d7072f8Seschrock goto done; 65803d7072f8Seschrock } 65813d7072f8Seschrock } 65823d7072f8Seschrock 6583b01c3b58Seschrock if (sysevent_attach_attributes(ev, attr) != 0) 6584b01c3b58Seschrock goto done; 6585b01c3b58Seschrock attr = NULL; 6586b01c3b58Seschrock 65873d7072f8Seschrock (void) log_sysevent(ev, SE_SLEEP, &eid); 65883d7072f8Seschrock 65893d7072f8Seschrock done: 65903d7072f8Seschrock if (attr) 65913d7072f8Seschrock sysevent_free_attr(attr); 65923d7072f8Seschrock sysevent_free(ev); 65933d7072f8Seschrock #endif 65943d7072f8Seschrock } 6595