1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5*441d80aaSlling * Common Development and Distribution License (the "License"). 6*441d80aaSlling * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 2234f18512Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens #include <sys/zfs_context.h> 29fa9e4066Sahrens #include <sys/spa.h> 30fa9e4066Sahrens #include <sys/spa_impl.h> 31fa9e4066Sahrens #include <sys/dmu.h> 32fa9e4066Sahrens #include <sys/dmu_tx.h> 33fa9e4066Sahrens #include <sys/vdev_impl.h> 34fa9e4066Sahrens #include <sys/uberblock_impl.h> 35fa9e4066Sahrens #include <sys/metaslab.h> 36fa9e4066Sahrens #include <sys/metaslab_impl.h> 37fa9e4066Sahrens #include <sys/space_map.h> 38fa9e4066Sahrens #include <sys/zio.h> 39fa9e4066Sahrens #include <sys/zap.h> 40fa9e4066Sahrens #include <sys/fs/zfs.h> 41fa9e4066Sahrens 42fa9e4066Sahrens /* 43fa9e4066Sahrens * Virtual device management. 44fa9e4066Sahrens */ 45fa9e4066Sahrens 46fa9e4066Sahrens static vdev_ops_t *vdev_ops_table[] = { 47fa9e4066Sahrens &vdev_root_ops, 48fa9e4066Sahrens &vdev_raidz_ops, 49fa9e4066Sahrens &vdev_mirror_ops, 50fa9e4066Sahrens &vdev_replacing_ops, 51fa9e4066Sahrens &vdev_disk_ops, 52fa9e4066Sahrens &vdev_file_ops, 53fa9e4066Sahrens &vdev_missing_ops, 54fa9e4066Sahrens NULL 55fa9e4066Sahrens }; 56fa9e4066Sahrens 57fa9e4066Sahrens /* 58fa9e4066Sahrens * Given a vdev type, return the appropriate ops vector. 59fa9e4066Sahrens */ 60fa9e4066Sahrens static vdev_ops_t * 61fa9e4066Sahrens vdev_getops(const char *type) 62fa9e4066Sahrens { 63fa9e4066Sahrens vdev_ops_t *ops, **opspp; 64fa9e4066Sahrens 65fa9e4066Sahrens for (opspp = vdev_ops_table; (ops = *opspp) != NULL; opspp++) 66fa9e4066Sahrens if (strcmp(ops->vdev_op_type, type) == 0) 67fa9e4066Sahrens break; 68fa9e4066Sahrens 69fa9e4066Sahrens return (ops); 70fa9e4066Sahrens } 71fa9e4066Sahrens 72fa9e4066Sahrens /* 73fa9e4066Sahrens * Default asize function: return the MAX of psize with the asize of 74fa9e4066Sahrens * all children. This is what's used by anything other than RAID-Z. 75fa9e4066Sahrens */ 76fa9e4066Sahrens uint64_t 77fa9e4066Sahrens vdev_default_asize(vdev_t *vd, uint64_t psize) 78fa9e4066Sahrens { 79fa9e4066Sahrens uint64_t asize = P2ROUNDUP(psize, 1ULL << vd->vdev_ashift); 80fa9e4066Sahrens uint64_t csize; 81fa9e4066Sahrens uint64_t c; 82fa9e4066Sahrens 83fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) { 84fa9e4066Sahrens csize = vdev_psize_to_asize(vd->vdev_child[c], psize); 85fa9e4066Sahrens asize = MAX(asize, csize); 86fa9e4066Sahrens } 87fa9e4066Sahrens 88fa9e4066Sahrens return (asize); 89fa9e4066Sahrens } 90fa9e4066Sahrens 912a79c5feSlling /* 922a79c5feSlling * Get the replaceable or attachable device size. 932a79c5feSlling * If the parent is a mirror or raidz, the replaceable size is the minimum 942a79c5feSlling * psize of all its children. For the rest, just return our own psize. 952a79c5feSlling * 962a79c5feSlling * e.g. 972a79c5feSlling * psize rsize 982a79c5feSlling * root - - 992a79c5feSlling * mirror/raidz - - 1002a79c5feSlling * disk1 20g 20g 1012a79c5feSlling * disk2 40g 20g 1022a79c5feSlling * disk3 80g 80g 1032a79c5feSlling */ 1042a79c5feSlling uint64_t 1052a79c5feSlling vdev_get_rsize(vdev_t *vd) 1062a79c5feSlling { 1072a79c5feSlling vdev_t *pvd, *cvd; 1082a79c5feSlling uint64_t c, rsize; 1092a79c5feSlling 1102a79c5feSlling pvd = vd->vdev_parent; 1112a79c5feSlling 1122a79c5feSlling /* 1132a79c5feSlling * If our parent is NULL or the root, just return our own psize. 1142a79c5feSlling */ 1152a79c5feSlling if (pvd == NULL || pvd->vdev_parent == NULL) 1162a79c5feSlling return (vd->vdev_psize); 1172a79c5feSlling 1182a79c5feSlling rsize = 0; 1192a79c5feSlling 1202a79c5feSlling for (c = 0; c < pvd->vdev_children; c++) { 1212a79c5feSlling cvd = pvd->vdev_child[c]; 1222a79c5feSlling rsize = MIN(rsize - 1, cvd->vdev_psize - 1) + 1; 1232a79c5feSlling } 1242a79c5feSlling 1252a79c5feSlling return (rsize); 1262a79c5feSlling } 1272a79c5feSlling 128fa9e4066Sahrens vdev_t * 129fa9e4066Sahrens vdev_lookup_top(spa_t *spa, uint64_t vdev) 130fa9e4066Sahrens { 131fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 132fa9e4066Sahrens 133fa9e4066Sahrens if (vdev < rvd->vdev_children) 134fa9e4066Sahrens return (rvd->vdev_child[vdev]); 135fa9e4066Sahrens 136fa9e4066Sahrens return (NULL); 137fa9e4066Sahrens } 138fa9e4066Sahrens 139fa9e4066Sahrens vdev_t * 140fa9e4066Sahrens vdev_lookup_by_path(vdev_t *vd, const char *path) 141fa9e4066Sahrens { 142fa9e4066Sahrens int c; 143fa9e4066Sahrens vdev_t *mvd; 144fa9e4066Sahrens 14534f18512Seschrock if (vd->vdev_path != NULL) { 14634f18512Seschrock if (vd->vdev_wholedisk == 1) { 14734f18512Seschrock /* 14834f18512Seschrock * For whole disks, the internal path has 's0', but the 14934f18512Seschrock * path passed in by the user doesn't. 15034f18512Seschrock */ 15134f18512Seschrock if (strlen(path) == strlen(vd->vdev_path) - 2 && 15234f18512Seschrock strncmp(path, vd->vdev_path, strlen(path)) == 0) 15334f18512Seschrock return (vd); 15434f18512Seschrock } else if (strcmp(path, vd->vdev_path) == 0) { 15534f18512Seschrock return (vd); 15634f18512Seschrock } 15734f18512Seschrock } 158fa9e4066Sahrens 159fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) 160fa9e4066Sahrens if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) != 161fa9e4066Sahrens NULL) 162fa9e4066Sahrens return (mvd); 163fa9e4066Sahrens 164fa9e4066Sahrens return (NULL); 165fa9e4066Sahrens } 166fa9e4066Sahrens 167fa9e4066Sahrens vdev_t * 168fa9e4066Sahrens vdev_lookup_by_guid(vdev_t *vd, uint64_t guid) 169fa9e4066Sahrens { 170fa9e4066Sahrens int c; 171fa9e4066Sahrens vdev_t *mvd; 172fa9e4066Sahrens 173fa9e4066Sahrens if (vd->vdev_children == 0 && vd->vdev_guid == guid) 174fa9e4066Sahrens return (vd); 175fa9e4066Sahrens 176fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) 177fa9e4066Sahrens if ((mvd = vdev_lookup_by_guid(vd->vdev_child[c], guid)) != 178fa9e4066Sahrens NULL) 179fa9e4066Sahrens return (mvd); 180fa9e4066Sahrens 181fa9e4066Sahrens return (NULL); 182fa9e4066Sahrens } 183fa9e4066Sahrens 184fa9e4066Sahrens void 185fa9e4066Sahrens vdev_add_child(vdev_t *pvd, vdev_t *cvd) 186fa9e4066Sahrens { 187fa9e4066Sahrens size_t oldsize, newsize; 188fa9e4066Sahrens uint64_t id = cvd->vdev_id; 189fa9e4066Sahrens vdev_t **newchild; 190fa9e4066Sahrens 191fa9e4066Sahrens ASSERT(spa_config_held(cvd->vdev_spa, RW_WRITER)); 192fa9e4066Sahrens ASSERT(cvd->vdev_parent == NULL); 193fa9e4066Sahrens 194fa9e4066Sahrens cvd->vdev_parent = pvd; 195fa9e4066Sahrens 196fa9e4066Sahrens if (pvd == NULL) 197fa9e4066Sahrens return; 198fa9e4066Sahrens 199fa9e4066Sahrens ASSERT(id >= pvd->vdev_children || pvd->vdev_child[id] == NULL); 200fa9e4066Sahrens 201fa9e4066Sahrens oldsize = pvd->vdev_children * sizeof (vdev_t *); 202fa9e4066Sahrens pvd->vdev_children = MAX(pvd->vdev_children, id + 1); 203fa9e4066Sahrens newsize = pvd->vdev_children * sizeof (vdev_t *); 204fa9e4066Sahrens 205fa9e4066Sahrens newchild = kmem_zalloc(newsize, KM_SLEEP); 206fa9e4066Sahrens if (pvd->vdev_child != NULL) { 207fa9e4066Sahrens bcopy(pvd->vdev_child, newchild, oldsize); 208fa9e4066Sahrens kmem_free(pvd->vdev_child, oldsize); 209fa9e4066Sahrens } 210fa9e4066Sahrens 211fa9e4066Sahrens pvd->vdev_child = newchild; 212fa9e4066Sahrens pvd->vdev_child[id] = cvd; 213fa9e4066Sahrens 214fa9e4066Sahrens cvd->vdev_top = (pvd->vdev_top ? pvd->vdev_top: cvd); 215fa9e4066Sahrens ASSERT(cvd->vdev_top->vdev_parent->vdev_parent == NULL); 216fa9e4066Sahrens 217fa9e4066Sahrens /* 218fa9e4066Sahrens * Walk up all ancestors to update guid sum. 219fa9e4066Sahrens */ 220fa9e4066Sahrens for (; pvd != NULL; pvd = pvd->vdev_parent) 221fa9e4066Sahrens pvd->vdev_guid_sum += cvd->vdev_guid_sum; 222fa9e4066Sahrens } 223fa9e4066Sahrens 224fa9e4066Sahrens void 225fa9e4066Sahrens vdev_remove_child(vdev_t *pvd, vdev_t *cvd) 226fa9e4066Sahrens { 227fa9e4066Sahrens int c; 228fa9e4066Sahrens uint_t id = cvd->vdev_id; 229fa9e4066Sahrens 230fa9e4066Sahrens ASSERT(cvd->vdev_parent == pvd); 231fa9e4066Sahrens 232fa9e4066Sahrens if (pvd == NULL) 233fa9e4066Sahrens return; 234fa9e4066Sahrens 235fa9e4066Sahrens ASSERT(id < pvd->vdev_children); 236fa9e4066Sahrens ASSERT(pvd->vdev_child[id] == cvd); 237fa9e4066Sahrens 238fa9e4066Sahrens pvd->vdev_child[id] = NULL; 239fa9e4066Sahrens cvd->vdev_parent = NULL; 240fa9e4066Sahrens 241fa9e4066Sahrens for (c = 0; c < pvd->vdev_children; c++) 242fa9e4066Sahrens if (pvd->vdev_child[c]) 243fa9e4066Sahrens break; 244fa9e4066Sahrens 245fa9e4066Sahrens if (c == pvd->vdev_children) { 246fa9e4066Sahrens kmem_free(pvd->vdev_child, c * sizeof (vdev_t *)); 247fa9e4066Sahrens pvd->vdev_child = NULL; 248fa9e4066Sahrens pvd->vdev_children = 0; 249fa9e4066Sahrens } 250fa9e4066Sahrens 251fa9e4066Sahrens /* 252fa9e4066Sahrens * Walk up all ancestors to update guid sum. 253fa9e4066Sahrens */ 254fa9e4066Sahrens for (; pvd != NULL; pvd = pvd->vdev_parent) 255fa9e4066Sahrens pvd->vdev_guid_sum -= cvd->vdev_guid_sum; 256fa9e4066Sahrens } 257fa9e4066Sahrens 258fa9e4066Sahrens /* 259fa9e4066Sahrens * Remove any holes in the child array. 260fa9e4066Sahrens */ 261fa9e4066Sahrens void 262fa9e4066Sahrens vdev_compact_children(vdev_t *pvd) 263fa9e4066Sahrens { 264fa9e4066Sahrens vdev_t **newchild, *cvd; 265fa9e4066Sahrens int oldc = pvd->vdev_children; 266fa9e4066Sahrens int newc, c; 267fa9e4066Sahrens 268fa9e4066Sahrens ASSERT(spa_config_held(pvd->vdev_spa, RW_WRITER)); 269fa9e4066Sahrens 270fa9e4066Sahrens for (c = newc = 0; c < oldc; c++) 271fa9e4066Sahrens if (pvd->vdev_child[c]) 272fa9e4066Sahrens newc++; 273fa9e4066Sahrens 274fa9e4066Sahrens newchild = kmem_alloc(newc * sizeof (vdev_t *), KM_SLEEP); 275fa9e4066Sahrens 276fa9e4066Sahrens for (c = newc = 0; c < oldc; c++) { 277fa9e4066Sahrens if ((cvd = pvd->vdev_child[c]) != NULL) { 278fa9e4066Sahrens newchild[newc] = cvd; 279fa9e4066Sahrens cvd->vdev_id = newc++; 280fa9e4066Sahrens } 281fa9e4066Sahrens } 282fa9e4066Sahrens 283fa9e4066Sahrens kmem_free(pvd->vdev_child, oldc * sizeof (vdev_t *)); 284fa9e4066Sahrens pvd->vdev_child = newchild; 285fa9e4066Sahrens pvd->vdev_children = newc; 286fa9e4066Sahrens } 287fa9e4066Sahrens 288fa9e4066Sahrens /* 289fa9e4066Sahrens * Allocate and minimally initialize a vdev_t. 290fa9e4066Sahrens */ 291fa9e4066Sahrens static vdev_t * 292fa9e4066Sahrens vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) 293fa9e4066Sahrens { 294fa9e4066Sahrens vdev_t *vd; 295fa9e4066Sahrens 296fa9e4066Sahrens while (guid == 0) 297fa9e4066Sahrens guid = spa_get_random(-1ULL); 298fa9e4066Sahrens 299fa9e4066Sahrens vd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP); 300fa9e4066Sahrens 301fa9e4066Sahrens vd->vdev_spa = spa; 302fa9e4066Sahrens vd->vdev_id = id; 303fa9e4066Sahrens vd->vdev_guid = guid; 304fa9e4066Sahrens vd->vdev_guid_sum = guid; 305fa9e4066Sahrens vd->vdev_ops = ops; 306fa9e4066Sahrens vd->vdev_state = VDEV_STATE_CLOSED; 307fa9e4066Sahrens 308fa9e4066Sahrens mutex_init(&vd->vdev_io_lock, NULL, MUTEX_DEFAULT, NULL); 309fa9e4066Sahrens cv_init(&vd->vdev_io_cv, NULL, CV_DEFAULT, NULL); 310fa9e4066Sahrens list_create(&vd->vdev_io_pending, sizeof (zio_t), 311fa9e4066Sahrens offsetof(zio_t, io_pending)); 312fa9e4066Sahrens mutex_init(&vd->vdev_dirty_lock, NULL, MUTEX_DEFAULT, NULL); 313fa9e4066Sahrens mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL); 314fa9e4066Sahrens space_map_create(&vd->vdev_dtl_map, 0, -1ULL, 0, &vd->vdev_dtl_lock); 315fa9e4066Sahrens space_map_create(&vd->vdev_dtl_scrub, 0, -1ULL, 0, &vd->vdev_dtl_lock); 316fa9e4066Sahrens txg_list_create(&vd->vdev_ms_list, 317fa9e4066Sahrens offsetof(struct metaslab, ms_txg_node)); 318fa9e4066Sahrens txg_list_create(&vd->vdev_dtl_list, 319fa9e4066Sahrens offsetof(struct vdev, vdev_dtl_node)); 320fa9e4066Sahrens vd->vdev_stat.vs_timestamp = gethrtime(); 321fa9e4066Sahrens 322fa9e4066Sahrens return (vd); 323fa9e4066Sahrens } 324fa9e4066Sahrens 325fa9e4066Sahrens /* 326fa9e4066Sahrens * Free a vdev_t that has been removed from service. 327fa9e4066Sahrens */ 328fa9e4066Sahrens static void 329fa9e4066Sahrens vdev_free_common(vdev_t *vd) 330fa9e4066Sahrens { 331fa9e4066Sahrens if (vd->vdev_path) 332fa9e4066Sahrens spa_strfree(vd->vdev_path); 333fa9e4066Sahrens if (vd->vdev_devid) 334fa9e4066Sahrens spa_strfree(vd->vdev_devid); 335fa9e4066Sahrens 336fa9e4066Sahrens txg_list_destroy(&vd->vdev_ms_list); 337fa9e4066Sahrens txg_list_destroy(&vd->vdev_dtl_list); 338fa9e4066Sahrens mutex_enter(&vd->vdev_dtl_lock); 339fa9e4066Sahrens space_map_vacate(&vd->vdev_dtl_map, NULL, NULL); 340fa9e4066Sahrens space_map_destroy(&vd->vdev_dtl_map); 341fa9e4066Sahrens space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL); 342fa9e4066Sahrens space_map_destroy(&vd->vdev_dtl_scrub); 343fa9e4066Sahrens mutex_exit(&vd->vdev_dtl_lock); 344fa9e4066Sahrens mutex_destroy(&vd->vdev_dtl_lock); 345fa9e4066Sahrens mutex_destroy(&vd->vdev_dirty_lock); 346fa9e4066Sahrens list_destroy(&vd->vdev_io_pending); 347fa9e4066Sahrens mutex_destroy(&vd->vdev_io_lock); 348fa9e4066Sahrens cv_destroy(&vd->vdev_io_cv); 349fa9e4066Sahrens 350fa9e4066Sahrens kmem_free(vd, sizeof (vdev_t)); 351fa9e4066Sahrens } 352fa9e4066Sahrens 353fa9e4066Sahrens /* 354fa9e4066Sahrens * Allocate a new vdev. The 'alloctype' is used to control whether we are 355fa9e4066Sahrens * creating a new vdev or loading an existing one - the behavior is slightly 356fa9e4066Sahrens * different for each case. 357fa9e4066Sahrens */ 358fa9e4066Sahrens vdev_t * 359fa9e4066Sahrens vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype) 360fa9e4066Sahrens { 361fa9e4066Sahrens vdev_ops_t *ops; 362fa9e4066Sahrens char *type; 363*441d80aaSlling uint64_t guid = 0, offline = 0; 364fa9e4066Sahrens vdev_t *vd; 365fa9e4066Sahrens 366fa9e4066Sahrens ASSERT(spa_config_held(spa, RW_WRITER)); 367fa9e4066Sahrens 368fa9e4066Sahrens if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) 369fa9e4066Sahrens return (NULL); 370fa9e4066Sahrens 371fa9e4066Sahrens if ((ops = vdev_getops(type)) == NULL) 372fa9e4066Sahrens return (NULL); 373fa9e4066Sahrens 374fa9e4066Sahrens /* 375fa9e4066Sahrens * If this is a load, get the vdev guid from the nvlist. 376fa9e4066Sahrens * Otherwise, vdev_alloc_common() will generate one for us. 377fa9e4066Sahrens */ 378fa9e4066Sahrens if (alloctype == VDEV_ALLOC_LOAD) { 379fa9e4066Sahrens uint64_t label_id; 380fa9e4066Sahrens 381fa9e4066Sahrens if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &label_id) || 382fa9e4066Sahrens label_id != id) 383fa9e4066Sahrens return (NULL); 384fa9e4066Sahrens 385fa9e4066Sahrens if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) 386fa9e4066Sahrens return (NULL); 387fa9e4066Sahrens } 388fa9e4066Sahrens 389fa9e4066Sahrens vd = vdev_alloc_common(spa, id, guid, ops); 390fa9e4066Sahrens 391fa9e4066Sahrens if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0) 392fa9e4066Sahrens vd->vdev_path = spa_strdup(vd->vdev_path); 393fa9e4066Sahrens if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0) 394fa9e4066Sahrens vd->vdev_devid = spa_strdup(vd->vdev_devid); 395fa9e4066Sahrens 396afefbcddSeschrock /* 397afefbcddSeschrock * Set the whole_disk property. If it's not specified, leave the value 398afefbcddSeschrock * as -1. 399afefbcddSeschrock */ 400afefbcddSeschrock if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, 401afefbcddSeschrock &vd->vdev_wholedisk) != 0) 402afefbcddSeschrock vd->vdev_wholedisk = -1ULL; 403afefbcddSeschrock 404fa9e4066Sahrens /* 405fa9e4066Sahrens * If we're a top-level vdev, try to load the allocation parameters. 406fa9e4066Sahrens */ 407fa9e4066Sahrens if (parent && !parent->vdev_parent && alloctype == VDEV_ALLOC_LOAD) { 408fa9e4066Sahrens (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, 409fa9e4066Sahrens &vd->vdev_ms_array); 410fa9e4066Sahrens (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT, 411fa9e4066Sahrens &vd->vdev_ms_shift); 412fa9e4066Sahrens (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, 413fa9e4066Sahrens &vd->vdev_ashift); 414fa9e4066Sahrens (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASIZE, 415fa9e4066Sahrens &vd->vdev_asize); 416fa9e4066Sahrens } 417fa9e4066Sahrens 418fa9e4066Sahrens /* 419*441d80aaSlling * If we're a leaf vdev, try to load the DTL object 420*441d80aaSlling * and the offline state. 421fa9e4066Sahrens */ 422*441d80aaSlling vd->vdev_offline = B_FALSE; 423fa9e4066Sahrens if (vd->vdev_ops->vdev_op_leaf && alloctype == VDEV_ALLOC_LOAD) { 424fa9e4066Sahrens (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DTL, 425fa9e4066Sahrens &vd->vdev_dtl.smo_object); 426*441d80aaSlling 427*441d80aaSlling if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &offline) 428*441d80aaSlling == 0) 429*441d80aaSlling vd->vdev_offline = offline; 430fa9e4066Sahrens } 431fa9e4066Sahrens 432fa9e4066Sahrens /* 433fa9e4066Sahrens * Add ourselves to the parent's list of children. 434fa9e4066Sahrens */ 435fa9e4066Sahrens vdev_add_child(parent, vd); 436fa9e4066Sahrens 437fa9e4066Sahrens return (vd); 438fa9e4066Sahrens } 439fa9e4066Sahrens 440fa9e4066Sahrens void 441fa9e4066Sahrens vdev_free(vdev_t *vd) 442fa9e4066Sahrens { 443fa9e4066Sahrens int c; 444fa9e4066Sahrens 445fa9e4066Sahrens /* 446fa9e4066Sahrens * vdev_free() implies closing the vdev first. This is simpler than 447fa9e4066Sahrens * trying to ensure complicated semantics for all callers. 448fa9e4066Sahrens */ 449fa9e4066Sahrens vdev_close(vd); 450fa9e4066Sahrens 451fa9e4066Sahrens /* 452fa9e4066Sahrens * It's possible to free a vdev that's been added to the dirty 453fa9e4066Sahrens * list when in the middle of spa_vdev_add(). Handle that case 454fa9e4066Sahrens * correctly here. 455fa9e4066Sahrens */ 456fa9e4066Sahrens if (vd->vdev_is_dirty) 457fa9e4066Sahrens vdev_config_clean(vd); 458fa9e4066Sahrens 459fa9e4066Sahrens /* 460fa9e4066Sahrens * Free all children. 461fa9e4066Sahrens */ 462fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) 463fa9e4066Sahrens vdev_free(vd->vdev_child[c]); 464fa9e4066Sahrens 465fa9e4066Sahrens ASSERT(vd->vdev_child == NULL); 466fa9e4066Sahrens ASSERT(vd->vdev_guid_sum == vd->vdev_guid); 467fa9e4066Sahrens 468fa9e4066Sahrens /* 469fa9e4066Sahrens * Discard allocation state. 470fa9e4066Sahrens */ 471fa9e4066Sahrens if (vd == vd->vdev_top) 472fa9e4066Sahrens vdev_metaslab_fini(vd); 473fa9e4066Sahrens 474fa9e4066Sahrens ASSERT3U(vd->vdev_stat.vs_space, ==, 0); 475fa9e4066Sahrens ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0); 476fa9e4066Sahrens 477fa9e4066Sahrens /* 478fa9e4066Sahrens * Remove this vdev from its parent's child list. 479fa9e4066Sahrens */ 480fa9e4066Sahrens vdev_remove_child(vd->vdev_parent, vd); 481fa9e4066Sahrens 482fa9e4066Sahrens ASSERT(vd->vdev_parent == NULL); 483fa9e4066Sahrens 484fa9e4066Sahrens vdev_free_common(vd); 485fa9e4066Sahrens } 486fa9e4066Sahrens 487fa9e4066Sahrens /* 488fa9e4066Sahrens * Transfer top-level vdev state from svd to tvd. 489fa9e4066Sahrens */ 490fa9e4066Sahrens static void 491fa9e4066Sahrens vdev_top_transfer(vdev_t *svd, vdev_t *tvd) 492fa9e4066Sahrens { 493fa9e4066Sahrens spa_t *spa = svd->vdev_spa; 494fa9e4066Sahrens metaslab_t *msp; 495fa9e4066Sahrens vdev_t *vd; 496fa9e4066Sahrens int t; 497fa9e4066Sahrens 498fa9e4066Sahrens ASSERT(tvd == tvd->vdev_top); 499fa9e4066Sahrens 500fa9e4066Sahrens tvd->vdev_ms_array = svd->vdev_ms_array; 501fa9e4066Sahrens tvd->vdev_ms_shift = svd->vdev_ms_shift; 502fa9e4066Sahrens tvd->vdev_ms_count = svd->vdev_ms_count; 503fa9e4066Sahrens 504fa9e4066Sahrens svd->vdev_ms_array = 0; 505fa9e4066Sahrens svd->vdev_ms_shift = 0; 506fa9e4066Sahrens svd->vdev_ms_count = 0; 507fa9e4066Sahrens 508fa9e4066Sahrens tvd->vdev_mg = svd->vdev_mg; 509fa9e4066Sahrens tvd->vdev_mg->mg_vd = tvd; 510fa9e4066Sahrens tvd->vdev_ms = svd->vdev_ms; 511fa9e4066Sahrens tvd->vdev_smo = svd->vdev_smo; 512fa9e4066Sahrens 513fa9e4066Sahrens svd->vdev_mg = NULL; 514fa9e4066Sahrens svd->vdev_ms = NULL; 515fa9e4066Sahrens svd->vdev_smo = NULL; 516fa9e4066Sahrens 517fa9e4066Sahrens tvd->vdev_stat.vs_alloc = svd->vdev_stat.vs_alloc; 518fa9e4066Sahrens tvd->vdev_stat.vs_space = svd->vdev_stat.vs_space; 519fa9e4066Sahrens 520fa9e4066Sahrens svd->vdev_stat.vs_alloc = 0; 521fa9e4066Sahrens svd->vdev_stat.vs_space = 0; 522fa9e4066Sahrens 523fa9e4066Sahrens for (t = 0; t < TXG_SIZE; t++) { 524fa9e4066Sahrens while ((msp = txg_list_remove(&svd->vdev_ms_list, t)) != NULL) 525fa9e4066Sahrens (void) txg_list_add(&tvd->vdev_ms_list, msp, t); 526fa9e4066Sahrens while ((vd = txg_list_remove(&svd->vdev_dtl_list, t)) != NULL) 527fa9e4066Sahrens (void) txg_list_add(&tvd->vdev_dtl_list, vd, t); 528fa9e4066Sahrens if (txg_list_remove_this(&spa->spa_vdev_txg_list, svd, t)) 529fa9e4066Sahrens (void) txg_list_add(&spa->spa_vdev_txg_list, tvd, t); 530fa9e4066Sahrens tvd->vdev_dirty[t] = svd->vdev_dirty[t]; 531fa9e4066Sahrens svd->vdev_dirty[t] = 0; 532fa9e4066Sahrens } 533fa9e4066Sahrens 534fa9e4066Sahrens if (svd->vdev_is_dirty) { 535fa9e4066Sahrens vdev_config_clean(svd); 536fa9e4066Sahrens vdev_config_dirty(tvd); 537fa9e4066Sahrens } 538fa9e4066Sahrens 539fa9e4066Sahrens ASSERT(svd->vdev_io_retry == NULL); 540fa9e4066Sahrens ASSERT(list_is_empty(&svd->vdev_io_pending)); 541fa9e4066Sahrens } 542fa9e4066Sahrens 543fa9e4066Sahrens static void 544fa9e4066Sahrens vdev_top_update(vdev_t *tvd, vdev_t *vd) 545fa9e4066Sahrens { 546fa9e4066Sahrens int c; 547fa9e4066Sahrens 548fa9e4066Sahrens if (vd == NULL) 549fa9e4066Sahrens return; 550fa9e4066Sahrens 551fa9e4066Sahrens vd->vdev_top = tvd; 552fa9e4066Sahrens 553fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) 554fa9e4066Sahrens vdev_top_update(tvd, vd->vdev_child[c]); 555fa9e4066Sahrens } 556fa9e4066Sahrens 557fa9e4066Sahrens /* 558fa9e4066Sahrens * Add a mirror/replacing vdev above an existing vdev. 559fa9e4066Sahrens */ 560fa9e4066Sahrens vdev_t * 561fa9e4066Sahrens vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops) 562fa9e4066Sahrens { 563fa9e4066Sahrens spa_t *spa = cvd->vdev_spa; 564fa9e4066Sahrens vdev_t *pvd = cvd->vdev_parent; 565fa9e4066Sahrens vdev_t *mvd; 566fa9e4066Sahrens 567fa9e4066Sahrens ASSERT(spa_config_held(spa, RW_WRITER)); 568fa9e4066Sahrens 569fa9e4066Sahrens mvd = vdev_alloc_common(spa, cvd->vdev_id, 0, ops); 570fa9e4066Sahrens vdev_remove_child(pvd, cvd); 571fa9e4066Sahrens vdev_add_child(pvd, mvd); 572fa9e4066Sahrens cvd->vdev_id = mvd->vdev_children; 573fa9e4066Sahrens vdev_add_child(mvd, cvd); 574fa9e4066Sahrens vdev_top_update(cvd->vdev_top, cvd->vdev_top); 575fa9e4066Sahrens 576fa9e4066Sahrens mvd->vdev_asize = cvd->vdev_asize; 577fa9e4066Sahrens mvd->vdev_ashift = cvd->vdev_ashift; 578fa9e4066Sahrens mvd->vdev_state = cvd->vdev_state; 579fa9e4066Sahrens 580fa9e4066Sahrens if (mvd == mvd->vdev_top) 581fa9e4066Sahrens vdev_top_transfer(cvd, mvd); 582fa9e4066Sahrens 583fa9e4066Sahrens return (mvd); 584fa9e4066Sahrens } 585fa9e4066Sahrens 586fa9e4066Sahrens /* 587fa9e4066Sahrens * Remove a 1-way mirror/replacing vdev from the tree. 588fa9e4066Sahrens */ 589fa9e4066Sahrens void 590fa9e4066Sahrens vdev_remove_parent(vdev_t *cvd) 591fa9e4066Sahrens { 592fa9e4066Sahrens vdev_t *mvd = cvd->vdev_parent; 593fa9e4066Sahrens vdev_t *pvd = mvd->vdev_parent; 594fa9e4066Sahrens 595fa9e4066Sahrens ASSERT(spa_config_held(cvd->vdev_spa, RW_WRITER)); 596fa9e4066Sahrens 597fa9e4066Sahrens ASSERT(mvd->vdev_children == 1); 598fa9e4066Sahrens ASSERT(mvd->vdev_ops == &vdev_mirror_ops || 599fa9e4066Sahrens mvd->vdev_ops == &vdev_replacing_ops); 600fa9e4066Sahrens 601fa9e4066Sahrens vdev_remove_child(mvd, cvd); 602fa9e4066Sahrens vdev_remove_child(pvd, mvd); 603fa9e4066Sahrens cvd->vdev_id = mvd->vdev_id; 604fa9e4066Sahrens vdev_add_child(pvd, cvd); 605fa9e4066Sahrens vdev_top_update(cvd->vdev_top, cvd->vdev_top); 606fa9e4066Sahrens 607fa9e4066Sahrens if (cvd == cvd->vdev_top) 608fa9e4066Sahrens vdev_top_transfer(mvd, cvd); 609fa9e4066Sahrens 610fa9e4066Sahrens ASSERT(mvd->vdev_children == 0); 611fa9e4066Sahrens vdev_free(mvd); 612fa9e4066Sahrens } 613fa9e4066Sahrens 614fa9e4066Sahrens void 615fa9e4066Sahrens vdev_metaslab_init(vdev_t *vd, uint64_t txg) 616fa9e4066Sahrens { 617fa9e4066Sahrens spa_t *spa = vd->vdev_spa; 618fa9e4066Sahrens metaslab_class_t *mc = spa_metaslab_class_select(spa); 619fa9e4066Sahrens uint64_t c; 620fa9e4066Sahrens uint64_t oldc = vd->vdev_ms_count; 621fa9e4066Sahrens uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift; 622fa9e4066Sahrens space_map_obj_t *smo = vd->vdev_smo; 623fa9e4066Sahrens metaslab_t **mspp = vd->vdev_ms; 624fa9e4066Sahrens 625fa9e4066Sahrens dprintf("%s oldc %llu newc %llu\n", vdev_description(vd), oldc, newc); 626fa9e4066Sahrens 627fa9e4066Sahrens ASSERT(oldc <= newc); 628fa9e4066Sahrens 629fa9e4066Sahrens vd->vdev_smo = kmem_zalloc(newc * sizeof (*smo), KM_SLEEP); 630fa9e4066Sahrens vd->vdev_ms = kmem_zalloc(newc * sizeof (*mspp), KM_SLEEP); 631fa9e4066Sahrens vd->vdev_ms_count = newc; 632fa9e4066Sahrens 633fa9e4066Sahrens if (vd->vdev_mg == NULL) { 634fa9e4066Sahrens if (txg == 0) { 635fa9e4066Sahrens dmu_buf_t *db; 636fa9e4066Sahrens uint64_t *ms_array; 637fa9e4066Sahrens 638fa9e4066Sahrens ms_array = kmem_zalloc(newc * sizeof (uint64_t), 639fa9e4066Sahrens KM_SLEEP); 640fa9e4066Sahrens 641fa9e4066Sahrens dmu_read(spa->spa_meta_objset, vd->vdev_ms_array, 642fa9e4066Sahrens 0, newc * sizeof (uint64_t), ms_array); 643fa9e4066Sahrens 644fa9e4066Sahrens for (c = 0; c < newc; c++) { 645fa9e4066Sahrens if (ms_array[c] == 0) 646fa9e4066Sahrens continue; 647fa9e4066Sahrens db = dmu_bonus_hold(spa->spa_meta_objset, 648fa9e4066Sahrens ms_array[c]); 649fa9e4066Sahrens dmu_buf_read(db); 650fa9e4066Sahrens ASSERT3U(db->db_size, ==, sizeof (*smo)); 651fa9e4066Sahrens bcopy(db->db_data, &vd->vdev_smo[c], 652fa9e4066Sahrens db->db_size); 653fa9e4066Sahrens ASSERT3U(vd->vdev_smo[c].smo_object, ==, 654fa9e4066Sahrens ms_array[c]); 655fa9e4066Sahrens dmu_buf_rele(db); 656fa9e4066Sahrens } 657fa9e4066Sahrens kmem_free(ms_array, newc * sizeof (uint64_t)); 658fa9e4066Sahrens } 659fa9e4066Sahrens vd->vdev_mg = metaslab_group_create(mc, vd); 660fa9e4066Sahrens } 661fa9e4066Sahrens 662fa9e4066Sahrens for (c = 0; c < oldc; c++) { 663fa9e4066Sahrens vd->vdev_smo[c] = smo[c]; 664fa9e4066Sahrens vd->vdev_ms[c] = mspp[c]; 665fa9e4066Sahrens mspp[c]->ms_smo = &vd->vdev_smo[c]; 666fa9e4066Sahrens } 667fa9e4066Sahrens 668fa9e4066Sahrens for (c = oldc; c < newc; c++) 669fa9e4066Sahrens metaslab_init(vd->vdev_mg, &vd->vdev_smo[c], &vd->vdev_ms[c], 670fa9e4066Sahrens c << vd->vdev_ms_shift, 1ULL << vd->vdev_ms_shift, txg); 671fa9e4066Sahrens 672fa9e4066Sahrens if (oldc != 0) { 673fa9e4066Sahrens kmem_free(smo, oldc * sizeof (*smo)); 674fa9e4066Sahrens kmem_free(mspp, oldc * sizeof (*mspp)); 675fa9e4066Sahrens } 676fa9e4066Sahrens 677fa9e4066Sahrens } 678fa9e4066Sahrens 679fa9e4066Sahrens void 680fa9e4066Sahrens vdev_metaslab_fini(vdev_t *vd) 681fa9e4066Sahrens { 682fa9e4066Sahrens uint64_t m; 683fa9e4066Sahrens uint64_t count = vd->vdev_ms_count; 684fa9e4066Sahrens 685fa9e4066Sahrens if (vd->vdev_ms != NULL) { 686fa9e4066Sahrens for (m = 0; m < count; m++) 687fa9e4066Sahrens metaslab_fini(vd->vdev_ms[m]); 688fa9e4066Sahrens kmem_free(vd->vdev_ms, count * sizeof (metaslab_t *)); 689fa9e4066Sahrens vd->vdev_ms = NULL; 690fa9e4066Sahrens } 691fa9e4066Sahrens 692fa9e4066Sahrens if (vd->vdev_smo != NULL) { 693fa9e4066Sahrens kmem_free(vd->vdev_smo, count * sizeof (space_map_obj_t)); 694fa9e4066Sahrens vd->vdev_smo = NULL; 695fa9e4066Sahrens } 696fa9e4066Sahrens } 697fa9e4066Sahrens 698fa9e4066Sahrens /* 699fa9e4066Sahrens * Prepare a virtual device for access. 700fa9e4066Sahrens */ 701fa9e4066Sahrens int 702fa9e4066Sahrens vdev_open(vdev_t *vd) 703fa9e4066Sahrens { 704fa9e4066Sahrens int error; 705fa9e4066Sahrens vdev_knob_t *vk; 706fa9e4066Sahrens int c; 707fa9e4066Sahrens uint64_t osize = 0; 708fa9e4066Sahrens uint64_t asize, psize; 709fa9e4066Sahrens uint64_t ashift = -1ULL; 710fa9e4066Sahrens 711fa9e4066Sahrens ASSERT(vd->vdev_state == VDEV_STATE_CLOSED || 712fa9e4066Sahrens vd->vdev_state == VDEV_STATE_CANT_OPEN || 713fa9e4066Sahrens vd->vdev_state == VDEV_STATE_OFFLINE); 714fa9e4066Sahrens 715fa9e4066Sahrens if (vd->vdev_fault_mode == VDEV_FAULT_COUNT) 716fa9e4066Sahrens vd->vdev_fault_arg >>= 1; 717fa9e4066Sahrens else 718fa9e4066Sahrens vd->vdev_fault_mode = VDEV_FAULT_NONE; 719fa9e4066Sahrens 720fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_NONE; 721fa9e4066Sahrens 722fa9e4066Sahrens for (vk = vdev_knob_next(NULL); vk != NULL; vk = vdev_knob_next(vk)) { 723fa9e4066Sahrens uint64_t *valp = (uint64_t *)((char *)vd + vk->vk_offset); 724fa9e4066Sahrens 725fa9e4066Sahrens *valp = vk->vk_default; 726fa9e4066Sahrens *valp = MAX(*valp, vk->vk_min); 727fa9e4066Sahrens *valp = MIN(*valp, vk->vk_max); 728fa9e4066Sahrens } 729fa9e4066Sahrens 730fa9e4066Sahrens if (vd->vdev_ops->vdev_op_leaf) { 731fa9e4066Sahrens vdev_cache_init(vd); 732fa9e4066Sahrens vdev_queue_init(vd); 733fa9e4066Sahrens vd->vdev_cache_active = B_TRUE; 734fa9e4066Sahrens } 735fa9e4066Sahrens 736fa9e4066Sahrens if (vd->vdev_offline) { 737fa9e4066Sahrens ASSERT(vd->vdev_children == 0); 738fa9e4066Sahrens dprintf("OFFLINE: %s = ENXIO\n", vdev_description(vd)); 739fa9e4066Sahrens vd->vdev_state = VDEV_STATE_OFFLINE; 740fa9e4066Sahrens return (ENXIO); 741fa9e4066Sahrens } 742fa9e4066Sahrens 743fa9e4066Sahrens error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift); 744fa9e4066Sahrens 745fa9e4066Sahrens dprintf("%s = %d, osize %llu, state = %d\n", 746fa9e4066Sahrens vdev_description(vd), error, osize, vd->vdev_state); 747fa9e4066Sahrens 748fa9e4066Sahrens if (error) { 749fa9e4066Sahrens dprintf("%s in %s failed to open, error %d, aux %d\n", 750fa9e4066Sahrens vdev_description(vd), 751fa9e4066Sahrens vdev_description(vd->vdev_parent), 752fa9e4066Sahrens error, 753fa9e4066Sahrens vd->vdev_stat.vs_aux); 754fa9e4066Sahrens 755fa9e4066Sahrens vd->vdev_state = VDEV_STATE_CANT_OPEN; 756fa9e4066Sahrens return (error); 757fa9e4066Sahrens } 758fa9e4066Sahrens 759fa9e4066Sahrens vd->vdev_state = VDEV_STATE_HEALTHY; 760fa9e4066Sahrens 761fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) 762fa9e4066Sahrens if (vd->vdev_child[c]->vdev_state != VDEV_STATE_HEALTHY) 763fa9e4066Sahrens vd->vdev_state = VDEV_STATE_DEGRADED; 764fa9e4066Sahrens 765fa9e4066Sahrens osize = P2ALIGN(osize, (uint64_t)sizeof (vdev_label_t)); 766fa9e4066Sahrens 767fa9e4066Sahrens if (vd->vdev_children == 0) { 768fa9e4066Sahrens if (osize < SPA_MINDEVSIZE) { 769fa9e4066Sahrens vd->vdev_state = VDEV_STATE_CANT_OPEN; 770fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_TOO_SMALL; 771fa9e4066Sahrens return (EOVERFLOW); 772fa9e4066Sahrens } 773fa9e4066Sahrens psize = osize; 774fa9e4066Sahrens asize = osize - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE); 775fa9e4066Sahrens } else { 776fa9e4066Sahrens if (osize < SPA_MINDEVSIZE - 777fa9e4066Sahrens (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) { 778fa9e4066Sahrens vd->vdev_state = VDEV_STATE_CANT_OPEN; 779fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_TOO_SMALL; 780fa9e4066Sahrens return (EOVERFLOW); 781fa9e4066Sahrens } 782fa9e4066Sahrens psize = 0; 783fa9e4066Sahrens asize = osize; 784fa9e4066Sahrens } 785fa9e4066Sahrens 786fa9e4066Sahrens vd->vdev_psize = psize; 787fa9e4066Sahrens 788fa9e4066Sahrens if (vd->vdev_asize == 0) { 789fa9e4066Sahrens /* 790fa9e4066Sahrens * This is the first-ever open, so use the computed values. 791fa9e4066Sahrens */ 792fa9e4066Sahrens vd->vdev_asize = asize; 793fa9e4066Sahrens vd->vdev_ashift = ashift; 794fa9e4066Sahrens } else { 795fa9e4066Sahrens /* 796fa9e4066Sahrens * Make sure the alignment requirement hasn't increased. 797fa9e4066Sahrens */ 798fa9e4066Sahrens if (ashift > vd->vdev_ashift) { 799fa9e4066Sahrens dprintf("%s: ashift grew\n", vdev_description(vd)); 800fa9e4066Sahrens vd->vdev_state = VDEV_STATE_CANT_OPEN; 801fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 802fa9e4066Sahrens return (EINVAL); 803fa9e4066Sahrens } 804fa9e4066Sahrens 805fa9e4066Sahrens /* 806fa9e4066Sahrens * Make sure the device hasn't shrunk. 807fa9e4066Sahrens */ 808fa9e4066Sahrens if (asize < vd->vdev_asize) { 809fa9e4066Sahrens dprintf("%s: device shrank\n", vdev_description(vd)); 810fa9e4066Sahrens vd->vdev_state = VDEV_STATE_CANT_OPEN; 811fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 812fa9e4066Sahrens return (EINVAL); 813fa9e4066Sahrens } 814fa9e4066Sahrens 815fa9e4066Sahrens /* 816fa9e4066Sahrens * If all children are healthy and the asize has increased, 817fa9e4066Sahrens * then we've experienced dynamic LUN growth. 818fa9e4066Sahrens */ 819fa9e4066Sahrens if (vd->vdev_state == VDEV_STATE_HEALTHY && 820fa9e4066Sahrens asize > vd->vdev_asize) { 821fa9e4066Sahrens dprintf("%s: device grew\n", vdev_description(vd)); 822fa9e4066Sahrens vd->vdev_asize = asize; 823fa9e4066Sahrens } 824fa9e4066Sahrens } 825fa9e4066Sahrens 826fa9e4066Sahrens return (0); 827fa9e4066Sahrens } 828fa9e4066Sahrens 829fa9e4066Sahrens /* 830fa9e4066Sahrens * Close a virtual device. 831fa9e4066Sahrens */ 832fa9e4066Sahrens void 833fa9e4066Sahrens vdev_close(vdev_t *vd) 834fa9e4066Sahrens { 835fa9e4066Sahrens ASSERT3P(list_head(&vd->vdev_io_pending), ==, NULL); 836fa9e4066Sahrens 837fa9e4066Sahrens vd->vdev_ops->vdev_op_close(vd); 838fa9e4066Sahrens 839fa9e4066Sahrens if (vd->vdev_cache_active) { 840fa9e4066Sahrens vdev_cache_fini(vd); 841fa9e4066Sahrens vdev_queue_fini(vd); 842fa9e4066Sahrens vd->vdev_cache_active = B_FALSE; 843fa9e4066Sahrens } 844fa9e4066Sahrens 845fa9e4066Sahrens if (vd->vdev_offline) 846fa9e4066Sahrens vd->vdev_state = VDEV_STATE_OFFLINE; 847fa9e4066Sahrens else 848fa9e4066Sahrens vd->vdev_state = VDEV_STATE_CLOSED; 849fa9e4066Sahrens } 850fa9e4066Sahrens 851fa9e4066Sahrens void 852fa9e4066Sahrens vdev_reopen(vdev_t *vd, zio_t **rq) 853fa9e4066Sahrens { 854fa9e4066Sahrens vdev_t *rvd = vd->vdev_spa->spa_root_vdev; 855fa9e4066Sahrens int c; 856fa9e4066Sahrens 857fa9e4066Sahrens if (vd == rvd) { 858fa9e4066Sahrens ASSERT(rq == NULL); 859fa9e4066Sahrens for (c = 0; c < rvd->vdev_children; c++) 860fa9e4066Sahrens vdev_reopen(rvd->vdev_child[c], NULL); 861fa9e4066Sahrens return; 862fa9e4066Sahrens } 863fa9e4066Sahrens 864fa9e4066Sahrens /* only valid for top-level vdevs */ 865fa9e4066Sahrens ASSERT3P(vd, ==, vd->vdev_top); 866fa9e4066Sahrens 867fa9e4066Sahrens /* 868fa9e4066Sahrens * vdev_state can change when spa_config_lock is held as writer, 869fa9e4066Sahrens * or when it's held as reader and we're doing a vdev_reopen(). 870fa9e4066Sahrens * To handle the latter case, we grab rvd's io_lock to serialize 871fa9e4066Sahrens * reopens. This ensures that there's never more than one vdev 872fa9e4066Sahrens * state changer active at a time. 873fa9e4066Sahrens */ 874fa9e4066Sahrens mutex_enter(&rvd->vdev_io_lock); 875fa9e4066Sahrens 876fa9e4066Sahrens mutex_enter(&vd->vdev_io_lock); 877fa9e4066Sahrens while (list_head(&vd->vdev_io_pending) != NULL) 878fa9e4066Sahrens cv_wait(&vd->vdev_io_cv, &vd->vdev_io_lock); 879fa9e4066Sahrens vdev_close(vd); 880fa9e4066Sahrens (void) vdev_open(vd); 881fa9e4066Sahrens if (rq != NULL) { 882fa9e4066Sahrens *rq = vd->vdev_io_retry; 883fa9e4066Sahrens vd->vdev_io_retry = NULL; 884fa9e4066Sahrens } 885fa9e4066Sahrens mutex_exit(&vd->vdev_io_lock); 886fa9e4066Sahrens 887fa9e4066Sahrens /* 888fa9e4066Sahrens * Reassess root vdev's health. 889fa9e4066Sahrens */ 890fa9e4066Sahrens rvd->vdev_state = VDEV_STATE_HEALTHY; 891fa9e4066Sahrens for (c = 0; c < rvd->vdev_children; c++) { 892fa9e4066Sahrens uint64_t state = rvd->vdev_child[c]->vdev_state; 893fa9e4066Sahrens rvd->vdev_state = MIN(rvd->vdev_state, state); 894fa9e4066Sahrens } 895fa9e4066Sahrens 896fa9e4066Sahrens mutex_exit(&rvd->vdev_io_lock); 897fa9e4066Sahrens } 898fa9e4066Sahrens 899fa9e4066Sahrens int 900fa9e4066Sahrens vdev_create(vdev_t *vd, uint64_t txg) 901fa9e4066Sahrens { 902fa9e4066Sahrens int error; 903fa9e4066Sahrens 904fa9e4066Sahrens /* 905fa9e4066Sahrens * Normally, partial opens (e.g. of a mirror) are allowed. 906fa9e4066Sahrens * For a create, however, we want to fail the request if 907fa9e4066Sahrens * there are any components we can't open. 908fa9e4066Sahrens */ 909fa9e4066Sahrens error = vdev_open(vd); 910fa9e4066Sahrens 911fa9e4066Sahrens if (error || vd->vdev_state != VDEV_STATE_HEALTHY) { 912fa9e4066Sahrens vdev_close(vd); 913fa9e4066Sahrens return (error ? error : ENXIO); 914fa9e4066Sahrens } 915fa9e4066Sahrens 916fa9e4066Sahrens /* 917fa9e4066Sahrens * Recursively initialize all labels. 918fa9e4066Sahrens */ 919fa9e4066Sahrens if ((error = vdev_label_init(vd, txg)) != 0) { 920fa9e4066Sahrens vdev_close(vd); 921fa9e4066Sahrens return (error); 922fa9e4066Sahrens } 923fa9e4066Sahrens 924fa9e4066Sahrens return (0); 925fa9e4066Sahrens } 926fa9e4066Sahrens 927fa9e4066Sahrens /* 928fa9e4066Sahrens * The is the latter half of vdev_create(). It is distinct because it 929fa9e4066Sahrens * involves initiating transactions in order to do metaslab creation. 930fa9e4066Sahrens * For creation, we want to try to create all vdevs at once and then undo it 931fa9e4066Sahrens * if anything fails; this is much harder if we have pending transactions. 932fa9e4066Sahrens */ 933fa9e4066Sahrens void 934fa9e4066Sahrens vdev_init(vdev_t *vd, uint64_t txg) 935fa9e4066Sahrens { 936fa9e4066Sahrens /* 937fa9e4066Sahrens * Aim for roughly 200 metaslabs per vdev. 938fa9e4066Sahrens */ 939fa9e4066Sahrens vd->vdev_ms_shift = highbit(vd->vdev_asize / 200); 940fa9e4066Sahrens vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT); 941fa9e4066Sahrens 942fa9e4066Sahrens /* 943fa9e4066Sahrens * Initialize the vdev's metaslabs. 944fa9e4066Sahrens */ 945fa9e4066Sahrens vdev_metaslab_init(vd, txg); 946fa9e4066Sahrens } 947fa9e4066Sahrens 948fa9e4066Sahrens void 949fa9e4066Sahrens vdev_dirty(vdev_t *vd, uint8_t flags, uint64_t txg) 950fa9e4066Sahrens { 951fa9e4066Sahrens vdev_t *tvd = vd->vdev_top; 952fa9e4066Sahrens 953fa9e4066Sahrens mutex_enter(&tvd->vdev_dirty_lock); 954fa9e4066Sahrens if ((tvd->vdev_dirty[txg & TXG_MASK] & flags) != flags) { 955fa9e4066Sahrens tvd->vdev_dirty[txg & TXG_MASK] |= flags; 956fa9e4066Sahrens (void) txg_list_add(&tvd->vdev_spa->spa_vdev_txg_list, 957fa9e4066Sahrens tvd, txg); 958fa9e4066Sahrens } 959fa9e4066Sahrens mutex_exit(&tvd->vdev_dirty_lock); 960fa9e4066Sahrens } 961fa9e4066Sahrens 962fa9e4066Sahrens void 963fa9e4066Sahrens vdev_dtl_dirty(space_map_t *sm, uint64_t txg, uint64_t size) 964fa9e4066Sahrens { 965fa9e4066Sahrens mutex_enter(sm->sm_lock); 966fa9e4066Sahrens if (!space_map_contains(sm, txg, size)) 967fa9e4066Sahrens space_map_add(sm, txg, size); 968fa9e4066Sahrens mutex_exit(sm->sm_lock); 969fa9e4066Sahrens } 970fa9e4066Sahrens 971fa9e4066Sahrens int 972fa9e4066Sahrens vdev_dtl_contains(space_map_t *sm, uint64_t txg, uint64_t size) 973fa9e4066Sahrens { 974fa9e4066Sahrens int dirty; 975fa9e4066Sahrens 976fa9e4066Sahrens /* 977fa9e4066Sahrens * Quick test without the lock -- covers the common case that 978fa9e4066Sahrens * there are no dirty time segments. 979fa9e4066Sahrens */ 980fa9e4066Sahrens if (sm->sm_space == 0) 981fa9e4066Sahrens return (0); 982fa9e4066Sahrens 983fa9e4066Sahrens mutex_enter(sm->sm_lock); 984fa9e4066Sahrens dirty = space_map_contains(sm, txg, size); 985fa9e4066Sahrens mutex_exit(sm->sm_lock); 986fa9e4066Sahrens 987fa9e4066Sahrens return (dirty); 988fa9e4066Sahrens } 989fa9e4066Sahrens 990fa9e4066Sahrens /* 991fa9e4066Sahrens * Reassess DTLs after a config change or scrub completion. 992fa9e4066Sahrens */ 993fa9e4066Sahrens void 994fa9e4066Sahrens vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) 995fa9e4066Sahrens { 996fa9e4066Sahrens int c; 997fa9e4066Sahrens 998fa9e4066Sahrens ASSERT(spa_config_held(vd->vdev_spa, RW_WRITER)); 999fa9e4066Sahrens 1000fa9e4066Sahrens if (vd->vdev_children == 0) { 1001fa9e4066Sahrens mutex_enter(&vd->vdev_dtl_lock); 1002fa9e4066Sahrens /* 1003fa9e4066Sahrens * We're successfully scrubbed everything up to scrub_txg. 1004fa9e4066Sahrens * Therefore, excise all old DTLs up to that point, then 1005fa9e4066Sahrens * fold in the DTLs for everything we couldn't scrub. 1006fa9e4066Sahrens */ 1007fa9e4066Sahrens if (scrub_txg != 0) { 1008fa9e4066Sahrens space_map_excise(&vd->vdev_dtl_map, 0, scrub_txg); 1009fa9e4066Sahrens space_map_union(&vd->vdev_dtl_map, &vd->vdev_dtl_scrub); 1010fa9e4066Sahrens } 1011fa9e4066Sahrens if (scrub_done) 1012fa9e4066Sahrens space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL); 1013fa9e4066Sahrens mutex_exit(&vd->vdev_dtl_lock); 1014fa9e4066Sahrens if (txg != 0) { 1015fa9e4066Sahrens vdev_t *tvd = vd->vdev_top; 1016fa9e4066Sahrens vdev_dirty(tvd, VDD_DTL, txg); 1017fa9e4066Sahrens (void) txg_list_add(&tvd->vdev_dtl_list, vd, txg); 1018fa9e4066Sahrens } 1019fa9e4066Sahrens return; 1020fa9e4066Sahrens } 1021fa9e4066Sahrens 1022fa9e4066Sahrens mutex_enter(&vd->vdev_dtl_lock); 1023fa9e4066Sahrens space_map_vacate(&vd->vdev_dtl_map, NULL, NULL); 1024fa9e4066Sahrens space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL); 1025fa9e4066Sahrens mutex_exit(&vd->vdev_dtl_lock); 1026fa9e4066Sahrens 1027fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) { 1028fa9e4066Sahrens vdev_t *cvd = vd->vdev_child[c]; 1029fa9e4066Sahrens vdev_dtl_reassess(cvd, txg, scrub_txg, scrub_done); 1030fa9e4066Sahrens mutex_enter(&vd->vdev_dtl_lock); 1031fa9e4066Sahrens space_map_union(&vd->vdev_dtl_map, &cvd->vdev_dtl_map); 1032fa9e4066Sahrens space_map_union(&vd->vdev_dtl_scrub, &cvd->vdev_dtl_scrub); 1033fa9e4066Sahrens mutex_exit(&vd->vdev_dtl_lock); 1034fa9e4066Sahrens } 1035fa9e4066Sahrens } 1036fa9e4066Sahrens 1037fa9e4066Sahrens static int 1038fa9e4066Sahrens vdev_dtl_load(vdev_t *vd) 1039fa9e4066Sahrens { 1040fa9e4066Sahrens spa_t *spa = vd->vdev_spa; 1041fa9e4066Sahrens space_map_obj_t *smo = &vd->vdev_dtl; 1042fa9e4066Sahrens dmu_buf_t *db; 1043fa9e4066Sahrens int error; 1044fa9e4066Sahrens 1045fa9e4066Sahrens ASSERT(vd->vdev_children == 0); 1046fa9e4066Sahrens 1047fa9e4066Sahrens if (smo->smo_object == 0) 1048fa9e4066Sahrens return (0); 1049fa9e4066Sahrens 1050fa9e4066Sahrens db = dmu_bonus_hold(spa->spa_meta_objset, smo->smo_object); 1051fa9e4066Sahrens dmu_buf_read(db); 1052fa9e4066Sahrens ASSERT3U(db->db_size, ==, sizeof (*smo)); 1053fa9e4066Sahrens bcopy(db->db_data, smo, db->db_size); 1054fa9e4066Sahrens dmu_buf_rele(db); 1055fa9e4066Sahrens 1056fa9e4066Sahrens mutex_enter(&vd->vdev_dtl_lock); 1057fa9e4066Sahrens error = space_map_load(&vd->vdev_dtl_map, smo, SM_ALLOC, 1058fa9e4066Sahrens spa->spa_meta_objset, smo->smo_objsize, smo->smo_alloc); 1059fa9e4066Sahrens mutex_exit(&vd->vdev_dtl_lock); 1060fa9e4066Sahrens 1061fa9e4066Sahrens return (error); 1062fa9e4066Sahrens } 1063fa9e4066Sahrens 1064fa9e4066Sahrens void 1065fa9e4066Sahrens vdev_dtl_sync(vdev_t *vd, uint64_t txg) 1066fa9e4066Sahrens { 1067fa9e4066Sahrens spa_t *spa = vd->vdev_spa; 1068fa9e4066Sahrens space_map_obj_t *smo = &vd->vdev_dtl; 1069fa9e4066Sahrens space_map_t *sm = &vd->vdev_dtl_map; 1070fa9e4066Sahrens space_map_t smsync; 1071fa9e4066Sahrens kmutex_t smlock; 1072fa9e4066Sahrens avl_tree_t *t = &sm->sm_root; 1073fa9e4066Sahrens space_seg_t *ss; 1074fa9e4066Sahrens dmu_buf_t *db; 1075fa9e4066Sahrens dmu_tx_t *tx; 1076fa9e4066Sahrens 1077fa9e4066Sahrens dprintf("%s in txg %llu pass %d\n", 1078fa9e4066Sahrens vdev_description(vd), (u_longlong_t)txg, spa_sync_pass(spa)); 1079fa9e4066Sahrens 1080fa9e4066Sahrens tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 1081fa9e4066Sahrens 1082fa9e4066Sahrens if (vd->vdev_detached) { 1083fa9e4066Sahrens if (smo->smo_object != 0) { 1084fa9e4066Sahrens int err = dmu_object_free(spa->spa_meta_objset, 1085fa9e4066Sahrens smo->smo_object, tx); 1086fa9e4066Sahrens ASSERT3U(err, ==, 0); 1087fa9e4066Sahrens smo->smo_object = 0; 1088fa9e4066Sahrens } 1089fa9e4066Sahrens dmu_tx_commit(tx); 1090fa9e4066Sahrens return; 1091fa9e4066Sahrens } 1092fa9e4066Sahrens 1093fa9e4066Sahrens if (smo->smo_object == 0) { 1094fa9e4066Sahrens ASSERT(smo->smo_objsize == 0); 1095fa9e4066Sahrens ASSERT(smo->smo_alloc == 0); 1096fa9e4066Sahrens smo->smo_object = dmu_object_alloc(spa->spa_meta_objset, 1097fa9e4066Sahrens DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT, 1098fa9e4066Sahrens DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx); 1099fa9e4066Sahrens ASSERT(smo->smo_object != 0); 1100fa9e4066Sahrens vdev_config_dirty(vd->vdev_top); 1101fa9e4066Sahrens } 1102fa9e4066Sahrens 1103fa9e4066Sahrens dmu_free_range(spa->spa_meta_objset, smo->smo_object, 1104fa9e4066Sahrens 0, smo->smo_objsize, tx); 1105fa9e4066Sahrens 1106fa9e4066Sahrens mutex_init(&smlock, NULL, MUTEX_DEFAULT, NULL); 1107fa9e4066Sahrens 1108fa9e4066Sahrens space_map_create(&smsync, sm->sm_start, sm->sm_size, sm->sm_shift, 1109fa9e4066Sahrens &smlock); 1110fa9e4066Sahrens 1111fa9e4066Sahrens mutex_enter(&smlock); 1112fa9e4066Sahrens 1113fa9e4066Sahrens mutex_enter(&vd->vdev_dtl_lock); 1114fa9e4066Sahrens for (ss = avl_first(t); ss != NULL; ss = AVL_NEXT(t, ss)) 1115fa9e4066Sahrens space_map_add(&smsync, ss->ss_start, ss->ss_end - ss->ss_start); 1116fa9e4066Sahrens mutex_exit(&vd->vdev_dtl_lock); 1117fa9e4066Sahrens 1118fa9e4066Sahrens smo->smo_objsize = 0; 1119fa9e4066Sahrens smo->smo_alloc = smsync.sm_space; 1120fa9e4066Sahrens 1121fa9e4066Sahrens space_map_sync(&smsync, NULL, smo, SM_ALLOC, spa->spa_meta_objset, tx); 1122fa9e4066Sahrens space_map_destroy(&smsync); 1123fa9e4066Sahrens 1124fa9e4066Sahrens mutex_exit(&smlock); 1125fa9e4066Sahrens mutex_destroy(&smlock); 1126fa9e4066Sahrens 1127fa9e4066Sahrens db = dmu_bonus_hold(spa->spa_meta_objset, smo->smo_object); 1128fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 1129fa9e4066Sahrens ASSERT3U(db->db_size, ==, sizeof (*smo)); 1130fa9e4066Sahrens bcopy(smo, db->db_data, db->db_size); 1131fa9e4066Sahrens dmu_buf_rele(db); 1132fa9e4066Sahrens 1133fa9e4066Sahrens dmu_tx_commit(tx); 1134fa9e4066Sahrens } 1135fa9e4066Sahrens 1136fa9e4066Sahrens int 1137fa9e4066Sahrens vdev_load(vdev_t *vd, int import) 1138fa9e4066Sahrens { 1139fa9e4066Sahrens spa_t *spa = vd->vdev_spa; 1140fa9e4066Sahrens int c, error; 1141fa9e4066Sahrens nvlist_t *label; 1142fa9e4066Sahrens uint64_t guid, state; 1143fa9e4066Sahrens 1144fa9e4066Sahrens dprintf("loading %s\n", vdev_description(vd)); 1145fa9e4066Sahrens 1146fa9e4066Sahrens /* 1147fa9e4066Sahrens * Recursively load all children. 1148fa9e4066Sahrens */ 1149fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) 1150fa9e4066Sahrens if ((error = vdev_load(vd->vdev_child[c], import)) != 0) 1151fa9e4066Sahrens return (error); 1152fa9e4066Sahrens 1153fa9e4066Sahrens /* 1154fa9e4066Sahrens * If this is a leaf vdev, make sure its agrees with its disk labels. 1155fa9e4066Sahrens */ 1156fa9e4066Sahrens if (vd->vdev_ops->vdev_op_leaf) { 1157fa9e4066Sahrens 1158fa9e4066Sahrens if (vdev_is_dead(vd)) 1159fa9e4066Sahrens return (0); 1160fa9e4066Sahrens 1161fa9e4066Sahrens /* 1162fa9e4066Sahrens * XXX state transitions don't propagate to parent here. 1163fa9e4066Sahrens * Also, merely setting the state isn't sufficient because 1164fa9e4066Sahrens * it's not persistent; a vdev_reopen() would make us 1165fa9e4066Sahrens * forget all about it. 1166fa9e4066Sahrens */ 1167fa9e4066Sahrens if ((label = vdev_label_read_config(vd)) == NULL) { 1168fa9e4066Sahrens dprintf("can't load label config\n"); 1169fa9e4066Sahrens vdev_set_state(vd, VDEV_STATE_CANT_OPEN, 1170fa9e4066Sahrens VDEV_AUX_CORRUPT_DATA); 1171fa9e4066Sahrens return (0); 1172fa9e4066Sahrens } 1173fa9e4066Sahrens 1174fa9e4066Sahrens if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, 1175fa9e4066Sahrens &guid) != 0 || guid != spa_guid(spa)) { 1176fa9e4066Sahrens dprintf("bad or missing pool GUID (%llu)\n", guid); 1177fa9e4066Sahrens vdev_set_state(vd, VDEV_STATE_CANT_OPEN, 1178fa9e4066Sahrens VDEV_AUX_CORRUPT_DATA); 1179fa9e4066Sahrens nvlist_free(label); 1180fa9e4066Sahrens return (0); 1181fa9e4066Sahrens } 1182fa9e4066Sahrens 1183fa9e4066Sahrens if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) || 1184fa9e4066Sahrens guid != vd->vdev_guid) { 1185fa9e4066Sahrens dprintf("bad or missing vdev guid (%llu != %llu)\n", 1186fa9e4066Sahrens guid, vd->vdev_guid); 1187fa9e4066Sahrens vdev_set_state(vd, VDEV_STATE_CANT_OPEN, 1188fa9e4066Sahrens VDEV_AUX_CORRUPT_DATA); 1189fa9e4066Sahrens nvlist_free(label); 1190fa9e4066Sahrens return (0); 1191fa9e4066Sahrens } 1192fa9e4066Sahrens 1193fa9e4066Sahrens /* 1194fa9e4066Sahrens * If we find a vdev with a matching pool guid and vdev guid, 1195fa9e4066Sahrens * but the pool state is not active, it indicates that the user 1196fa9e4066Sahrens * exported or destroyed the pool without affecting the config 1197fa9e4066Sahrens * cache (if / was mounted readonly, for example). In this 1198fa9e4066Sahrens * case, immediately return EBADF so the caller can remove it 1199fa9e4066Sahrens * from the config. 1200fa9e4066Sahrens */ 1201fa9e4066Sahrens if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, 1202fa9e4066Sahrens &state)) { 1203fa9e4066Sahrens dprintf("missing pool state\n"); 1204fa9e4066Sahrens vdev_set_state(vd, VDEV_STATE_CANT_OPEN, 1205fa9e4066Sahrens VDEV_AUX_CORRUPT_DATA); 1206fa9e4066Sahrens nvlist_free(label); 1207fa9e4066Sahrens return (0); 1208fa9e4066Sahrens } 1209fa9e4066Sahrens 1210fa9e4066Sahrens if (state != POOL_STATE_ACTIVE && 1211fa9e4066Sahrens (!import || state != POOL_STATE_EXPORTED)) { 1212fa9e4066Sahrens dprintf("pool state not active (%llu)\n", state); 1213fa9e4066Sahrens nvlist_free(label); 1214fa9e4066Sahrens return (EBADF); 1215fa9e4066Sahrens } 1216fa9e4066Sahrens 1217fa9e4066Sahrens nvlist_free(label); 1218fa9e4066Sahrens } 1219fa9e4066Sahrens 1220fa9e4066Sahrens /* 1221fa9e4066Sahrens * If this is a top-level vdev, make sure its allocation parameters 1222fa9e4066Sahrens * exist and initialize its metaslabs. 1223fa9e4066Sahrens */ 1224fa9e4066Sahrens if (vd == vd->vdev_top) { 1225fa9e4066Sahrens 1226fa9e4066Sahrens if (vd->vdev_ms_array == 0 || 1227fa9e4066Sahrens vd->vdev_ms_shift == 0 || 1228fa9e4066Sahrens vd->vdev_ashift == 0 || 1229fa9e4066Sahrens vd->vdev_asize == 0) { 1230fa9e4066Sahrens vdev_set_state(vd, VDEV_STATE_CANT_OPEN, 1231fa9e4066Sahrens VDEV_AUX_CORRUPT_DATA); 1232fa9e4066Sahrens return (0); 1233fa9e4066Sahrens } 1234fa9e4066Sahrens 1235fa9e4066Sahrens vdev_metaslab_init(vd, 0); 1236fa9e4066Sahrens } 1237fa9e4066Sahrens 1238fa9e4066Sahrens /* 1239fa9e4066Sahrens * If this is a leaf vdev, load its DTL. 1240fa9e4066Sahrens */ 1241fa9e4066Sahrens if (vd->vdev_ops->vdev_op_leaf) { 1242fa9e4066Sahrens error = vdev_dtl_load(vd); 1243fa9e4066Sahrens if (error) { 1244fa9e4066Sahrens dprintf("can't load DTL for %s, error %d\n", 1245fa9e4066Sahrens vdev_description(vd), error); 1246fa9e4066Sahrens vdev_set_state(vd, VDEV_STATE_CANT_OPEN, 1247fa9e4066Sahrens VDEV_AUX_CORRUPT_DATA); 1248fa9e4066Sahrens return (0); 1249fa9e4066Sahrens } 1250fa9e4066Sahrens } 1251fa9e4066Sahrens 1252fa9e4066Sahrens return (0); 1253fa9e4066Sahrens } 1254fa9e4066Sahrens 1255fa9e4066Sahrens void 1256fa9e4066Sahrens vdev_sync_done(vdev_t *vd, uint64_t txg) 1257fa9e4066Sahrens { 1258fa9e4066Sahrens metaslab_t *msp; 1259fa9e4066Sahrens 1260fa9e4066Sahrens dprintf("%s txg %llu\n", vdev_description(vd), txg); 1261fa9e4066Sahrens 1262fa9e4066Sahrens while (msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg))) 1263fa9e4066Sahrens metaslab_sync_done(msp, txg); 1264fa9e4066Sahrens } 1265fa9e4066Sahrens 1266fa9e4066Sahrens void 1267fa9e4066Sahrens vdev_add_sync(vdev_t *vd, uint64_t txg) 1268fa9e4066Sahrens { 1269fa9e4066Sahrens spa_t *spa = vd->vdev_spa; 1270fa9e4066Sahrens dmu_tx_t *tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 1271fa9e4066Sahrens 1272fa9e4066Sahrens ASSERT(vd == vd->vdev_top); 1273fa9e4066Sahrens 1274fa9e4066Sahrens if (vd->vdev_ms_array == 0) 1275fa9e4066Sahrens vd->vdev_ms_array = dmu_object_alloc(spa->spa_meta_objset, 1276fa9e4066Sahrens DMU_OT_OBJECT_ARRAY, 0, DMU_OT_NONE, 0, tx); 1277fa9e4066Sahrens 1278fa9e4066Sahrens ASSERT(vd->vdev_ms_array != 0); 1279fa9e4066Sahrens 1280fa9e4066Sahrens vdev_config_dirty(vd); 1281fa9e4066Sahrens 1282fa9e4066Sahrens dmu_tx_commit(tx); 1283fa9e4066Sahrens } 1284fa9e4066Sahrens 1285fa9e4066Sahrens void 1286fa9e4066Sahrens vdev_sync(vdev_t *vd, uint64_t txg) 1287fa9e4066Sahrens { 1288fa9e4066Sahrens spa_t *spa = vd->vdev_spa; 1289fa9e4066Sahrens vdev_t *lvd; 1290fa9e4066Sahrens metaslab_t *msp; 1291fa9e4066Sahrens uint8_t *dirtyp = &vd->vdev_dirty[txg & TXG_MASK]; 1292fa9e4066Sahrens uint8_t dirty = *dirtyp; 1293fa9e4066Sahrens 1294fa9e4066Sahrens mutex_enter(&vd->vdev_dirty_lock); 1295fa9e4066Sahrens *dirtyp &= ~(VDD_ALLOC | VDD_FREE | VDD_ADD | VDD_DTL); 1296fa9e4066Sahrens mutex_exit(&vd->vdev_dirty_lock); 1297fa9e4066Sahrens 1298fa9e4066Sahrens dprintf("%s txg %llu pass %d\n", 1299fa9e4066Sahrens vdev_description(vd), (u_longlong_t)txg, spa_sync_pass(spa)); 1300fa9e4066Sahrens 1301fa9e4066Sahrens if (dirty & VDD_ADD) 1302fa9e4066Sahrens vdev_add_sync(vd, txg); 1303fa9e4066Sahrens 1304fa9e4066Sahrens while ((msp = txg_list_remove(&vd->vdev_ms_list, txg)) != NULL) 1305fa9e4066Sahrens metaslab_sync(msp, txg); 1306fa9e4066Sahrens 1307fa9e4066Sahrens while ((lvd = txg_list_remove(&vd->vdev_dtl_list, txg)) != NULL) 1308fa9e4066Sahrens vdev_dtl_sync(lvd, txg); 1309fa9e4066Sahrens 1310fa9e4066Sahrens (void) txg_list_add(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg)); 1311fa9e4066Sahrens } 1312fa9e4066Sahrens 1313fa9e4066Sahrens uint64_t 1314fa9e4066Sahrens vdev_psize_to_asize(vdev_t *vd, uint64_t psize) 1315fa9e4066Sahrens { 1316fa9e4066Sahrens return (vd->vdev_ops->vdev_op_asize(vd, psize)); 1317fa9e4066Sahrens } 1318fa9e4066Sahrens 1319fa9e4066Sahrens void 1320fa9e4066Sahrens vdev_io_start(zio_t *zio) 1321fa9e4066Sahrens { 1322fa9e4066Sahrens zio->io_vd->vdev_ops->vdev_op_io_start(zio); 1323fa9e4066Sahrens } 1324fa9e4066Sahrens 1325fa9e4066Sahrens void 1326fa9e4066Sahrens vdev_io_done(zio_t *zio) 1327fa9e4066Sahrens { 1328fa9e4066Sahrens zio->io_vd->vdev_ops->vdev_op_io_done(zio); 1329fa9e4066Sahrens } 1330fa9e4066Sahrens 1331fa9e4066Sahrens const char * 1332fa9e4066Sahrens vdev_description(vdev_t *vd) 1333fa9e4066Sahrens { 1334fa9e4066Sahrens if (vd == NULL || vd->vdev_ops == NULL) 1335fa9e4066Sahrens return ("<unknown>"); 1336fa9e4066Sahrens 1337fa9e4066Sahrens if (vd->vdev_path != NULL) 1338fa9e4066Sahrens return (vd->vdev_path); 1339fa9e4066Sahrens 1340fa9e4066Sahrens if (vd->vdev_parent == NULL) 1341fa9e4066Sahrens return (spa_name(vd->vdev_spa)); 1342fa9e4066Sahrens 1343fa9e4066Sahrens return (vd->vdev_ops->vdev_op_type); 1344fa9e4066Sahrens } 1345fa9e4066Sahrens 1346fa9e4066Sahrens int 1347fa9e4066Sahrens vdev_online(spa_t *spa, const char *path) 1348fa9e4066Sahrens { 1349*441d80aaSlling vdev_t *rvd, *vd; 1350*441d80aaSlling uint64_t txg; 1351fa9e4066Sahrens 1352*441d80aaSlling txg = spa_vdev_enter(spa); 1353fa9e4066Sahrens 1354*441d80aaSlling rvd = spa->spa_root_vdev; 1355*441d80aaSlling if ((vd = vdev_lookup_by_path(rvd, path)) == NULL) 1356*441d80aaSlling return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 1357fa9e4066Sahrens 1358fa9e4066Sahrens dprintf("ONLINE: %s\n", vdev_description(vd)); 1359fa9e4066Sahrens 1360fa9e4066Sahrens vd->vdev_offline = B_FALSE; 1361*441d80aaSlling vd->vdev_tmpoffline = B_FALSE; 1362fa9e4066Sahrens 1363fa9e4066Sahrens /* 1364fa9e4066Sahrens * Clear the error counts. The idea is that you expect to see all 1365fa9e4066Sahrens * zeroes when everything is working, so if you've just onlined a 1366fa9e4066Sahrens * device, you don't want to keep hearing about errors from before. 1367fa9e4066Sahrens */ 1368fa9e4066Sahrens vd->vdev_stat.vs_read_errors = 0; 1369fa9e4066Sahrens vd->vdev_stat.vs_write_errors = 0; 1370fa9e4066Sahrens vd->vdev_stat.vs_checksum_errors = 0; 1371fa9e4066Sahrens 1372fa9e4066Sahrens vdev_reopen(vd->vdev_top, NULL); 1373fa9e4066Sahrens 1374*441d80aaSlling spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0)); 1375*441d80aaSlling 1376*441d80aaSlling vdev_config_dirty(vd->vdev_top); 1377*441d80aaSlling 1378*441d80aaSlling (void) spa_vdev_exit(spa, NULL, txg, 0); 1379fa9e4066Sahrens 1380fa9e4066Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 1381fa9e4066Sahrens 1382fa9e4066Sahrens return (0); 1383fa9e4066Sahrens } 1384fa9e4066Sahrens 1385fa9e4066Sahrens int 1386*441d80aaSlling vdev_offline(spa_t *spa, const char *path, int istmp) 1387fa9e4066Sahrens { 1388*441d80aaSlling vdev_t *rvd, *vd; 1389*441d80aaSlling uint64_t txg; 1390fa9e4066Sahrens 1391*441d80aaSlling txg = spa_vdev_enter(spa); 1392fa9e4066Sahrens 1393*441d80aaSlling rvd = spa->spa_root_vdev; 1394*441d80aaSlling if ((vd = vdev_lookup_by_path(rvd, path)) == NULL) 1395*441d80aaSlling return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 1396fa9e4066Sahrens 1397fa9e4066Sahrens dprintf("OFFLINE: %s\n", vdev_description(vd)); 1398fa9e4066Sahrens 1399*441d80aaSlling /* vdev is already offlined, do nothing */ 1400*441d80aaSlling if (vd->vdev_offline) 1401*441d80aaSlling return (spa_vdev_exit(spa, NULL, txg, 0)); 1402*441d80aaSlling 1403fa9e4066Sahrens /* 1404fa9e4066Sahrens * If this device's top-level vdev has a non-empty DTL, 1405fa9e4066Sahrens * don't allow the device to be offlined. 1406fa9e4066Sahrens * 1407fa9e4066Sahrens * XXX -- we should make this more precise by allowing the offline 1408fa9e4066Sahrens * as long as the remaining devices don't have any DTL holes. 1409fa9e4066Sahrens */ 1410*441d80aaSlling if (vd->vdev_top->vdev_dtl_map.sm_space != 0) 1411*441d80aaSlling return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 1412fa9e4066Sahrens 1413fa9e4066Sahrens /* 1414fa9e4066Sahrens * Set this device to offline state and reopen its top-level vdev. 1415fa9e4066Sahrens * If this action results in the top-level vdev becoming unusable, 1416fa9e4066Sahrens * undo it and fail the request. 1417fa9e4066Sahrens */ 1418fa9e4066Sahrens vd->vdev_offline = B_TRUE; 1419fa9e4066Sahrens vdev_reopen(vd->vdev_top, NULL); 1420fa9e4066Sahrens if (vdev_is_dead(vd->vdev_top)) { 1421fa9e4066Sahrens vd->vdev_offline = B_FALSE; 1422fa9e4066Sahrens vdev_reopen(vd->vdev_top, NULL); 1423*441d80aaSlling return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 1424fa9e4066Sahrens } 1425fa9e4066Sahrens 1426*441d80aaSlling vd->vdev_tmpoffline = istmp; 1427*441d80aaSlling if (istmp) 1428*441d80aaSlling return (spa_vdev_exit(spa, NULL, txg, 0)); 1429fa9e4066Sahrens 1430*441d80aaSlling spa_config_set(spa, spa_config_generate(spa, rvd, txg, 0)); 1431*441d80aaSlling 1432*441d80aaSlling vdev_config_dirty(vd->vdev_top); 1433*441d80aaSlling 1434*441d80aaSlling return (spa_vdev_exit(spa, NULL, txg, 0)); 1435fa9e4066Sahrens } 1436fa9e4066Sahrens 1437fa9e4066Sahrens int 1438fa9e4066Sahrens vdev_error_setup(spa_t *spa, const char *path, int mode, int mask, uint64_t arg) 1439fa9e4066Sahrens { 1440fa9e4066Sahrens vdev_t *vd; 1441fa9e4066Sahrens 1442fa9e4066Sahrens spa_config_enter(spa, RW_WRITER); 1443fa9e4066Sahrens 1444fa9e4066Sahrens if ((vd = vdev_lookup_by_path(spa->spa_root_vdev, path)) == NULL) { 1445fa9e4066Sahrens spa_config_exit(spa); 1446fa9e4066Sahrens return (ENODEV); 1447fa9e4066Sahrens } 1448fa9e4066Sahrens 1449fa9e4066Sahrens vd->vdev_fault_mode = mode; 1450fa9e4066Sahrens vd->vdev_fault_mask = mask; 1451fa9e4066Sahrens vd->vdev_fault_arg = arg; 1452fa9e4066Sahrens 1453fa9e4066Sahrens spa_config_exit(spa); 1454fa9e4066Sahrens 1455fa9e4066Sahrens return (0); 1456fa9e4066Sahrens } 1457fa9e4066Sahrens 1458fa9e4066Sahrens int 1459fa9e4066Sahrens vdev_is_dead(vdev_t *vd) 1460fa9e4066Sahrens { 1461fa9e4066Sahrens return (vd->vdev_state <= VDEV_STATE_CANT_OPEN); 1462fa9e4066Sahrens } 1463fa9e4066Sahrens 1464fa9e4066Sahrens int 1465fa9e4066Sahrens vdev_error_inject(vdev_t *vd, zio_t *zio) 1466fa9e4066Sahrens { 1467fa9e4066Sahrens int error = 0; 1468fa9e4066Sahrens 1469fa9e4066Sahrens if (vd->vdev_fault_mode == VDEV_FAULT_NONE) 1470fa9e4066Sahrens return (0); 1471fa9e4066Sahrens 1472fa9e4066Sahrens if (((1ULL << zio->io_type) & vd->vdev_fault_mask) == 0) 1473fa9e4066Sahrens return (0); 1474fa9e4066Sahrens 1475fa9e4066Sahrens switch (vd->vdev_fault_mode) { 1476fa9e4066Sahrens case VDEV_FAULT_RANDOM: 1477fa9e4066Sahrens if (spa_get_random(vd->vdev_fault_arg) == 0) 1478fa9e4066Sahrens error = EIO; 1479fa9e4066Sahrens break; 1480fa9e4066Sahrens 1481fa9e4066Sahrens case VDEV_FAULT_COUNT: 1482fa9e4066Sahrens if ((int64_t)--vd->vdev_fault_arg <= 0) 1483fa9e4066Sahrens vd->vdev_fault_mode = VDEV_FAULT_NONE; 1484fa9e4066Sahrens error = EIO; 1485fa9e4066Sahrens break; 1486fa9e4066Sahrens } 1487fa9e4066Sahrens 1488fa9e4066Sahrens if (error != 0) { 1489fa9e4066Sahrens dprintf("returning %d for type %d on %s state %d offset %llx\n", 1490fa9e4066Sahrens error, zio->io_type, vdev_description(vd), 1491fa9e4066Sahrens vd->vdev_state, zio->io_offset); 1492fa9e4066Sahrens } 1493fa9e4066Sahrens 1494fa9e4066Sahrens return (error); 1495fa9e4066Sahrens } 1496fa9e4066Sahrens 1497fa9e4066Sahrens /* 1498fa9e4066Sahrens * Get statistics for the given vdev. 1499fa9e4066Sahrens */ 1500fa9e4066Sahrens void 1501fa9e4066Sahrens vdev_get_stats(vdev_t *vd, vdev_stat_t *vs) 1502fa9e4066Sahrens { 1503fa9e4066Sahrens vdev_t *rvd = vd->vdev_spa->spa_root_vdev; 1504fa9e4066Sahrens int c, t; 1505fa9e4066Sahrens 1506fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1507fa9e4066Sahrens bcopy(&vd->vdev_stat, vs, sizeof (*vs)); 1508fa9e4066Sahrens vs->vs_timestamp = gethrtime() - vs->vs_timestamp; 1509fa9e4066Sahrens vs->vs_state = vd->vdev_state; 15102a79c5feSlling vs->vs_rsize = vdev_get_rsize(vd); 1511fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1512fa9e4066Sahrens 1513fa9e4066Sahrens /* 1514fa9e4066Sahrens * If we're getting stats on the root vdev, aggregate the I/O counts 1515fa9e4066Sahrens * over all top-level vdevs (i.e. the direct children of the root). 1516fa9e4066Sahrens */ 1517fa9e4066Sahrens if (vd == rvd) { 1518fa9e4066Sahrens for (c = 0; c < rvd->vdev_children; c++) { 1519fa9e4066Sahrens vdev_t *cvd = rvd->vdev_child[c]; 1520fa9e4066Sahrens vdev_stat_t *cvs = &cvd->vdev_stat; 1521fa9e4066Sahrens 1522fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1523fa9e4066Sahrens for (t = 0; t < ZIO_TYPES; t++) { 1524fa9e4066Sahrens vs->vs_ops[t] += cvs->vs_ops[t]; 1525fa9e4066Sahrens vs->vs_bytes[t] += cvs->vs_bytes[t]; 1526fa9e4066Sahrens } 1527fa9e4066Sahrens vs->vs_read_errors += cvs->vs_read_errors; 1528fa9e4066Sahrens vs->vs_write_errors += cvs->vs_write_errors; 1529fa9e4066Sahrens vs->vs_checksum_errors += cvs->vs_checksum_errors; 1530fa9e4066Sahrens vs->vs_scrub_examined += cvs->vs_scrub_examined; 1531fa9e4066Sahrens vs->vs_scrub_errors += cvs->vs_scrub_errors; 1532fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1533fa9e4066Sahrens } 1534fa9e4066Sahrens } 1535fa9e4066Sahrens } 1536fa9e4066Sahrens 1537fa9e4066Sahrens void 1538fa9e4066Sahrens vdev_stat_update(zio_t *zio) 1539fa9e4066Sahrens { 1540fa9e4066Sahrens vdev_t *vd = zio->io_vd; 1541fa9e4066Sahrens vdev_t *pvd; 1542fa9e4066Sahrens uint64_t txg = zio->io_txg; 1543fa9e4066Sahrens vdev_stat_t *vs = &vd->vdev_stat; 1544fa9e4066Sahrens zio_type_t type = zio->io_type; 1545fa9e4066Sahrens int flags = zio->io_flags; 1546fa9e4066Sahrens 1547fa9e4066Sahrens if (zio->io_error == 0) { 1548fa9e4066Sahrens if (!(flags & ZIO_FLAG_IO_BYPASS)) { 1549fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1550fa9e4066Sahrens vs->vs_ops[type]++; 1551fa9e4066Sahrens vs->vs_bytes[type] += zio->io_size; 1552fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1553fa9e4066Sahrens } 1554fa9e4066Sahrens if ((flags & ZIO_FLAG_IO_REPAIR) && 1555fa9e4066Sahrens zio->io_delegate_list == NULL) { 1556fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1557fa9e4066Sahrens if (flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)) 1558fa9e4066Sahrens vs->vs_scrub_repaired += zio->io_size; 1559fa9e4066Sahrens else 1560fa9e4066Sahrens vs->vs_self_healed += zio->io_size; 1561fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1562fa9e4066Sahrens } 1563fa9e4066Sahrens return; 1564fa9e4066Sahrens } 1565fa9e4066Sahrens 1566fa9e4066Sahrens if (flags & ZIO_FLAG_SPECULATIVE) 1567fa9e4066Sahrens return; 1568fa9e4066Sahrens 1569fa9e4066Sahrens if (!vdev_is_dead(vd)) { 1570fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1571fa9e4066Sahrens if (type == ZIO_TYPE_READ) { 1572fa9e4066Sahrens if (zio->io_error == ECKSUM) 1573fa9e4066Sahrens vs->vs_checksum_errors++; 1574fa9e4066Sahrens else 1575fa9e4066Sahrens vs->vs_read_errors++; 1576fa9e4066Sahrens } 1577fa9e4066Sahrens if (type == ZIO_TYPE_WRITE) 1578fa9e4066Sahrens vs->vs_write_errors++; 1579fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1580fa9e4066Sahrens } 1581fa9e4066Sahrens 1582fa9e4066Sahrens if (type == ZIO_TYPE_WRITE) { 1583fa9e4066Sahrens if (txg == 0 || vd->vdev_children != 0) 1584fa9e4066Sahrens return; 1585fa9e4066Sahrens if (flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)) { 1586fa9e4066Sahrens ASSERT(flags & ZIO_FLAG_IO_REPAIR); 1587fa9e4066Sahrens for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) 1588fa9e4066Sahrens vdev_dtl_dirty(&pvd->vdev_dtl_scrub, txg, 1); 1589fa9e4066Sahrens } 1590fa9e4066Sahrens if (!(flags & ZIO_FLAG_IO_REPAIR)) { 1591fa9e4066Sahrens vdev_t *tvd = vd->vdev_top; 1592fa9e4066Sahrens if (vdev_dtl_contains(&vd->vdev_dtl_map, txg, 1)) 1593fa9e4066Sahrens return; 1594fa9e4066Sahrens vdev_dirty(tvd, VDD_DTL, txg); 1595fa9e4066Sahrens (void) txg_list_add(&tvd->vdev_dtl_list, vd, txg); 1596fa9e4066Sahrens for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) 1597fa9e4066Sahrens vdev_dtl_dirty(&pvd->vdev_dtl_map, txg, 1); 1598fa9e4066Sahrens } 1599fa9e4066Sahrens } 1600fa9e4066Sahrens } 1601fa9e4066Sahrens 1602fa9e4066Sahrens void 1603fa9e4066Sahrens vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type, boolean_t complete) 1604fa9e4066Sahrens { 1605fa9e4066Sahrens int c; 1606fa9e4066Sahrens vdev_stat_t *vs = &vd->vdev_stat; 1607fa9e4066Sahrens 1608fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) 1609fa9e4066Sahrens vdev_scrub_stat_update(vd->vdev_child[c], type, complete); 1610fa9e4066Sahrens 1611fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1612fa9e4066Sahrens 1613fa9e4066Sahrens if (type == POOL_SCRUB_NONE) { 1614fa9e4066Sahrens /* 1615fa9e4066Sahrens * Update completion and end time. Leave everything else alone 1616fa9e4066Sahrens * so we can report what happened during the previous scrub. 1617fa9e4066Sahrens */ 1618fa9e4066Sahrens vs->vs_scrub_complete = complete; 1619fa9e4066Sahrens vs->vs_scrub_end = gethrestime_sec(); 1620fa9e4066Sahrens } else { 1621fa9e4066Sahrens vs->vs_scrub_type = type; 1622fa9e4066Sahrens vs->vs_scrub_complete = 0; 1623fa9e4066Sahrens vs->vs_scrub_examined = 0; 1624fa9e4066Sahrens vs->vs_scrub_repaired = 0; 1625fa9e4066Sahrens vs->vs_scrub_errors = 0; 1626fa9e4066Sahrens vs->vs_scrub_start = gethrestime_sec(); 1627fa9e4066Sahrens vs->vs_scrub_end = 0; 1628fa9e4066Sahrens } 1629fa9e4066Sahrens 1630fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1631fa9e4066Sahrens } 1632fa9e4066Sahrens 1633fa9e4066Sahrens /* 1634fa9e4066Sahrens * Report checksum errors that a vdev that didn't realize it made. 1635fa9e4066Sahrens * This can happen, for example, when RAID-Z combinatorial reconstruction 1636fa9e4066Sahrens * infers that one of its components returned bad data. 1637fa9e4066Sahrens */ 1638fa9e4066Sahrens void 1639fa9e4066Sahrens vdev_checksum_error(zio_t *zio, vdev_t *vd) 1640fa9e4066Sahrens { 1641fa9e4066Sahrens dprintf_bp(zio->io_bp, "imputed checksum error on %s: ", 1642fa9e4066Sahrens vdev_description(vd)); 1643fa9e4066Sahrens 1644fa9e4066Sahrens if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { 1645fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1646fa9e4066Sahrens vd->vdev_stat.vs_checksum_errors++; 1647fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1648fa9e4066Sahrens } 1649fa9e4066Sahrens } 1650fa9e4066Sahrens 1651fa9e4066Sahrens /* 1652fa9e4066Sahrens * Update the in-core space usage stats for this vdev and the root vdev. 1653fa9e4066Sahrens */ 1654fa9e4066Sahrens void 1655fa9e4066Sahrens vdev_space_update(vdev_t *vd, uint64_t space_delta, uint64_t alloc_delta) 1656fa9e4066Sahrens { 1657fa9e4066Sahrens ASSERT(vd == vd->vdev_top); 1658fa9e4066Sahrens 1659fa9e4066Sahrens do { 1660fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1661fa9e4066Sahrens vd->vdev_stat.vs_space += space_delta; 1662fa9e4066Sahrens vd->vdev_stat.vs_alloc += alloc_delta; 1663fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1664fa9e4066Sahrens } while ((vd = vd->vdev_parent) != NULL); 1665fa9e4066Sahrens } 1666fa9e4066Sahrens 1667fa9e4066Sahrens /* 1668fa9e4066Sahrens * Various knobs to tune a vdev. 1669fa9e4066Sahrens */ 1670fa9e4066Sahrens static vdev_knob_t vdev_knob[] = { 1671fa9e4066Sahrens { 1672fa9e4066Sahrens "cache_size", 1673fa9e4066Sahrens "size of the read-ahead cache", 1674fa9e4066Sahrens 0, 1675fa9e4066Sahrens 1ULL << 30, 1676fa9e4066Sahrens 10ULL << 20, 1677fa9e4066Sahrens offsetof(struct vdev, vdev_cache.vc_size) 1678fa9e4066Sahrens }, 1679fa9e4066Sahrens { 1680fa9e4066Sahrens "cache_bshift", 1681fa9e4066Sahrens "log2 of cache blocksize", 1682fa9e4066Sahrens SPA_MINBLOCKSHIFT, 1683fa9e4066Sahrens SPA_MAXBLOCKSHIFT, 1684fa9e4066Sahrens 16, 1685fa9e4066Sahrens offsetof(struct vdev, vdev_cache.vc_bshift) 1686fa9e4066Sahrens }, 1687fa9e4066Sahrens { 1688fa9e4066Sahrens "cache_max", 1689fa9e4066Sahrens "largest block size to cache", 1690fa9e4066Sahrens 0, 1691fa9e4066Sahrens SPA_MAXBLOCKSIZE, 1692fa9e4066Sahrens 1ULL << 14, 1693fa9e4066Sahrens offsetof(struct vdev, vdev_cache.vc_max) 1694fa9e4066Sahrens }, 1695fa9e4066Sahrens { 1696fa9e4066Sahrens "min_pending", 1697fa9e4066Sahrens "minimum pending I/Os to the disk", 1698fa9e4066Sahrens 1, 1699fa9e4066Sahrens 10000, 1700fa9e4066Sahrens 2, 1701fa9e4066Sahrens offsetof(struct vdev, vdev_queue.vq_min_pending) 1702fa9e4066Sahrens }, 1703fa9e4066Sahrens { 1704fa9e4066Sahrens "max_pending", 1705fa9e4066Sahrens "maximum pending I/Os to the disk", 1706fa9e4066Sahrens 1, 1707fa9e4066Sahrens 10000, 1708fa9e4066Sahrens 35, 1709fa9e4066Sahrens offsetof(struct vdev, vdev_queue.vq_max_pending) 1710fa9e4066Sahrens }, 1711fa9e4066Sahrens { 1712fa9e4066Sahrens "agg_limit", 1713fa9e4066Sahrens "maximum size of aggregated I/Os", 1714fa9e4066Sahrens 0, 1715fa9e4066Sahrens SPA_MAXBLOCKSIZE, 1716fa9e4066Sahrens SPA_MAXBLOCKSIZE, 1717fa9e4066Sahrens offsetof(struct vdev, vdev_queue.vq_agg_limit) 1718fa9e4066Sahrens }, 1719fa9e4066Sahrens { 1720fa9e4066Sahrens "time_shift", 1721fa9e4066Sahrens "deadline = pri + (lbolt >> time_shift)", 1722fa9e4066Sahrens 0, 1723fa9e4066Sahrens 63, 1724fa9e4066Sahrens 4, 1725fa9e4066Sahrens offsetof(struct vdev, vdev_queue.vq_time_shift) 1726fa9e4066Sahrens }, 1727fa9e4066Sahrens { 1728fa9e4066Sahrens "ramp_rate", 1729fa9e4066Sahrens "exponential I/O issue ramp-up rate", 1730fa9e4066Sahrens 1, 1731fa9e4066Sahrens 10000, 1732fa9e4066Sahrens 2, 1733fa9e4066Sahrens offsetof(struct vdev, vdev_queue.vq_ramp_rate) 1734fa9e4066Sahrens }, 1735fa9e4066Sahrens }; 1736fa9e4066Sahrens 1737fa9e4066Sahrens vdev_knob_t * 1738fa9e4066Sahrens vdev_knob_next(vdev_knob_t *vk) 1739fa9e4066Sahrens { 1740fa9e4066Sahrens if (vk == NULL) 1741fa9e4066Sahrens return (vdev_knob); 1742fa9e4066Sahrens 1743fa9e4066Sahrens if (++vk == vdev_knob + sizeof (vdev_knob) / sizeof (vdev_knob_t)) 1744fa9e4066Sahrens return (NULL); 1745fa9e4066Sahrens 1746fa9e4066Sahrens return (vk); 1747fa9e4066Sahrens } 1748fa9e4066Sahrens 1749fa9e4066Sahrens /* 1750fa9e4066Sahrens * Mark a top-level vdev's config as dirty, placing it on the dirty list 1751fa9e4066Sahrens * so that it will be written out next time the vdev configuration is synced. 1752fa9e4066Sahrens * If the root vdev is specified (vdev_top == NULL), dirty all top-level vdevs. 1753fa9e4066Sahrens */ 1754fa9e4066Sahrens void 1755fa9e4066Sahrens vdev_config_dirty(vdev_t *vd) 1756fa9e4066Sahrens { 1757fa9e4066Sahrens spa_t *spa = vd->vdev_spa; 1758fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 1759fa9e4066Sahrens int c; 1760fa9e4066Sahrens 1761fa9e4066Sahrens if (vd == rvd) { 1762fa9e4066Sahrens for (c = 0; c < rvd->vdev_children; c++) 1763fa9e4066Sahrens vdev_config_dirty(rvd->vdev_child[c]); 1764fa9e4066Sahrens } else { 1765fa9e4066Sahrens ASSERT(vd == vd->vdev_top); 1766fa9e4066Sahrens 1767fa9e4066Sahrens if (!vd->vdev_is_dirty) { 1768fa9e4066Sahrens list_insert_head(&spa->spa_dirty_list, vd); 1769fa9e4066Sahrens vd->vdev_is_dirty = B_TRUE; 1770fa9e4066Sahrens } 1771fa9e4066Sahrens } 1772fa9e4066Sahrens } 1773fa9e4066Sahrens 1774fa9e4066Sahrens void 1775fa9e4066Sahrens vdev_config_clean(vdev_t *vd) 1776fa9e4066Sahrens { 1777fa9e4066Sahrens ASSERT(vd->vdev_is_dirty); 1778fa9e4066Sahrens 1779fa9e4066Sahrens list_remove(&vd->vdev_spa->spa_dirty_list, vd); 1780fa9e4066Sahrens vd->vdev_is_dirty = B_FALSE; 1781fa9e4066Sahrens } 1782fa9e4066Sahrens 1783fa9e4066Sahrens /* 1784fa9e4066Sahrens * Set a vdev's state, updating any parent's state as well. 1785fa9e4066Sahrens */ 1786fa9e4066Sahrens void 1787fa9e4066Sahrens vdev_set_state(vdev_t *vd, vdev_state_t state, vdev_aux_t aux) 1788fa9e4066Sahrens { 1789fa9e4066Sahrens if (state == vd->vdev_state) 1790fa9e4066Sahrens return; 1791fa9e4066Sahrens 1792fa9e4066Sahrens vd->vdev_state = state; 1793fa9e4066Sahrens vd->vdev_stat.vs_aux = aux; 1794fa9e4066Sahrens 1795fa9e4066Sahrens if (vd->vdev_parent != NULL) { 1796fa9e4066Sahrens int c; 1797fa9e4066Sahrens int degraded = 0, faulted = 0; 1798fa9e4066Sahrens vdev_t *parent, *child; 1799fa9e4066Sahrens 1800fa9e4066Sahrens parent = vd->vdev_parent; 1801fa9e4066Sahrens for (c = 0; c < parent->vdev_children; c++) { 1802fa9e4066Sahrens child = parent->vdev_child[c]; 1803fa9e4066Sahrens if (child->vdev_state <= VDEV_STATE_CANT_OPEN) 1804fa9e4066Sahrens faulted++; 1805fa9e4066Sahrens else if (child->vdev_state == VDEV_STATE_DEGRADED) 1806fa9e4066Sahrens degraded++; 1807fa9e4066Sahrens } 1808fa9e4066Sahrens 1809fa9e4066Sahrens vd->vdev_parent->vdev_ops->vdev_op_state_change( 1810fa9e4066Sahrens vd->vdev_parent, faulted, degraded); 1811fa9e4066Sahrens } 1812fa9e4066Sahrens } 1813