1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5fa9e4066Sahrens * Common Development and Distribution License, Version 1.0 only 6fa9e4066Sahrens * (the "License"). You may not use this file except in compliance 7fa9e4066Sahrens * with the License. 8fa9e4066Sahrens * 9fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 11fa9e4066Sahrens * See the License for the specific language governing permissions 12fa9e4066Sahrens * and limitations under the License. 13fa9e4066Sahrens * 14fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 15fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 17fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 18fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 19fa9e4066Sahrens * 20fa9e4066Sahrens * CDDL HEADER END 21fa9e4066Sahrens */ 22fa9e4066Sahrens /* 23fa9e4066Sahrens * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24fa9e4066Sahrens * Use is subject to license terms. 25fa9e4066Sahrens */ 26fa9e4066Sahrens 27fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28fa9e4066Sahrens 29fa9e4066Sahrens #include <sys/zfs_context.h> 30fa9e4066Sahrens #include <sys/spa.h> 31fa9e4066Sahrens #include <sys/spa_impl.h> 32fa9e4066Sahrens #include <sys/dmu.h> 33fa9e4066Sahrens #include <sys/dmu_tx.h> 34fa9e4066Sahrens #include <sys/vdev_impl.h> 35fa9e4066Sahrens #include <sys/uberblock_impl.h> 36fa9e4066Sahrens #include <sys/metaslab.h> 37fa9e4066Sahrens #include <sys/metaslab_impl.h> 38fa9e4066Sahrens #include <sys/space_map.h> 39fa9e4066Sahrens #include <sys/zio.h> 40fa9e4066Sahrens #include <sys/zap.h> 41fa9e4066Sahrens #include <sys/fs/zfs.h> 42fa9e4066Sahrens 43fa9e4066Sahrens /* 44fa9e4066Sahrens * Virtual device management. 45fa9e4066Sahrens */ 46fa9e4066Sahrens 47fa9e4066Sahrens static vdev_ops_t *vdev_ops_table[] = { 48fa9e4066Sahrens &vdev_root_ops, 49fa9e4066Sahrens &vdev_raidz_ops, 50fa9e4066Sahrens &vdev_mirror_ops, 51fa9e4066Sahrens &vdev_replacing_ops, 52fa9e4066Sahrens &vdev_disk_ops, 53fa9e4066Sahrens &vdev_file_ops, 54fa9e4066Sahrens &vdev_missing_ops, 55fa9e4066Sahrens NULL 56fa9e4066Sahrens }; 57fa9e4066Sahrens 58fa9e4066Sahrens /* 59fa9e4066Sahrens * Given a vdev type, return the appropriate ops vector. 60fa9e4066Sahrens */ 61fa9e4066Sahrens static vdev_ops_t * 62fa9e4066Sahrens vdev_getops(const char *type) 63fa9e4066Sahrens { 64fa9e4066Sahrens vdev_ops_t *ops, **opspp; 65fa9e4066Sahrens 66fa9e4066Sahrens for (opspp = vdev_ops_table; (ops = *opspp) != NULL; opspp++) 67fa9e4066Sahrens if (strcmp(ops->vdev_op_type, type) == 0) 68fa9e4066Sahrens break; 69fa9e4066Sahrens 70fa9e4066Sahrens return (ops); 71fa9e4066Sahrens } 72fa9e4066Sahrens 73fa9e4066Sahrens /* 74fa9e4066Sahrens * Default asize function: return the MAX of psize with the asize of 75fa9e4066Sahrens * all children. This is what's used by anything other than RAID-Z. 76fa9e4066Sahrens */ 77fa9e4066Sahrens uint64_t 78fa9e4066Sahrens vdev_default_asize(vdev_t *vd, uint64_t psize) 79fa9e4066Sahrens { 80fa9e4066Sahrens uint64_t asize = P2ROUNDUP(psize, 1ULL << vd->vdev_ashift); 81fa9e4066Sahrens uint64_t csize; 82fa9e4066Sahrens uint64_t c; 83fa9e4066Sahrens 84fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) { 85fa9e4066Sahrens csize = vdev_psize_to_asize(vd->vdev_child[c], psize); 86fa9e4066Sahrens asize = MAX(asize, csize); 87fa9e4066Sahrens } 88fa9e4066Sahrens 89fa9e4066Sahrens return (asize); 90fa9e4066Sahrens } 91fa9e4066Sahrens 92*2a79c5feSlling /* 93*2a79c5feSlling * Get the replaceable or attachable device size. 94*2a79c5feSlling * If the parent is a mirror or raidz, the replaceable size is the minimum 95*2a79c5feSlling * psize of all its children. For the rest, just return our own psize. 96*2a79c5feSlling * 97*2a79c5feSlling * e.g. 98*2a79c5feSlling * psize rsize 99*2a79c5feSlling * root - - 100*2a79c5feSlling * mirror/raidz - - 101*2a79c5feSlling * disk1 20g 20g 102*2a79c5feSlling * disk2 40g 20g 103*2a79c5feSlling * disk3 80g 80g 104*2a79c5feSlling */ 105*2a79c5feSlling uint64_t 106*2a79c5feSlling vdev_get_rsize(vdev_t *vd) 107*2a79c5feSlling { 108*2a79c5feSlling vdev_t *pvd, *cvd; 109*2a79c5feSlling uint64_t c, rsize; 110*2a79c5feSlling 111*2a79c5feSlling pvd = vd->vdev_parent; 112*2a79c5feSlling 113*2a79c5feSlling /* 114*2a79c5feSlling * If our parent is NULL or the root, just return our own psize. 115*2a79c5feSlling */ 116*2a79c5feSlling if (pvd == NULL || pvd->vdev_parent == NULL) 117*2a79c5feSlling return (vd->vdev_psize); 118*2a79c5feSlling 119*2a79c5feSlling rsize = 0; 120*2a79c5feSlling 121*2a79c5feSlling for (c = 0; c < pvd->vdev_children; c++) { 122*2a79c5feSlling cvd = pvd->vdev_child[c]; 123*2a79c5feSlling rsize = MIN(rsize - 1, cvd->vdev_psize - 1) + 1; 124*2a79c5feSlling } 125*2a79c5feSlling 126*2a79c5feSlling return (rsize); 127*2a79c5feSlling } 128*2a79c5feSlling 129fa9e4066Sahrens vdev_t * 130fa9e4066Sahrens vdev_lookup_top(spa_t *spa, uint64_t vdev) 131fa9e4066Sahrens { 132fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 133fa9e4066Sahrens 134fa9e4066Sahrens if (vdev < rvd->vdev_children) 135fa9e4066Sahrens return (rvd->vdev_child[vdev]); 136fa9e4066Sahrens 137fa9e4066Sahrens return (NULL); 138fa9e4066Sahrens } 139fa9e4066Sahrens 140fa9e4066Sahrens vdev_t * 141fa9e4066Sahrens vdev_lookup_by_path(vdev_t *vd, const char *path) 142fa9e4066Sahrens { 143fa9e4066Sahrens int c; 144fa9e4066Sahrens vdev_t *mvd; 145fa9e4066Sahrens 146fa9e4066Sahrens if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0) 147fa9e4066Sahrens return (vd); 148fa9e4066Sahrens 149fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) 150fa9e4066Sahrens if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) != 151fa9e4066Sahrens NULL) 152fa9e4066Sahrens return (mvd); 153fa9e4066Sahrens 154fa9e4066Sahrens return (NULL); 155fa9e4066Sahrens } 156fa9e4066Sahrens 157fa9e4066Sahrens vdev_t * 158fa9e4066Sahrens vdev_lookup_by_guid(vdev_t *vd, uint64_t guid) 159fa9e4066Sahrens { 160fa9e4066Sahrens int c; 161fa9e4066Sahrens vdev_t *mvd; 162fa9e4066Sahrens 163fa9e4066Sahrens if (vd->vdev_children == 0 && vd->vdev_guid == guid) 164fa9e4066Sahrens return (vd); 165fa9e4066Sahrens 166fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) 167fa9e4066Sahrens if ((mvd = vdev_lookup_by_guid(vd->vdev_child[c], guid)) != 168fa9e4066Sahrens NULL) 169fa9e4066Sahrens return (mvd); 170fa9e4066Sahrens 171fa9e4066Sahrens return (NULL); 172fa9e4066Sahrens } 173fa9e4066Sahrens 174fa9e4066Sahrens void 175fa9e4066Sahrens vdev_add_child(vdev_t *pvd, vdev_t *cvd) 176fa9e4066Sahrens { 177fa9e4066Sahrens size_t oldsize, newsize; 178fa9e4066Sahrens uint64_t id = cvd->vdev_id; 179fa9e4066Sahrens vdev_t **newchild; 180fa9e4066Sahrens 181fa9e4066Sahrens ASSERT(spa_config_held(cvd->vdev_spa, RW_WRITER)); 182fa9e4066Sahrens ASSERT(cvd->vdev_parent == NULL); 183fa9e4066Sahrens 184fa9e4066Sahrens cvd->vdev_parent = pvd; 185fa9e4066Sahrens 186fa9e4066Sahrens if (pvd == NULL) 187fa9e4066Sahrens return; 188fa9e4066Sahrens 189fa9e4066Sahrens ASSERT(id >= pvd->vdev_children || pvd->vdev_child[id] == NULL); 190fa9e4066Sahrens 191fa9e4066Sahrens oldsize = pvd->vdev_children * sizeof (vdev_t *); 192fa9e4066Sahrens pvd->vdev_children = MAX(pvd->vdev_children, id + 1); 193fa9e4066Sahrens newsize = pvd->vdev_children * sizeof (vdev_t *); 194fa9e4066Sahrens 195fa9e4066Sahrens newchild = kmem_zalloc(newsize, KM_SLEEP); 196fa9e4066Sahrens if (pvd->vdev_child != NULL) { 197fa9e4066Sahrens bcopy(pvd->vdev_child, newchild, oldsize); 198fa9e4066Sahrens kmem_free(pvd->vdev_child, oldsize); 199fa9e4066Sahrens } 200fa9e4066Sahrens 201fa9e4066Sahrens pvd->vdev_child = newchild; 202fa9e4066Sahrens pvd->vdev_child[id] = cvd; 203fa9e4066Sahrens 204fa9e4066Sahrens cvd->vdev_top = (pvd->vdev_top ? pvd->vdev_top: cvd); 205fa9e4066Sahrens ASSERT(cvd->vdev_top->vdev_parent->vdev_parent == NULL); 206fa9e4066Sahrens 207fa9e4066Sahrens /* 208fa9e4066Sahrens * Walk up all ancestors to update guid sum. 209fa9e4066Sahrens */ 210fa9e4066Sahrens for (; pvd != NULL; pvd = pvd->vdev_parent) 211fa9e4066Sahrens pvd->vdev_guid_sum += cvd->vdev_guid_sum; 212fa9e4066Sahrens } 213fa9e4066Sahrens 214fa9e4066Sahrens void 215fa9e4066Sahrens vdev_remove_child(vdev_t *pvd, vdev_t *cvd) 216fa9e4066Sahrens { 217fa9e4066Sahrens int c; 218fa9e4066Sahrens uint_t id = cvd->vdev_id; 219fa9e4066Sahrens 220fa9e4066Sahrens ASSERT(cvd->vdev_parent == pvd); 221fa9e4066Sahrens 222fa9e4066Sahrens if (pvd == NULL) 223fa9e4066Sahrens return; 224fa9e4066Sahrens 225fa9e4066Sahrens ASSERT(id < pvd->vdev_children); 226fa9e4066Sahrens ASSERT(pvd->vdev_child[id] == cvd); 227fa9e4066Sahrens 228fa9e4066Sahrens pvd->vdev_child[id] = NULL; 229fa9e4066Sahrens cvd->vdev_parent = NULL; 230fa9e4066Sahrens 231fa9e4066Sahrens for (c = 0; c < pvd->vdev_children; c++) 232fa9e4066Sahrens if (pvd->vdev_child[c]) 233fa9e4066Sahrens break; 234fa9e4066Sahrens 235fa9e4066Sahrens if (c == pvd->vdev_children) { 236fa9e4066Sahrens kmem_free(pvd->vdev_child, c * sizeof (vdev_t *)); 237fa9e4066Sahrens pvd->vdev_child = NULL; 238fa9e4066Sahrens pvd->vdev_children = 0; 239fa9e4066Sahrens } 240fa9e4066Sahrens 241fa9e4066Sahrens /* 242fa9e4066Sahrens * Walk up all ancestors to update guid sum. 243fa9e4066Sahrens */ 244fa9e4066Sahrens for (; pvd != NULL; pvd = pvd->vdev_parent) 245fa9e4066Sahrens pvd->vdev_guid_sum -= cvd->vdev_guid_sum; 246fa9e4066Sahrens } 247fa9e4066Sahrens 248fa9e4066Sahrens /* 249fa9e4066Sahrens * Remove any holes in the child array. 250fa9e4066Sahrens */ 251fa9e4066Sahrens void 252fa9e4066Sahrens vdev_compact_children(vdev_t *pvd) 253fa9e4066Sahrens { 254fa9e4066Sahrens vdev_t **newchild, *cvd; 255fa9e4066Sahrens int oldc = pvd->vdev_children; 256fa9e4066Sahrens int newc, c; 257fa9e4066Sahrens 258fa9e4066Sahrens ASSERT(spa_config_held(pvd->vdev_spa, RW_WRITER)); 259fa9e4066Sahrens 260fa9e4066Sahrens for (c = newc = 0; c < oldc; c++) 261fa9e4066Sahrens if (pvd->vdev_child[c]) 262fa9e4066Sahrens newc++; 263fa9e4066Sahrens 264fa9e4066Sahrens newchild = kmem_alloc(newc * sizeof (vdev_t *), KM_SLEEP); 265fa9e4066Sahrens 266fa9e4066Sahrens for (c = newc = 0; c < oldc; c++) { 267fa9e4066Sahrens if ((cvd = pvd->vdev_child[c]) != NULL) { 268fa9e4066Sahrens newchild[newc] = cvd; 269fa9e4066Sahrens cvd->vdev_id = newc++; 270fa9e4066Sahrens } 271fa9e4066Sahrens } 272fa9e4066Sahrens 273fa9e4066Sahrens kmem_free(pvd->vdev_child, oldc * sizeof (vdev_t *)); 274fa9e4066Sahrens pvd->vdev_child = newchild; 275fa9e4066Sahrens pvd->vdev_children = newc; 276fa9e4066Sahrens } 277fa9e4066Sahrens 278fa9e4066Sahrens /* 279fa9e4066Sahrens * Allocate and minimally initialize a vdev_t. 280fa9e4066Sahrens */ 281fa9e4066Sahrens static vdev_t * 282fa9e4066Sahrens vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) 283fa9e4066Sahrens { 284fa9e4066Sahrens vdev_t *vd; 285fa9e4066Sahrens 286fa9e4066Sahrens while (guid == 0) 287fa9e4066Sahrens guid = spa_get_random(-1ULL); 288fa9e4066Sahrens 289fa9e4066Sahrens vd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP); 290fa9e4066Sahrens 291fa9e4066Sahrens vd->vdev_spa = spa; 292fa9e4066Sahrens vd->vdev_id = id; 293fa9e4066Sahrens vd->vdev_guid = guid; 294fa9e4066Sahrens vd->vdev_guid_sum = guid; 295fa9e4066Sahrens vd->vdev_ops = ops; 296fa9e4066Sahrens vd->vdev_state = VDEV_STATE_CLOSED; 297fa9e4066Sahrens 298fa9e4066Sahrens mutex_init(&vd->vdev_io_lock, NULL, MUTEX_DEFAULT, NULL); 299fa9e4066Sahrens cv_init(&vd->vdev_io_cv, NULL, CV_DEFAULT, NULL); 300fa9e4066Sahrens list_create(&vd->vdev_io_pending, sizeof (zio_t), 301fa9e4066Sahrens offsetof(zio_t, io_pending)); 302fa9e4066Sahrens mutex_init(&vd->vdev_dirty_lock, NULL, MUTEX_DEFAULT, NULL); 303fa9e4066Sahrens mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL); 304fa9e4066Sahrens space_map_create(&vd->vdev_dtl_map, 0, -1ULL, 0, &vd->vdev_dtl_lock); 305fa9e4066Sahrens space_map_create(&vd->vdev_dtl_scrub, 0, -1ULL, 0, &vd->vdev_dtl_lock); 306fa9e4066Sahrens txg_list_create(&vd->vdev_ms_list, 307fa9e4066Sahrens offsetof(struct metaslab, ms_txg_node)); 308fa9e4066Sahrens txg_list_create(&vd->vdev_dtl_list, 309fa9e4066Sahrens offsetof(struct vdev, vdev_dtl_node)); 310fa9e4066Sahrens vd->vdev_stat.vs_timestamp = gethrtime(); 311fa9e4066Sahrens 312fa9e4066Sahrens return (vd); 313fa9e4066Sahrens } 314fa9e4066Sahrens 315fa9e4066Sahrens /* 316fa9e4066Sahrens * Free a vdev_t that has been removed from service. 317fa9e4066Sahrens */ 318fa9e4066Sahrens static void 319fa9e4066Sahrens vdev_free_common(vdev_t *vd) 320fa9e4066Sahrens { 321fa9e4066Sahrens if (vd->vdev_path) 322fa9e4066Sahrens spa_strfree(vd->vdev_path); 323fa9e4066Sahrens if (vd->vdev_devid) 324fa9e4066Sahrens spa_strfree(vd->vdev_devid); 325fa9e4066Sahrens 326fa9e4066Sahrens txg_list_destroy(&vd->vdev_ms_list); 327fa9e4066Sahrens txg_list_destroy(&vd->vdev_dtl_list); 328fa9e4066Sahrens mutex_enter(&vd->vdev_dtl_lock); 329fa9e4066Sahrens space_map_vacate(&vd->vdev_dtl_map, NULL, NULL); 330fa9e4066Sahrens space_map_destroy(&vd->vdev_dtl_map); 331fa9e4066Sahrens space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL); 332fa9e4066Sahrens space_map_destroy(&vd->vdev_dtl_scrub); 333fa9e4066Sahrens mutex_exit(&vd->vdev_dtl_lock); 334fa9e4066Sahrens mutex_destroy(&vd->vdev_dtl_lock); 335fa9e4066Sahrens mutex_destroy(&vd->vdev_dirty_lock); 336fa9e4066Sahrens list_destroy(&vd->vdev_io_pending); 337fa9e4066Sahrens mutex_destroy(&vd->vdev_io_lock); 338fa9e4066Sahrens cv_destroy(&vd->vdev_io_cv); 339fa9e4066Sahrens 340fa9e4066Sahrens kmem_free(vd, sizeof (vdev_t)); 341fa9e4066Sahrens } 342fa9e4066Sahrens 343fa9e4066Sahrens /* 344fa9e4066Sahrens * Allocate a new vdev. The 'alloctype' is used to control whether we are 345fa9e4066Sahrens * creating a new vdev or loading an existing one - the behavior is slightly 346fa9e4066Sahrens * different for each case. 347fa9e4066Sahrens */ 348fa9e4066Sahrens vdev_t * 349fa9e4066Sahrens vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype) 350fa9e4066Sahrens { 351fa9e4066Sahrens vdev_ops_t *ops; 352fa9e4066Sahrens char *type; 353fa9e4066Sahrens uint64_t guid = 0; 354fa9e4066Sahrens vdev_t *vd; 355fa9e4066Sahrens 356fa9e4066Sahrens ASSERT(spa_config_held(spa, RW_WRITER)); 357fa9e4066Sahrens 358fa9e4066Sahrens if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) 359fa9e4066Sahrens return (NULL); 360fa9e4066Sahrens 361fa9e4066Sahrens if ((ops = vdev_getops(type)) == NULL) 362fa9e4066Sahrens return (NULL); 363fa9e4066Sahrens 364fa9e4066Sahrens /* 365fa9e4066Sahrens * If this is a load, get the vdev guid from the nvlist. 366fa9e4066Sahrens * Otherwise, vdev_alloc_common() will generate one for us. 367fa9e4066Sahrens */ 368fa9e4066Sahrens if (alloctype == VDEV_ALLOC_LOAD) { 369fa9e4066Sahrens uint64_t label_id; 370fa9e4066Sahrens 371fa9e4066Sahrens if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &label_id) || 372fa9e4066Sahrens label_id != id) 373fa9e4066Sahrens return (NULL); 374fa9e4066Sahrens 375fa9e4066Sahrens if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) 376fa9e4066Sahrens return (NULL); 377fa9e4066Sahrens } 378fa9e4066Sahrens 379fa9e4066Sahrens vd = vdev_alloc_common(spa, id, guid, ops); 380fa9e4066Sahrens 381fa9e4066Sahrens if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0) 382fa9e4066Sahrens vd->vdev_path = spa_strdup(vd->vdev_path); 383fa9e4066Sahrens if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0) 384fa9e4066Sahrens vd->vdev_devid = spa_strdup(vd->vdev_devid); 385fa9e4066Sahrens 386afefbcddSeschrock /* 387afefbcddSeschrock * Set the whole_disk property. If it's not specified, leave the value 388afefbcddSeschrock * as -1. 389afefbcddSeschrock */ 390afefbcddSeschrock if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, 391afefbcddSeschrock &vd->vdev_wholedisk) != 0) 392afefbcddSeschrock vd->vdev_wholedisk = -1ULL; 393afefbcddSeschrock 394fa9e4066Sahrens /* 395fa9e4066Sahrens * If we're a top-level vdev, try to load the allocation parameters. 396fa9e4066Sahrens */ 397fa9e4066Sahrens if (parent && !parent->vdev_parent && alloctype == VDEV_ALLOC_LOAD) { 398fa9e4066Sahrens (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, 399fa9e4066Sahrens &vd->vdev_ms_array); 400fa9e4066Sahrens (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT, 401fa9e4066Sahrens &vd->vdev_ms_shift); 402fa9e4066Sahrens (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, 403fa9e4066Sahrens &vd->vdev_ashift); 404fa9e4066Sahrens (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASIZE, 405fa9e4066Sahrens &vd->vdev_asize); 406fa9e4066Sahrens } 407fa9e4066Sahrens 408fa9e4066Sahrens /* 409fa9e4066Sahrens * If we're a leaf vdev, try to load the DTL object. 410fa9e4066Sahrens */ 411fa9e4066Sahrens if (vd->vdev_ops->vdev_op_leaf && alloctype == VDEV_ALLOC_LOAD) { 412fa9e4066Sahrens (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DTL, 413fa9e4066Sahrens &vd->vdev_dtl.smo_object); 414fa9e4066Sahrens } 415fa9e4066Sahrens 416fa9e4066Sahrens /* 417fa9e4066Sahrens * Add ourselves to the parent's list of children. 418fa9e4066Sahrens */ 419fa9e4066Sahrens vdev_add_child(parent, vd); 420fa9e4066Sahrens 421fa9e4066Sahrens return (vd); 422fa9e4066Sahrens } 423fa9e4066Sahrens 424fa9e4066Sahrens void 425fa9e4066Sahrens vdev_free(vdev_t *vd) 426fa9e4066Sahrens { 427fa9e4066Sahrens int c; 428fa9e4066Sahrens 429fa9e4066Sahrens /* 430fa9e4066Sahrens * vdev_free() implies closing the vdev first. This is simpler than 431fa9e4066Sahrens * trying to ensure complicated semantics for all callers. 432fa9e4066Sahrens */ 433fa9e4066Sahrens vdev_close(vd); 434fa9e4066Sahrens 435fa9e4066Sahrens /* 436fa9e4066Sahrens * It's possible to free a vdev that's been added to the dirty 437fa9e4066Sahrens * list when in the middle of spa_vdev_add(). Handle that case 438fa9e4066Sahrens * correctly here. 439fa9e4066Sahrens */ 440fa9e4066Sahrens if (vd->vdev_is_dirty) 441fa9e4066Sahrens vdev_config_clean(vd); 442fa9e4066Sahrens 443fa9e4066Sahrens /* 444fa9e4066Sahrens * Free all children. 445fa9e4066Sahrens */ 446fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) 447fa9e4066Sahrens vdev_free(vd->vdev_child[c]); 448fa9e4066Sahrens 449fa9e4066Sahrens ASSERT(vd->vdev_child == NULL); 450fa9e4066Sahrens ASSERT(vd->vdev_guid_sum == vd->vdev_guid); 451fa9e4066Sahrens 452fa9e4066Sahrens /* 453fa9e4066Sahrens * Discard allocation state. 454fa9e4066Sahrens */ 455fa9e4066Sahrens if (vd == vd->vdev_top) 456fa9e4066Sahrens vdev_metaslab_fini(vd); 457fa9e4066Sahrens 458fa9e4066Sahrens ASSERT3U(vd->vdev_stat.vs_space, ==, 0); 459fa9e4066Sahrens ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0); 460fa9e4066Sahrens 461fa9e4066Sahrens /* 462fa9e4066Sahrens * Remove this vdev from its parent's child list. 463fa9e4066Sahrens */ 464fa9e4066Sahrens vdev_remove_child(vd->vdev_parent, vd); 465fa9e4066Sahrens 466fa9e4066Sahrens ASSERT(vd->vdev_parent == NULL); 467fa9e4066Sahrens 468fa9e4066Sahrens vdev_free_common(vd); 469fa9e4066Sahrens } 470fa9e4066Sahrens 471fa9e4066Sahrens /* 472fa9e4066Sahrens * Transfer top-level vdev state from svd to tvd. 473fa9e4066Sahrens */ 474fa9e4066Sahrens static void 475fa9e4066Sahrens vdev_top_transfer(vdev_t *svd, vdev_t *tvd) 476fa9e4066Sahrens { 477fa9e4066Sahrens spa_t *spa = svd->vdev_spa; 478fa9e4066Sahrens metaslab_t *msp; 479fa9e4066Sahrens vdev_t *vd; 480fa9e4066Sahrens int t; 481fa9e4066Sahrens 482fa9e4066Sahrens ASSERT(tvd == tvd->vdev_top); 483fa9e4066Sahrens 484fa9e4066Sahrens tvd->vdev_ms_array = svd->vdev_ms_array; 485fa9e4066Sahrens tvd->vdev_ms_shift = svd->vdev_ms_shift; 486fa9e4066Sahrens tvd->vdev_ms_count = svd->vdev_ms_count; 487fa9e4066Sahrens 488fa9e4066Sahrens svd->vdev_ms_array = 0; 489fa9e4066Sahrens svd->vdev_ms_shift = 0; 490fa9e4066Sahrens svd->vdev_ms_count = 0; 491fa9e4066Sahrens 492fa9e4066Sahrens tvd->vdev_mg = svd->vdev_mg; 493fa9e4066Sahrens tvd->vdev_mg->mg_vd = tvd; 494fa9e4066Sahrens tvd->vdev_ms = svd->vdev_ms; 495fa9e4066Sahrens tvd->vdev_smo = svd->vdev_smo; 496fa9e4066Sahrens 497fa9e4066Sahrens svd->vdev_mg = NULL; 498fa9e4066Sahrens svd->vdev_ms = NULL; 499fa9e4066Sahrens svd->vdev_smo = NULL; 500fa9e4066Sahrens 501fa9e4066Sahrens tvd->vdev_stat.vs_alloc = svd->vdev_stat.vs_alloc; 502fa9e4066Sahrens tvd->vdev_stat.vs_space = svd->vdev_stat.vs_space; 503fa9e4066Sahrens 504fa9e4066Sahrens svd->vdev_stat.vs_alloc = 0; 505fa9e4066Sahrens svd->vdev_stat.vs_space = 0; 506fa9e4066Sahrens 507fa9e4066Sahrens for (t = 0; t < TXG_SIZE; t++) { 508fa9e4066Sahrens while ((msp = txg_list_remove(&svd->vdev_ms_list, t)) != NULL) 509fa9e4066Sahrens (void) txg_list_add(&tvd->vdev_ms_list, msp, t); 510fa9e4066Sahrens while ((vd = txg_list_remove(&svd->vdev_dtl_list, t)) != NULL) 511fa9e4066Sahrens (void) txg_list_add(&tvd->vdev_dtl_list, vd, t); 512fa9e4066Sahrens if (txg_list_remove_this(&spa->spa_vdev_txg_list, svd, t)) 513fa9e4066Sahrens (void) txg_list_add(&spa->spa_vdev_txg_list, tvd, t); 514fa9e4066Sahrens tvd->vdev_dirty[t] = svd->vdev_dirty[t]; 515fa9e4066Sahrens svd->vdev_dirty[t] = 0; 516fa9e4066Sahrens } 517fa9e4066Sahrens 518fa9e4066Sahrens if (svd->vdev_is_dirty) { 519fa9e4066Sahrens vdev_config_clean(svd); 520fa9e4066Sahrens vdev_config_dirty(tvd); 521fa9e4066Sahrens } 522fa9e4066Sahrens 523fa9e4066Sahrens ASSERT(svd->vdev_io_retry == NULL); 524fa9e4066Sahrens ASSERT(list_is_empty(&svd->vdev_io_pending)); 525fa9e4066Sahrens } 526fa9e4066Sahrens 527fa9e4066Sahrens static void 528fa9e4066Sahrens vdev_top_update(vdev_t *tvd, vdev_t *vd) 529fa9e4066Sahrens { 530fa9e4066Sahrens int c; 531fa9e4066Sahrens 532fa9e4066Sahrens if (vd == NULL) 533fa9e4066Sahrens return; 534fa9e4066Sahrens 535fa9e4066Sahrens vd->vdev_top = tvd; 536fa9e4066Sahrens 537fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) 538fa9e4066Sahrens vdev_top_update(tvd, vd->vdev_child[c]); 539fa9e4066Sahrens } 540fa9e4066Sahrens 541fa9e4066Sahrens /* 542fa9e4066Sahrens * Add a mirror/replacing vdev above an existing vdev. 543fa9e4066Sahrens */ 544fa9e4066Sahrens vdev_t * 545fa9e4066Sahrens vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops) 546fa9e4066Sahrens { 547fa9e4066Sahrens spa_t *spa = cvd->vdev_spa; 548fa9e4066Sahrens vdev_t *pvd = cvd->vdev_parent; 549fa9e4066Sahrens vdev_t *mvd; 550fa9e4066Sahrens 551fa9e4066Sahrens ASSERT(spa_config_held(spa, RW_WRITER)); 552fa9e4066Sahrens 553fa9e4066Sahrens mvd = vdev_alloc_common(spa, cvd->vdev_id, 0, ops); 554fa9e4066Sahrens vdev_remove_child(pvd, cvd); 555fa9e4066Sahrens vdev_add_child(pvd, mvd); 556fa9e4066Sahrens cvd->vdev_id = mvd->vdev_children; 557fa9e4066Sahrens vdev_add_child(mvd, cvd); 558fa9e4066Sahrens vdev_top_update(cvd->vdev_top, cvd->vdev_top); 559fa9e4066Sahrens 560fa9e4066Sahrens mvd->vdev_asize = cvd->vdev_asize; 561fa9e4066Sahrens mvd->vdev_ashift = cvd->vdev_ashift; 562fa9e4066Sahrens mvd->vdev_state = cvd->vdev_state; 563fa9e4066Sahrens 564fa9e4066Sahrens if (mvd == mvd->vdev_top) 565fa9e4066Sahrens vdev_top_transfer(cvd, mvd); 566fa9e4066Sahrens 567fa9e4066Sahrens return (mvd); 568fa9e4066Sahrens } 569fa9e4066Sahrens 570fa9e4066Sahrens /* 571fa9e4066Sahrens * Remove a 1-way mirror/replacing vdev from the tree. 572fa9e4066Sahrens */ 573fa9e4066Sahrens void 574fa9e4066Sahrens vdev_remove_parent(vdev_t *cvd) 575fa9e4066Sahrens { 576fa9e4066Sahrens vdev_t *mvd = cvd->vdev_parent; 577fa9e4066Sahrens vdev_t *pvd = mvd->vdev_parent; 578fa9e4066Sahrens 579fa9e4066Sahrens ASSERT(spa_config_held(cvd->vdev_spa, RW_WRITER)); 580fa9e4066Sahrens 581fa9e4066Sahrens ASSERT(mvd->vdev_children == 1); 582fa9e4066Sahrens ASSERT(mvd->vdev_ops == &vdev_mirror_ops || 583fa9e4066Sahrens mvd->vdev_ops == &vdev_replacing_ops); 584fa9e4066Sahrens 585fa9e4066Sahrens vdev_remove_child(mvd, cvd); 586fa9e4066Sahrens vdev_remove_child(pvd, mvd); 587fa9e4066Sahrens cvd->vdev_id = mvd->vdev_id; 588fa9e4066Sahrens vdev_add_child(pvd, cvd); 589fa9e4066Sahrens vdev_top_update(cvd->vdev_top, cvd->vdev_top); 590fa9e4066Sahrens 591fa9e4066Sahrens if (cvd == cvd->vdev_top) 592fa9e4066Sahrens vdev_top_transfer(mvd, cvd); 593fa9e4066Sahrens 594fa9e4066Sahrens ASSERT(mvd->vdev_children == 0); 595fa9e4066Sahrens vdev_free(mvd); 596fa9e4066Sahrens } 597fa9e4066Sahrens 598fa9e4066Sahrens void 599fa9e4066Sahrens vdev_metaslab_init(vdev_t *vd, uint64_t txg) 600fa9e4066Sahrens { 601fa9e4066Sahrens spa_t *spa = vd->vdev_spa; 602fa9e4066Sahrens metaslab_class_t *mc = spa_metaslab_class_select(spa); 603fa9e4066Sahrens uint64_t c; 604fa9e4066Sahrens uint64_t oldc = vd->vdev_ms_count; 605fa9e4066Sahrens uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift; 606fa9e4066Sahrens space_map_obj_t *smo = vd->vdev_smo; 607fa9e4066Sahrens metaslab_t **mspp = vd->vdev_ms; 608fa9e4066Sahrens 609fa9e4066Sahrens dprintf("%s oldc %llu newc %llu\n", vdev_description(vd), oldc, newc); 610fa9e4066Sahrens 611fa9e4066Sahrens ASSERT(oldc <= newc); 612fa9e4066Sahrens 613fa9e4066Sahrens vd->vdev_smo = kmem_zalloc(newc * sizeof (*smo), KM_SLEEP); 614fa9e4066Sahrens vd->vdev_ms = kmem_zalloc(newc * sizeof (*mspp), KM_SLEEP); 615fa9e4066Sahrens vd->vdev_ms_count = newc; 616fa9e4066Sahrens 617fa9e4066Sahrens if (vd->vdev_mg == NULL) { 618fa9e4066Sahrens if (txg == 0) { 619fa9e4066Sahrens dmu_buf_t *db; 620fa9e4066Sahrens uint64_t *ms_array; 621fa9e4066Sahrens 622fa9e4066Sahrens ms_array = kmem_zalloc(newc * sizeof (uint64_t), 623fa9e4066Sahrens KM_SLEEP); 624fa9e4066Sahrens 625fa9e4066Sahrens dmu_read(spa->spa_meta_objset, vd->vdev_ms_array, 626fa9e4066Sahrens 0, newc * sizeof (uint64_t), ms_array); 627fa9e4066Sahrens 628fa9e4066Sahrens for (c = 0; c < newc; c++) { 629fa9e4066Sahrens if (ms_array[c] == 0) 630fa9e4066Sahrens continue; 631fa9e4066Sahrens db = dmu_bonus_hold(spa->spa_meta_objset, 632fa9e4066Sahrens ms_array[c]); 633fa9e4066Sahrens dmu_buf_read(db); 634fa9e4066Sahrens ASSERT3U(db->db_size, ==, sizeof (*smo)); 635fa9e4066Sahrens bcopy(db->db_data, &vd->vdev_smo[c], 636fa9e4066Sahrens db->db_size); 637fa9e4066Sahrens ASSERT3U(vd->vdev_smo[c].smo_object, ==, 638fa9e4066Sahrens ms_array[c]); 639fa9e4066Sahrens dmu_buf_rele(db); 640fa9e4066Sahrens } 641fa9e4066Sahrens kmem_free(ms_array, newc * sizeof (uint64_t)); 642fa9e4066Sahrens } 643fa9e4066Sahrens vd->vdev_mg = metaslab_group_create(mc, vd); 644fa9e4066Sahrens } 645fa9e4066Sahrens 646fa9e4066Sahrens for (c = 0; c < oldc; c++) { 647fa9e4066Sahrens vd->vdev_smo[c] = smo[c]; 648fa9e4066Sahrens vd->vdev_ms[c] = mspp[c]; 649fa9e4066Sahrens mspp[c]->ms_smo = &vd->vdev_smo[c]; 650fa9e4066Sahrens } 651fa9e4066Sahrens 652fa9e4066Sahrens for (c = oldc; c < newc; c++) 653fa9e4066Sahrens metaslab_init(vd->vdev_mg, &vd->vdev_smo[c], &vd->vdev_ms[c], 654fa9e4066Sahrens c << vd->vdev_ms_shift, 1ULL << vd->vdev_ms_shift, txg); 655fa9e4066Sahrens 656fa9e4066Sahrens if (oldc != 0) { 657fa9e4066Sahrens kmem_free(smo, oldc * sizeof (*smo)); 658fa9e4066Sahrens kmem_free(mspp, oldc * sizeof (*mspp)); 659fa9e4066Sahrens } 660fa9e4066Sahrens 661fa9e4066Sahrens } 662fa9e4066Sahrens 663fa9e4066Sahrens void 664fa9e4066Sahrens vdev_metaslab_fini(vdev_t *vd) 665fa9e4066Sahrens { 666fa9e4066Sahrens uint64_t m; 667fa9e4066Sahrens uint64_t count = vd->vdev_ms_count; 668fa9e4066Sahrens 669fa9e4066Sahrens if (vd->vdev_ms != NULL) { 670fa9e4066Sahrens for (m = 0; m < count; m++) 671fa9e4066Sahrens metaslab_fini(vd->vdev_ms[m]); 672fa9e4066Sahrens kmem_free(vd->vdev_ms, count * sizeof (metaslab_t *)); 673fa9e4066Sahrens vd->vdev_ms = NULL; 674fa9e4066Sahrens } 675fa9e4066Sahrens 676fa9e4066Sahrens if (vd->vdev_smo != NULL) { 677fa9e4066Sahrens kmem_free(vd->vdev_smo, count * sizeof (space_map_obj_t)); 678fa9e4066Sahrens vd->vdev_smo = NULL; 679fa9e4066Sahrens } 680fa9e4066Sahrens } 681fa9e4066Sahrens 682fa9e4066Sahrens /* 683fa9e4066Sahrens * Prepare a virtual device for access. 684fa9e4066Sahrens */ 685fa9e4066Sahrens int 686fa9e4066Sahrens vdev_open(vdev_t *vd) 687fa9e4066Sahrens { 688fa9e4066Sahrens int error; 689fa9e4066Sahrens vdev_knob_t *vk; 690fa9e4066Sahrens int c; 691fa9e4066Sahrens uint64_t osize = 0; 692fa9e4066Sahrens uint64_t asize, psize; 693fa9e4066Sahrens uint64_t ashift = -1ULL; 694fa9e4066Sahrens 695fa9e4066Sahrens ASSERT(vd->vdev_state == VDEV_STATE_CLOSED || 696fa9e4066Sahrens vd->vdev_state == VDEV_STATE_CANT_OPEN || 697fa9e4066Sahrens vd->vdev_state == VDEV_STATE_OFFLINE); 698fa9e4066Sahrens 699fa9e4066Sahrens if (vd->vdev_fault_mode == VDEV_FAULT_COUNT) 700fa9e4066Sahrens vd->vdev_fault_arg >>= 1; 701fa9e4066Sahrens else 702fa9e4066Sahrens vd->vdev_fault_mode = VDEV_FAULT_NONE; 703fa9e4066Sahrens 704fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_NONE; 705fa9e4066Sahrens 706fa9e4066Sahrens for (vk = vdev_knob_next(NULL); vk != NULL; vk = vdev_knob_next(vk)) { 707fa9e4066Sahrens uint64_t *valp = (uint64_t *)((char *)vd + vk->vk_offset); 708fa9e4066Sahrens 709fa9e4066Sahrens *valp = vk->vk_default; 710fa9e4066Sahrens *valp = MAX(*valp, vk->vk_min); 711fa9e4066Sahrens *valp = MIN(*valp, vk->vk_max); 712fa9e4066Sahrens } 713fa9e4066Sahrens 714fa9e4066Sahrens if (vd->vdev_ops->vdev_op_leaf) { 715fa9e4066Sahrens vdev_cache_init(vd); 716fa9e4066Sahrens vdev_queue_init(vd); 717fa9e4066Sahrens vd->vdev_cache_active = B_TRUE; 718fa9e4066Sahrens } 719fa9e4066Sahrens 720fa9e4066Sahrens if (vd->vdev_offline) { 721fa9e4066Sahrens ASSERT(vd->vdev_children == 0); 722fa9e4066Sahrens dprintf("OFFLINE: %s = ENXIO\n", vdev_description(vd)); 723fa9e4066Sahrens vd->vdev_state = VDEV_STATE_OFFLINE; 724fa9e4066Sahrens return (ENXIO); 725fa9e4066Sahrens } 726fa9e4066Sahrens 727fa9e4066Sahrens error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift); 728fa9e4066Sahrens 729fa9e4066Sahrens dprintf("%s = %d, osize %llu, state = %d\n", 730fa9e4066Sahrens vdev_description(vd), error, osize, vd->vdev_state); 731fa9e4066Sahrens 732fa9e4066Sahrens if (error) { 733fa9e4066Sahrens dprintf("%s in %s failed to open, error %d, aux %d\n", 734fa9e4066Sahrens vdev_description(vd), 735fa9e4066Sahrens vdev_description(vd->vdev_parent), 736fa9e4066Sahrens error, 737fa9e4066Sahrens vd->vdev_stat.vs_aux); 738fa9e4066Sahrens 739fa9e4066Sahrens vd->vdev_state = VDEV_STATE_CANT_OPEN; 740fa9e4066Sahrens return (error); 741fa9e4066Sahrens } 742fa9e4066Sahrens 743fa9e4066Sahrens vd->vdev_state = VDEV_STATE_HEALTHY; 744fa9e4066Sahrens 745fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) 746fa9e4066Sahrens if (vd->vdev_child[c]->vdev_state != VDEV_STATE_HEALTHY) 747fa9e4066Sahrens vd->vdev_state = VDEV_STATE_DEGRADED; 748fa9e4066Sahrens 749fa9e4066Sahrens osize = P2ALIGN(osize, (uint64_t)sizeof (vdev_label_t)); 750fa9e4066Sahrens 751fa9e4066Sahrens if (vd->vdev_children == 0) { 752fa9e4066Sahrens if (osize < SPA_MINDEVSIZE) { 753fa9e4066Sahrens vd->vdev_state = VDEV_STATE_CANT_OPEN; 754fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_TOO_SMALL; 755fa9e4066Sahrens return (EOVERFLOW); 756fa9e4066Sahrens } 757fa9e4066Sahrens psize = osize; 758fa9e4066Sahrens asize = osize - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE); 759fa9e4066Sahrens } else { 760fa9e4066Sahrens if (osize < SPA_MINDEVSIZE - 761fa9e4066Sahrens (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) { 762fa9e4066Sahrens vd->vdev_state = VDEV_STATE_CANT_OPEN; 763fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_TOO_SMALL; 764fa9e4066Sahrens return (EOVERFLOW); 765fa9e4066Sahrens } 766fa9e4066Sahrens psize = 0; 767fa9e4066Sahrens asize = osize; 768fa9e4066Sahrens } 769fa9e4066Sahrens 770fa9e4066Sahrens vd->vdev_psize = psize; 771fa9e4066Sahrens 772fa9e4066Sahrens if (vd->vdev_asize == 0) { 773fa9e4066Sahrens /* 774fa9e4066Sahrens * This is the first-ever open, so use the computed values. 775fa9e4066Sahrens */ 776fa9e4066Sahrens vd->vdev_asize = asize; 777fa9e4066Sahrens vd->vdev_ashift = ashift; 778fa9e4066Sahrens } else { 779fa9e4066Sahrens /* 780fa9e4066Sahrens * Make sure the alignment requirement hasn't increased. 781fa9e4066Sahrens */ 782fa9e4066Sahrens if (ashift > vd->vdev_ashift) { 783fa9e4066Sahrens dprintf("%s: ashift grew\n", vdev_description(vd)); 784fa9e4066Sahrens vd->vdev_state = VDEV_STATE_CANT_OPEN; 785fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 786fa9e4066Sahrens return (EINVAL); 787fa9e4066Sahrens } 788fa9e4066Sahrens 789fa9e4066Sahrens /* 790fa9e4066Sahrens * Make sure the device hasn't shrunk. 791fa9e4066Sahrens */ 792fa9e4066Sahrens if (asize < vd->vdev_asize) { 793fa9e4066Sahrens dprintf("%s: device shrank\n", vdev_description(vd)); 794fa9e4066Sahrens vd->vdev_state = VDEV_STATE_CANT_OPEN; 795fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 796fa9e4066Sahrens return (EINVAL); 797fa9e4066Sahrens } 798fa9e4066Sahrens 799fa9e4066Sahrens /* 800fa9e4066Sahrens * If all children are healthy and the asize has increased, 801fa9e4066Sahrens * then we've experienced dynamic LUN growth. 802fa9e4066Sahrens */ 803fa9e4066Sahrens if (vd->vdev_state == VDEV_STATE_HEALTHY && 804fa9e4066Sahrens asize > vd->vdev_asize) { 805fa9e4066Sahrens dprintf("%s: device grew\n", vdev_description(vd)); 806fa9e4066Sahrens vd->vdev_asize = asize; 807fa9e4066Sahrens } 808fa9e4066Sahrens } 809fa9e4066Sahrens 810fa9e4066Sahrens return (0); 811fa9e4066Sahrens } 812fa9e4066Sahrens 813fa9e4066Sahrens /* 814fa9e4066Sahrens * Close a virtual device. 815fa9e4066Sahrens */ 816fa9e4066Sahrens void 817fa9e4066Sahrens vdev_close(vdev_t *vd) 818fa9e4066Sahrens { 819fa9e4066Sahrens ASSERT3P(list_head(&vd->vdev_io_pending), ==, NULL); 820fa9e4066Sahrens 821fa9e4066Sahrens vd->vdev_ops->vdev_op_close(vd); 822fa9e4066Sahrens 823fa9e4066Sahrens if (vd->vdev_cache_active) { 824fa9e4066Sahrens vdev_cache_fini(vd); 825fa9e4066Sahrens vdev_queue_fini(vd); 826fa9e4066Sahrens vd->vdev_cache_active = B_FALSE; 827fa9e4066Sahrens } 828fa9e4066Sahrens 829fa9e4066Sahrens if (vd->vdev_offline) 830fa9e4066Sahrens vd->vdev_state = VDEV_STATE_OFFLINE; 831fa9e4066Sahrens else 832fa9e4066Sahrens vd->vdev_state = VDEV_STATE_CLOSED; 833fa9e4066Sahrens } 834fa9e4066Sahrens 835fa9e4066Sahrens void 836fa9e4066Sahrens vdev_reopen(vdev_t *vd, zio_t **rq) 837fa9e4066Sahrens { 838fa9e4066Sahrens vdev_t *rvd = vd->vdev_spa->spa_root_vdev; 839fa9e4066Sahrens int c; 840fa9e4066Sahrens 841fa9e4066Sahrens if (vd == rvd) { 842fa9e4066Sahrens ASSERT(rq == NULL); 843fa9e4066Sahrens for (c = 0; c < rvd->vdev_children; c++) 844fa9e4066Sahrens vdev_reopen(rvd->vdev_child[c], NULL); 845fa9e4066Sahrens return; 846fa9e4066Sahrens } 847fa9e4066Sahrens 848fa9e4066Sahrens /* only valid for top-level vdevs */ 849fa9e4066Sahrens ASSERT3P(vd, ==, vd->vdev_top); 850fa9e4066Sahrens 851fa9e4066Sahrens /* 852fa9e4066Sahrens * vdev_state can change when spa_config_lock is held as writer, 853fa9e4066Sahrens * or when it's held as reader and we're doing a vdev_reopen(). 854fa9e4066Sahrens * To handle the latter case, we grab rvd's io_lock to serialize 855fa9e4066Sahrens * reopens. This ensures that there's never more than one vdev 856fa9e4066Sahrens * state changer active at a time. 857fa9e4066Sahrens */ 858fa9e4066Sahrens mutex_enter(&rvd->vdev_io_lock); 859fa9e4066Sahrens 860fa9e4066Sahrens mutex_enter(&vd->vdev_io_lock); 861fa9e4066Sahrens while (list_head(&vd->vdev_io_pending) != NULL) 862fa9e4066Sahrens cv_wait(&vd->vdev_io_cv, &vd->vdev_io_lock); 863fa9e4066Sahrens vdev_close(vd); 864fa9e4066Sahrens (void) vdev_open(vd); 865fa9e4066Sahrens if (rq != NULL) { 866fa9e4066Sahrens *rq = vd->vdev_io_retry; 867fa9e4066Sahrens vd->vdev_io_retry = NULL; 868fa9e4066Sahrens } 869fa9e4066Sahrens mutex_exit(&vd->vdev_io_lock); 870fa9e4066Sahrens 871fa9e4066Sahrens /* 872fa9e4066Sahrens * Reassess root vdev's health. 873fa9e4066Sahrens */ 874fa9e4066Sahrens rvd->vdev_state = VDEV_STATE_HEALTHY; 875fa9e4066Sahrens for (c = 0; c < rvd->vdev_children; c++) { 876fa9e4066Sahrens uint64_t state = rvd->vdev_child[c]->vdev_state; 877fa9e4066Sahrens rvd->vdev_state = MIN(rvd->vdev_state, state); 878fa9e4066Sahrens } 879fa9e4066Sahrens 880fa9e4066Sahrens mutex_exit(&rvd->vdev_io_lock); 881fa9e4066Sahrens } 882fa9e4066Sahrens 883fa9e4066Sahrens int 884fa9e4066Sahrens vdev_create(vdev_t *vd, uint64_t txg) 885fa9e4066Sahrens { 886fa9e4066Sahrens int error; 887fa9e4066Sahrens 888fa9e4066Sahrens /* 889fa9e4066Sahrens * Normally, partial opens (e.g. of a mirror) are allowed. 890fa9e4066Sahrens * For a create, however, we want to fail the request if 891fa9e4066Sahrens * there are any components we can't open. 892fa9e4066Sahrens */ 893fa9e4066Sahrens error = vdev_open(vd); 894fa9e4066Sahrens 895fa9e4066Sahrens if (error || vd->vdev_state != VDEV_STATE_HEALTHY) { 896fa9e4066Sahrens vdev_close(vd); 897fa9e4066Sahrens return (error ? error : ENXIO); 898fa9e4066Sahrens } 899fa9e4066Sahrens 900fa9e4066Sahrens /* 901fa9e4066Sahrens * Recursively initialize all labels. 902fa9e4066Sahrens */ 903fa9e4066Sahrens if ((error = vdev_label_init(vd, txg)) != 0) { 904fa9e4066Sahrens vdev_close(vd); 905fa9e4066Sahrens return (error); 906fa9e4066Sahrens } 907fa9e4066Sahrens 908fa9e4066Sahrens return (0); 909fa9e4066Sahrens } 910fa9e4066Sahrens 911fa9e4066Sahrens /* 912fa9e4066Sahrens * The is the latter half of vdev_create(). It is distinct because it 913fa9e4066Sahrens * involves initiating transactions in order to do metaslab creation. 914fa9e4066Sahrens * For creation, we want to try to create all vdevs at once and then undo it 915fa9e4066Sahrens * if anything fails; this is much harder if we have pending transactions. 916fa9e4066Sahrens */ 917fa9e4066Sahrens void 918fa9e4066Sahrens vdev_init(vdev_t *vd, uint64_t txg) 919fa9e4066Sahrens { 920fa9e4066Sahrens /* 921fa9e4066Sahrens * Aim for roughly 200 metaslabs per vdev. 922fa9e4066Sahrens */ 923fa9e4066Sahrens vd->vdev_ms_shift = highbit(vd->vdev_asize / 200); 924fa9e4066Sahrens vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT); 925fa9e4066Sahrens 926fa9e4066Sahrens /* 927fa9e4066Sahrens * Initialize the vdev's metaslabs. 928fa9e4066Sahrens */ 929fa9e4066Sahrens vdev_metaslab_init(vd, txg); 930fa9e4066Sahrens } 931fa9e4066Sahrens 932fa9e4066Sahrens void 933fa9e4066Sahrens vdev_dirty(vdev_t *vd, uint8_t flags, uint64_t txg) 934fa9e4066Sahrens { 935fa9e4066Sahrens vdev_t *tvd = vd->vdev_top; 936fa9e4066Sahrens 937fa9e4066Sahrens mutex_enter(&tvd->vdev_dirty_lock); 938fa9e4066Sahrens if ((tvd->vdev_dirty[txg & TXG_MASK] & flags) != flags) { 939fa9e4066Sahrens tvd->vdev_dirty[txg & TXG_MASK] |= flags; 940fa9e4066Sahrens (void) txg_list_add(&tvd->vdev_spa->spa_vdev_txg_list, 941fa9e4066Sahrens tvd, txg); 942fa9e4066Sahrens } 943fa9e4066Sahrens mutex_exit(&tvd->vdev_dirty_lock); 944fa9e4066Sahrens } 945fa9e4066Sahrens 946fa9e4066Sahrens void 947fa9e4066Sahrens vdev_dtl_dirty(space_map_t *sm, uint64_t txg, uint64_t size) 948fa9e4066Sahrens { 949fa9e4066Sahrens mutex_enter(sm->sm_lock); 950fa9e4066Sahrens if (!space_map_contains(sm, txg, size)) 951fa9e4066Sahrens space_map_add(sm, txg, size); 952fa9e4066Sahrens mutex_exit(sm->sm_lock); 953fa9e4066Sahrens } 954fa9e4066Sahrens 955fa9e4066Sahrens int 956fa9e4066Sahrens vdev_dtl_contains(space_map_t *sm, uint64_t txg, uint64_t size) 957fa9e4066Sahrens { 958fa9e4066Sahrens int dirty; 959fa9e4066Sahrens 960fa9e4066Sahrens /* 961fa9e4066Sahrens * Quick test without the lock -- covers the common case that 962fa9e4066Sahrens * there are no dirty time segments. 963fa9e4066Sahrens */ 964fa9e4066Sahrens if (sm->sm_space == 0) 965fa9e4066Sahrens return (0); 966fa9e4066Sahrens 967fa9e4066Sahrens mutex_enter(sm->sm_lock); 968fa9e4066Sahrens dirty = space_map_contains(sm, txg, size); 969fa9e4066Sahrens mutex_exit(sm->sm_lock); 970fa9e4066Sahrens 971fa9e4066Sahrens return (dirty); 972fa9e4066Sahrens } 973fa9e4066Sahrens 974fa9e4066Sahrens /* 975fa9e4066Sahrens * Reassess DTLs after a config change or scrub completion. 976fa9e4066Sahrens */ 977fa9e4066Sahrens void 978fa9e4066Sahrens vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) 979fa9e4066Sahrens { 980fa9e4066Sahrens int c; 981fa9e4066Sahrens 982fa9e4066Sahrens ASSERT(spa_config_held(vd->vdev_spa, RW_WRITER)); 983fa9e4066Sahrens 984fa9e4066Sahrens if (vd->vdev_children == 0) { 985fa9e4066Sahrens mutex_enter(&vd->vdev_dtl_lock); 986fa9e4066Sahrens /* 987fa9e4066Sahrens * We're successfully scrubbed everything up to scrub_txg. 988fa9e4066Sahrens * Therefore, excise all old DTLs up to that point, then 989fa9e4066Sahrens * fold in the DTLs for everything we couldn't scrub. 990fa9e4066Sahrens */ 991fa9e4066Sahrens if (scrub_txg != 0) { 992fa9e4066Sahrens space_map_excise(&vd->vdev_dtl_map, 0, scrub_txg); 993fa9e4066Sahrens space_map_union(&vd->vdev_dtl_map, &vd->vdev_dtl_scrub); 994fa9e4066Sahrens } 995fa9e4066Sahrens if (scrub_done) 996fa9e4066Sahrens space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL); 997fa9e4066Sahrens mutex_exit(&vd->vdev_dtl_lock); 998fa9e4066Sahrens if (txg != 0) { 999fa9e4066Sahrens vdev_t *tvd = vd->vdev_top; 1000fa9e4066Sahrens vdev_dirty(tvd, VDD_DTL, txg); 1001fa9e4066Sahrens (void) txg_list_add(&tvd->vdev_dtl_list, vd, txg); 1002fa9e4066Sahrens } 1003fa9e4066Sahrens return; 1004fa9e4066Sahrens } 1005fa9e4066Sahrens 1006fa9e4066Sahrens mutex_enter(&vd->vdev_dtl_lock); 1007fa9e4066Sahrens space_map_vacate(&vd->vdev_dtl_map, NULL, NULL); 1008fa9e4066Sahrens space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL); 1009fa9e4066Sahrens mutex_exit(&vd->vdev_dtl_lock); 1010fa9e4066Sahrens 1011fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) { 1012fa9e4066Sahrens vdev_t *cvd = vd->vdev_child[c]; 1013fa9e4066Sahrens vdev_dtl_reassess(cvd, txg, scrub_txg, scrub_done); 1014fa9e4066Sahrens mutex_enter(&vd->vdev_dtl_lock); 1015fa9e4066Sahrens space_map_union(&vd->vdev_dtl_map, &cvd->vdev_dtl_map); 1016fa9e4066Sahrens space_map_union(&vd->vdev_dtl_scrub, &cvd->vdev_dtl_scrub); 1017fa9e4066Sahrens mutex_exit(&vd->vdev_dtl_lock); 1018fa9e4066Sahrens } 1019fa9e4066Sahrens } 1020fa9e4066Sahrens 1021fa9e4066Sahrens static int 1022fa9e4066Sahrens vdev_dtl_load(vdev_t *vd) 1023fa9e4066Sahrens { 1024fa9e4066Sahrens spa_t *spa = vd->vdev_spa; 1025fa9e4066Sahrens space_map_obj_t *smo = &vd->vdev_dtl; 1026fa9e4066Sahrens dmu_buf_t *db; 1027fa9e4066Sahrens int error; 1028fa9e4066Sahrens 1029fa9e4066Sahrens ASSERT(vd->vdev_children == 0); 1030fa9e4066Sahrens 1031fa9e4066Sahrens if (smo->smo_object == 0) 1032fa9e4066Sahrens return (0); 1033fa9e4066Sahrens 1034fa9e4066Sahrens db = dmu_bonus_hold(spa->spa_meta_objset, smo->smo_object); 1035fa9e4066Sahrens dmu_buf_read(db); 1036fa9e4066Sahrens ASSERT3U(db->db_size, ==, sizeof (*smo)); 1037fa9e4066Sahrens bcopy(db->db_data, smo, db->db_size); 1038fa9e4066Sahrens dmu_buf_rele(db); 1039fa9e4066Sahrens 1040fa9e4066Sahrens mutex_enter(&vd->vdev_dtl_lock); 1041fa9e4066Sahrens error = space_map_load(&vd->vdev_dtl_map, smo, SM_ALLOC, 1042fa9e4066Sahrens spa->spa_meta_objset, smo->smo_objsize, smo->smo_alloc); 1043fa9e4066Sahrens mutex_exit(&vd->vdev_dtl_lock); 1044fa9e4066Sahrens 1045fa9e4066Sahrens return (error); 1046fa9e4066Sahrens } 1047fa9e4066Sahrens 1048fa9e4066Sahrens void 1049fa9e4066Sahrens vdev_dtl_sync(vdev_t *vd, uint64_t txg) 1050fa9e4066Sahrens { 1051fa9e4066Sahrens spa_t *spa = vd->vdev_spa; 1052fa9e4066Sahrens space_map_obj_t *smo = &vd->vdev_dtl; 1053fa9e4066Sahrens space_map_t *sm = &vd->vdev_dtl_map; 1054fa9e4066Sahrens space_map_t smsync; 1055fa9e4066Sahrens kmutex_t smlock; 1056fa9e4066Sahrens avl_tree_t *t = &sm->sm_root; 1057fa9e4066Sahrens space_seg_t *ss; 1058fa9e4066Sahrens dmu_buf_t *db; 1059fa9e4066Sahrens dmu_tx_t *tx; 1060fa9e4066Sahrens 1061fa9e4066Sahrens dprintf("%s in txg %llu pass %d\n", 1062fa9e4066Sahrens vdev_description(vd), (u_longlong_t)txg, spa_sync_pass(spa)); 1063fa9e4066Sahrens 1064fa9e4066Sahrens tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 1065fa9e4066Sahrens 1066fa9e4066Sahrens if (vd->vdev_detached) { 1067fa9e4066Sahrens if (smo->smo_object != 0) { 1068fa9e4066Sahrens int err = dmu_object_free(spa->spa_meta_objset, 1069fa9e4066Sahrens smo->smo_object, tx); 1070fa9e4066Sahrens ASSERT3U(err, ==, 0); 1071fa9e4066Sahrens smo->smo_object = 0; 1072fa9e4066Sahrens } 1073fa9e4066Sahrens dmu_tx_commit(tx); 1074fa9e4066Sahrens return; 1075fa9e4066Sahrens } 1076fa9e4066Sahrens 1077fa9e4066Sahrens if (smo->smo_object == 0) { 1078fa9e4066Sahrens ASSERT(smo->smo_objsize == 0); 1079fa9e4066Sahrens ASSERT(smo->smo_alloc == 0); 1080fa9e4066Sahrens smo->smo_object = dmu_object_alloc(spa->spa_meta_objset, 1081fa9e4066Sahrens DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT, 1082fa9e4066Sahrens DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx); 1083fa9e4066Sahrens ASSERT(smo->smo_object != 0); 1084fa9e4066Sahrens vdev_config_dirty(vd->vdev_top); 1085fa9e4066Sahrens } 1086fa9e4066Sahrens 1087fa9e4066Sahrens dmu_free_range(spa->spa_meta_objset, smo->smo_object, 1088fa9e4066Sahrens 0, smo->smo_objsize, tx); 1089fa9e4066Sahrens 1090fa9e4066Sahrens mutex_init(&smlock, NULL, MUTEX_DEFAULT, NULL); 1091fa9e4066Sahrens 1092fa9e4066Sahrens space_map_create(&smsync, sm->sm_start, sm->sm_size, sm->sm_shift, 1093fa9e4066Sahrens &smlock); 1094fa9e4066Sahrens 1095fa9e4066Sahrens mutex_enter(&smlock); 1096fa9e4066Sahrens 1097fa9e4066Sahrens mutex_enter(&vd->vdev_dtl_lock); 1098fa9e4066Sahrens for (ss = avl_first(t); ss != NULL; ss = AVL_NEXT(t, ss)) 1099fa9e4066Sahrens space_map_add(&smsync, ss->ss_start, ss->ss_end - ss->ss_start); 1100fa9e4066Sahrens mutex_exit(&vd->vdev_dtl_lock); 1101fa9e4066Sahrens 1102fa9e4066Sahrens smo->smo_objsize = 0; 1103fa9e4066Sahrens smo->smo_alloc = smsync.sm_space; 1104fa9e4066Sahrens 1105fa9e4066Sahrens space_map_sync(&smsync, NULL, smo, SM_ALLOC, spa->spa_meta_objset, tx); 1106fa9e4066Sahrens space_map_destroy(&smsync); 1107fa9e4066Sahrens 1108fa9e4066Sahrens mutex_exit(&smlock); 1109fa9e4066Sahrens mutex_destroy(&smlock); 1110fa9e4066Sahrens 1111fa9e4066Sahrens db = dmu_bonus_hold(spa->spa_meta_objset, smo->smo_object); 1112fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 1113fa9e4066Sahrens ASSERT3U(db->db_size, ==, sizeof (*smo)); 1114fa9e4066Sahrens bcopy(smo, db->db_data, db->db_size); 1115fa9e4066Sahrens dmu_buf_rele(db); 1116fa9e4066Sahrens 1117fa9e4066Sahrens dmu_tx_commit(tx); 1118fa9e4066Sahrens } 1119fa9e4066Sahrens 1120fa9e4066Sahrens int 1121fa9e4066Sahrens vdev_load(vdev_t *vd, int import) 1122fa9e4066Sahrens { 1123fa9e4066Sahrens spa_t *spa = vd->vdev_spa; 1124fa9e4066Sahrens int c, error; 1125fa9e4066Sahrens nvlist_t *label; 1126fa9e4066Sahrens uint64_t guid, state; 1127fa9e4066Sahrens 1128fa9e4066Sahrens dprintf("loading %s\n", vdev_description(vd)); 1129fa9e4066Sahrens 1130fa9e4066Sahrens /* 1131fa9e4066Sahrens * Recursively load all children. 1132fa9e4066Sahrens */ 1133fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) 1134fa9e4066Sahrens if ((error = vdev_load(vd->vdev_child[c], import)) != 0) 1135fa9e4066Sahrens return (error); 1136fa9e4066Sahrens 1137fa9e4066Sahrens /* 1138fa9e4066Sahrens * If this is a leaf vdev, make sure its agrees with its disk labels. 1139fa9e4066Sahrens */ 1140fa9e4066Sahrens if (vd->vdev_ops->vdev_op_leaf) { 1141fa9e4066Sahrens 1142fa9e4066Sahrens if (vdev_is_dead(vd)) 1143fa9e4066Sahrens return (0); 1144fa9e4066Sahrens 1145fa9e4066Sahrens /* 1146fa9e4066Sahrens * XXX state transitions don't propagate to parent here. 1147fa9e4066Sahrens * Also, merely setting the state isn't sufficient because 1148fa9e4066Sahrens * it's not persistent; a vdev_reopen() would make us 1149fa9e4066Sahrens * forget all about it. 1150fa9e4066Sahrens */ 1151fa9e4066Sahrens if ((label = vdev_label_read_config(vd)) == NULL) { 1152fa9e4066Sahrens dprintf("can't load label config\n"); 1153fa9e4066Sahrens vdev_set_state(vd, VDEV_STATE_CANT_OPEN, 1154fa9e4066Sahrens VDEV_AUX_CORRUPT_DATA); 1155fa9e4066Sahrens return (0); 1156fa9e4066Sahrens } 1157fa9e4066Sahrens 1158fa9e4066Sahrens if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, 1159fa9e4066Sahrens &guid) != 0 || guid != spa_guid(spa)) { 1160fa9e4066Sahrens dprintf("bad or missing pool GUID (%llu)\n", guid); 1161fa9e4066Sahrens vdev_set_state(vd, VDEV_STATE_CANT_OPEN, 1162fa9e4066Sahrens VDEV_AUX_CORRUPT_DATA); 1163fa9e4066Sahrens nvlist_free(label); 1164fa9e4066Sahrens return (0); 1165fa9e4066Sahrens } 1166fa9e4066Sahrens 1167fa9e4066Sahrens if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) || 1168fa9e4066Sahrens guid != vd->vdev_guid) { 1169fa9e4066Sahrens dprintf("bad or missing vdev guid (%llu != %llu)\n", 1170fa9e4066Sahrens guid, vd->vdev_guid); 1171fa9e4066Sahrens vdev_set_state(vd, VDEV_STATE_CANT_OPEN, 1172fa9e4066Sahrens VDEV_AUX_CORRUPT_DATA); 1173fa9e4066Sahrens nvlist_free(label); 1174fa9e4066Sahrens return (0); 1175fa9e4066Sahrens } 1176fa9e4066Sahrens 1177fa9e4066Sahrens /* 1178fa9e4066Sahrens * If we find a vdev with a matching pool guid and vdev guid, 1179fa9e4066Sahrens * but the pool state is not active, it indicates that the user 1180fa9e4066Sahrens * exported or destroyed the pool without affecting the config 1181fa9e4066Sahrens * cache (if / was mounted readonly, for example). In this 1182fa9e4066Sahrens * case, immediately return EBADF so the caller can remove it 1183fa9e4066Sahrens * from the config. 1184fa9e4066Sahrens */ 1185fa9e4066Sahrens if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, 1186fa9e4066Sahrens &state)) { 1187fa9e4066Sahrens dprintf("missing pool state\n"); 1188fa9e4066Sahrens vdev_set_state(vd, VDEV_STATE_CANT_OPEN, 1189fa9e4066Sahrens VDEV_AUX_CORRUPT_DATA); 1190fa9e4066Sahrens nvlist_free(label); 1191fa9e4066Sahrens return (0); 1192fa9e4066Sahrens } 1193fa9e4066Sahrens 1194fa9e4066Sahrens if (state != POOL_STATE_ACTIVE && 1195fa9e4066Sahrens (!import || state != POOL_STATE_EXPORTED)) { 1196fa9e4066Sahrens dprintf("pool state not active (%llu)\n", state); 1197fa9e4066Sahrens nvlist_free(label); 1198fa9e4066Sahrens return (EBADF); 1199fa9e4066Sahrens } 1200fa9e4066Sahrens 1201fa9e4066Sahrens nvlist_free(label); 1202fa9e4066Sahrens } 1203fa9e4066Sahrens 1204fa9e4066Sahrens /* 1205fa9e4066Sahrens * If this is a top-level vdev, make sure its allocation parameters 1206fa9e4066Sahrens * exist and initialize its metaslabs. 1207fa9e4066Sahrens */ 1208fa9e4066Sahrens if (vd == vd->vdev_top) { 1209fa9e4066Sahrens 1210fa9e4066Sahrens if (vd->vdev_ms_array == 0 || 1211fa9e4066Sahrens vd->vdev_ms_shift == 0 || 1212fa9e4066Sahrens vd->vdev_ashift == 0 || 1213fa9e4066Sahrens vd->vdev_asize == 0) { 1214fa9e4066Sahrens vdev_set_state(vd, VDEV_STATE_CANT_OPEN, 1215fa9e4066Sahrens VDEV_AUX_CORRUPT_DATA); 1216fa9e4066Sahrens return (0); 1217fa9e4066Sahrens } 1218fa9e4066Sahrens 1219fa9e4066Sahrens vdev_metaslab_init(vd, 0); 1220fa9e4066Sahrens } 1221fa9e4066Sahrens 1222fa9e4066Sahrens /* 1223fa9e4066Sahrens * If this is a leaf vdev, load its DTL. 1224fa9e4066Sahrens */ 1225fa9e4066Sahrens if (vd->vdev_ops->vdev_op_leaf) { 1226fa9e4066Sahrens error = vdev_dtl_load(vd); 1227fa9e4066Sahrens if (error) { 1228fa9e4066Sahrens dprintf("can't load DTL for %s, error %d\n", 1229fa9e4066Sahrens vdev_description(vd), error); 1230fa9e4066Sahrens vdev_set_state(vd, VDEV_STATE_CANT_OPEN, 1231fa9e4066Sahrens VDEV_AUX_CORRUPT_DATA); 1232fa9e4066Sahrens return (0); 1233fa9e4066Sahrens } 1234fa9e4066Sahrens } 1235fa9e4066Sahrens 1236fa9e4066Sahrens return (0); 1237fa9e4066Sahrens } 1238fa9e4066Sahrens 1239fa9e4066Sahrens void 1240fa9e4066Sahrens vdev_sync_done(vdev_t *vd, uint64_t txg) 1241fa9e4066Sahrens { 1242fa9e4066Sahrens metaslab_t *msp; 1243fa9e4066Sahrens 1244fa9e4066Sahrens dprintf("%s txg %llu\n", vdev_description(vd), txg); 1245fa9e4066Sahrens 1246fa9e4066Sahrens while (msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg))) 1247fa9e4066Sahrens metaslab_sync_done(msp, txg); 1248fa9e4066Sahrens } 1249fa9e4066Sahrens 1250fa9e4066Sahrens void 1251fa9e4066Sahrens vdev_add_sync(vdev_t *vd, uint64_t txg) 1252fa9e4066Sahrens { 1253fa9e4066Sahrens spa_t *spa = vd->vdev_spa; 1254fa9e4066Sahrens dmu_tx_t *tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 1255fa9e4066Sahrens 1256fa9e4066Sahrens ASSERT(vd == vd->vdev_top); 1257fa9e4066Sahrens 1258fa9e4066Sahrens if (vd->vdev_ms_array == 0) 1259fa9e4066Sahrens vd->vdev_ms_array = dmu_object_alloc(spa->spa_meta_objset, 1260fa9e4066Sahrens DMU_OT_OBJECT_ARRAY, 0, DMU_OT_NONE, 0, tx); 1261fa9e4066Sahrens 1262fa9e4066Sahrens ASSERT(vd->vdev_ms_array != 0); 1263fa9e4066Sahrens 1264fa9e4066Sahrens vdev_config_dirty(vd); 1265fa9e4066Sahrens 1266fa9e4066Sahrens dmu_tx_commit(tx); 1267fa9e4066Sahrens } 1268fa9e4066Sahrens 1269fa9e4066Sahrens void 1270fa9e4066Sahrens vdev_sync(vdev_t *vd, uint64_t txg) 1271fa9e4066Sahrens { 1272fa9e4066Sahrens spa_t *spa = vd->vdev_spa; 1273fa9e4066Sahrens vdev_t *lvd; 1274fa9e4066Sahrens metaslab_t *msp; 1275fa9e4066Sahrens uint8_t *dirtyp = &vd->vdev_dirty[txg & TXG_MASK]; 1276fa9e4066Sahrens uint8_t dirty = *dirtyp; 1277fa9e4066Sahrens 1278fa9e4066Sahrens mutex_enter(&vd->vdev_dirty_lock); 1279fa9e4066Sahrens *dirtyp &= ~(VDD_ALLOC | VDD_FREE | VDD_ADD | VDD_DTL); 1280fa9e4066Sahrens mutex_exit(&vd->vdev_dirty_lock); 1281fa9e4066Sahrens 1282fa9e4066Sahrens dprintf("%s txg %llu pass %d\n", 1283fa9e4066Sahrens vdev_description(vd), (u_longlong_t)txg, spa_sync_pass(spa)); 1284fa9e4066Sahrens 1285fa9e4066Sahrens if (dirty & VDD_ADD) 1286fa9e4066Sahrens vdev_add_sync(vd, txg); 1287fa9e4066Sahrens 1288fa9e4066Sahrens while ((msp = txg_list_remove(&vd->vdev_ms_list, txg)) != NULL) 1289fa9e4066Sahrens metaslab_sync(msp, txg); 1290fa9e4066Sahrens 1291fa9e4066Sahrens while ((lvd = txg_list_remove(&vd->vdev_dtl_list, txg)) != NULL) 1292fa9e4066Sahrens vdev_dtl_sync(lvd, txg); 1293fa9e4066Sahrens 1294fa9e4066Sahrens (void) txg_list_add(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg)); 1295fa9e4066Sahrens } 1296fa9e4066Sahrens 1297fa9e4066Sahrens uint64_t 1298fa9e4066Sahrens vdev_psize_to_asize(vdev_t *vd, uint64_t psize) 1299fa9e4066Sahrens { 1300fa9e4066Sahrens return (vd->vdev_ops->vdev_op_asize(vd, psize)); 1301fa9e4066Sahrens } 1302fa9e4066Sahrens 1303fa9e4066Sahrens void 1304fa9e4066Sahrens vdev_io_start(zio_t *zio) 1305fa9e4066Sahrens { 1306fa9e4066Sahrens zio->io_vd->vdev_ops->vdev_op_io_start(zio); 1307fa9e4066Sahrens } 1308fa9e4066Sahrens 1309fa9e4066Sahrens void 1310fa9e4066Sahrens vdev_io_done(zio_t *zio) 1311fa9e4066Sahrens { 1312fa9e4066Sahrens zio->io_vd->vdev_ops->vdev_op_io_done(zio); 1313fa9e4066Sahrens } 1314fa9e4066Sahrens 1315fa9e4066Sahrens const char * 1316fa9e4066Sahrens vdev_description(vdev_t *vd) 1317fa9e4066Sahrens { 1318fa9e4066Sahrens if (vd == NULL || vd->vdev_ops == NULL) 1319fa9e4066Sahrens return ("<unknown>"); 1320fa9e4066Sahrens 1321fa9e4066Sahrens if (vd->vdev_path != NULL) 1322fa9e4066Sahrens return (vd->vdev_path); 1323fa9e4066Sahrens 1324fa9e4066Sahrens if (vd->vdev_parent == NULL) 1325fa9e4066Sahrens return (spa_name(vd->vdev_spa)); 1326fa9e4066Sahrens 1327fa9e4066Sahrens return (vd->vdev_ops->vdev_op_type); 1328fa9e4066Sahrens } 1329fa9e4066Sahrens 1330fa9e4066Sahrens int 1331fa9e4066Sahrens vdev_online(spa_t *spa, const char *path) 1332fa9e4066Sahrens { 1333fa9e4066Sahrens vdev_t *vd; 1334fa9e4066Sahrens 1335fa9e4066Sahrens spa_config_enter(spa, RW_WRITER); 1336fa9e4066Sahrens 1337fa9e4066Sahrens if ((vd = vdev_lookup_by_path(spa->spa_root_vdev, path)) == NULL) { 1338fa9e4066Sahrens spa_config_exit(spa); 1339fa9e4066Sahrens return (ENODEV); 1340fa9e4066Sahrens } 1341fa9e4066Sahrens 1342fa9e4066Sahrens dprintf("ONLINE: %s\n", vdev_description(vd)); 1343fa9e4066Sahrens 1344fa9e4066Sahrens vd->vdev_offline = B_FALSE; 1345fa9e4066Sahrens 1346fa9e4066Sahrens /* 1347fa9e4066Sahrens * Clear the error counts. The idea is that you expect to see all 1348fa9e4066Sahrens * zeroes when everything is working, so if you've just onlined a 1349fa9e4066Sahrens * device, you don't want to keep hearing about errors from before. 1350fa9e4066Sahrens */ 1351fa9e4066Sahrens vd->vdev_stat.vs_read_errors = 0; 1352fa9e4066Sahrens vd->vdev_stat.vs_write_errors = 0; 1353fa9e4066Sahrens vd->vdev_stat.vs_checksum_errors = 0; 1354fa9e4066Sahrens 1355fa9e4066Sahrens vdev_reopen(vd->vdev_top, NULL); 1356fa9e4066Sahrens 1357fa9e4066Sahrens spa_config_exit(spa); 1358fa9e4066Sahrens 1359fa9e4066Sahrens VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); 1360fa9e4066Sahrens 1361fa9e4066Sahrens return (0); 1362fa9e4066Sahrens } 1363fa9e4066Sahrens 1364fa9e4066Sahrens int 1365fa9e4066Sahrens vdev_offline(spa_t *spa, const char *path) 1366fa9e4066Sahrens { 1367fa9e4066Sahrens vdev_t *vd; 1368fa9e4066Sahrens 1369fa9e4066Sahrens spa_config_enter(spa, RW_WRITER); 1370fa9e4066Sahrens 1371fa9e4066Sahrens if ((vd = vdev_lookup_by_path(spa->spa_root_vdev, path)) == NULL) { 1372fa9e4066Sahrens spa_config_exit(spa); 1373fa9e4066Sahrens return (ENODEV); 1374fa9e4066Sahrens } 1375fa9e4066Sahrens 1376fa9e4066Sahrens dprintf("OFFLINE: %s\n", vdev_description(vd)); 1377fa9e4066Sahrens 1378fa9e4066Sahrens /* 1379fa9e4066Sahrens * If this device's top-level vdev has a non-empty DTL, 1380fa9e4066Sahrens * don't allow the device to be offlined. 1381fa9e4066Sahrens * 1382fa9e4066Sahrens * XXX -- we should make this more precise by allowing the offline 1383fa9e4066Sahrens * as long as the remaining devices don't have any DTL holes. 1384fa9e4066Sahrens */ 1385fa9e4066Sahrens if (vd->vdev_top->vdev_dtl_map.sm_space != 0) { 1386fa9e4066Sahrens spa_config_exit(spa); 1387fa9e4066Sahrens return (EBUSY); 1388fa9e4066Sahrens } 1389fa9e4066Sahrens 1390fa9e4066Sahrens /* 1391fa9e4066Sahrens * Set this device to offline state and reopen its top-level vdev. 1392fa9e4066Sahrens * If this action results in the top-level vdev becoming unusable, 1393fa9e4066Sahrens * undo it and fail the request. 1394fa9e4066Sahrens */ 1395fa9e4066Sahrens vd->vdev_offline = B_TRUE; 1396fa9e4066Sahrens vdev_reopen(vd->vdev_top, NULL); 1397fa9e4066Sahrens if (vdev_is_dead(vd->vdev_top)) { 1398fa9e4066Sahrens vd->vdev_offline = B_FALSE; 1399fa9e4066Sahrens vdev_reopen(vd->vdev_top, NULL); 1400fa9e4066Sahrens spa_config_exit(spa); 1401fa9e4066Sahrens return (EBUSY); 1402fa9e4066Sahrens } 1403fa9e4066Sahrens 1404fa9e4066Sahrens spa_config_exit(spa); 1405fa9e4066Sahrens 1406fa9e4066Sahrens return (0); 1407fa9e4066Sahrens } 1408fa9e4066Sahrens 1409fa9e4066Sahrens int 1410fa9e4066Sahrens vdev_error_setup(spa_t *spa, const char *path, int mode, int mask, uint64_t arg) 1411fa9e4066Sahrens { 1412fa9e4066Sahrens vdev_t *vd; 1413fa9e4066Sahrens 1414fa9e4066Sahrens spa_config_enter(spa, RW_WRITER); 1415fa9e4066Sahrens 1416fa9e4066Sahrens if ((vd = vdev_lookup_by_path(spa->spa_root_vdev, path)) == NULL) { 1417fa9e4066Sahrens spa_config_exit(spa); 1418fa9e4066Sahrens return (ENODEV); 1419fa9e4066Sahrens } 1420fa9e4066Sahrens 1421fa9e4066Sahrens vd->vdev_fault_mode = mode; 1422fa9e4066Sahrens vd->vdev_fault_mask = mask; 1423fa9e4066Sahrens vd->vdev_fault_arg = arg; 1424fa9e4066Sahrens 1425fa9e4066Sahrens spa_config_exit(spa); 1426fa9e4066Sahrens 1427fa9e4066Sahrens return (0); 1428fa9e4066Sahrens } 1429fa9e4066Sahrens 1430fa9e4066Sahrens int 1431fa9e4066Sahrens vdev_is_dead(vdev_t *vd) 1432fa9e4066Sahrens { 1433fa9e4066Sahrens return (vd->vdev_state <= VDEV_STATE_CANT_OPEN); 1434fa9e4066Sahrens } 1435fa9e4066Sahrens 1436fa9e4066Sahrens int 1437fa9e4066Sahrens vdev_error_inject(vdev_t *vd, zio_t *zio) 1438fa9e4066Sahrens { 1439fa9e4066Sahrens int error = 0; 1440fa9e4066Sahrens 1441fa9e4066Sahrens if (vd->vdev_fault_mode == VDEV_FAULT_NONE) 1442fa9e4066Sahrens return (0); 1443fa9e4066Sahrens 1444fa9e4066Sahrens if (((1ULL << zio->io_type) & vd->vdev_fault_mask) == 0) 1445fa9e4066Sahrens return (0); 1446fa9e4066Sahrens 1447fa9e4066Sahrens switch (vd->vdev_fault_mode) { 1448fa9e4066Sahrens case VDEV_FAULT_RANDOM: 1449fa9e4066Sahrens if (spa_get_random(vd->vdev_fault_arg) == 0) 1450fa9e4066Sahrens error = EIO; 1451fa9e4066Sahrens break; 1452fa9e4066Sahrens 1453fa9e4066Sahrens case VDEV_FAULT_COUNT: 1454fa9e4066Sahrens if ((int64_t)--vd->vdev_fault_arg <= 0) 1455fa9e4066Sahrens vd->vdev_fault_mode = VDEV_FAULT_NONE; 1456fa9e4066Sahrens error = EIO; 1457fa9e4066Sahrens break; 1458fa9e4066Sahrens } 1459fa9e4066Sahrens 1460fa9e4066Sahrens if (error != 0) { 1461fa9e4066Sahrens dprintf("returning %d for type %d on %s state %d offset %llx\n", 1462fa9e4066Sahrens error, zio->io_type, vdev_description(vd), 1463fa9e4066Sahrens vd->vdev_state, zio->io_offset); 1464fa9e4066Sahrens } 1465fa9e4066Sahrens 1466fa9e4066Sahrens return (error); 1467fa9e4066Sahrens } 1468fa9e4066Sahrens 1469fa9e4066Sahrens /* 1470fa9e4066Sahrens * Get statistics for the given vdev. 1471fa9e4066Sahrens */ 1472fa9e4066Sahrens void 1473fa9e4066Sahrens vdev_get_stats(vdev_t *vd, vdev_stat_t *vs) 1474fa9e4066Sahrens { 1475fa9e4066Sahrens vdev_t *rvd = vd->vdev_spa->spa_root_vdev; 1476fa9e4066Sahrens int c, t; 1477fa9e4066Sahrens 1478fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1479fa9e4066Sahrens bcopy(&vd->vdev_stat, vs, sizeof (*vs)); 1480fa9e4066Sahrens vs->vs_timestamp = gethrtime() - vs->vs_timestamp; 1481fa9e4066Sahrens vs->vs_state = vd->vdev_state; 1482*2a79c5feSlling vs->vs_rsize = vdev_get_rsize(vd); 1483fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1484fa9e4066Sahrens 1485fa9e4066Sahrens /* 1486fa9e4066Sahrens * If we're getting stats on the root vdev, aggregate the I/O counts 1487fa9e4066Sahrens * over all top-level vdevs (i.e. the direct children of the root). 1488fa9e4066Sahrens */ 1489fa9e4066Sahrens if (vd == rvd) { 1490fa9e4066Sahrens for (c = 0; c < rvd->vdev_children; c++) { 1491fa9e4066Sahrens vdev_t *cvd = rvd->vdev_child[c]; 1492fa9e4066Sahrens vdev_stat_t *cvs = &cvd->vdev_stat; 1493fa9e4066Sahrens 1494fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1495fa9e4066Sahrens for (t = 0; t < ZIO_TYPES; t++) { 1496fa9e4066Sahrens vs->vs_ops[t] += cvs->vs_ops[t]; 1497fa9e4066Sahrens vs->vs_bytes[t] += cvs->vs_bytes[t]; 1498fa9e4066Sahrens } 1499fa9e4066Sahrens vs->vs_read_errors += cvs->vs_read_errors; 1500fa9e4066Sahrens vs->vs_write_errors += cvs->vs_write_errors; 1501fa9e4066Sahrens vs->vs_checksum_errors += cvs->vs_checksum_errors; 1502fa9e4066Sahrens vs->vs_scrub_examined += cvs->vs_scrub_examined; 1503fa9e4066Sahrens vs->vs_scrub_errors += cvs->vs_scrub_errors; 1504fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1505fa9e4066Sahrens } 1506fa9e4066Sahrens } 1507fa9e4066Sahrens } 1508fa9e4066Sahrens 1509fa9e4066Sahrens void 1510fa9e4066Sahrens vdev_stat_update(zio_t *zio) 1511fa9e4066Sahrens { 1512fa9e4066Sahrens vdev_t *vd = zio->io_vd; 1513fa9e4066Sahrens vdev_t *pvd; 1514fa9e4066Sahrens uint64_t txg = zio->io_txg; 1515fa9e4066Sahrens vdev_stat_t *vs = &vd->vdev_stat; 1516fa9e4066Sahrens zio_type_t type = zio->io_type; 1517fa9e4066Sahrens int flags = zio->io_flags; 1518fa9e4066Sahrens 1519fa9e4066Sahrens if (zio->io_error == 0) { 1520fa9e4066Sahrens if (!(flags & ZIO_FLAG_IO_BYPASS)) { 1521fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1522fa9e4066Sahrens vs->vs_ops[type]++; 1523fa9e4066Sahrens vs->vs_bytes[type] += zio->io_size; 1524fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1525fa9e4066Sahrens } 1526fa9e4066Sahrens if ((flags & ZIO_FLAG_IO_REPAIR) && 1527fa9e4066Sahrens zio->io_delegate_list == NULL) { 1528fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1529fa9e4066Sahrens if (flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)) 1530fa9e4066Sahrens vs->vs_scrub_repaired += zio->io_size; 1531fa9e4066Sahrens else 1532fa9e4066Sahrens vs->vs_self_healed += zio->io_size; 1533fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1534fa9e4066Sahrens } 1535fa9e4066Sahrens return; 1536fa9e4066Sahrens } 1537fa9e4066Sahrens 1538fa9e4066Sahrens if (flags & ZIO_FLAG_SPECULATIVE) 1539fa9e4066Sahrens return; 1540fa9e4066Sahrens 1541fa9e4066Sahrens if (!vdev_is_dead(vd)) { 1542fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1543fa9e4066Sahrens if (type == ZIO_TYPE_READ) { 1544fa9e4066Sahrens if (zio->io_error == ECKSUM) 1545fa9e4066Sahrens vs->vs_checksum_errors++; 1546fa9e4066Sahrens else 1547fa9e4066Sahrens vs->vs_read_errors++; 1548fa9e4066Sahrens } 1549fa9e4066Sahrens if (type == ZIO_TYPE_WRITE) 1550fa9e4066Sahrens vs->vs_write_errors++; 1551fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1552fa9e4066Sahrens } 1553fa9e4066Sahrens 1554fa9e4066Sahrens if (type == ZIO_TYPE_WRITE) { 1555fa9e4066Sahrens if (txg == 0 || vd->vdev_children != 0) 1556fa9e4066Sahrens return; 1557fa9e4066Sahrens if (flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)) { 1558fa9e4066Sahrens ASSERT(flags & ZIO_FLAG_IO_REPAIR); 1559fa9e4066Sahrens for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) 1560fa9e4066Sahrens vdev_dtl_dirty(&pvd->vdev_dtl_scrub, txg, 1); 1561fa9e4066Sahrens } 1562fa9e4066Sahrens if (!(flags & ZIO_FLAG_IO_REPAIR)) { 1563fa9e4066Sahrens vdev_t *tvd = vd->vdev_top; 1564fa9e4066Sahrens if (vdev_dtl_contains(&vd->vdev_dtl_map, txg, 1)) 1565fa9e4066Sahrens return; 1566fa9e4066Sahrens vdev_dirty(tvd, VDD_DTL, txg); 1567fa9e4066Sahrens (void) txg_list_add(&tvd->vdev_dtl_list, vd, txg); 1568fa9e4066Sahrens for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) 1569fa9e4066Sahrens vdev_dtl_dirty(&pvd->vdev_dtl_map, txg, 1); 1570fa9e4066Sahrens } 1571fa9e4066Sahrens } 1572fa9e4066Sahrens } 1573fa9e4066Sahrens 1574fa9e4066Sahrens void 1575fa9e4066Sahrens vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type, boolean_t complete) 1576fa9e4066Sahrens { 1577fa9e4066Sahrens int c; 1578fa9e4066Sahrens vdev_stat_t *vs = &vd->vdev_stat; 1579fa9e4066Sahrens 1580fa9e4066Sahrens for (c = 0; c < vd->vdev_children; c++) 1581fa9e4066Sahrens vdev_scrub_stat_update(vd->vdev_child[c], type, complete); 1582fa9e4066Sahrens 1583fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1584fa9e4066Sahrens 1585fa9e4066Sahrens if (type == POOL_SCRUB_NONE) { 1586fa9e4066Sahrens /* 1587fa9e4066Sahrens * Update completion and end time. Leave everything else alone 1588fa9e4066Sahrens * so we can report what happened during the previous scrub. 1589fa9e4066Sahrens */ 1590fa9e4066Sahrens vs->vs_scrub_complete = complete; 1591fa9e4066Sahrens vs->vs_scrub_end = gethrestime_sec(); 1592fa9e4066Sahrens } else { 1593fa9e4066Sahrens vs->vs_scrub_type = type; 1594fa9e4066Sahrens vs->vs_scrub_complete = 0; 1595fa9e4066Sahrens vs->vs_scrub_examined = 0; 1596fa9e4066Sahrens vs->vs_scrub_repaired = 0; 1597fa9e4066Sahrens vs->vs_scrub_errors = 0; 1598fa9e4066Sahrens vs->vs_scrub_start = gethrestime_sec(); 1599fa9e4066Sahrens vs->vs_scrub_end = 0; 1600fa9e4066Sahrens } 1601fa9e4066Sahrens 1602fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1603fa9e4066Sahrens } 1604fa9e4066Sahrens 1605fa9e4066Sahrens /* 1606fa9e4066Sahrens * Report checksum errors that a vdev that didn't realize it made. 1607fa9e4066Sahrens * This can happen, for example, when RAID-Z combinatorial reconstruction 1608fa9e4066Sahrens * infers that one of its components returned bad data. 1609fa9e4066Sahrens */ 1610fa9e4066Sahrens void 1611fa9e4066Sahrens vdev_checksum_error(zio_t *zio, vdev_t *vd) 1612fa9e4066Sahrens { 1613fa9e4066Sahrens dprintf_bp(zio->io_bp, "imputed checksum error on %s: ", 1614fa9e4066Sahrens vdev_description(vd)); 1615fa9e4066Sahrens 1616fa9e4066Sahrens if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { 1617fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1618fa9e4066Sahrens vd->vdev_stat.vs_checksum_errors++; 1619fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1620fa9e4066Sahrens } 1621fa9e4066Sahrens } 1622fa9e4066Sahrens 1623fa9e4066Sahrens /* 1624fa9e4066Sahrens * Update the in-core space usage stats for this vdev and the root vdev. 1625fa9e4066Sahrens */ 1626fa9e4066Sahrens void 1627fa9e4066Sahrens vdev_space_update(vdev_t *vd, uint64_t space_delta, uint64_t alloc_delta) 1628fa9e4066Sahrens { 1629fa9e4066Sahrens ASSERT(vd == vd->vdev_top); 1630fa9e4066Sahrens 1631fa9e4066Sahrens do { 1632fa9e4066Sahrens mutex_enter(&vd->vdev_stat_lock); 1633fa9e4066Sahrens vd->vdev_stat.vs_space += space_delta; 1634fa9e4066Sahrens vd->vdev_stat.vs_alloc += alloc_delta; 1635fa9e4066Sahrens mutex_exit(&vd->vdev_stat_lock); 1636fa9e4066Sahrens } while ((vd = vd->vdev_parent) != NULL); 1637fa9e4066Sahrens } 1638fa9e4066Sahrens 1639fa9e4066Sahrens /* 1640fa9e4066Sahrens * Various knobs to tune a vdev. 1641fa9e4066Sahrens */ 1642fa9e4066Sahrens static vdev_knob_t vdev_knob[] = { 1643fa9e4066Sahrens { 1644fa9e4066Sahrens "cache_size", 1645fa9e4066Sahrens "size of the read-ahead cache", 1646fa9e4066Sahrens 0, 1647fa9e4066Sahrens 1ULL << 30, 1648fa9e4066Sahrens 10ULL << 20, 1649fa9e4066Sahrens offsetof(struct vdev, vdev_cache.vc_size) 1650fa9e4066Sahrens }, 1651fa9e4066Sahrens { 1652fa9e4066Sahrens "cache_bshift", 1653fa9e4066Sahrens "log2 of cache blocksize", 1654fa9e4066Sahrens SPA_MINBLOCKSHIFT, 1655fa9e4066Sahrens SPA_MAXBLOCKSHIFT, 1656fa9e4066Sahrens 16, 1657fa9e4066Sahrens offsetof(struct vdev, vdev_cache.vc_bshift) 1658fa9e4066Sahrens }, 1659fa9e4066Sahrens { 1660fa9e4066Sahrens "cache_max", 1661fa9e4066Sahrens "largest block size to cache", 1662fa9e4066Sahrens 0, 1663fa9e4066Sahrens SPA_MAXBLOCKSIZE, 1664fa9e4066Sahrens 1ULL << 14, 1665fa9e4066Sahrens offsetof(struct vdev, vdev_cache.vc_max) 1666fa9e4066Sahrens }, 1667fa9e4066Sahrens { 1668fa9e4066Sahrens "min_pending", 1669fa9e4066Sahrens "minimum pending I/Os to the disk", 1670fa9e4066Sahrens 1, 1671fa9e4066Sahrens 10000, 1672fa9e4066Sahrens 2, 1673fa9e4066Sahrens offsetof(struct vdev, vdev_queue.vq_min_pending) 1674fa9e4066Sahrens }, 1675fa9e4066Sahrens { 1676fa9e4066Sahrens "max_pending", 1677fa9e4066Sahrens "maximum pending I/Os to the disk", 1678fa9e4066Sahrens 1, 1679fa9e4066Sahrens 10000, 1680fa9e4066Sahrens 35, 1681fa9e4066Sahrens offsetof(struct vdev, vdev_queue.vq_max_pending) 1682fa9e4066Sahrens }, 1683fa9e4066Sahrens { 1684fa9e4066Sahrens "agg_limit", 1685fa9e4066Sahrens "maximum size of aggregated I/Os", 1686fa9e4066Sahrens 0, 1687fa9e4066Sahrens SPA_MAXBLOCKSIZE, 1688fa9e4066Sahrens SPA_MAXBLOCKSIZE, 1689fa9e4066Sahrens offsetof(struct vdev, vdev_queue.vq_agg_limit) 1690fa9e4066Sahrens }, 1691fa9e4066Sahrens { 1692fa9e4066Sahrens "time_shift", 1693fa9e4066Sahrens "deadline = pri + (lbolt >> time_shift)", 1694fa9e4066Sahrens 0, 1695fa9e4066Sahrens 63, 1696fa9e4066Sahrens 4, 1697fa9e4066Sahrens offsetof(struct vdev, vdev_queue.vq_time_shift) 1698fa9e4066Sahrens }, 1699fa9e4066Sahrens { 1700fa9e4066Sahrens "ramp_rate", 1701fa9e4066Sahrens "exponential I/O issue ramp-up rate", 1702fa9e4066Sahrens 1, 1703fa9e4066Sahrens 10000, 1704fa9e4066Sahrens 2, 1705fa9e4066Sahrens offsetof(struct vdev, vdev_queue.vq_ramp_rate) 1706fa9e4066Sahrens }, 1707fa9e4066Sahrens }; 1708fa9e4066Sahrens 1709fa9e4066Sahrens vdev_knob_t * 1710fa9e4066Sahrens vdev_knob_next(vdev_knob_t *vk) 1711fa9e4066Sahrens { 1712fa9e4066Sahrens if (vk == NULL) 1713fa9e4066Sahrens return (vdev_knob); 1714fa9e4066Sahrens 1715fa9e4066Sahrens if (++vk == vdev_knob + sizeof (vdev_knob) / sizeof (vdev_knob_t)) 1716fa9e4066Sahrens return (NULL); 1717fa9e4066Sahrens 1718fa9e4066Sahrens return (vk); 1719fa9e4066Sahrens } 1720fa9e4066Sahrens 1721fa9e4066Sahrens /* 1722fa9e4066Sahrens * Mark a top-level vdev's config as dirty, placing it on the dirty list 1723fa9e4066Sahrens * so that it will be written out next time the vdev configuration is synced. 1724fa9e4066Sahrens * If the root vdev is specified (vdev_top == NULL), dirty all top-level vdevs. 1725fa9e4066Sahrens */ 1726fa9e4066Sahrens void 1727fa9e4066Sahrens vdev_config_dirty(vdev_t *vd) 1728fa9e4066Sahrens { 1729fa9e4066Sahrens spa_t *spa = vd->vdev_spa; 1730fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 1731fa9e4066Sahrens int c; 1732fa9e4066Sahrens 1733fa9e4066Sahrens if (vd == rvd) { 1734fa9e4066Sahrens for (c = 0; c < rvd->vdev_children; c++) 1735fa9e4066Sahrens vdev_config_dirty(rvd->vdev_child[c]); 1736fa9e4066Sahrens } else { 1737fa9e4066Sahrens ASSERT(vd == vd->vdev_top); 1738fa9e4066Sahrens 1739fa9e4066Sahrens if (!vd->vdev_is_dirty) { 1740fa9e4066Sahrens list_insert_head(&spa->spa_dirty_list, vd); 1741fa9e4066Sahrens vd->vdev_is_dirty = B_TRUE; 1742fa9e4066Sahrens } 1743fa9e4066Sahrens } 1744fa9e4066Sahrens } 1745fa9e4066Sahrens 1746fa9e4066Sahrens void 1747fa9e4066Sahrens vdev_config_clean(vdev_t *vd) 1748fa9e4066Sahrens { 1749fa9e4066Sahrens ASSERT(vd->vdev_is_dirty); 1750fa9e4066Sahrens 1751fa9e4066Sahrens list_remove(&vd->vdev_spa->spa_dirty_list, vd); 1752fa9e4066Sahrens vd->vdev_is_dirty = B_FALSE; 1753fa9e4066Sahrens } 1754fa9e4066Sahrens 1755fa9e4066Sahrens /* 1756fa9e4066Sahrens * Set a vdev's state, updating any parent's state as well. 1757fa9e4066Sahrens */ 1758fa9e4066Sahrens void 1759fa9e4066Sahrens vdev_set_state(vdev_t *vd, vdev_state_t state, vdev_aux_t aux) 1760fa9e4066Sahrens { 1761fa9e4066Sahrens if (state == vd->vdev_state) 1762fa9e4066Sahrens return; 1763fa9e4066Sahrens 1764fa9e4066Sahrens vd->vdev_state = state; 1765fa9e4066Sahrens vd->vdev_stat.vs_aux = aux; 1766fa9e4066Sahrens 1767fa9e4066Sahrens if (vd->vdev_parent != NULL) { 1768fa9e4066Sahrens int c; 1769fa9e4066Sahrens int degraded = 0, faulted = 0; 1770fa9e4066Sahrens vdev_t *parent, *child; 1771fa9e4066Sahrens 1772fa9e4066Sahrens parent = vd->vdev_parent; 1773fa9e4066Sahrens for (c = 0; c < parent->vdev_children; c++) { 1774fa9e4066Sahrens child = parent->vdev_child[c]; 1775fa9e4066Sahrens if (child->vdev_state <= VDEV_STATE_CANT_OPEN) 1776fa9e4066Sahrens faulted++; 1777fa9e4066Sahrens else if (child->vdev_state == VDEV_STATE_DEGRADED) 1778fa9e4066Sahrens degraded++; 1779fa9e4066Sahrens } 1780fa9e4066Sahrens 1781fa9e4066Sahrens vd->vdev_parent->vdev_ops->vdev_op_state_change( 1782fa9e4066Sahrens vd->vdev_parent, faulted, degraded); 1783fa9e4066Sahrens } 1784fa9e4066Sahrens } 1785