1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 2199653d4eSeschrock 22fa9e4066Sahrens /* 23b1b8ab34Slling * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24fa9e4066Sahrens * Use is subject to license terms. 25fa9e4066Sahrens */ 26fa9e4066Sahrens 27fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28fa9e4066Sahrens 29fa9e4066Sahrens #include <sys/spa.h> 30fa9e4066Sahrens #include <sys/spa_impl.h> 31fa9e4066Sahrens #include <sys/nvpair.h> 32fa9e4066Sahrens #include <sys/uio.h> 33fa9e4066Sahrens #include <sys/fs/zfs.h> 34fa9e4066Sahrens #include <sys/vdev_impl.h> 35fa9e4066Sahrens #include <sys/zfs_ioctl.h> 3695173954Sek #include <sys/utsname.h> 3795173954Sek #include <sys/systeminfo.h> 3895173954Sek #include <sys/sunddi.h> 39ea8dc4b6Seschrock #ifdef _KERNEL 40ea8dc4b6Seschrock #include <sys/kobj.h> 41ea8dc4b6Seschrock #endif 42ea8dc4b6Seschrock 43fa9e4066Sahrens /* 44fa9e4066Sahrens * Pool configuration repository. 45fa9e4066Sahrens * 46fa9e4066Sahrens * The configuration for all pools, in addition to being stored on disk, is 470373e76bSbonwick * stored in /etc/zfs/zpool.cache as a packed nvlist. The kernel maintains 48fa9e4066Sahrens * this list as pools are created, destroyed, or modified. 49fa9e4066Sahrens * 50fa9e4066Sahrens * We have a single nvlist which holds all the configuration information. When 51fa9e4066Sahrens * the module loads, we read this information from the cache and populate the 52fa9e4066Sahrens * SPA namespace. This namespace is maintained independently in spa.c. 53fa9e4066Sahrens * Whenever the namespace is modified, or the configuration of a pool is 54fa9e4066Sahrens * changed, we call spa_config_sync(), which walks through all the active pools 55fa9e4066Sahrens * and writes the configuration to disk. 56fa9e4066Sahrens */ 57fa9e4066Sahrens 58fa9e4066Sahrens static uint64_t spa_config_generation = 1; 59fa9e4066Sahrens 60fa9e4066Sahrens /* 61fa9e4066Sahrens * This can be overridden in userland to preserve an alternate namespace for 62fa9e4066Sahrens * userland pools when doing testing. 63fa9e4066Sahrens */ 64fa9e4066Sahrens const char *spa_config_dir = ZPOOL_CACHE_DIR; 65fa9e4066Sahrens 66fa9e4066Sahrens /* 67fa9e4066Sahrens * Called when the module is first loaded, this routine loads the configuration 68fa9e4066Sahrens * file into the SPA namespace. It does not actually open or load the pools; it 69fa9e4066Sahrens * only populates the namespace. 70fa9e4066Sahrens */ 71fa9e4066Sahrens void 72fa9e4066Sahrens spa_config_load(void) 73fa9e4066Sahrens { 74fa9e4066Sahrens void *buf = NULL; 75fa9e4066Sahrens nvlist_t *nvlist, *child; 76fa9e4066Sahrens nvpair_t *nvpair; 77fa9e4066Sahrens spa_t *spa; 78fa9e4066Sahrens char pathname[128]; 79ea8dc4b6Seschrock struct _buf *file; 80b1b8ab34Slling uint64_t fsize; 81fa9e4066Sahrens 82fa9e4066Sahrens /* 83fa9e4066Sahrens * Open the configuration file. 84fa9e4066Sahrens */ 85ea8dc4b6Seschrock (void) snprintf(pathname, sizeof (pathname), "%s%s/%s", 860373e76bSbonwick (rootdir != NULL) ? "./" : "", spa_config_dir, ZPOOL_CACHE_FILE); 87ea8dc4b6Seschrock 88ea8dc4b6Seschrock file = kobj_open_file(pathname); 89ea8dc4b6Seschrock if (file == (struct _buf *)-1) 90fa9e4066Sahrens return; 91fa9e4066Sahrens 92b1b8ab34Slling if (kobj_get_filesize(file, &fsize) != 0) 93fa9e4066Sahrens goto out; 94fa9e4066Sahrens 95b1b8ab34Slling buf = kmem_alloc(fsize, KM_SLEEP); 96fa9e4066Sahrens 97ea8dc4b6Seschrock /* 98ea8dc4b6Seschrock * Read the nvlist from the file. 99ea8dc4b6Seschrock */ 100b1b8ab34Slling if (kobj_read_file(file, buf, fsize, 0) < 0) 101fa9e4066Sahrens goto out; 102fa9e4066Sahrens 103fa9e4066Sahrens /* 104fa9e4066Sahrens * Unpack the nvlist. 105fa9e4066Sahrens */ 106b1b8ab34Slling if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0) 107fa9e4066Sahrens goto out; 108fa9e4066Sahrens 109fa9e4066Sahrens /* 110fa9e4066Sahrens * Iterate over all elements in the nvlist, creating a new spa_t for 111fa9e4066Sahrens * each one with the specified configuration. 112fa9e4066Sahrens */ 113fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 114fa9e4066Sahrens nvpair = NULL; 115fa9e4066Sahrens while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) { 116fa9e4066Sahrens 117fa9e4066Sahrens if (nvpair_type(nvpair) != DATA_TYPE_NVLIST) 118fa9e4066Sahrens continue; 119fa9e4066Sahrens 120fa9e4066Sahrens VERIFY(nvpair_value_nvlist(nvpair, &child) == 0); 121fa9e4066Sahrens 122fa9e4066Sahrens if (spa_lookup(nvpair_name(nvpair)) != NULL) 123fa9e4066Sahrens continue; 1240373e76bSbonwick spa = spa_add(nvpair_name(nvpair), NULL); 125fa9e4066Sahrens 126fa9e4066Sahrens /* 127fa9e4066Sahrens * We blindly duplicate the configuration here. If it's 128fa9e4066Sahrens * invalid, we will catch it when the pool is first opened. 129fa9e4066Sahrens */ 130fa9e4066Sahrens VERIFY(nvlist_dup(child, &spa->spa_config, 0) == 0); 131fa9e4066Sahrens } 132fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 133fa9e4066Sahrens 134fa9e4066Sahrens nvlist_free(nvlist); 135fa9e4066Sahrens 136fa9e4066Sahrens out: 137fa9e4066Sahrens if (buf != NULL) 138b1b8ab34Slling kmem_free(buf, fsize); 139fa9e4066Sahrens 140ea8dc4b6Seschrock kobj_close_file(file); 141fa9e4066Sahrens } 142fa9e4066Sahrens 143fa9e4066Sahrens /* 144fa9e4066Sahrens * Synchronize all pools to disk. This must be called with the namespace lock 145fa9e4066Sahrens * held. 146fa9e4066Sahrens */ 147fa9e4066Sahrens void 148fa9e4066Sahrens spa_config_sync(void) 149fa9e4066Sahrens { 150fa9e4066Sahrens spa_t *spa = NULL; 151fa9e4066Sahrens nvlist_t *config; 152fa9e4066Sahrens size_t buflen; 153fa9e4066Sahrens char *buf; 154fa9e4066Sahrens vnode_t *vp; 155fa9e4066Sahrens int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX; 156fa9e4066Sahrens char pathname[128]; 157fa9e4066Sahrens char pathname2[128]; 158fa9e4066Sahrens 159fa9e4066Sahrens ASSERT(MUTEX_HELD(&spa_namespace_lock)); 160fa9e4066Sahrens 161ea8dc4b6Seschrock VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); 162fa9e4066Sahrens 163fa9e4066Sahrens /* 164fa9e4066Sahrens * Add all known pools to the configuration list, ignoring those with 165fa9e4066Sahrens * alternate root paths. 166fa9e4066Sahrens */ 167fa9e4066Sahrens spa = NULL; 168fa9e4066Sahrens while ((spa = spa_next(spa)) != NULL) { 169fa9e4066Sahrens mutex_enter(&spa->spa_config_cache_lock); 170fa9e4066Sahrens if (spa->spa_config && spa->spa_name && spa->spa_root == NULL) 171fa9e4066Sahrens VERIFY(nvlist_add_nvlist(config, spa->spa_name, 172fa9e4066Sahrens spa->spa_config) == 0); 173fa9e4066Sahrens mutex_exit(&spa->spa_config_cache_lock); 174fa9e4066Sahrens } 175fa9e4066Sahrens 176fa9e4066Sahrens /* 177fa9e4066Sahrens * Pack the configuration into a buffer. 178fa9e4066Sahrens */ 179fa9e4066Sahrens VERIFY(nvlist_size(config, &buflen, NV_ENCODE_XDR) == 0); 180fa9e4066Sahrens 181fa9e4066Sahrens buf = kmem_alloc(buflen, KM_SLEEP); 182fa9e4066Sahrens 183ea8dc4b6Seschrock VERIFY(nvlist_pack(config, &buf, &buflen, NV_ENCODE_XDR, 184ea8dc4b6Seschrock KM_SLEEP) == 0); 185fa9e4066Sahrens 186fa9e4066Sahrens /* 187fa9e4066Sahrens * Write the configuration to disk. We need to do the traditional 188fa9e4066Sahrens * 'write to temporary file, sync, move over original' to make sure we 189fa9e4066Sahrens * always have a consistent view of the data. 190fa9e4066Sahrens */ 191fa9e4066Sahrens (void) snprintf(pathname, sizeof (pathname), "%s/%s", spa_config_dir, 192fa9e4066Sahrens ZPOOL_CACHE_TMP); 193fa9e4066Sahrens 194fa9e4066Sahrens if (vn_open(pathname, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0) != 0) 195fa9e4066Sahrens goto out; 196fa9e4066Sahrens 197fa9e4066Sahrens if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE, 198fa9e4066Sahrens 0, RLIM64_INFINITY, kcred, NULL) == 0 && 199fa9e4066Sahrens VOP_FSYNC(vp, FSYNC, kcred) == 0) { 200fa9e4066Sahrens (void) snprintf(pathname2, sizeof (pathname2), "%s/%s", 201fa9e4066Sahrens spa_config_dir, ZPOOL_CACHE_FILE); 202fa9e4066Sahrens (void) vn_rename(pathname, pathname2, UIO_SYSSPACE); 203fa9e4066Sahrens } 204fa9e4066Sahrens 205fa9e4066Sahrens (void) VOP_CLOSE(vp, oflags, 1, 0, kcred); 206fa9e4066Sahrens VN_RELE(vp); 207fa9e4066Sahrens 208fa9e4066Sahrens out: 209fa9e4066Sahrens (void) vn_remove(pathname, UIO_SYSSPACE, RMFILE); 210fa9e4066Sahrens spa_config_generation++; 211fa9e4066Sahrens 212fa9e4066Sahrens kmem_free(buf, buflen); 213fa9e4066Sahrens nvlist_free(config); 214fa9e4066Sahrens } 215fa9e4066Sahrens 216fa9e4066Sahrens /* 2170373e76bSbonwick * Sigh. Inside a local zone, we don't have access to /etc/zfs/zpool.cache, 218fa9e4066Sahrens * and we don't want to allow the local zone to see all the pools anyway. 219fa9e4066Sahrens * So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration 220fa9e4066Sahrens * information for all pool visible within the zone. 221fa9e4066Sahrens */ 222fa9e4066Sahrens nvlist_t * 223fa9e4066Sahrens spa_all_configs(uint64_t *generation) 224fa9e4066Sahrens { 225fa9e4066Sahrens nvlist_t *pools; 226fa9e4066Sahrens spa_t *spa; 227fa9e4066Sahrens 228fa9e4066Sahrens if (*generation == spa_config_generation) 229fa9e4066Sahrens return (NULL); 230fa9e4066Sahrens 231ea8dc4b6Seschrock VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_SLEEP) == 0); 232fa9e4066Sahrens 233fa9e4066Sahrens spa = NULL; 234fa9e4066Sahrens mutex_enter(&spa_namespace_lock); 235fa9e4066Sahrens while ((spa = spa_next(spa)) != NULL) { 236fa9e4066Sahrens if (INGLOBALZONE(curproc) || 237fa9e4066Sahrens zone_dataset_visible(spa_name(spa), NULL)) { 238fa9e4066Sahrens mutex_enter(&spa->spa_config_cache_lock); 239fa9e4066Sahrens VERIFY(nvlist_add_nvlist(pools, spa_name(spa), 240fa9e4066Sahrens spa->spa_config) == 0); 241fa9e4066Sahrens mutex_exit(&spa->spa_config_cache_lock); 242fa9e4066Sahrens } 243fa9e4066Sahrens } 244fa9e4066Sahrens mutex_exit(&spa_namespace_lock); 245fa9e4066Sahrens 246fa9e4066Sahrens *generation = spa_config_generation; 247fa9e4066Sahrens 248fa9e4066Sahrens return (pools); 249fa9e4066Sahrens } 250fa9e4066Sahrens 251fa9e4066Sahrens void 252fa9e4066Sahrens spa_config_set(spa_t *spa, nvlist_t *config) 253fa9e4066Sahrens { 254fa9e4066Sahrens mutex_enter(&spa->spa_config_cache_lock); 255fa9e4066Sahrens if (spa->spa_config != NULL) 256fa9e4066Sahrens nvlist_free(spa->spa_config); 257fa9e4066Sahrens spa->spa_config = config; 258fa9e4066Sahrens mutex_exit(&spa->spa_config_cache_lock); 259fa9e4066Sahrens } 260fa9e4066Sahrens 261fa9e4066Sahrens /* 262fa9e4066Sahrens * Generate the pool's configuration based on the current in-core state. 263fa9e4066Sahrens * We infer whether to generate a complete config or just one top-level config 264fa9e4066Sahrens * based on whether vd is the root vdev. 265fa9e4066Sahrens */ 266fa9e4066Sahrens nvlist_t * 267fa9e4066Sahrens spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) 268fa9e4066Sahrens { 269fa9e4066Sahrens nvlist_t *config, *nvroot; 270fa9e4066Sahrens vdev_t *rvd = spa->spa_root_vdev; 27195173954Sek unsigned long hostid = 0; 272fa9e4066Sahrens 2730373e76bSbonwick ASSERT(spa_config_held(spa, RW_READER)); 2740373e76bSbonwick 275fa9e4066Sahrens if (vd == NULL) 276fa9e4066Sahrens vd = rvd; 277fa9e4066Sahrens 278fa9e4066Sahrens /* 279fa9e4066Sahrens * If txg is -1, report the current value of spa->spa_config_txg. 280fa9e4066Sahrens */ 281fa9e4066Sahrens if (txg == -1ULL) 282fa9e4066Sahrens txg = spa->spa_config_txg; 283fa9e4066Sahrens 284ea8dc4b6Seschrock VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); 285fa9e4066Sahrens 286fa9e4066Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, 28799653d4eSeschrock spa_version(spa)) == 0); 288fa9e4066Sahrens VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 289fa9e4066Sahrens spa_name(spa)) == 0); 290fa9e4066Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 291fa9e4066Sahrens spa_state(spa)) == 0); 292fa9e4066Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, 293fa9e4066Sahrens txg) == 0); 294fa9e4066Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, 295fa9e4066Sahrens spa_guid(spa)) == 0); 29695173954Sek (void) ddi_strtoul(hw_serial, NULL, 10, &hostid); 297*17194a52Slling if (hostid != 0) { 298*17194a52Slling VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, 299*17194a52Slling hostid) == 0); 300*17194a52Slling } 30195173954Sek VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, 30295173954Sek utsname.nodename) == 0); 303fa9e4066Sahrens 304fa9e4066Sahrens if (vd != rvd) { 305fa9e4066Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID, 306fa9e4066Sahrens vd->vdev_top->vdev_guid) == 0); 307fa9e4066Sahrens VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID, 308fa9e4066Sahrens vd->vdev_guid) == 0); 30999653d4eSeschrock if (vd->vdev_isspare) 31099653d4eSeschrock VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE, 31199653d4eSeschrock 1ULL) == 0); 312fa9e4066Sahrens vd = vd->vdev_top; /* label contains top config */ 313fa9e4066Sahrens } 314fa9e4066Sahrens 31599653d4eSeschrock nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE); 316fa9e4066Sahrens VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 317fa9e4066Sahrens nvlist_free(nvroot); 318fa9e4066Sahrens 319fa9e4066Sahrens return (config); 320fa9e4066Sahrens } 3210373e76bSbonwick 3220373e76bSbonwick /* 3230373e76bSbonwick * Update all disk labels, generate a fresh config based on the current 3240373e76bSbonwick * in-core state, and sync the global config cache. 3250373e76bSbonwick */ 3260373e76bSbonwick void 3270373e76bSbonwick spa_config_update(spa_t *spa, int what) 3280373e76bSbonwick { 3290373e76bSbonwick vdev_t *rvd = spa->spa_root_vdev; 3300373e76bSbonwick uint64_t txg; 3310373e76bSbonwick int c; 3320373e76bSbonwick 3330373e76bSbonwick ASSERT(MUTEX_HELD(&spa_namespace_lock)); 3340373e76bSbonwick 3350373e76bSbonwick spa_config_enter(spa, RW_WRITER, FTAG); 3360373e76bSbonwick txg = spa_last_synced_txg(spa) + 1; 3370373e76bSbonwick if (what == SPA_CONFIG_UPDATE_POOL) { 3380373e76bSbonwick vdev_config_dirty(rvd); 3390373e76bSbonwick } else { 3400373e76bSbonwick /* 3410373e76bSbonwick * If we have top-level vdevs that were added but have 3420373e76bSbonwick * not yet been prepared for allocation, do that now. 3430373e76bSbonwick * (It's safe now because the config cache is up to date, 3440373e76bSbonwick * so it will be able to translate the new DVAs.) 3450373e76bSbonwick * See comments in spa_vdev_add() for full details. 3460373e76bSbonwick */ 3470373e76bSbonwick for (c = 0; c < rvd->vdev_children; c++) { 3480373e76bSbonwick vdev_t *tvd = rvd->vdev_child[c]; 3490373e76bSbonwick if (tvd->vdev_ms_array == 0) { 3500373e76bSbonwick vdev_init(tvd, txg); 3510373e76bSbonwick vdev_config_dirty(tvd); 3520373e76bSbonwick } 3530373e76bSbonwick } 3540373e76bSbonwick } 3550373e76bSbonwick spa_config_exit(spa, FTAG); 3560373e76bSbonwick 3570373e76bSbonwick /* 3580373e76bSbonwick * Wait for the mosconfig to be regenerated and synced. 3590373e76bSbonwick */ 3600373e76bSbonwick txg_wait_synced(spa->spa_dsl_pool, txg); 3610373e76bSbonwick 3620373e76bSbonwick /* 3630373e76bSbonwick * Update the global config cache to reflect the new mosconfig. 3640373e76bSbonwick */ 3650373e76bSbonwick spa_config_sync(); 3660373e76bSbonwick 3670373e76bSbonwick if (what == SPA_CONFIG_UPDATE_POOL) 3680373e76bSbonwick spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS); 3690373e76bSbonwick } 370