1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22ea8dc4b6Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens #include <sys/dmu_objset.h> 29fa9e4066Sahrens #include <sys/dsl_dataset.h> 30fa9e4066Sahrens #include <sys/dsl_dir.h> 31*99653d4eSeschrock #include <sys/dsl_prop.h> 32fa9e4066Sahrens #include <sys/dmu_traverse.h> 33fa9e4066Sahrens #include <sys/dmu_tx.h> 34fa9e4066Sahrens #include <sys/arc.h> 35fa9e4066Sahrens #include <sys/zio.h> 36fa9e4066Sahrens #include <sys/zap.h> 37fa9e4066Sahrens #include <sys/unique.h> 38fa9e4066Sahrens #include <sys/zfs_context.h> 39fa9e4066Sahrens 40e1930233Sbonwick static int dsl_dataset_destroy_begin_sync(dsl_dir_t *dd, 41e1930233Sbonwick void *arg, dmu_tx_t *tx); 42e1930233Sbonwick 43fa9e4066Sahrens #define DOS_REF_MAX (1ULL << 62) 44fa9e4066Sahrens 45fa9e4066Sahrens #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 46fa9e4066Sahrens 47fa9e4066Sahrens /* 48fa9e4066Sahrens * We use weighted reference counts to express the various forms of exclusion 49fa9e4066Sahrens * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open 50fa9e4066Sahrens * is DOS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. 51fa9e4066Sahrens * This makes the exclusion logic simple: the total refcnt for all opens cannot 52fa9e4066Sahrens * exceed DOS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their 53fa9e4066Sahrens * weight (DOS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume 54fa9e4066Sahrens * just over half of the refcnt space, so there can't be more than one, but it 55fa9e4066Sahrens * can peacefully coexist with any number of STANDARD opens. 56fa9e4066Sahrens */ 57fa9e4066Sahrens static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { 58fa9e4066Sahrens 0, /* DOS_MODE_NONE - invalid */ 59fa9e4066Sahrens 1, /* DOS_MODE_STANDARD - unlimited number */ 60fa9e4066Sahrens (DOS_REF_MAX >> 1) + 1, /* DOS_MODE_PRIMARY - only one of these */ 61fa9e4066Sahrens DOS_REF_MAX /* DOS_MODE_EXCLUSIVE - no other opens */ 62fa9e4066Sahrens }; 63fa9e4066Sahrens 64fa9e4066Sahrens 65fa9e4066Sahrens void 66fa9e4066Sahrens dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 67fa9e4066Sahrens { 68*99653d4eSeschrock int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 69fa9e4066Sahrens int compressed = BP_GET_PSIZE(bp); 70fa9e4066Sahrens int uncompressed = BP_GET_UCSIZE(bp); 71fa9e4066Sahrens 72fa9e4066Sahrens dprintf_bp(bp, "born, ds=%p\n", ds); 73fa9e4066Sahrens 74fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 75fa9e4066Sahrens /* It could have been compressed away to nothing */ 76fa9e4066Sahrens if (BP_IS_HOLE(bp)) 77fa9e4066Sahrens return; 78fa9e4066Sahrens ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 79fa9e4066Sahrens ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 80fa9e4066Sahrens if (ds == NULL) { 81fa9e4066Sahrens /* 82fa9e4066Sahrens * Account for the meta-objset space in its placeholder 83fa9e4066Sahrens * dsl_dir. 84fa9e4066Sahrens */ 85fa9e4066Sahrens ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 86fa9e4066Sahrens dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 87fa9e4066Sahrens used, compressed, uncompressed, tx); 88fa9e4066Sahrens dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 89fa9e4066Sahrens return; 90fa9e4066Sahrens } 91fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 92fa9e4066Sahrens mutex_enter(&ds->ds_lock); 93fa9e4066Sahrens ds->ds_phys->ds_used_bytes += used; 94fa9e4066Sahrens ds->ds_phys->ds_compressed_bytes += compressed; 95fa9e4066Sahrens ds->ds_phys->ds_uncompressed_bytes += uncompressed; 96fa9e4066Sahrens ds->ds_phys->ds_unique_bytes += used; 97fa9e4066Sahrens mutex_exit(&ds->ds_lock); 98fa9e4066Sahrens dsl_dir_diduse_space(ds->ds_dir, 99fa9e4066Sahrens used, compressed, uncompressed, tx); 100fa9e4066Sahrens } 101fa9e4066Sahrens 102fa9e4066Sahrens void 103fa9e4066Sahrens dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 104fa9e4066Sahrens { 105*99653d4eSeschrock int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 106fa9e4066Sahrens int compressed = BP_GET_PSIZE(bp); 107fa9e4066Sahrens int uncompressed = BP_GET_UCSIZE(bp); 108fa9e4066Sahrens 109fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 110fa9e4066Sahrens if (BP_IS_HOLE(bp)) 111fa9e4066Sahrens return; 112fa9e4066Sahrens 113fa9e4066Sahrens ASSERT(used > 0); 114fa9e4066Sahrens if (ds == NULL) { 115fa9e4066Sahrens /* 116fa9e4066Sahrens * Account for the meta-objset space in its placeholder 117fa9e4066Sahrens * dataset. 118fa9e4066Sahrens */ 119fa9e4066Sahrens /* XXX this can fail, what do we do when it does? */ 120fa9e4066Sahrens (void) arc_free(NULL, tx->tx_pool->dp_spa, 121fa9e4066Sahrens tx->tx_txg, bp, NULL, NULL, ARC_WAIT); 122fa9e4066Sahrens bzero(bp, sizeof (blkptr_t)); 123fa9e4066Sahrens 124fa9e4066Sahrens dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 125fa9e4066Sahrens -used, -compressed, -uncompressed, tx); 126fa9e4066Sahrens dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 127fa9e4066Sahrens return; 128fa9e4066Sahrens } 129fa9e4066Sahrens ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 130fa9e4066Sahrens 131fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 132fa9e4066Sahrens 133fa9e4066Sahrens if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 134fa9e4066Sahrens dprintf_bp(bp, "freeing: %s", ""); 135fa9e4066Sahrens /* XXX check return code? */ 136fa9e4066Sahrens (void) arc_free(NULL, tx->tx_pool->dp_spa, 137fa9e4066Sahrens tx->tx_txg, bp, NULL, NULL, ARC_WAIT); 138fa9e4066Sahrens 139fa9e4066Sahrens mutex_enter(&ds->ds_lock); 140fa9e4066Sahrens /* XXX unique_bytes is not accurate for head datasets */ 141fa9e4066Sahrens /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */ 142fa9e4066Sahrens ds->ds_phys->ds_unique_bytes -= used; 143fa9e4066Sahrens mutex_exit(&ds->ds_lock); 144fa9e4066Sahrens dsl_dir_diduse_space(ds->ds_dir, 145fa9e4066Sahrens -used, -compressed, -uncompressed, tx); 146fa9e4066Sahrens } else { 147fa9e4066Sahrens dprintf_bp(bp, "putting on dead list: %s", ""); 148ea8dc4b6Seschrock VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 149fa9e4066Sahrens /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 150fa9e4066Sahrens if (ds->ds_phys->ds_prev_snap_obj != 0) { 151fa9e4066Sahrens ASSERT3U(ds->ds_prev->ds_object, ==, 152fa9e4066Sahrens ds->ds_phys->ds_prev_snap_obj); 153fa9e4066Sahrens ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 154fa9e4066Sahrens if (ds->ds_prev->ds_phys->ds_next_snap_obj == 155*99653d4eSeschrock ds->ds_object && bp->blk_birth > 156fa9e4066Sahrens ds->ds_prev->ds_phys->ds_prev_snap_txg) { 157fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 158fa9e4066Sahrens mutex_enter(&ds->ds_prev->ds_lock); 159fa9e4066Sahrens ds->ds_prev->ds_phys->ds_unique_bytes += 160fa9e4066Sahrens used; 161fa9e4066Sahrens mutex_exit(&ds->ds_prev->ds_lock); 162fa9e4066Sahrens } 163fa9e4066Sahrens } 164fa9e4066Sahrens } 165fa9e4066Sahrens bzero(bp, sizeof (blkptr_t)); 166fa9e4066Sahrens mutex_enter(&ds->ds_lock); 167fa9e4066Sahrens ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 168fa9e4066Sahrens ds->ds_phys->ds_used_bytes -= used; 169fa9e4066Sahrens ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 170fa9e4066Sahrens ds->ds_phys->ds_compressed_bytes -= compressed; 171fa9e4066Sahrens ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 172fa9e4066Sahrens ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 173fa9e4066Sahrens mutex_exit(&ds->ds_lock); 174fa9e4066Sahrens } 175fa9e4066Sahrens 176ea8dc4b6Seschrock uint64_t 177ea8dc4b6Seschrock dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 178fa9e4066Sahrens { 179ea8dc4b6Seschrock uint64_t txg; 180fa9e4066Sahrens dsl_dir_t *dd; 181ea8dc4b6Seschrock 182fa9e4066Sahrens if (ds == NULL) 183ea8dc4b6Seschrock return (0); 184fa9e4066Sahrens /* 185fa9e4066Sahrens * The snapshot creation could fail, but that would cause an 186fa9e4066Sahrens * incorrect FALSE return, which would only result in an 187fa9e4066Sahrens * overestimation of the amount of space that an operation would 188fa9e4066Sahrens * consume, which is OK. 189fa9e4066Sahrens * 190fa9e4066Sahrens * There's also a small window where we could miss a pending 191fa9e4066Sahrens * snapshot, because we could set the sync task in the quiescing 192fa9e4066Sahrens * phase. So this should only be used as a guess. 193fa9e4066Sahrens */ 194fa9e4066Sahrens dd = ds->ds_dir; 195fa9e4066Sahrens mutex_enter(&dd->dd_lock); 196ea8dc4b6Seschrock if (dd->dd_sync_func == dsl_dataset_snapshot_sync) 197ea8dc4b6Seschrock txg = dd->dd_sync_txg; 198fa9e4066Sahrens else 199ea8dc4b6Seschrock txg = ds->ds_phys->ds_prev_snap_txg; 200fa9e4066Sahrens mutex_exit(&dd->dd_lock); 201ea8dc4b6Seschrock 202ea8dc4b6Seschrock return (txg); 203ea8dc4b6Seschrock } 204ea8dc4b6Seschrock 205ea8dc4b6Seschrock int 206ea8dc4b6Seschrock dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 207ea8dc4b6Seschrock { 208ea8dc4b6Seschrock return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 209fa9e4066Sahrens } 210fa9e4066Sahrens 211fa9e4066Sahrens /* ARGSUSED */ 212fa9e4066Sahrens static void 213fa9e4066Sahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv) 214fa9e4066Sahrens { 215fa9e4066Sahrens dsl_dataset_t *ds = dsv; 216fa9e4066Sahrens dsl_pool_t *dp = ds->ds_dir->dd_pool; 217fa9e4066Sahrens 218fa9e4066Sahrens /* open_refcount == DOS_REF_MAX when deleting */ 219fa9e4066Sahrens ASSERT(ds->ds_open_refcount == 0 || 220fa9e4066Sahrens ds->ds_open_refcount == DOS_REF_MAX); 221fa9e4066Sahrens 222fa9e4066Sahrens dprintf_ds(ds, "evicting %s\n", ""); 223fa9e4066Sahrens 224fa9e4066Sahrens unique_remove(ds->ds_phys->ds_fsid_guid); 225fa9e4066Sahrens 226fa9e4066Sahrens if (ds->ds_user_ptr != NULL) 227fa9e4066Sahrens ds->ds_user_evict_func(ds, ds->ds_user_ptr); 228fa9e4066Sahrens 229fa9e4066Sahrens if (ds->ds_prev) { 230fa9e4066Sahrens dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 231fa9e4066Sahrens ds->ds_prev = NULL; 232fa9e4066Sahrens } 233fa9e4066Sahrens 234fa9e4066Sahrens bplist_close(&ds->ds_deadlist); 235fa9e4066Sahrens dsl_dir_close(ds->ds_dir, ds); 236fa9e4066Sahrens 237fa9e4066Sahrens if (list_link_active(&ds->ds_synced_link)) 238fa9e4066Sahrens list_remove(&dp->dp_synced_objsets, ds); 239fa9e4066Sahrens 240fa9e4066Sahrens kmem_free(ds, sizeof (dsl_dataset_t)); 241fa9e4066Sahrens } 242fa9e4066Sahrens 243ea8dc4b6Seschrock static int 244fa9e4066Sahrens dsl_dataset_get_snapname(dsl_dataset_t *ds) 245fa9e4066Sahrens { 246fa9e4066Sahrens dsl_dataset_phys_t *headphys; 247fa9e4066Sahrens int err; 248fa9e4066Sahrens dmu_buf_t *headdbuf; 249fa9e4066Sahrens dsl_pool_t *dp = ds->ds_dir->dd_pool; 250fa9e4066Sahrens objset_t *mos = dp->dp_meta_objset; 251fa9e4066Sahrens 252fa9e4066Sahrens if (ds->ds_snapname[0]) 253ea8dc4b6Seschrock return (0); 254fa9e4066Sahrens if (ds->ds_phys->ds_next_snap_obj == 0) 255ea8dc4b6Seschrock return (0); 256fa9e4066Sahrens 257ea8dc4b6Seschrock err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 258ea8dc4b6Seschrock FTAG, &headdbuf); 259ea8dc4b6Seschrock if (err) 260ea8dc4b6Seschrock return (err); 261fa9e4066Sahrens headphys = headdbuf->db_data; 262fa9e4066Sahrens err = zap_value_search(dp->dp_meta_objset, 263fa9e4066Sahrens headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname); 264ea8dc4b6Seschrock dmu_buf_rele(headdbuf, FTAG); 265ea8dc4b6Seschrock return (err); 266fa9e4066Sahrens } 267fa9e4066Sahrens 268ea8dc4b6Seschrock int 269fa9e4066Sahrens dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, 270ea8dc4b6Seschrock int mode, void *tag, dsl_dataset_t **dsp) 271fa9e4066Sahrens { 272fa9e4066Sahrens uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 273fa9e4066Sahrens objset_t *mos = dp->dp_meta_objset; 274fa9e4066Sahrens dmu_buf_t *dbuf; 275fa9e4066Sahrens dsl_dataset_t *ds; 276ea8dc4b6Seschrock int err; 277fa9e4066Sahrens 278fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 279fa9e4066Sahrens dsl_pool_sync_context(dp)); 280fa9e4066Sahrens 281ea8dc4b6Seschrock err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 282ea8dc4b6Seschrock if (err) 283ea8dc4b6Seschrock return (err); 284fa9e4066Sahrens ds = dmu_buf_get_user(dbuf); 285fa9e4066Sahrens if (ds == NULL) { 286fa9e4066Sahrens dsl_dataset_t *winner; 287fa9e4066Sahrens 288fa9e4066Sahrens ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 289fa9e4066Sahrens ds->ds_dbuf = dbuf; 290fa9e4066Sahrens ds->ds_object = dsobj; 291fa9e4066Sahrens ds->ds_phys = dbuf->db_data; 292fa9e4066Sahrens 293ea8dc4b6Seschrock err = bplist_open(&ds->ds_deadlist, 294fa9e4066Sahrens mos, ds->ds_phys->ds_deadlist_obj); 295ea8dc4b6Seschrock if (err == 0) { 296ea8dc4b6Seschrock err = dsl_dir_open_obj(dp, 297ea8dc4b6Seschrock ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 298ea8dc4b6Seschrock } 299ea8dc4b6Seschrock if (err) { 300ea8dc4b6Seschrock /* 301ea8dc4b6Seschrock * we don't really need to close the blist if we 302ea8dc4b6Seschrock * just opened it. 303ea8dc4b6Seschrock */ 304ea8dc4b6Seschrock kmem_free(ds, sizeof (dsl_dataset_t)); 305ea8dc4b6Seschrock dmu_buf_rele(dbuf, tag); 306ea8dc4b6Seschrock return (err); 307ea8dc4b6Seschrock } 308fa9e4066Sahrens 309fa9e4066Sahrens if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { 310fa9e4066Sahrens ds->ds_snapname[0] = '\0'; 311fa9e4066Sahrens if (ds->ds_phys->ds_prev_snap_obj) { 312ea8dc4b6Seschrock err = dsl_dataset_open_obj(dp, 313fa9e4066Sahrens ds->ds_phys->ds_prev_snap_obj, NULL, 314ea8dc4b6Seschrock DS_MODE_NONE, ds, &ds->ds_prev); 315fa9e4066Sahrens } 316fa9e4066Sahrens } else { 317fa9e4066Sahrens if (snapname) { 318fa9e4066Sahrens #ifdef ZFS_DEBUG 319fa9e4066Sahrens dsl_dataset_phys_t *headphys; 320ea8dc4b6Seschrock dmu_buf_t *headdbuf; 321ea8dc4b6Seschrock err = dmu_bonus_hold(mos, 322ea8dc4b6Seschrock ds->ds_dir->dd_phys->dd_head_dataset_obj, 323ea8dc4b6Seschrock FTAG, &headdbuf); 324ea8dc4b6Seschrock if (err == 0) { 325ea8dc4b6Seschrock headphys = headdbuf->db_data; 326ea8dc4b6Seschrock uint64_t foundobj; 327ea8dc4b6Seschrock err = zap_lookup(dp->dp_meta_objset, 328ea8dc4b6Seschrock headphys->ds_snapnames_zapobj, 329ea8dc4b6Seschrock snapname, sizeof (foundobj), 1, 330ea8dc4b6Seschrock &foundobj); 331ea8dc4b6Seschrock ASSERT3U(foundobj, ==, dsobj); 332ea8dc4b6Seschrock dmu_buf_rele(headdbuf, FTAG); 333ea8dc4b6Seschrock } 334fa9e4066Sahrens #endif 335fa9e4066Sahrens (void) strcat(ds->ds_snapname, snapname); 336fa9e4066Sahrens } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 337ea8dc4b6Seschrock err = dsl_dataset_get_snapname(ds); 338fa9e4066Sahrens } 339fa9e4066Sahrens } 340fa9e4066Sahrens 341ea8dc4b6Seschrock if (err == 0) { 342ea8dc4b6Seschrock winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 343ea8dc4b6Seschrock dsl_dataset_evict); 344ea8dc4b6Seschrock } 345ea8dc4b6Seschrock if (err || winner) { 346fa9e4066Sahrens bplist_close(&ds->ds_deadlist); 347fa9e4066Sahrens if (ds->ds_prev) { 348fa9e4066Sahrens dsl_dataset_close(ds->ds_prev, 349fa9e4066Sahrens DS_MODE_NONE, ds); 350fa9e4066Sahrens } 351fa9e4066Sahrens dsl_dir_close(ds->ds_dir, ds); 352fa9e4066Sahrens kmem_free(ds, sizeof (dsl_dataset_t)); 353ea8dc4b6Seschrock if (err) { 354ea8dc4b6Seschrock dmu_buf_rele(dbuf, tag); 355ea8dc4b6Seschrock return (err); 356ea8dc4b6Seschrock } 357fa9e4066Sahrens ds = winner; 358fa9e4066Sahrens } else { 359fa9e4066Sahrens uint64_t new = 360fa9e4066Sahrens unique_insert(ds->ds_phys->ds_fsid_guid); 361fa9e4066Sahrens if (new != ds->ds_phys->ds_fsid_guid) { 362fa9e4066Sahrens /* XXX it won't necessarily be synced... */ 363fa9e4066Sahrens ds->ds_phys->ds_fsid_guid = new; 364fa9e4066Sahrens } 365fa9e4066Sahrens } 366fa9e4066Sahrens } 367fa9e4066Sahrens ASSERT3P(ds->ds_dbuf, ==, dbuf); 368fa9e4066Sahrens ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 369fa9e4066Sahrens 370fa9e4066Sahrens mutex_enter(&ds->ds_lock); 371fa9e4066Sahrens if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && 372*99653d4eSeschrock (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && 373*99653d4eSeschrock !DS_MODE_IS_INCONSISTENT(mode)) || 374fa9e4066Sahrens (ds->ds_open_refcount + weight > DOS_REF_MAX)) { 375fa9e4066Sahrens mutex_exit(&ds->ds_lock); 376fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_NONE, tag); 377ea8dc4b6Seschrock return (EBUSY); 378fa9e4066Sahrens } 379fa9e4066Sahrens ds->ds_open_refcount += weight; 380fa9e4066Sahrens mutex_exit(&ds->ds_lock); 381fa9e4066Sahrens 382ea8dc4b6Seschrock *dsp = ds; 383ea8dc4b6Seschrock return (0); 384fa9e4066Sahrens } 385fa9e4066Sahrens 386fa9e4066Sahrens int 387fa9e4066Sahrens dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, 388fa9e4066Sahrens void *tag, dsl_dataset_t **dsp) 389fa9e4066Sahrens { 390fa9e4066Sahrens dsl_dir_t *dd; 391fa9e4066Sahrens dsl_pool_t *dp; 392fa9e4066Sahrens const char *tail; 393fa9e4066Sahrens uint64_t obj; 394fa9e4066Sahrens dsl_dataset_t *ds = NULL; 395fa9e4066Sahrens int err = 0; 396fa9e4066Sahrens 397ea8dc4b6Seschrock err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); 398ea8dc4b6Seschrock if (err) 399ea8dc4b6Seschrock return (err); 400fa9e4066Sahrens 401fa9e4066Sahrens dp = dd->dd_pool; 402fa9e4066Sahrens obj = dd->dd_phys->dd_head_dataset_obj; 403fa9e4066Sahrens rw_enter(&dp->dp_config_rwlock, RW_READER); 404fa9e4066Sahrens if (obj == 0) { 405fa9e4066Sahrens /* A dataset with no associated objset */ 406fa9e4066Sahrens err = ENOENT; 407fa9e4066Sahrens goto out; 408fa9e4066Sahrens } 409fa9e4066Sahrens 410fa9e4066Sahrens if (tail != NULL) { 411fa9e4066Sahrens objset_t *mos = dp->dp_meta_objset; 412fa9e4066Sahrens 413ea8dc4b6Seschrock err = dsl_dataset_open_obj(dp, obj, NULL, 414ea8dc4b6Seschrock DS_MODE_NONE, tag, &ds); 415ea8dc4b6Seschrock if (err) 416ea8dc4b6Seschrock goto out; 417fa9e4066Sahrens obj = ds->ds_phys->ds_snapnames_zapobj; 418fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_NONE, tag); 419fa9e4066Sahrens ds = NULL; 420fa9e4066Sahrens 421fa9e4066Sahrens if (tail[0] != '@') { 422fa9e4066Sahrens err = ENOENT; 423fa9e4066Sahrens goto out; 424fa9e4066Sahrens } 425fa9e4066Sahrens tail++; 426fa9e4066Sahrens 427fa9e4066Sahrens /* Look for a snapshot */ 428fa9e4066Sahrens if (!DS_MODE_IS_READONLY(mode)) { 429fa9e4066Sahrens err = EROFS; 430fa9e4066Sahrens goto out; 431fa9e4066Sahrens } 432fa9e4066Sahrens dprintf("looking for snapshot '%s'\n", tail); 433fa9e4066Sahrens err = zap_lookup(mos, obj, tail, 8, 1, &obj); 434fa9e4066Sahrens if (err) 435fa9e4066Sahrens goto out; 436fa9e4066Sahrens } 437ea8dc4b6Seschrock err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); 438fa9e4066Sahrens 439fa9e4066Sahrens out: 440fa9e4066Sahrens rw_exit(&dp->dp_config_rwlock); 441fa9e4066Sahrens dsl_dir_close(dd, FTAG); 442fa9e4066Sahrens 443fa9e4066Sahrens ASSERT3U((err == 0), ==, (ds != NULL)); 444fa9e4066Sahrens /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ 445fa9e4066Sahrens 446fa9e4066Sahrens *dsp = ds; 447fa9e4066Sahrens return (err); 448fa9e4066Sahrens } 449fa9e4066Sahrens 450fa9e4066Sahrens int 451fa9e4066Sahrens dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) 452fa9e4066Sahrens { 453fa9e4066Sahrens return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); 454fa9e4066Sahrens } 455fa9e4066Sahrens 456fa9e4066Sahrens void 457fa9e4066Sahrens dsl_dataset_name(dsl_dataset_t *ds, char *name) 458fa9e4066Sahrens { 459fa9e4066Sahrens if (ds == NULL) { 460fa9e4066Sahrens (void) strcpy(name, "mos"); 461fa9e4066Sahrens } else { 462fa9e4066Sahrens dsl_dir_name(ds->ds_dir, name); 463ea8dc4b6Seschrock VERIFY(0 == dsl_dataset_get_snapname(ds)); 464fa9e4066Sahrens if (ds->ds_snapname[0]) { 465fa9e4066Sahrens (void) strcat(name, "@"); 466fa9e4066Sahrens if (!MUTEX_HELD(&ds->ds_lock)) { 467fa9e4066Sahrens /* 468fa9e4066Sahrens * We use a "recursive" mutex so that we 469fa9e4066Sahrens * can call dprintf_ds() with ds_lock held. 470fa9e4066Sahrens */ 471fa9e4066Sahrens mutex_enter(&ds->ds_lock); 472fa9e4066Sahrens (void) strcat(name, ds->ds_snapname); 473fa9e4066Sahrens mutex_exit(&ds->ds_lock); 474fa9e4066Sahrens } else { 475fa9e4066Sahrens (void) strcat(name, ds->ds_snapname); 476fa9e4066Sahrens } 477fa9e4066Sahrens } 478fa9e4066Sahrens } 479fa9e4066Sahrens } 480fa9e4066Sahrens 481fa9e4066Sahrens void 482fa9e4066Sahrens dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) 483fa9e4066Sahrens { 484fa9e4066Sahrens uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 485fa9e4066Sahrens mutex_enter(&ds->ds_lock); 486fa9e4066Sahrens ASSERT3U(ds->ds_open_refcount, >=, weight); 487fa9e4066Sahrens ds->ds_open_refcount -= weight; 488fa9e4066Sahrens dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", 489fa9e4066Sahrens mode, ds->ds_open_refcount); 490fa9e4066Sahrens mutex_exit(&ds->ds_lock); 491fa9e4066Sahrens 492ea8dc4b6Seschrock dmu_buf_rele(ds->ds_dbuf, tag); 493fa9e4066Sahrens } 494fa9e4066Sahrens 495fa9e4066Sahrens void 496fa9e4066Sahrens dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) 497fa9e4066Sahrens { 498fa9e4066Sahrens objset_t *mos = dp->dp_meta_objset; 499fa9e4066Sahrens dmu_buf_t *dbuf; 500fa9e4066Sahrens dsl_dataset_phys_t *dsphys; 501fa9e4066Sahrens dsl_dataset_t *ds; 502fa9e4066Sahrens uint64_t dsobj; 503fa9e4066Sahrens dsl_dir_t *dd; 504fa9e4066Sahrens 505fa9e4066Sahrens dsl_dir_create_root(mos, ddobjp, tx); 506ea8dc4b6Seschrock VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); 507fa9e4066Sahrens 5081649cd4bStabriz dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 5091649cd4bStabriz DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 510ea8dc4b6Seschrock VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 511fa9e4066Sahrens dmu_buf_will_dirty(dbuf, tx); 512fa9e4066Sahrens dsphys = dbuf->db_data; 513fa9e4066Sahrens dsphys->ds_dir_obj = dd->dd_object; 514fa9e4066Sahrens dsphys->ds_fsid_guid = unique_create(); 515ea8dc4b6Seschrock unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 516fa9e4066Sahrens (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 517fa9e4066Sahrens sizeof (dsphys->ds_guid)); 518fa9e4066Sahrens dsphys->ds_snapnames_zapobj = 51987e5029aSahrens zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 520fa9e4066Sahrens dsphys->ds_creation_time = gethrestime_sec(); 521fa9e4066Sahrens dsphys->ds_creation_txg = tx->tx_txg; 522fa9e4066Sahrens dsphys->ds_deadlist_obj = 523fa9e4066Sahrens bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 524ea8dc4b6Seschrock dmu_buf_rele(dbuf, FTAG); 525fa9e4066Sahrens 526fa9e4066Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 527fa9e4066Sahrens dd->dd_phys->dd_head_dataset_obj = dsobj; 528fa9e4066Sahrens dsl_dir_close(dd, FTAG); 529fa9e4066Sahrens 530ea8dc4b6Seschrock VERIFY(0 == 531ea8dc4b6Seschrock dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); 532fa9e4066Sahrens (void) dmu_objset_create_impl(dp->dp_spa, ds, DMU_OST_ZFS, tx); 533fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 534fa9e4066Sahrens } 535fa9e4066Sahrens 536fa9e4066Sahrens int 537fa9e4066Sahrens dsl_dataset_create_sync(dsl_dir_t *pds, const char *fullname, 538fa9e4066Sahrens const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) 539fa9e4066Sahrens { 540fa9e4066Sahrens int err; 541fa9e4066Sahrens dsl_pool_t *dp = pds->dd_pool; 542fa9e4066Sahrens dmu_buf_t *dbuf; 543fa9e4066Sahrens dsl_dataset_phys_t *dsphys; 544fa9e4066Sahrens uint64_t dsobj; 545fa9e4066Sahrens objset_t *mos = dp->dp_meta_objset; 546fa9e4066Sahrens dsl_dir_t *dd; 547fa9e4066Sahrens 548fa9e4066Sahrens if (clone_parent != NULL) { 549fa9e4066Sahrens /* 550fa9e4066Sahrens * You can't clone across pools. 551fa9e4066Sahrens */ 552fa9e4066Sahrens if (clone_parent->ds_dir->dd_pool != dp) 553fa9e4066Sahrens return (EXDEV); 554fa9e4066Sahrens 555fa9e4066Sahrens /* 556fa9e4066Sahrens * You can only clone snapshots, not the head datasets. 557fa9e4066Sahrens */ 558fa9e4066Sahrens if (clone_parent->ds_phys->ds_num_children == 0) 559fa9e4066Sahrens return (EINVAL); 560fa9e4066Sahrens } 561fa9e4066Sahrens 562fa9e4066Sahrens ASSERT(lastname[0] != '@'); 563fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 564fa9e4066Sahrens 565fa9e4066Sahrens err = dsl_dir_create_sync(pds, lastname, tx); 566fa9e4066Sahrens if (err) 567fa9e4066Sahrens return (err); 568ea8dc4b6Seschrock VERIFY(0 == dsl_dir_open_spa(dp->dp_spa, fullname, FTAG, &dd, NULL)); 569fa9e4066Sahrens 570fa9e4066Sahrens /* This is the point of no (unsuccessful) return */ 571fa9e4066Sahrens 5721649cd4bStabriz dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 5731649cd4bStabriz DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 574ea8dc4b6Seschrock VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 575fa9e4066Sahrens dmu_buf_will_dirty(dbuf, tx); 576fa9e4066Sahrens dsphys = dbuf->db_data; 577fa9e4066Sahrens dsphys->ds_dir_obj = dd->dd_object; 578fa9e4066Sahrens dsphys->ds_fsid_guid = unique_create(); 579fa9e4066Sahrens unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 580fa9e4066Sahrens (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 581fa9e4066Sahrens sizeof (dsphys->ds_guid)); 582fa9e4066Sahrens dsphys->ds_snapnames_zapobj = 58387e5029aSahrens zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 584fa9e4066Sahrens dsphys->ds_creation_time = gethrestime_sec(); 585fa9e4066Sahrens dsphys->ds_creation_txg = tx->tx_txg; 586fa9e4066Sahrens dsphys->ds_deadlist_obj = 587fa9e4066Sahrens bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 588fa9e4066Sahrens if (clone_parent) { 589fa9e4066Sahrens dsphys->ds_prev_snap_obj = clone_parent->ds_object; 590fa9e4066Sahrens dsphys->ds_prev_snap_txg = 591fa9e4066Sahrens clone_parent->ds_phys->ds_creation_txg; 592fa9e4066Sahrens dsphys->ds_used_bytes = 593fa9e4066Sahrens clone_parent->ds_phys->ds_used_bytes; 594fa9e4066Sahrens dsphys->ds_compressed_bytes = 595fa9e4066Sahrens clone_parent->ds_phys->ds_compressed_bytes; 596fa9e4066Sahrens dsphys->ds_uncompressed_bytes = 597fa9e4066Sahrens clone_parent->ds_phys->ds_uncompressed_bytes; 598fa9e4066Sahrens dsphys->ds_bp = clone_parent->ds_phys->ds_bp; 599fa9e4066Sahrens 600fa9e4066Sahrens dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); 601fa9e4066Sahrens clone_parent->ds_phys->ds_num_children++; 602fa9e4066Sahrens 603fa9e4066Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 604fa9e4066Sahrens dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; 605fa9e4066Sahrens } 606ea8dc4b6Seschrock dmu_buf_rele(dbuf, FTAG); 607fa9e4066Sahrens 608fa9e4066Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 609fa9e4066Sahrens dd->dd_phys->dd_head_dataset_obj = dsobj; 610fa9e4066Sahrens dsl_dir_close(dd, FTAG); 611fa9e4066Sahrens 612fa9e4066Sahrens return (0); 613fa9e4066Sahrens } 614fa9e4066Sahrens 615fa9e4066Sahrens int 616fa9e4066Sahrens dsl_dataset_destroy(const char *name) 617fa9e4066Sahrens { 618fa9e4066Sahrens int err; 619fa9e4066Sahrens dsl_pool_t *dp; 620fa9e4066Sahrens dsl_dir_t *dd; 621fa9e4066Sahrens const char *tail; 622fa9e4066Sahrens 623ea8dc4b6Seschrock err = dsl_dir_open(name, FTAG, &dd, &tail); 624ea8dc4b6Seschrock if (err) 625ea8dc4b6Seschrock return (err); 626fa9e4066Sahrens 627fa9e4066Sahrens dp = dd->dd_pool; 628fa9e4066Sahrens if (tail != NULL) { 629fa9e4066Sahrens if (tail[0] != '@') { 630fa9e4066Sahrens dsl_dir_close(dd, FTAG); 631fa9e4066Sahrens return (ENOENT); 632fa9e4066Sahrens } 633fa9e4066Sahrens tail++; 634fa9e4066Sahrens /* Just blow away the snapshot */ 635fa9e4066Sahrens do { 636fa9e4066Sahrens txg_wait_synced(dp, 0); 637fa9e4066Sahrens err = dsl_dir_sync_task(dd, 638fa9e4066Sahrens dsl_dataset_destroy_sync, (void*)tail, 0); 639fa9e4066Sahrens } while (err == EAGAIN); 640fa9e4066Sahrens dsl_dir_close(dd, FTAG); 641fa9e4066Sahrens } else { 642fa9e4066Sahrens char buf[MAXNAMELEN]; 643fa9e4066Sahrens char *cp; 644e1930233Sbonwick objset_t *os; 645e1930233Sbonwick uint64_t obj; 646fa9e4066Sahrens dsl_dir_t *pds; 647e1930233Sbonwick 648fa9e4066Sahrens if (dd->dd_phys->dd_parent_obj == 0) { 649fa9e4066Sahrens dsl_dir_close(dd, FTAG); 650fa9e4066Sahrens return (EINVAL); 651fa9e4066Sahrens } 652e1930233Sbonwick 653e1930233Sbonwick err = dmu_objset_open(name, DMU_OST_ANY, 654e1930233Sbonwick DS_MODE_PRIMARY | DS_MODE_INCONSISTENT, &os); 655e1930233Sbonwick if (err) { 656e1930233Sbonwick dsl_dir_close(dd, FTAG); 657e1930233Sbonwick return (err); 658e1930233Sbonwick } 659e1930233Sbonwick 660e1930233Sbonwick /* 661e1930233Sbonwick * Check for errors and mark this ds as inconsistent, in 662e1930233Sbonwick * case we crash while freeing the objects. 663e1930233Sbonwick */ 664e1930233Sbonwick err = dsl_dir_sync_task(os->os->os_dsl_dataset->ds_dir, 665e1930233Sbonwick dsl_dataset_destroy_begin_sync, os->os->os_dsl_dataset, 0); 666e1930233Sbonwick if (err) { 667e1930233Sbonwick dmu_objset_close(os); 668e1930233Sbonwick dsl_dir_close(dd, FTAG); 669e1930233Sbonwick return (err); 670e1930233Sbonwick } 671e1930233Sbonwick 672fa9e4066Sahrens /* 673e1930233Sbonwick * remove the objects in open context, so that we won't 674e1930233Sbonwick * have too much to do in syncing context. 675fa9e4066Sahrens */ 676e1930233Sbonwick for (obj = 0; err == 0; 677e1930233Sbonwick err = dmu_object_next(os, &obj, FALSE)) { 678e1930233Sbonwick dmu_tx_t *tx = dmu_tx_create(os); 679e1930233Sbonwick dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); 680e1930233Sbonwick dmu_tx_hold_bonus(tx, obj); 681e1930233Sbonwick err = dmu_tx_assign(tx, TXG_WAIT); 682e1930233Sbonwick if (err) { 683e1930233Sbonwick /* 684e1930233Sbonwick * Perhaps there is not enough disk 685e1930233Sbonwick * space. Just deal with it from 686e1930233Sbonwick * dsl_dataset_destroy_sync(). 687e1930233Sbonwick */ 688e1930233Sbonwick dmu_tx_abort(tx); 689e1930233Sbonwick continue; 690e1930233Sbonwick } 691e1930233Sbonwick VERIFY(0 == dmu_object_free(os, obj, tx)); 692e1930233Sbonwick dmu_tx_commit(tx); 693e1930233Sbonwick } 69431fd60d3Sahrens /* Make sure it's not dirty before we finish destroying it. */ 69531fd60d3Sahrens txg_wait_synced(dd->dd_pool, 0); 69631fd60d3Sahrens 697e1930233Sbonwick dmu_objset_close(os); 698e1930233Sbonwick if (err != ESRCH) { 699e1930233Sbonwick dsl_dir_close(dd, FTAG); 700e1930233Sbonwick return (err); 701e1930233Sbonwick } 702e1930233Sbonwick 703fa9e4066Sahrens /* 704fa9e4066Sahrens * Blow away the dsl_dir + head dataset. 705fa9e4066Sahrens * dsl_dir_destroy_sync() will call 706fa9e4066Sahrens * dsl_dataset_destroy_sync() to destroy the head dataset. 707fa9e4066Sahrens */ 708fa9e4066Sahrens rw_enter(&dp->dp_config_rwlock, RW_READER); 709ea8dc4b6Seschrock err = dsl_dir_open_obj(dd->dd_pool, 710ea8dc4b6Seschrock dd->dd_phys->dd_parent_obj, NULL, FTAG, &pds); 711fa9e4066Sahrens dsl_dir_close(dd, FTAG); 712fa9e4066Sahrens rw_exit(&dp->dp_config_rwlock); 713ea8dc4b6Seschrock if (err) 714ea8dc4b6Seschrock return (err); 715fa9e4066Sahrens 716fa9e4066Sahrens (void) strcpy(buf, name); 717fa9e4066Sahrens cp = strrchr(buf, '/') + 1; 718fa9e4066Sahrens ASSERT(cp[0] != '\0'); 719fa9e4066Sahrens do { 720fa9e4066Sahrens txg_wait_synced(dp, 0); 721fa9e4066Sahrens err = dsl_dir_sync_task(pds, 722fa9e4066Sahrens dsl_dir_destroy_sync, cp, 0); 723fa9e4066Sahrens } while (err == EAGAIN); 724fa9e4066Sahrens dsl_dir_close(pds, FTAG); 725fa9e4066Sahrens } 726fa9e4066Sahrens 727fa9e4066Sahrens return (err); 728fa9e4066Sahrens } 729fa9e4066Sahrens 730fa9e4066Sahrens int 731fa9e4066Sahrens dsl_dataset_rollback(const char *name) 732fa9e4066Sahrens { 733fa9e4066Sahrens int err; 734fa9e4066Sahrens dsl_dir_t *dd; 735fa9e4066Sahrens const char *tail; 736fa9e4066Sahrens 737ea8dc4b6Seschrock err = dsl_dir_open(name, FTAG, &dd, &tail); 738ea8dc4b6Seschrock if (err) 739ea8dc4b6Seschrock return (err); 740fa9e4066Sahrens 741fa9e4066Sahrens if (tail != NULL) { 742fa9e4066Sahrens dsl_dir_close(dd, FTAG); 743fa9e4066Sahrens return (EINVAL); 744fa9e4066Sahrens } 745fa9e4066Sahrens do { 746fa9e4066Sahrens txg_wait_synced(dd->dd_pool, 0); 747fa9e4066Sahrens err = dsl_dir_sync_task(dd, 748fa9e4066Sahrens dsl_dataset_rollback_sync, NULL, 0); 749fa9e4066Sahrens } while (err == EAGAIN); 750fa9e4066Sahrens dsl_dir_close(dd, FTAG); 751fa9e4066Sahrens 752fa9e4066Sahrens return (err); 753fa9e4066Sahrens } 754fa9e4066Sahrens 755fa9e4066Sahrens void * 756fa9e4066Sahrens dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 757fa9e4066Sahrens void *p, dsl_dataset_evict_func_t func) 758fa9e4066Sahrens { 759fa9e4066Sahrens void *old; 760fa9e4066Sahrens 761fa9e4066Sahrens mutex_enter(&ds->ds_lock); 762fa9e4066Sahrens old = ds->ds_user_ptr; 763fa9e4066Sahrens if (old == NULL) { 764fa9e4066Sahrens ds->ds_user_ptr = p; 765fa9e4066Sahrens ds->ds_user_evict_func = func; 766fa9e4066Sahrens } 767fa9e4066Sahrens mutex_exit(&ds->ds_lock); 768fa9e4066Sahrens return (old); 769fa9e4066Sahrens } 770fa9e4066Sahrens 771fa9e4066Sahrens void * 772fa9e4066Sahrens dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 773fa9e4066Sahrens { 774fa9e4066Sahrens return (ds->ds_user_ptr); 775fa9e4066Sahrens } 776fa9e4066Sahrens 777fa9e4066Sahrens 778fa9e4066Sahrens void 779fa9e4066Sahrens dsl_dataset_get_blkptr(dsl_dataset_t *ds, blkptr_t *bp) 780fa9e4066Sahrens { 781fa9e4066Sahrens *bp = ds->ds_phys->ds_bp; 782fa9e4066Sahrens } 783fa9e4066Sahrens 784fa9e4066Sahrens void 785fa9e4066Sahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 786fa9e4066Sahrens { 787fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 788fa9e4066Sahrens /* If it's the meta-objset, set dp_meta_rootbp */ 789fa9e4066Sahrens if (ds == NULL) { 790fa9e4066Sahrens tx->tx_pool->dp_meta_rootbp = *bp; 791fa9e4066Sahrens } else { 792fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 793fa9e4066Sahrens ds->ds_phys->ds_bp = *bp; 794fa9e4066Sahrens } 795fa9e4066Sahrens } 796fa9e4066Sahrens 797fa9e4066Sahrens spa_t * 798fa9e4066Sahrens dsl_dataset_get_spa(dsl_dataset_t *ds) 799fa9e4066Sahrens { 800fa9e4066Sahrens return (ds->ds_dir->dd_pool->dp_spa); 801fa9e4066Sahrens } 802fa9e4066Sahrens 803fa9e4066Sahrens void 804fa9e4066Sahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 805fa9e4066Sahrens { 806fa9e4066Sahrens dsl_pool_t *dp; 807fa9e4066Sahrens 808fa9e4066Sahrens if (ds == NULL) /* this is the meta-objset */ 809fa9e4066Sahrens return; 810fa9e4066Sahrens 811fa9e4066Sahrens ASSERT(ds->ds_user_ptr != NULL); 812fa9e4066Sahrens ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 813fa9e4066Sahrens 814fa9e4066Sahrens dp = ds->ds_dir->dd_pool; 815fa9e4066Sahrens 816fa9e4066Sahrens if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 817fa9e4066Sahrens /* up the hold count until we can be written out */ 818fa9e4066Sahrens dmu_buf_add_ref(ds->ds_dbuf, ds); 819fa9e4066Sahrens } 820fa9e4066Sahrens } 821fa9e4066Sahrens 822fa9e4066Sahrens struct killarg { 823fa9e4066Sahrens uint64_t *usedp; 824fa9e4066Sahrens uint64_t *compressedp; 825fa9e4066Sahrens uint64_t *uncompressedp; 826fa9e4066Sahrens zio_t *zio; 827fa9e4066Sahrens dmu_tx_t *tx; 828fa9e4066Sahrens }; 829fa9e4066Sahrens 830fa9e4066Sahrens static int 831fa9e4066Sahrens kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 832fa9e4066Sahrens { 833fa9e4066Sahrens struct killarg *ka = arg; 834fa9e4066Sahrens blkptr_t *bp = &bc->bc_blkptr; 835fa9e4066Sahrens 836fa9e4066Sahrens ASSERT3U(bc->bc_errno, ==, 0); 837fa9e4066Sahrens 838fa9e4066Sahrens /* 839fa9e4066Sahrens * Since this callback is not called concurrently, no lock is 840fa9e4066Sahrens * needed on the accounting values. 841fa9e4066Sahrens */ 842*99653d4eSeschrock *ka->usedp += bp_get_dasize(spa, bp); 843fa9e4066Sahrens *ka->compressedp += BP_GET_PSIZE(bp); 844fa9e4066Sahrens *ka->uncompressedp += BP_GET_UCSIZE(bp); 845fa9e4066Sahrens /* XXX check for EIO? */ 846fa9e4066Sahrens (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, 847fa9e4066Sahrens ARC_NOWAIT); 848fa9e4066Sahrens return (0); 849fa9e4066Sahrens } 850fa9e4066Sahrens 851fa9e4066Sahrens /* ARGSUSED */ 852fa9e4066Sahrens int 853fa9e4066Sahrens dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 854fa9e4066Sahrens { 855fa9e4066Sahrens objset_t *mos = dd->dd_pool->dp_meta_objset; 856fa9e4066Sahrens dsl_dataset_t *ds; 857ea8dc4b6Seschrock int err; 858fa9e4066Sahrens 859fa9e4066Sahrens if (dd->dd_phys->dd_head_dataset_obj == 0) 860fa9e4066Sahrens return (EINVAL); 861ea8dc4b6Seschrock err = dsl_dataset_open_obj(dd->dd_pool, 862ea8dc4b6Seschrock dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &ds); 863ea8dc4b6Seschrock if (err) 864ea8dc4b6Seschrock return (err); 865fa9e4066Sahrens 866fa9e4066Sahrens if (ds->ds_phys->ds_prev_snap_txg == 0) { 867fa9e4066Sahrens /* 868fa9e4066Sahrens * There's no previous snapshot. I suppose we could 869fa9e4066Sahrens * roll it back to being empty (and re-initialize the 870fa9e4066Sahrens * upper (ZPL) layer). But for now there's no way to do 871fa9e4066Sahrens * this via the user interface. 872fa9e4066Sahrens */ 873fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 874fa9e4066Sahrens return (EINVAL); 875fa9e4066Sahrens } 876fa9e4066Sahrens 877fa9e4066Sahrens mutex_enter(&ds->ds_lock); 878fa9e4066Sahrens if (ds->ds_open_refcount > 0) { 879fa9e4066Sahrens mutex_exit(&ds->ds_lock); 880fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 881fa9e4066Sahrens return (EBUSY); 882fa9e4066Sahrens } 883fa9e4066Sahrens 884fa9e4066Sahrens /* 885fa9e4066Sahrens * If we made changes this txg, traverse_dsl_dataset won't find 886fa9e4066Sahrens * them. Try again. 887fa9e4066Sahrens */ 888fa9e4066Sahrens if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) { 889fa9e4066Sahrens mutex_exit(&ds->ds_lock); 890fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 891fa9e4066Sahrens return (EAGAIN); 892fa9e4066Sahrens } 893fa9e4066Sahrens 894fa9e4066Sahrens /* THE POINT OF NO (unsuccessful) RETURN */ 895fa9e4066Sahrens ds->ds_open_refcount = DOS_REF_MAX; 896fa9e4066Sahrens mutex_exit(&ds->ds_lock); 897fa9e4066Sahrens 898fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 899fa9e4066Sahrens 900fa9e4066Sahrens /* Zero out the deadlist. */ 901fa9e4066Sahrens dprintf("old deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj); 902fa9e4066Sahrens bplist_close(&ds->ds_deadlist); 903fa9e4066Sahrens bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 904fa9e4066Sahrens ds->ds_phys->ds_deadlist_obj = 905fa9e4066Sahrens bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 906ea8dc4b6Seschrock VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 907ea8dc4b6Seschrock ds->ds_phys->ds_deadlist_obj)); 908fa9e4066Sahrens dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj); 909fa9e4066Sahrens 910fa9e4066Sahrens { 911fa9e4066Sahrens /* Free blkptrs that we gave birth to */ 912fa9e4066Sahrens zio_t *zio; 913fa9e4066Sahrens uint64_t used = 0, compressed = 0, uncompressed = 0; 914fa9e4066Sahrens struct killarg ka; 915fa9e4066Sahrens 916fa9e4066Sahrens zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 917fa9e4066Sahrens ZIO_FLAG_MUSTSUCCEED); 918fa9e4066Sahrens ka.usedp = &used; 919fa9e4066Sahrens ka.compressedp = &compressed; 920fa9e4066Sahrens ka.uncompressedp = &uncompressed; 921fa9e4066Sahrens ka.zio = zio; 922fa9e4066Sahrens ka.tx = tx; 923fa9e4066Sahrens (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 924fa9e4066Sahrens ADVANCE_POST, kill_blkptr, &ka); 925fa9e4066Sahrens (void) zio_wait(zio); 926fa9e4066Sahrens 927fa9e4066Sahrens dsl_dir_diduse_space(dd, 928fa9e4066Sahrens -used, -compressed, -uncompressed, tx); 929fa9e4066Sahrens } 930fa9e4066Sahrens 931fa9e4066Sahrens /* Change our contents to that of the prev snapshot (finally!) */ 932fa9e4066Sahrens ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); 933fa9e4066Sahrens ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 934fa9e4066Sahrens ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; 935fa9e4066Sahrens ds->ds_phys->ds_compressed_bytes = 936fa9e4066Sahrens ds->ds_prev->ds_phys->ds_compressed_bytes; 937fa9e4066Sahrens ds->ds_phys->ds_uncompressed_bytes = 938fa9e4066Sahrens ds->ds_prev->ds_phys->ds_uncompressed_bytes; 939*99653d4eSeschrock ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 940fa9e4066Sahrens ds->ds_phys->ds_unique_bytes = 0; 941fa9e4066Sahrens 942fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 943fa9e4066Sahrens ds->ds_prev->ds_phys->ds_unique_bytes = 0; 944fa9e4066Sahrens 945fa9e4066Sahrens dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj); 946fa9e4066Sahrens ds->ds_open_refcount = 0; 947fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 948fa9e4066Sahrens 949fa9e4066Sahrens return (0); 950fa9e4066Sahrens } 951fa9e4066Sahrens 952e1930233Sbonwick /* ARGSUSED */ 953e1930233Sbonwick static int 954e1930233Sbonwick dsl_dataset_destroy_begin_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 955e1930233Sbonwick { 956e1930233Sbonwick dsl_dataset_t *ds = arg; 957e1930233Sbonwick 958e1930233Sbonwick /* 959e1930233Sbonwick * Can't delete a head dataset if there are snapshots of it. 960e1930233Sbonwick * (Except if the only snapshots are from the branch we cloned 961e1930233Sbonwick * from.) 962e1930233Sbonwick */ 963e1930233Sbonwick if (ds->ds_prev != NULL && 964e1930233Sbonwick ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 965e1930233Sbonwick return (EINVAL); 966e1930233Sbonwick 967e1930233Sbonwick /* Mark it as inconsistent on-disk, in case we crash */ 968e1930233Sbonwick dmu_buf_will_dirty(ds->ds_dbuf, tx); 969*99653d4eSeschrock ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 970e1930233Sbonwick 971e1930233Sbonwick return (0); 972e1930233Sbonwick } 973e1930233Sbonwick 974fa9e4066Sahrens int 975fa9e4066Sahrens dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 976fa9e4066Sahrens { 977fa9e4066Sahrens const char *snapname = arg; 978fa9e4066Sahrens uint64_t used = 0, compressed = 0, uncompressed = 0; 979fa9e4066Sahrens blkptr_t bp; 980fa9e4066Sahrens zio_t *zio; 981fa9e4066Sahrens int err; 982fa9e4066Sahrens int after_branch_point = FALSE; 983fa9e4066Sahrens int drop_lock = FALSE; 984fa9e4066Sahrens dsl_pool_t *dp = dd->dd_pool; 985fa9e4066Sahrens objset_t *mos = dp->dp_meta_objset; 986fa9e4066Sahrens dsl_dataset_t *ds, *ds_prev = NULL; 987fa9e4066Sahrens uint64_t obj; 988fa9e4066Sahrens 989fa9e4066Sahrens if (dd->dd_phys->dd_head_dataset_obj == 0) 990fa9e4066Sahrens return (EINVAL); 991fa9e4066Sahrens 992fa9e4066Sahrens if (!RW_WRITE_HELD(&dp->dp_config_rwlock)) { 993fa9e4066Sahrens rw_enter(&dp->dp_config_rwlock, RW_WRITER); 994fa9e4066Sahrens drop_lock = TRUE; 995fa9e4066Sahrens } 996fa9e4066Sahrens 997ea8dc4b6Seschrock err = dsl_dataset_open_obj(dd->dd_pool, 998fa9e4066Sahrens dd->dd_phys->dd_head_dataset_obj, NULL, 999ea8dc4b6Seschrock snapname ? DS_MODE_NONE : DS_MODE_EXCLUSIVE, FTAG, &ds); 1000fa9e4066Sahrens 1001ea8dc4b6Seschrock if (err == 0 && snapname) { 1002fa9e4066Sahrens err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1003fa9e4066Sahrens snapname, 8, 1, &obj); 1004fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1005ea8dc4b6Seschrock if (err == 0) { 1006ea8dc4b6Seschrock err = dsl_dataset_open_obj(dd->dd_pool, obj, NULL, 1007ea8dc4b6Seschrock DS_MODE_EXCLUSIVE, FTAG, &ds); 1008fa9e4066Sahrens } 1009fa9e4066Sahrens } 1010ea8dc4b6Seschrock if (err) { 1011fa9e4066Sahrens if (drop_lock) 1012fa9e4066Sahrens rw_exit(&dp->dp_config_rwlock); 1013ea8dc4b6Seschrock return (err); 1014fa9e4066Sahrens } 1015fa9e4066Sahrens 1016fa9e4066Sahrens obj = ds->ds_object; 1017fa9e4066Sahrens 1018fa9e4066Sahrens /* Can't delete a branch point. */ 1019fa9e4066Sahrens if (ds->ds_phys->ds_num_children > 1) { 1020fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1021fa9e4066Sahrens if (drop_lock) 1022fa9e4066Sahrens rw_exit(&dp->dp_config_rwlock); 1023fa9e4066Sahrens return (EINVAL); 1024fa9e4066Sahrens } 1025fa9e4066Sahrens 1026fa9e4066Sahrens /* 1027fa9e4066Sahrens * Can't delete a head dataset if there are snapshots of it. 1028fa9e4066Sahrens * (Except if the only snapshots are from the branch we cloned 1029fa9e4066Sahrens * from.) 1030fa9e4066Sahrens */ 1031fa9e4066Sahrens if (ds->ds_prev != NULL && 1032fa9e4066Sahrens ds->ds_prev->ds_phys->ds_next_snap_obj == obj) { 1033fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1034fa9e4066Sahrens if (drop_lock) 1035fa9e4066Sahrens rw_exit(&dp->dp_config_rwlock); 1036fa9e4066Sahrens return (EINVAL); 1037fa9e4066Sahrens } 1038fa9e4066Sahrens 1039fa9e4066Sahrens /* 1040fa9e4066Sahrens * If we made changes this txg, traverse_dsl_dataset won't find 1041fa9e4066Sahrens * them. Try again. 1042fa9e4066Sahrens */ 1043fa9e4066Sahrens if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) { 1044fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 104587e5029aSahrens if (drop_lock) 104687e5029aSahrens rw_exit(&dp->dp_config_rwlock); 1047fa9e4066Sahrens return (EAGAIN); 1048fa9e4066Sahrens } 1049fa9e4066Sahrens 1050fa9e4066Sahrens if (ds->ds_phys->ds_prev_snap_obj != 0) { 1051fa9e4066Sahrens if (ds->ds_prev) { 1052fa9e4066Sahrens ds_prev = ds->ds_prev; 1053fa9e4066Sahrens } else { 1054ea8dc4b6Seschrock err = dsl_dataset_open_obj(dd->dd_pool, 1055fa9e4066Sahrens ds->ds_phys->ds_prev_snap_obj, NULL, 1056ea8dc4b6Seschrock DS_MODE_NONE, FTAG, &ds_prev); 1057ea8dc4b6Seschrock if (err) { 1058ea8dc4b6Seschrock dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1059ea8dc4b6Seschrock if (drop_lock) 1060ea8dc4b6Seschrock rw_exit(&dp->dp_config_rwlock); 1061ea8dc4b6Seschrock return (err); 1062ea8dc4b6Seschrock } 1063fa9e4066Sahrens } 1064fa9e4066Sahrens after_branch_point = 1065fa9e4066Sahrens (ds_prev->ds_phys->ds_next_snap_obj != obj); 1066fa9e4066Sahrens 1067fa9e4066Sahrens dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1068fa9e4066Sahrens if (after_branch_point && 1069fa9e4066Sahrens ds->ds_phys->ds_next_snap_obj == 0) { 1070fa9e4066Sahrens /* This clone is toast. */ 1071fa9e4066Sahrens ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1072fa9e4066Sahrens ds_prev->ds_phys->ds_num_children--; 1073fa9e4066Sahrens } else if (!after_branch_point) { 1074fa9e4066Sahrens ds_prev->ds_phys->ds_next_snap_obj = 1075fa9e4066Sahrens ds->ds_phys->ds_next_snap_obj; 1076fa9e4066Sahrens } 1077fa9e4066Sahrens } 1078fa9e4066Sahrens 1079ea8dc4b6Seschrock /* THE POINT OF NO (unsuccessful) RETURN */ 1080ea8dc4b6Seschrock 1081fa9e4066Sahrens ASSERT3P(tx->tx_pool, ==, dd->dd_pool); 1082fa9e4066Sahrens zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1083fa9e4066Sahrens 1084fa9e4066Sahrens if (ds->ds_phys->ds_next_snap_obj != 0) { 1085fa9e4066Sahrens dsl_dataset_t *ds_next; 1086fa9e4066Sahrens uint64_t itor = 0; 1087fa9e4066Sahrens 1088fa9e4066Sahrens spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1089fa9e4066Sahrens 1090ea8dc4b6Seschrock VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1091ea8dc4b6Seschrock ds->ds_phys->ds_next_snap_obj, NULL, 1092ea8dc4b6Seschrock DS_MODE_NONE, FTAG, &ds_next)); 1093fa9e4066Sahrens ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1094fa9e4066Sahrens 1095fa9e4066Sahrens dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1096fa9e4066Sahrens ds_next->ds_phys->ds_prev_snap_obj = 1097fa9e4066Sahrens ds->ds_phys->ds_prev_snap_obj; 1098fa9e4066Sahrens ds_next->ds_phys->ds_prev_snap_txg = 1099fa9e4066Sahrens ds->ds_phys->ds_prev_snap_txg; 1100fa9e4066Sahrens ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1101fa9e4066Sahrens ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1102fa9e4066Sahrens 1103fa9e4066Sahrens /* 1104fa9e4066Sahrens * Transfer to our deadlist (which will become next's 1105fa9e4066Sahrens * new deadlist) any entries from next's current 1106fa9e4066Sahrens * deadlist which were born before prev, and free the 1107fa9e4066Sahrens * other entries. 1108fa9e4066Sahrens * 1109fa9e4066Sahrens * XXX we're doing this long task with the config lock held 1110fa9e4066Sahrens */ 1111fa9e4066Sahrens while (bplist_iterate(&ds_next->ds_deadlist, &itor, 1112fa9e4066Sahrens &bp) == 0) { 1113fa9e4066Sahrens if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1114ea8dc4b6Seschrock VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1115ea8dc4b6Seschrock &bp, tx)); 1116fa9e4066Sahrens if (ds_prev && !after_branch_point && 1117fa9e4066Sahrens bp.blk_birth > 1118fa9e4066Sahrens ds_prev->ds_phys->ds_prev_snap_txg) { 1119fa9e4066Sahrens ds_prev->ds_phys->ds_unique_bytes += 1120*99653d4eSeschrock bp_get_dasize(dp->dp_spa, &bp); 1121fa9e4066Sahrens } 1122fa9e4066Sahrens } else { 1123*99653d4eSeschrock used += bp_get_dasize(dp->dp_spa, &bp); 1124fa9e4066Sahrens compressed += BP_GET_PSIZE(&bp); 1125fa9e4066Sahrens uncompressed += BP_GET_UCSIZE(&bp); 1126fa9e4066Sahrens /* XXX check return value? */ 1127fa9e4066Sahrens (void) arc_free(zio, dp->dp_spa, tx->tx_txg, 1128fa9e4066Sahrens &bp, NULL, NULL, ARC_NOWAIT); 1129fa9e4066Sahrens } 1130fa9e4066Sahrens } 1131fa9e4066Sahrens 1132fa9e4066Sahrens /* free next's deadlist */ 1133fa9e4066Sahrens bplist_close(&ds_next->ds_deadlist); 1134fa9e4066Sahrens bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1135fa9e4066Sahrens 1136fa9e4066Sahrens /* set next's deadlist to our deadlist */ 1137fa9e4066Sahrens ds_next->ds_phys->ds_deadlist_obj = 1138fa9e4066Sahrens ds->ds_phys->ds_deadlist_obj; 1139ea8dc4b6Seschrock VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1140ea8dc4b6Seschrock ds_next->ds_phys->ds_deadlist_obj)); 1141fa9e4066Sahrens ds->ds_phys->ds_deadlist_obj = 0; 1142fa9e4066Sahrens 1143fa9e4066Sahrens if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1144fa9e4066Sahrens /* 1145fa9e4066Sahrens * Update next's unique to include blocks which 1146fa9e4066Sahrens * were previously shared by only this snapshot 1147fa9e4066Sahrens * and it. Those blocks will be born after the 1148fa9e4066Sahrens * prev snap and before this snap, and will have 1149fa9e4066Sahrens * died after the next snap and before the one 1150fa9e4066Sahrens * after that (ie. be on the snap after next's 1151fa9e4066Sahrens * deadlist). 1152fa9e4066Sahrens * 1153fa9e4066Sahrens * XXX we're doing this long task with the 1154fa9e4066Sahrens * config lock held 1155fa9e4066Sahrens */ 1156fa9e4066Sahrens dsl_dataset_t *ds_after_next; 1157fa9e4066Sahrens 1158ea8dc4b6Seschrock VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1159fa9e4066Sahrens ds_next->ds_phys->ds_next_snap_obj, NULL, 1160ea8dc4b6Seschrock DS_MODE_NONE, FTAG, &ds_after_next)); 1161fa9e4066Sahrens itor = 0; 1162fa9e4066Sahrens while (bplist_iterate(&ds_after_next->ds_deadlist, 1163fa9e4066Sahrens &itor, &bp) == 0) { 1164fa9e4066Sahrens if (bp.blk_birth > 1165fa9e4066Sahrens ds->ds_phys->ds_prev_snap_txg && 1166fa9e4066Sahrens bp.blk_birth <= 1167fa9e4066Sahrens ds->ds_phys->ds_creation_txg) { 1168fa9e4066Sahrens ds_next->ds_phys->ds_unique_bytes += 1169*99653d4eSeschrock bp_get_dasize(dp->dp_spa, &bp); 1170fa9e4066Sahrens } 1171fa9e4066Sahrens } 1172fa9e4066Sahrens 1173fa9e4066Sahrens dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); 1174fa9e4066Sahrens ASSERT3P(ds_next->ds_prev, ==, NULL); 1175fa9e4066Sahrens } else { 1176fa9e4066Sahrens /* 1177fa9e4066Sahrens * It would be nice to update the head dataset's 1178fa9e4066Sahrens * unique. To do so we would have to traverse 1179fa9e4066Sahrens * it for blocks born after ds_prev, which is 1180fa9e4066Sahrens * pretty expensive just to maintain something 1181fa9e4066Sahrens * for debugging purposes. 1182fa9e4066Sahrens */ 1183fa9e4066Sahrens ASSERT3P(ds_next->ds_prev, ==, ds); 1184fa9e4066Sahrens dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, 1185fa9e4066Sahrens ds_next); 1186fa9e4066Sahrens if (ds_prev) { 1187ea8dc4b6Seschrock VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1188ea8dc4b6Seschrock ds->ds_phys->ds_prev_snap_obj, NULL, 1189ea8dc4b6Seschrock DS_MODE_NONE, ds_next, &ds_next->ds_prev)); 1190fa9e4066Sahrens } else { 1191fa9e4066Sahrens ds_next->ds_prev = NULL; 1192fa9e4066Sahrens } 1193fa9e4066Sahrens } 1194fa9e4066Sahrens dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); 1195fa9e4066Sahrens 1196fa9e4066Sahrens /* 1197fa9e4066Sahrens * NB: unique_bytes is not accurate for head objsets 1198fa9e4066Sahrens * because we don't update it when we delete the most 1199fa9e4066Sahrens * recent snapshot -- see above comment. 1200fa9e4066Sahrens */ 1201fa9e4066Sahrens ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1202fa9e4066Sahrens } else { 1203fa9e4066Sahrens /* 1204fa9e4066Sahrens * There's no next snapshot, so this is a head dataset. 1205fa9e4066Sahrens * Destroy the deadlist. Unless it's a clone, the 1206fa9e4066Sahrens * deadlist should be empty. (If it's a clone, it's 1207fa9e4066Sahrens * safe to ignore the deadlist contents.) 1208fa9e4066Sahrens */ 1209fa9e4066Sahrens struct killarg ka; 1210fa9e4066Sahrens 1211fa9e4066Sahrens ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1212fa9e4066Sahrens bplist_close(&ds->ds_deadlist); 1213fa9e4066Sahrens bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1214fa9e4066Sahrens ds->ds_phys->ds_deadlist_obj = 0; 1215fa9e4066Sahrens 1216fa9e4066Sahrens /* 1217fa9e4066Sahrens * Free everything that we point to (that's born after 1218fa9e4066Sahrens * the previous snapshot, if we are a clone) 1219fa9e4066Sahrens * 1220fa9e4066Sahrens * XXX we're doing this long task with the config lock held 1221fa9e4066Sahrens */ 1222fa9e4066Sahrens ka.usedp = &used; 1223fa9e4066Sahrens ka.compressedp = &compressed; 1224fa9e4066Sahrens ka.uncompressedp = &uncompressed; 1225fa9e4066Sahrens ka.zio = zio; 1226fa9e4066Sahrens ka.tx = tx; 1227fa9e4066Sahrens err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1228fa9e4066Sahrens ADVANCE_POST, kill_blkptr, &ka); 1229fa9e4066Sahrens ASSERT3U(err, ==, 0); 1230fa9e4066Sahrens } 1231fa9e4066Sahrens 1232fa9e4066Sahrens err = zio_wait(zio); 1233fa9e4066Sahrens ASSERT3U(err, ==, 0); 1234fa9e4066Sahrens 1235fa9e4066Sahrens dsl_dir_diduse_space(dd, -used, -compressed, -uncompressed, tx); 1236fa9e4066Sahrens 1237fa9e4066Sahrens if (ds->ds_phys->ds_snapnames_zapobj) { 1238fa9e4066Sahrens err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1239fa9e4066Sahrens ASSERT(err == 0); 1240fa9e4066Sahrens } 1241fa9e4066Sahrens 1242fa9e4066Sahrens if (dd->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1243fa9e4066Sahrens /* Erase the link in the dataset */ 1244fa9e4066Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 1245fa9e4066Sahrens dd->dd_phys->dd_head_dataset_obj = 0; 1246fa9e4066Sahrens /* 1247fa9e4066Sahrens * dsl_dir_sync_destroy() called us, they'll destroy 1248fa9e4066Sahrens * the dataset. 1249fa9e4066Sahrens */ 1250fa9e4066Sahrens } else { 1251fa9e4066Sahrens /* remove from snapshot namespace */ 1252fa9e4066Sahrens dsl_dataset_t *ds_head; 1253ea8dc4b6Seschrock VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1254ea8dc4b6Seschrock dd->dd_phys->dd_head_dataset_obj, NULL, 1255ea8dc4b6Seschrock DS_MODE_NONE, FTAG, &ds_head)); 1256fa9e4066Sahrens #ifdef ZFS_DEBUG 1257fa9e4066Sahrens { 1258fa9e4066Sahrens uint64_t val; 1259fa9e4066Sahrens err = zap_lookup(mos, 1260fa9e4066Sahrens ds_head->ds_phys->ds_snapnames_zapobj, 1261fa9e4066Sahrens snapname, 8, 1, &val); 1262fa9e4066Sahrens ASSERT3U(err, ==, 0); 1263fa9e4066Sahrens ASSERT3U(val, ==, obj); 1264fa9e4066Sahrens } 1265fa9e4066Sahrens #endif 1266fa9e4066Sahrens err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, 1267fa9e4066Sahrens snapname, tx); 1268fa9e4066Sahrens ASSERT(err == 0); 1269fa9e4066Sahrens dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); 1270fa9e4066Sahrens } 1271fa9e4066Sahrens 1272fa9e4066Sahrens if (ds_prev && ds->ds_prev != ds_prev) 1273fa9e4066Sahrens dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1274fa9e4066Sahrens 1275fa9e4066Sahrens err = dmu_object_free(mos, obj, tx); 1276fa9e4066Sahrens ASSERT(err == 0); 1277fa9e4066Sahrens 1278fa9e4066Sahrens /* 1279fa9e4066Sahrens * Close the objset with mode NONE, thus leaving it with 1280fa9e4066Sahrens * DOS_REF_MAX set, so that noone can access it. 1281fa9e4066Sahrens */ 1282fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1283fa9e4066Sahrens 1284fa9e4066Sahrens if (drop_lock) 1285fa9e4066Sahrens rw_exit(&dp->dp_config_rwlock); 1286fa9e4066Sahrens return (0); 1287fa9e4066Sahrens } 1288fa9e4066Sahrens 1289fa9e4066Sahrens int 1290fa9e4066Sahrens dsl_dataset_snapshot_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1291fa9e4066Sahrens { 1292fa9e4066Sahrens const char *snapname = arg; 1293fa9e4066Sahrens dsl_pool_t *dp = dd->dd_pool; 1294fa9e4066Sahrens dmu_buf_t *dbuf; 1295fa9e4066Sahrens dsl_dataset_phys_t *dsphys; 1296fa9e4066Sahrens uint64_t dsobj, value; 1297fa9e4066Sahrens objset_t *mos = dp->dp_meta_objset; 1298fa9e4066Sahrens dsl_dataset_t *ds; 1299fa9e4066Sahrens int err; 1300fa9e4066Sahrens 1301fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 1302fa9e4066Sahrens 1303fa9e4066Sahrens if (dd->dd_phys->dd_head_dataset_obj == 0) 1304fa9e4066Sahrens return (EINVAL); 1305ea8dc4b6Seschrock err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_head_dataset_obj, NULL, 1306ea8dc4b6Seschrock DS_MODE_NONE, FTAG, &ds); 1307ea8dc4b6Seschrock if (err) 1308ea8dc4b6Seschrock return (err); 1309fa9e4066Sahrens 1310fa9e4066Sahrens err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1311fa9e4066Sahrens snapname, 8, 1, &value); 1312fa9e4066Sahrens if (err == 0) { 1313fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1314fa9e4066Sahrens return (EEXIST); 1315fa9e4066Sahrens } 1316fa9e4066Sahrens ASSERT(err == ENOENT); 1317fa9e4066Sahrens 1318fa9e4066Sahrens /* The point of no (unsuccessful) return */ 1319fa9e4066Sahrens 1320fa9e4066Sahrens dprintf_dd(dd, "taking snapshot %s in txg %llu\n", 1321fa9e4066Sahrens snapname, tx->tx_txg); 1322fa9e4066Sahrens 1323fa9e4066Sahrens spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1324fa9e4066Sahrens 1325fa9e4066Sahrens rw_enter(&dp->dp_config_rwlock, RW_WRITER); 1326fa9e4066Sahrens 13271649cd4bStabriz dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 13281649cd4bStabriz DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1329ea8dc4b6Seschrock VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1330fa9e4066Sahrens dmu_buf_will_dirty(dbuf, tx); 1331fa9e4066Sahrens dsphys = dbuf->db_data; 1332fa9e4066Sahrens dsphys->ds_dir_obj = dd->dd_object; 1333fa9e4066Sahrens dsphys->ds_fsid_guid = unique_create(); 1334fa9e4066Sahrens unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 1335fa9e4066Sahrens (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1336fa9e4066Sahrens sizeof (dsphys->ds_guid)); 1337fa9e4066Sahrens dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1338fa9e4066Sahrens dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1339fa9e4066Sahrens dsphys->ds_next_snap_obj = ds->ds_object; 1340fa9e4066Sahrens dsphys->ds_num_children = 1; 1341fa9e4066Sahrens dsphys->ds_creation_time = gethrestime_sec(); 1342fa9e4066Sahrens dsphys->ds_creation_txg = tx->tx_txg; 1343fa9e4066Sahrens dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1344fa9e4066Sahrens dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1345fa9e4066Sahrens dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1346fa9e4066Sahrens dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1347*99653d4eSeschrock dsphys->ds_flags = ds->ds_phys->ds_flags; 1348fa9e4066Sahrens dsphys->ds_bp = ds->ds_phys->ds_bp; 1349ea8dc4b6Seschrock dmu_buf_rele(dbuf, FTAG); 1350fa9e4066Sahrens 1351fa9e4066Sahrens if (ds->ds_phys->ds_prev_snap_obj != 0) { 1352fa9e4066Sahrens dsl_dataset_t *ds_prev; 1353fa9e4066Sahrens 1354ea8dc4b6Seschrock VERIFY(0 == dsl_dataset_open_obj(dp, 1355ea8dc4b6Seschrock ds->ds_phys->ds_prev_snap_obj, NULL, 1356ea8dc4b6Seschrock DS_MODE_NONE, FTAG, &ds_prev)); 1357fa9e4066Sahrens ASSERT(ds_prev->ds_phys->ds_next_snap_obj == 1358fa9e4066Sahrens ds->ds_object || 1359fa9e4066Sahrens ds_prev->ds_phys->ds_num_children > 1); 1360fa9e4066Sahrens if (ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1361fa9e4066Sahrens dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1362fa9e4066Sahrens ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1363fa9e4066Sahrens ds_prev->ds_phys->ds_creation_txg); 1364fa9e4066Sahrens ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1365fa9e4066Sahrens } 1366fa9e4066Sahrens dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1367fa9e4066Sahrens } else { 1368fa9e4066Sahrens ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 0); 1369fa9e4066Sahrens } 1370fa9e4066Sahrens 1371fa9e4066Sahrens bplist_close(&ds->ds_deadlist); 1372fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 1373fa9e4066Sahrens ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); 1374fa9e4066Sahrens ds->ds_phys->ds_prev_snap_obj = dsobj; 1375fa9e4066Sahrens ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; 1376fa9e4066Sahrens ds->ds_phys->ds_unique_bytes = 0; 1377fa9e4066Sahrens ds->ds_phys->ds_deadlist_obj = 1378fa9e4066Sahrens bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1379ea8dc4b6Seschrock VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1380ea8dc4b6Seschrock ds->ds_phys->ds_deadlist_obj)); 1381fa9e4066Sahrens 1382fa9e4066Sahrens dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1383fa9e4066Sahrens err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1384fa9e4066Sahrens snapname, 8, 1, &dsobj, tx); 1385fa9e4066Sahrens ASSERT(err == 0); 1386fa9e4066Sahrens 1387fa9e4066Sahrens if (ds->ds_prev) 1388fa9e4066Sahrens dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 1389ea8dc4b6Seschrock VERIFY(0 == dsl_dataset_open_obj(dp, 1390ea8dc4b6Seschrock ds->ds_phys->ds_prev_snap_obj, snapname, 1391ea8dc4b6Seschrock DS_MODE_NONE, ds, &ds->ds_prev)); 1392fa9e4066Sahrens 1393fa9e4066Sahrens rw_exit(&dp->dp_config_rwlock); 1394fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1395fa9e4066Sahrens 1396fa9e4066Sahrens return (0); 1397fa9e4066Sahrens } 1398fa9e4066Sahrens 1399fa9e4066Sahrens void 1400fa9e4066Sahrens dsl_dataset_sync(dsl_dataset_t *ds, dmu_tx_t *tx) 1401fa9e4066Sahrens { 1402fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 1403fa9e4066Sahrens ASSERT(ds->ds_user_ptr != NULL); 1404fa9e4066Sahrens ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1405fa9e4066Sahrens 1406fa9e4066Sahrens dmu_objset_sync(ds->ds_user_ptr, tx); 1407fa9e4066Sahrens dsl_dir_dirty(ds->ds_dir, tx); 1408fa9e4066Sahrens bplist_close(&ds->ds_deadlist); 1409fa9e4066Sahrens 1410ea8dc4b6Seschrock dmu_buf_rele(ds->ds_dbuf, ds); 1411fa9e4066Sahrens } 1412fa9e4066Sahrens 1413fa9e4066Sahrens void 1414fa9e4066Sahrens dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds) 1415fa9e4066Sahrens { 1416fa9e4066Sahrens /* fill in properties crap */ 1417fa9e4066Sahrens dsl_dir_stats(ds->ds_dir, dds); 1418fa9e4066Sahrens 1419fa9e4066Sahrens if (ds->ds_phys->ds_num_children != 0) { 1420fa9e4066Sahrens dds->dds_is_snapshot = TRUE; 1421fa9e4066Sahrens dds->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1422fa9e4066Sahrens } 1423fa9e4066Sahrens 1424*99653d4eSeschrock dds->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1425fa9e4066Sahrens dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth; 1426fa9e4066Sahrens 1427fa9e4066Sahrens dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill; 1428fa9e4066Sahrens dds->dds_objects_avail = DN_MAX_OBJECT - dds->dds_objects_used; 1429fa9e4066Sahrens 1430fa9e4066Sahrens /* We override the dataset's creation time... they should be the same */ 1431fa9e4066Sahrens dds->dds_creation_time = ds->ds_phys->ds_creation_time; 1432fa9e4066Sahrens dds->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1433fa9e4066Sahrens dds->dds_space_refd = ds->ds_phys->ds_used_bytes; 1434fa9e4066Sahrens dds->dds_fsid_guid = ds->ds_phys->ds_fsid_guid; 1435fa9e4066Sahrens 1436fa9e4066Sahrens if (ds->ds_phys->ds_next_snap_obj) { 1437fa9e4066Sahrens /* 1438fa9e4066Sahrens * This is a snapshot; override the dd's space used with 1439fa9e4066Sahrens * our unique space 1440fa9e4066Sahrens */ 1441fa9e4066Sahrens dds->dds_space_used = ds->ds_phys->ds_unique_bytes; 1442fa9e4066Sahrens dds->dds_compressed_bytes = 1443fa9e4066Sahrens ds->ds_phys->ds_compressed_bytes; 1444fa9e4066Sahrens dds->dds_uncompressed_bytes = 1445fa9e4066Sahrens ds->ds_phys->ds_uncompressed_bytes; 1446fa9e4066Sahrens } 1447fa9e4066Sahrens } 1448fa9e4066Sahrens 1449fa9e4066Sahrens dsl_pool_t * 1450fa9e4066Sahrens dsl_dataset_pool(dsl_dataset_t *ds) 1451fa9e4066Sahrens { 1452fa9e4066Sahrens return (ds->ds_dir->dd_pool); 1453fa9e4066Sahrens } 1454fa9e4066Sahrens 1455fa9e4066Sahrens struct osrenamearg { 1456fa9e4066Sahrens const char *oldname; 1457fa9e4066Sahrens const char *newname; 1458fa9e4066Sahrens }; 1459fa9e4066Sahrens 1460fa9e4066Sahrens static int 1461fa9e4066Sahrens dsl_dataset_snapshot_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1462fa9e4066Sahrens { 1463fa9e4066Sahrens struct osrenamearg *ora = arg; 1464fa9e4066Sahrens objset_t *mos = dd->dd_pool->dp_meta_objset; 1465fa9e4066Sahrens dsl_dir_t *nds; 1466fa9e4066Sahrens const char *tail; 1467fa9e4066Sahrens int err; 1468fa9e4066Sahrens dsl_dataset_t *snds, *fsds; 1469fa9e4066Sahrens uint64_t val; 1470fa9e4066Sahrens 1471fa9e4066Sahrens err = dsl_dataset_open_spa(dd->dd_pool->dp_spa, ora->oldname, 1472fa9e4066Sahrens DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &snds); 1473fa9e4066Sahrens if (err) 1474fa9e4066Sahrens return (err); 1475fa9e4066Sahrens 1476fa9e4066Sahrens if (snds->ds_dir != dd) { 1477fa9e4066Sahrens dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1478fa9e4066Sahrens return (EINVAL); 1479fa9e4066Sahrens } 1480fa9e4066Sahrens 1481fa9e4066Sahrens /* better be changing a snapshot */ 1482fa9e4066Sahrens if (snds->ds_phys->ds_next_snap_obj == 0) { 1483fa9e4066Sahrens dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1484fa9e4066Sahrens return (EINVAL); 1485fa9e4066Sahrens } 1486fa9e4066Sahrens 1487fa9e4066Sahrens /* new fs better exist */ 1488ea8dc4b6Seschrock err = dsl_dir_open_spa(dd->dd_pool->dp_spa, ora->newname, 1489ea8dc4b6Seschrock FTAG, &nds, &tail); 1490ea8dc4b6Seschrock if (err) { 1491fa9e4066Sahrens dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1492ea8dc4b6Seschrock return (err); 1493fa9e4066Sahrens } 1494fa9e4066Sahrens 1495fa9e4066Sahrens dsl_dir_close(nds, FTAG); 1496fa9e4066Sahrens 1497fa9e4066Sahrens /* new name better be in same fs */ 1498fa9e4066Sahrens if (nds != dd) { 1499fa9e4066Sahrens dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1500fa9e4066Sahrens return (EINVAL); 1501fa9e4066Sahrens } 1502fa9e4066Sahrens 1503fa9e4066Sahrens /* new name better be a snapshot */ 1504fa9e4066Sahrens if (tail == NULL || tail[0] != '@') { 1505fa9e4066Sahrens dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1506fa9e4066Sahrens return (EINVAL); 1507fa9e4066Sahrens } 1508fa9e4066Sahrens 1509fa9e4066Sahrens tail++; 1510fa9e4066Sahrens 1511ea8dc4b6Seschrock err = dsl_dataset_open_obj(dd->dd_pool, 1512ea8dc4b6Seschrock dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &fsds); 1513ea8dc4b6Seschrock if (err) { 1514ea8dc4b6Seschrock dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1515ea8dc4b6Seschrock return (err); 1516ea8dc4b6Seschrock } 1517fa9e4066Sahrens 1518fa9e4066Sahrens /* new name better not be in use */ 1519fa9e4066Sahrens err = zap_lookup(mos, fsds->ds_phys->ds_snapnames_zapobj, 1520fa9e4066Sahrens tail, 8, 1, &val); 1521fa9e4066Sahrens if (err != ENOENT) { 1522fa9e4066Sahrens if (err == 0) 1523fa9e4066Sahrens err = EEXIST; 1524fa9e4066Sahrens dsl_dataset_close(fsds, DS_MODE_NONE, FTAG); 1525fa9e4066Sahrens dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1526fa9e4066Sahrens return (EEXIST); 1527fa9e4066Sahrens } 1528fa9e4066Sahrens 1529fa9e4066Sahrens /* The point of no (unsuccessful) return */ 1530fa9e4066Sahrens 1531fa9e4066Sahrens rw_enter(&dd->dd_pool->dp_config_rwlock, RW_WRITER); 1532ea8dc4b6Seschrock VERIFY(0 == dsl_dataset_get_snapname(snds)); 1533fa9e4066Sahrens err = zap_remove(mos, fsds->ds_phys->ds_snapnames_zapobj, 1534fa9e4066Sahrens snds->ds_snapname, tx); 1535fa9e4066Sahrens ASSERT3U(err, ==, 0); 1536fa9e4066Sahrens mutex_enter(&snds->ds_lock); 1537fa9e4066Sahrens (void) strcpy(snds->ds_snapname, tail); 1538fa9e4066Sahrens mutex_exit(&snds->ds_lock); 1539fa9e4066Sahrens err = zap_add(mos, fsds->ds_phys->ds_snapnames_zapobj, 1540fa9e4066Sahrens snds->ds_snapname, 8, 1, &snds->ds_object, tx); 1541fa9e4066Sahrens ASSERT3U(err, ==, 0); 1542fa9e4066Sahrens rw_exit(&dd->dd_pool->dp_config_rwlock); 1543fa9e4066Sahrens 1544fa9e4066Sahrens dsl_dataset_close(fsds, DS_MODE_NONE, FTAG); 1545fa9e4066Sahrens dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); 1546fa9e4066Sahrens return (0); 1547fa9e4066Sahrens } 1548fa9e4066Sahrens 1549fa9e4066Sahrens #pragma weak dmu_objset_rename = dsl_dataset_rename 1550fa9e4066Sahrens int 1551fa9e4066Sahrens dsl_dataset_rename(const char *osname, const char *newname) 1552fa9e4066Sahrens { 1553fa9e4066Sahrens dsl_dir_t *dd; 1554fa9e4066Sahrens const char *tail; 1555fa9e4066Sahrens struct osrenamearg ora; 1556fa9e4066Sahrens int err; 1557fa9e4066Sahrens 1558ea8dc4b6Seschrock err = dsl_dir_open(osname, FTAG, &dd, &tail); 1559ea8dc4b6Seschrock if (err) 1560ea8dc4b6Seschrock return (err); 1561fa9e4066Sahrens if (tail == NULL) { 1562fa9e4066Sahrens err = dsl_dir_sync_task(dd, 1563fa9e4066Sahrens dsl_dir_rename_sync, (void*)newname, 1<<12); 1564fa9e4066Sahrens dsl_dir_close(dd, FTAG); 1565fa9e4066Sahrens return (err); 1566fa9e4066Sahrens } 1567fa9e4066Sahrens if (tail[0] != '@') { 1568fa9e4066Sahrens /* the name ended in a nonexistant component */ 1569fa9e4066Sahrens dsl_dir_close(dd, FTAG); 1570fa9e4066Sahrens return (ENOENT); 1571fa9e4066Sahrens } 1572fa9e4066Sahrens 1573fa9e4066Sahrens ora.oldname = osname; 1574fa9e4066Sahrens ora.newname = newname; 1575fa9e4066Sahrens 1576fa9e4066Sahrens err = dsl_dir_sync_task(dd, 1577fa9e4066Sahrens dsl_dataset_snapshot_rename_sync, &ora, 1<<12); 1578fa9e4066Sahrens dsl_dir_close(dd, FTAG); 1579fa9e4066Sahrens return (err); 1580fa9e4066Sahrens } 1581*99653d4eSeschrock 1582*99653d4eSeschrock /* ARGSUSED */ 1583*99653d4eSeschrock static int 1584*99653d4eSeschrock dsl_dataset_promote_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1585*99653d4eSeschrock { 1586*99653d4eSeschrock dsl_dir_t *pdd = NULL; 1587*99653d4eSeschrock dsl_dataset_t *ds = NULL; 1588*99653d4eSeschrock dsl_dataset_t *hds = NULL; 1589*99653d4eSeschrock dsl_dataset_t *phds = NULL; 1590*99653d4eSeschrock dsl_dataset_t *pivot_ds = NULL; 1591*99653d4eSeschrock dsl_dataset_t *newnext_ds = NULL; 1592*99653d4eSeschrock int err; 1593*99653d4eSeschrock char *name = NULL; 1594*99653d4eSeschrock uint64_t used = 0, comp = 0, uncomp = 0, unique = 0, itor = 0; 1595*99653d4eSeschrock blkptr_t bp; 1596*99653d4eSeschrock 1597*99653d4eSeschrock /* Check that it is a clone */ 1598*99653d4eSeschrock if (dd->dd_phys->dd_clone_parent_obj == 0) 1599*99653d4eSeschrock return (EINVAL); 1600*99653d4eSeschrock 1601*99653d4eSeschrock /* Open everyone */ 1602*99653d4eSeschrock if (err = dsl_dataset_open_obj(dd->dd_pool, 1603*99653d4eSeschrock dd->dd_phys->dd_clone_parent_obj, 1604*99653d4eSeschrock NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)) 1605*99653d4eSeschrock goto out; 1606*99653d4eSeschrock pdd = pivot_ds->ds_dir; 1607*99653d4eSeschrock if (err = dsl_dataset_open_obj(dd->dd_pool, 1608*99653d4eSeschrock pdd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &phds)) 1609*99653d4eSeschrock goto out; 1610*99653d4eSeschrock if (err = dsl_dataset_open_obj(dd->dd_pool, 1611*99653d4eSeschrock dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)) 1612*99653d4eSeschrock goto out; 1613*99653d4eSeschrock 1614*99653d4eSeschrock if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 1615*99653d4eSeschrock err = EXDEV; 1616*99653d4eSeschrock goto out; 1617*99653d4eSeschrock } 1618*99653d4eSeschrock 1619*99653d4eSeschrock /* find pivot point's new next ds */ 1620*99653d4eSeschrock VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, 1621*99653d4eSeschrock NULL, DS_MODE_NONE, FTAG, &newnext_ds)); 1622*99653d4eSeschrock while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) { 1623*99653d4eSeschrock dsl_dataset_t *prev; 1624*99653d4eSeschrock 1625*99653d4eSeschrock if (err = dsl_dataset_open_obj(dd->dd_pool, 1626*99653d4eSeschrock newnext_ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_NONE, 1627*99653d4eSeschrock FTAG, &prev)) 1628*99653d4eSeschrock goto out; 1629*99653d4eSeschrock dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1630*99653d4eSeschrock newnext_ds = prev; 1631*99653d4eSeschrock } 1632*99653d4eSeschrock 1633*99653d4eSeschrock /* compute pivot point's new unique space */ 1634*99653d4eSeschrock while ((err = bplist_iterate(&newnext_ds->ds_deadlist, 1635*99653d4eSeschrock &itor, &bp)) == 0) { 1636*99653d4eSeschrock if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg) 1637*99653d4eSeschrock unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); 1638*99653d4eSeschrock } 1639*99653d4eSeschrock if (err != ENOENT) 1640*99653d4eSeschrock goto out; 1641*99653d4eSeschrock 1642*99653d4eSeschrock /* need the config lock to ensure that the snapshots are not open */ 1643*99653d4eSeschrock rw_enter(&dd->dd_pool->dp_config_rwlock, RW_WRITER); 1644*99653d4eSeschrock 1645*99653d4eSeschrock /* Walk the snapshots that we are moving */ 1646*99653d4eSeschrock name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1647*99653d4eSeschrock ds = pivot_ds; 1648*99653d4eSeschrock /* CONSTCOND */ 1649*99653d4eSeschrock while (TRUE) { 1650*99653d4eSeschrock uint64_t val, dlused, dlcomp, dluncomp; 1651*99653d4eSeschrock dsl_dataset_t *prev; 1652*99653d4eSeschrock 1653*99653d4eSeschrock /* Check that the snapshot name does not conflict */ 1654*99653d4eSeschrock dsl_dataset_name(ds, name); 1655*99653d4eSeschrock err = zap_lookup(dd->dd_pool->dp_meta_objset, 1656*99653d4eSeschrock hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1657*99653d4eSeschrock 8, 1, &val); 1658*99653d4eSeschrock if (err != ENOENT) { 1659*99653d4eSeschrock if (err == 0) 1660*99653d4eSeschrock err = EEXIST; 1661*99653d4eSeschrock goto out; 1662*99653d4eSeschrock } 1663*99653d4eSeschrock 1664*99653d4eSeschrock /* 1665*99653d4eSeschrock * compute space to transfer. Each snapshot gave birth to: 1666*99653d4eSeschrock * (my used) - (prev's used) + (deadlist's used) 1667*99653d4eSeschrock */ 1668*99653d4eSeschrock used += ds->ds_phys->ds_used_bytes; 1669*99653d4eSeschrock comp += ds->ds_phys->ds_compressed_bytes; 1670*99653d4eSeschrock uncomp += ds->ds_phys->ds_uncompressed_bytes; 1671*99653d4eSeschrock 1672*99653d4eSeschrock /* If we reach the first snapshot, we're done. */ 1673*99653d4eSeschrock if (ds->ds_phys->ds_prev_snap_obj == 0) 1674*99653d4eSeschrock break; 1675*99653d4eSeschrock 1676*99653d4eSeschrock if (err = bplist_space(&ds->ds_deadlist, 1677*99653d4eSeschrock &dlused, &dlcomp, &dluncomp)) 1678*99653d4eSeschrock goto out; 1679*99653d4eSeschrock if (err = dsl_dataset_open_obj(dd->dd_pool, 1680*99653d4eSeschrock ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1681*99653d4eSeschrock FTAG, &prev)) 1682*99653d4eSeschrock goto out; 1683*99653d4eSeschrock used += dlused - prev->ds_phys->ds_used_bytes; 1684*99653d4eSeschrock comp += dlcomp - prev->ds_phys->ds_compressed_bytes; 1685*99653d4eSeschrock uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; 1686*99653d4eSeschrock 1687*99653d4eSeschrock /* 1688*99653d4eSeschrock * We could be a clone of a clone. If we reach our 1689*99653d4eSeschrock * parent's branch point, we're done. 1690*99653d4eSeschrock */ 1691*99653d4eSeschrock if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1692*99653d4eSeschrock dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1693*99653d4eSeschrock break; 1694*99653d4eSeschrock } 1695*99653d4eSeschrock if (ds != pivot_ds) 1696*99653d4eSeschrock dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1697*99653d4eSeschrock ds = prev; 1698*99653d4eSeschrock } 1699*99653d4eSeschrock if (ds != pivot_ds) 1700*99653d4eSeschrock dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1701*99653d4eSeschrock ds = NULL; 1702*99653d4eSeschrock 1703*99653d4eSeschrock /* Check that there is enough space here */ 1704*99653d4eSeschrock if (err = dsl_dir_transfer_possible(pdd, dd, used)) 1705*99653d4eSeschrock goto out; 1706*99653d4eSeschrock 1707*99653d4eSeschrock /* The point of no (unsuccessful) return */ 1708*99653d4eSeschrock 1709*99653d4eSeschrock /* move snapshots to this dir */ 1710*99653d4eSeschrock ds = pivot_ds; 1711*99653d4eSeschrock /* CONSTCOND */ 1712*99653d4eSeschrock while (TRUE) { 1713*99653d4eSeschrock dsl_dataset_t *prev; 1714*99653d4eSeschrock 1715*99653d4eSeschrock /* move snap name entry */ 1716*99653d4eSeschrock dsl_dataset_name(ds, name); 1717*99653d4eSeschrock VERIFY(0 == zap_remove(dd->dd_pool->dp_meta_objset, 1718*99653d4eSeschrock phds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, tx)); 1719*99653d4eSeschrock VERIFY(0 == zap_add(dd->dd_pool->dp_meta_objset, 1720*99653d4eSeschrock hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1721*99653d4eSeschrock 8, 1, &ds->ds_object, tx)); 1722*99653d4eSeschrock 1723*99653d4eSeschrock /* change containing dsl_dir */ 1724*99653d4eSeschrock dmu_buf_will_dirty(ds->ds_dbuf, tx); 1725*99653d4eSeschrock ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object); 1726*99653d4eSeschrock ds->ds_phys->ds_dir_obj = dd->dd_object; 1727*99653d4eSeschrock ASSERT3P(ds->ds_dir, ==, pdd); 1728*99653d4eSeschrock dsl_dir_close(ds->ds_dir, ds); 1729*99653d4eSeschrock VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, dd->dd_object, 1730*99653d4eSeschrock NULL, ds, &ds->ds_dir)); 1731*99653d4eSeschrock 1732*99653d4eSeschrock ASSERT3U(dsl_prop_numcb(ds), ==, 0); 1733*99653d4eSeschrock 1734*99653d4eSeschrock if (ds->ds_phys->ds_prev_snap_obj == 0) 1735*99653d4eSeschrock break; 1736*99653d4eSeschrock 1737*99653d4eSeschrock VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1738*99653d4eSeschrock ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1739*99653d4eSeschrock FTAG, &prev)); 1740*99653d4eSeschrock 1741*99653d4eSeschrock if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1742*99653d4eSeschrock dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1743*99653d4eSeschrock break; 1744*99653d4eSeschrock } 1745*99653d4eSeschrock if (ds != pivot_ds) 1746*99653d4eSeschrock dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1747*99653d4eSeschrock ds = prev; 1748*99653d4eSeschrock } 1749*99653d4eSeschrock 1750*99653d4eSeschrock /* change pivot point's next snap */ 1751*99653d4eSeschrock dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx); 1752*99653d4eSeschrock pivot_ds->ds_phys->ds_next_snap_obj = newnext_ds->ds_object; 1753*99653d4eSeschrock 1754*99653d4eSeschrock /* change clone_parent-age */ 1755*99653d4eSeschrock dmu_buf_will_dirty(dd->dd_dbuf, tx); 1756*99653d4eSeschrock ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object); 1757*99653d4eSeschrock dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj; 1758*99653d4eSeschrock dmu_buf_will_dirty(pdd->dd_dbuf, tx); 1759*99653d4eSeschrock pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object; 1760*99653d4eSeschrock 1761*99653d4eSeschrock /* change space accounting */ 1762*99653d4eSeschrock dsl_dir_diduse_space(pdd, -used, -comp, -uncomp, tx); 1763*99653d4eSeschrock dsl_dir_diduse_space(dd, used, comp, uncomp, tx); 1764*99653d4eSeschrock pivot_ds->ds_phys->ds_unique_bytes = unique; 1765*99653d4eSeschrock 1766*99653d4eSeschrock err = 0; 1767*99653d4eSeschrock 1768*99653d4eSeschrock out: 1769*99653d4eSeschrock if (RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock)) 1770*99653d4eSeschrock rw_exit(&dd->dd_pool->dp_config_rwlock); 1771*99653d4eSeschrock if (hds) 1772*99653d4eSeschrock dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1773*99653d4eSeschrock if (phds) 1774*99653d4eSeschrock dsl_dataset_close(phds, DS_MODE_NONE, FTAG); 1775*99653d4eSeschrock if (ds && ds != pivot_ds) 1776*99653d4eSeschrock dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1777*99653d4eSeschrock if (pivot_ds) 1778*99653d4eSeschrock dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 1779*99653d4eSeschrock if (newnext_ds) 1780*99653d4eSeschrock dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1781*99653d4eSeschrock if (name) 1782*99653d4eSeschrock kmem_free(name, MAXPATHLEN); 1783*99653d4eSeschrock return (err); 1784*99653d4eSeschrock } 1785*99653d4eSeschrock 1786*99653d4eSeschrock int 1787*99653d4eSeschrock dsl_dataset_promote(const char *name) 1788*99653d4eSeschrock { 1789*99653d4eSeschrock dsl_dataset_t *ds; 1790*99653d4eSeschrock int err; 1791*99653d4eSeschrock dmu_object_info_t doi; 1792*99653d4eSeschrock 1793*99653d4eSeschrock err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); 1794*99653d4eSeschrock if (err) 1795*99653d4eSeschrock return (err); 1796*99653d4eSeschrock 1797*99653d4eSeschrock err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, 1798*99653d4eSeschrock ds->ds_phys->ds_snapnames_zapobj, &doi); 1799*99653d4eSeschrock if (err) { 1800*99653d4eSeschrock dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1801*99653d4eSeschrock return (err); 1802*99653d4eSeschrock } 1803*99653d4eSeschrock 1804*99653d4eSeschrock /* 1805*99653d4eSeschrock * Add in 128x the snapnames zapobj size, since we will be moving 1806*99653d4eSeschrock * a bunch of snapnames to the promoted ds, and dirtying their 1807*99653d4eSeschrock * bonus buffers. 1808*99653d4eSeschrock */ 1809*99653d4eSeschrock err = dsl_dir_sync_task(ds->ds_dir, dsl_dataset_promote_sync, NULL, 1810*99653d4eSeschrock (1<<20) + (doi.doi_physical_blks << (SPA_MINBLOCKSHIFT + 7))); 1811*99653d4eSeschrock dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1812*99653d4eSeschrock return (err); 1813*99653d4eSeschrock } 1814