1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 229082849eSck * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #include <sys/dmu_objset.h> 27fa9e4066Sahrens #include <sys/dsl_dataset.h> 28fa9e4066Sahrens #include <sys/dsl_dir.h> 2999653d4eSeschrock #include <sys/dsl_prop.h> 301d452cf5Sahrens #include <sys/dsl_synctask.h> 31fa9e4066Sahrens #include <sys/dmu_traverse.h> 32fa9e4066Sahrens #include <sys/dmu_tx.h> 33fa9e4066Sahrens #include <sys/arc.h> 34fa9e4066Sahrens #include <sys/zio.h> 35fa9e4066Sahrens #include <sys/zap.h> 36fa9e4066Sahrens #include <sys/unique.h> 37fa9e4066Sahrens #include <sys/zfs_context.h> 38cdf5b4caSmmusante #include <sys/zfs_ioctl.h> 39ecd6cf80Smarks #include <sys/spa.h> 40088f3894Sahrens #include <sys/zfs_znode.h> 41ecd6cf80Smarks #include <sys/sunddi.h> 42fa9e4066Sahrens 43745cd3c5Smaybee static char *dsl_reaper = "the grim reaper"; 44745cd3c5Smaybee 451d452cf5Sahrens static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 461d452cf5Sahrens static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 471d452cf5Sahrens static dsl_checkfunc_t dsl_dataset_rollback_check; 481d452cf5Sahrens static dsl_syncfunc_t dsl_dataset_rollback_sync; 49a9799022Sck static dsl_syncfunc_t dsl_dataset_set_reservation_sync; 50e1930233Sbonwick 5155434c77Sek #define DS_REF_MAX (1ULL << 62) 52fa9e4066Sahrens 53fa9e4066Sahrens #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 54fa9e4066Sahrens 55745cd3c5Smaybee #define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper) 56745cd3c5Smaybee 57fa9e4066Sahrens 58a9799022Sck /* 59a9799022Sck * Figure out how much of this delta should be propogated to the dsl_dir 60a9799022Sck * layer. If there's a refreservation, that space has already been 61a9799022Sck * partially accounted for in our ancestors. 62a9799022Sck */ 63a9799022Sck static int64_t 64a9799022Sck parent_delta(dsl_dataset_t *ds, int64_t delta) 65a9799022Sck { 66a9799022Sck uint64_t old_bytes, new_bytes; 67a9799022Sck 68a9799022Sck if (ds->ds_reserved == 0) 69a9799022Sck return (delta); 70a9799022Sck 71a9799022Sck old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 72a9799022Sck new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 73a9799022Sck 74a9799022Sck ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 75a9799022Sck return (new_bytes - old_bytes); 76a9799022Sck } 77fa9e4066Sahrens 78fa9e4066Sahrens void 79fa9e4066Sahrens dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 80fa9e4066Sahrens { 8199653d4eSeschrock int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 82fa9e4066Sahrens int compressed = BP_GET_PSIZE(bp); 83fa9e4066Sahrens int uncompressed = BP_GET_UCSIZE(bp); 84a9799022Sck int64_t delta; 85fa9e4066Sahrens 86fa9e4066Sahrens dprintf_bp(bp, "born, ds=%p\n", ds); 87fa9e4066Sahrens 88fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 89fa9e4066Sahrens /* It could have been compressed away to nothing */ 90fa9e4066Sahrens if (BP_IS_HOLE(bp)) 91fa9e4066Sahrens return; 92fa9e4066Sahrens ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 93fa9e4066Sahrens ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 94fa9e4066Sahrens if (ds == NULL) { 95fa9e4066Sahrens /* 96fa9e4066Sahrens * Account for the meta-objset space in its placeholder 97fa9e4066Sahrens * dsl_dir. 98fa9e4066Sahrens */ 99fa9e4066Sahrens ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 100*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 101fa9e4066Sahrens used, compressed, uncompressed, tx); 102fa9e4066Sahrens dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 103fa9e4066Sahrens return; 104fa9e4066Sahrens } 105fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 106fa9e4066Sahrens mutex_enter(&ds->ds_lock); 107a9799022Sck delta = parent_delta(ds, used); 108fa9e4066Sahrens ds->ds_phys->ds_used_bytes += used; 109fa9e4066Sahrens ds->ds_phys->ds_compressed_bytes += compressed; 110fa9e4066Sahrens ds->ds_phys->ds_uncompressed_bytes += uncompressed; 111fa9e4066Sahrens ds->ds_phys->ds_unique_bytes += used; 112fa9e4066Sahrens mutex_exit(&ds->ds_lock); 113*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 114*74e7dc98SMatthew Ahrens compressed, uncompressed, tx); 115*74e7dc98SMatthew Ahrens dsl_dir_transfer_space(ds->ds_dir, used - delta, 116*74e7dc98SMatthew Ahrens DD_USED_REFRSRV, DD_USED_HEAD, tx); 117fa9e4066Sahrens } 118fa9e4066Sahrens 119cdb0ab79Smaybee int 120c717a561Smaybee dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 121c717a561Smaybee dmu_tx_t *tx) 122fa9e4066Sahrens { 12399653d4eSeschrock int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 124fa9e4066Sahrens int compressed = BP_GET_PSIZE(bp); 125fa9e4066Sahrens int uncompressed = BP_GET_UCSIZE(bp); 126fa9e4066Sahrens 127fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 128c717a561Smaybee /* No block pointer => nothing to free */ 129fa9e4066Sahrens if (BP_IS_HOLE(bp)) 130cdb0ab79Smaybee return (0); 131fa9e4066Sahrens 132fa9e4066Sahrens ASSERT(used > 0); 133fa9e4066Sahrens if (ds == NULL) { 134c717a561Smaybee int err; 135fa9e4066Sahrens /* 136fa9e4066Sahrens * Account for the meta-objset space in its placeholder 137fa9e4066Sahrens * dataset. 138fa9e4066Sahrens */ 139088f3894Sahrens err = dsl_free(pio, tx->tx_pool, 140c717a561Smaybee tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 141c717a561Smaybee ASSERT(err == 0); 142fa9e4066Sahrens 143*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 144fa9e4066Sahrens -used, -compressed, -uncompressed, tx); 145fa9e4066Sahrens dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 146cdb0ab79Smaybee return (used); 147fa9e4066Sahrens } 148fa9e4066Sahrens ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 149fa9e4066Sahrens 150*74e7dc98SMatthew Ahrens ASSERT(!dsl_dataset_is_snapshot(ds)); 151fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 152fa9e4066Sahrens 153fa9e4066Sahrens if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 154c717a561Smaybee int err; 155a9799022Sck int64_t delta; 156c717a561Smaybee 157fa9e4066Sahrens dprintf_bp(bp, "freeing: %s", ""); 158088f3894Sahrens err = dsl_free(pio, tx->tx_pool, 159*74e7dc98SMatthew Ahrens tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT : ARC_WAIT); 160c717a561Smaybee ASSERT(err == 0); 161fa9e4066Sahrens 162fa9e4066Sahrens mutex_enter(&ds->ds_lock); 163a9799022Sck ASSERT(ds->ds_phys->ds_unique_bytes >= used || 164a9799022Sck !DS_UNIQUE_IS_ACCURATE(ds)); 165a9799022Sck delta = parent_delta(ds, -used); 166fa9e4066Sahrens ds->ds_phys->ds_unique_bytes -= used; 167fa9e4066Sahrens mutex_exit(&ds->ds_lock); 168*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 169a9799022Sck delta, -compressed, -uncompressed, tx); 170*74e7dc98SMatthew Ahrens dsl_dir_transfer_space(ds->ds_dir, -used - delta, 171*74e7dc98SMatthew Ahrens DD_USED_REFRSRV, DD_USED_HEAD, tx); 172fa9e4066Sahrens } else { 173fa9e4066Sahrens dprintf_bp(bp, "putting on dead list: %s", ""); 174ea8dc4b6Seschrock VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 175a4611edeSahrens ASSERT3U(ds->ds_prev->ds_object, ==, 176a4611edeSahrens ds->ds_phys->ds_prev_snap_obj); 177a4611edeSahrens ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 178fa9e4066Sahrens /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 179a4611edeSahrens if (ds->ds_prev->ds_phys->ds_next_snap_obj == 180a4611edeSahrens ds->ds_object && bp->blk_birth > 181a4611edeSahrens ds->ds_prev->ds_phys->ds_prev_snap_txg) { 182a4611edeSahrens dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 183a4611edeSahrens mutex_enter(&ds->ds_prev->ds_lock); 184a4611edeSahrens ds->ds_prev->ds_phys->ds_unique_bytes += used; 185a4611edeSahrens mutex_exit(&ds->ds_prev->ds_lock); 186fa9e4066Sahrens } 187*74e7dc98SMatthew Ahrens if (bp->blk_birth > ds->ds_origin_txg) { 188*74e7dc98SMatthew Ahrens dsl_dir_transfer_space(ds->ds_dir, used, 189*74e7dc98SMatthew Ahrens DD_USED_HEAD, DD_USED_SNAP, tx); 190*74e7dc98SMatthew Ahrens } 191fa9e4066Sahrens } 192fa9e4066Sahrens mutex_enter(&ds->ds_lock); 193fa9e4066Sahrens ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 194fa9e4066Sahrens ds->ds_phys->ds_used_bytes -= used; 195fa9e4066Sahrens ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 196fa9e4066Sahrens ds->ds_phys->ds_compressed_bytes -= compressed; 197fa9e4066Sahrens ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 198fa9e4066Sahrens ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 199fa9e4066Sahrens mutex_exit(&ds->ds_lock); 200cdb0ab79Smaybee 201cdb0ab79Smaybee return (used); 202fa9e4066Sahrens } 203fa9e4066Sahrens 204ea8dc4b6Seschrock uint64_t 205ea8dc4b6Seschrock dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 206fa9e4066Sahrens { 207a2eea2e1Sahrens uint64_t trysnap = 0; 208a2eea2e1Sahrens 209fa9e4066Sahrens if (ds == NULL) 210ea8dc4b6Seschrock return (0); 211fa9e4066Sahrens /* 212fa9e4066Sahrens * The snapshot creation could fail, but that would cause an 213fa9e4066Sahrens * incorrect FALSE return, which would only result in an 214fa9e4066Sahrens * overestimation of the amount of space that an operation would 215fa9e4066Sahrens * consume, which is OK. 216fa9e4066Sahrens * 217fa9e4066Sahrens * There's also a small window where we could miss a pending 218fa9e4066Sahrens * snapshot, because we could set the sync task in the quiescing 219fa9e4066Sahrens * phase. So this should only be used as a guess. 220fa9e4066Sahrens */ 221a2eea2e1Sahrens if (ds->ds_trysnap_txg > 222a2eea2e1Sahrens spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 223a2eea2e1Sahrens trysnap = ds->ds_trysnap_txg; 224a2eea2e1Sahrens return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 225ea8dc4b6Seschrock } 226ea8dc4b6Seschrock 227ea8dc4b6Seschrock int 228ea8dc4b6Seschrock dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 229ea8dc4b6Seschrock { 230ea8dc4b6Seschrock return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 231fa9e4066Sahrens } 232fa9e4066Sahrens 233fa9e4066Sahrens /* ARGSUSED */ 234fa9e4066Sahrens static void 235fa9e4066Sahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv) 236fa9e4066Sahrens { 237fa9e4066Sahrens dsl_dataset_t *ds = dsv; 238fa9e4066Sahrens 239745cd3c5Smaybee ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); 240fa9e4066Sahrens 241fa9e4066Sahrens dprintf_ds(ds, "evicting %s\n", ""); 242fa9e4066Sahrens 24391ebeef5Sahrens unique_remove(ds->ds_fsid_guid); 244fa9e4066Sahrens 245fa9e4066Sahrens if (ds->ds_user_ptr != NULL) 246fa9e4066Sahrens ds->ds_user_evict_func(ds, ds->ds_user_ptr); 247fa9e4066Sahrens 248fa9e4066Sahrens if (ds->ds_prev) { 249745cd3c5Smaybee dsl_dataset_drop_ref(ds->ds_prev, ds); 250fa9e4066Sahrens ds->ds_prev = NULL; 251fa9e4066Sahrens } 252fa9e4066Sahrens 253fa9e4066Sahrens bplist_close(&ds->ds_deadlist); 254745cd3c5Smaybee if (ds->ds_dir) 255745cd3c5Smaybee dsl_dir_close(ds->ds_dir, ds); 256fa9e4066Sahrens 25791ebeef5Sahrens ASSERT(!list_link_active(&ds->ds_synced_link)); 258fa9e4066Sahrens 2595ad82045Snd mutex_destroy(&ds->ds_lock); 26091ebeef5Sahrens mutex_destroy(&ds->ds_opening_lock); 2615ad82045Snd mutex_destroy(&ds->ds_deadlist.bpl_lock); 262745cd3c5Smaybee rw_destroy(&ds->ds_rwlock); 263745cd3c5Smaybee cv_destroy(&ds->ds_exclusive_cv); 2645ad82045Snd 265fa9e4066Sahrens kmem_free(ds, sizeof (dsl_dataset_t)); 266fa9e4066Sahrens } 267fa9e4066Sahrens 268ea8dc4b6Seschrock static int 269fa9e4066Sahrens dsl_dataset_get_snapname(dsl_dataset_t *ds) 270fa9e4066Sahrens { 271fa9e4066Sahrens dsl_dataset_phys_t *headphys; 272fa9e4066Sahrens int err; 273fa9e4066Sahrens dmu_buf_t *headdbuf; 274fa9e4066Sahrens dsl_pool_t *dp = ds->ds_dir->dd_pool; 275fa9e4066Sahrens objset_t *mos = dp->dp_meta_objset; 276fa9e4066Sahrens 277fa9e4066Sahrens if (ds->ds_snapname[0]) 278ea8dc4b6Seschrock return (0); 279fa9e4066Sahrens if (ds->ds_phys->ds_next_snap_obj == 0) 280ea8dc4b6Seschrock return (0); 281fa9e4066Sahrens 282ea8dc4b6Seschrock err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 283ea8dc4b6Seschrock FTAG, &headdbuf); 284ea8dc4b6Seschrock if (err) 285ea8dc4b6Seschrock return (err); 286fa9e4066Sahrens headphys = headdbuf->db_data; 287fa9e4066Sahrens err = zap_value_search(dp->dp_meta_objset, 288e7437265Sahrens headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 289ea8dc4b6Seschrock dmu_buf_rele(headdbuf, FTAG); 290ea8dc4b6Seschrock return (err); 291fa9e4066Sahrens } 292fa9e4066Sahrens 293ab04eb8eStimh static int 294745cd3c5Smaybee dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 295ab04eb8eStimh { 296745cd3c5Smaybee objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 297745cd3c5Smaybee uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 298ab04eb8eStimh matchtype_t mt; 299ab04eb8eStimh int err; 300ab04eb8eStimh 301745cd3c5Smaybee if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 302ab04eb8eStimh mt = MT_FIRST; 303ab04eb8eStimh else 304ab04eb8eStimh mt = MT_EXACT; 305ab04eb8eStimh 306745cd3c5Smaybee err = zap_lookup_norm(mos, snapobj, name, 8, 1, 307ab04eb8eStimh value, mt, NULL, 0, NULL); 308ab04eb8eStimh if (err == ENOTSUP && mt == MT_FIRST) 309745cd3c5Smaybee err = zap_lookup(mos, snapobj, name, 8, 1, value); 310ab04eb8eStimh return (err); 311ab04eb8eStimh } 312ab04eb8eStimh 313ab04eb8eStimh static int 314745cd3c5Smaybee dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) 315ab04eb8eStimh { 316745cd3c5Smaybee objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 317745cd3c5Smaybee uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 318ab04eb8eStimh matchtype_t mt; 319ab04eb8eStimh int err; 320ab04eb8eStimh 321745cd3c5Smaybee if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 322ab04eb8eStimh mt = MT_FIRST; 323ab04eb8eStimh else 324ab04eb8eStimh mt = MT_EXACT; 325ab04eb8eStimh 326745cd3c5Smaybee err = zap_remove_norm(mos, snapobj, name, mt, tx); 327ab04eb8eStimh if (err == ENOTSUP && mt == MT_FIRST) 328745cd3c5Smaybee err = zap_remove(mos, snapobj, name, tx); 329ab04eb8eStimh return (err); 330ab04eb8eStimh } 331ab04eb8eStimh 332745cd3c5Smaybee static int 333745cd3c5Smaybee dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, 334745cd3c5Smaybee dsl_dataset_t **dsp) 335fa9e4066Sahrens { 336fa9e4066Sahrens objset_t *mos = dp->dp_meta_objset; 337fa9e4066Sahrens dmu_buf_t *dbuf; 338fa9e4066Sahrens dsl_dataset_t *ds; 339ea8dc4b6Seschrock int err; 340fa9e4066Sahrens 341fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 342fa9e4066Sahrens dsl_pool_sync_context(dp)); 343fa9e4066Sahrens 344ea8dc4b6Seschrock err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 345ea8dc4b6Seschrock if (err) 346ea8dc4b6Seschrock return (err); 347fa9e4066Sahrens ds = dmu_buf_get_user(dbuf); 348fa9e4066Sahrens if (ds == NULL) { 349fa9e4066Sahrens dsl_dataset_t *winner; 350fa9e4066Sahrens 351fa9e4066Sahrens ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 352fa9e4066Sahrens ds->ds_dbuf = dbuf; 353fa9e4066Sahrens ds->ds_object = dsobj; 354fa9e4066Sahrens ds->ds_phys = dbuf->db_data; 355fa9e4066Sahrens 3565ad82045Snd mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 35791ebeef5Sahrens mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 3585ad82045Snd mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 3595ad82045Snd NULL); 360745cd3c5Smaybee rw_init(&ds->ds_rwlock, 0, 0, 0); 361745cd3c5Smaybee cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); 3625ad82045Snd 363ea8dc4b6Seschrock err = bplist_open(&ds->ds_deadlist, 364fa9e4066Sahrens mos, ds->ds_phys->ds_deadlist_obj); 365ea8dc4b6Seschrock if (err == 0) { 366ea8dc4b6Seschrock err = dsl_dir_open_obj(dp, 367ea8dc4b6Seschrock ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 368ea8dc4b6Seschrock } 369ea8dc4b6Seschrock if (err) { 370ea8dc4b6Seschrock /* 371ea8dc4b6Seschrock * we don't really need to close the blist if we 372ea8dc4b6Seschrock * just opened it. 373ea8dc4b6Seschrock */ 3745ad82045Snd mutex_destroy(&ds->ds_lock); 37591ebeef5Sahrens mutex_destroy(&ds->ds_opening_lock); 3765ad82045Snd mutex_destroy(&ds->ds_deadlist.bpl_lock); 377745cd3c5Smaybee rw_destroy(&ds->ds_rwlock); 378745cd3c5Smaybee cv_destroy(&ds->ds_exclusive_cv); 379ea8dc4b6Seschrock kmem_free(ds, sizeof (dsl_dataset_t)); 380ea8dc4b6Seschrock dmu_buf_rele(dbuf, tag); 381ea8dc4b6Seschrock return (err); 382ea8dc4b6Seschrock } 383fa9e4066Sahrens 384*74e7dc98SMatthew Ahrens if (!dsl_dataset_is_snapshot(ds)) { 385fa9e4066Sahrens ds->ds_snapname[0] = '\0'; 386fa9e4066Sahrens if (ds->ds_phys->ds_prev_snap_obj) { 387745cd3c5Smaybee err = dsl_dataset_get_ref(dp, 388745cd3c5Smaybee ds->ds_phys->ds_prev_snap_obj, 389745cd3c5Smaybee ds, &ds->ds_prev); 390fa9e4066Sahrens } 391*74e7dc98SMatthew Ahrens 392*74e7dc98SMatthew Ahrens if (err == 0 && dsl_dir_is_clone(ds->ds_dir)) { 393*74e7dc98SMatthew Ahrens dsl_dataset_t *origin; 394*74e7dc98SMatthew Ahrens 395*74e7dc98SMatthew Ahrens err = dsl_dataset_hold_obj(dp, 396*74e7dc98SMatthew Ahrens ds->ds_dir->dd_phys->dd_origin_obj, 397*74e7dc98SMatthew Ahrens FTAG, &origin); 398*74e7dc98SMatthew Ahrens if (err == 0) { 399*74e7dc98SMatthew Ahrens ds->ds_origin_txg = 400*74e7dc98SMatthew Ahrens origin->ds_phys->ds_creation_txg; 401*74e7dc98SMatthew Ahrens dsl_dataset_rele(origin, FTAG); 402*74e7dc98SMatthew Ahrens } 403*74e7dc98SMatthew Ahrens } 404745cd3c5Smaybee } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 405745cd3c5Smaybee err = dsl_dataset_get_snapname(ds); 406fa9e4066Sahrens } 407fa9e4066Sahrens 408*74e7dc98SMatthew Ahrens if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 40927345066Sck /* 41027345066Sck * In sync context, we're called with either no lock 41127345066Sck * or with the write lock. If we're not syncing, 41227345066Sck * we're always called with the read lock held. 41327345066Sck */ 414cb625fb5Sck boolean_t need_lock = 41527345066Sck !RW_WRITE_HELD(&dp->dp_config_rwlock) && 41627345066Sck dsl_pool_sync_context(dp); 417cb625fb5Sck 418cb625fb5Sck if (need_lock) 419cb625fb5Sck rw_enter(&dp->dp_config_rwlock, RW_READER); 420cb625fb5Sck 421bb0ade09Sahrens err = dsl_prop_get_ds(ds, 422cb625fb5Sck "refreservation", sizeof (uint64_t), 1, 423cb625fb5Sck &ds->ds_reserved, NULL); 424cb625fb5Sck if (err == 0) { 425bb0ade09Sahrens err = dsl_prop_get_ds(ds, 426cb625fb5Sck "refquota", sizeof (uint64_t), 1, 427cb625fb5Sck &ds->ds_quota, NULL); 428cb625fb5Sck } 429cb625fb5Sck 430cb625fb5Sck if (need_lock) 431cb625fb5Sck rw_exit(&dp->dp_config_rwlock); 432cb625fb5Sck } else { 433cb625fb5Sck ds->ds_reserved = ds->ds_quota = 0; 434cb625fb5Sck } 435cb625fb5Sck 436ea8dc4b6Seschrock if (err == 0) { 437ea8dc4b6Seschrock winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 438ea8dc4b6Seschrock dsl_dataset_evict); 439ea8dc4b6Seschrock } 440ea8dc4b6Seschrock if (err || winner) { 441fa9e4066Sahrens bplist_close(&ds->ds_deadlist); 442745cd3c5Smaybee if (ds->ds_prev) 443745cd3c5Smaybee dsl_dataset_drop_ref(ds->ds_prev, ds); 444fa9e4066Sahrens dsl_dir_close(ds->ds_dir, ds); 4455ad82045Snd mutex_destroy(&ds->ds_lock); 44691ebeef5Sahrens mutex_destroy(&ds->ds_opening_lock); 4475ad82045Snd mutex_destroy(&ds->ds_deadlist.bpl_lock); 448745cd3c5Smaybee rw_destroy(&ds->ds_rwlock); 449745cd3c5Smaybee cv_destroy(&ds->ds_exclusive_cv); 450fa9e4066Sahrens kmem_free(ds, sizeof (dsl_dataset_t)); 451ea8dc4b6Seschrock if (err) { 452ea8dc4b6Seschrock dmu_buf_rele(dbuf, tag); 453ea8dc4b6Seschrock return (err); 454ea8dc4b6Seschrock } 455fa9e4066Sahrens ds = winner; 456fa9e4066Sahrens } else { 45791ebeef5Sahrens ds->ds_fsid_guid = 458fa9e4066Sahrens unique_insert(ds->ds_phys->ds_fsid_guid); 459fa9e4066Sahrens } 460fa9e4066Sahrens } 461fa9e4066Sahrens ASSERT3P(ds->ds_dbuf, ==, dbuf); 462fa9e4066Sahrens ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 463088f3894Sahrens ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 464afc6333aSahrens spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 46584db2a68Sahrens dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 466fa9e4066Sahrens mutex_enter(&ds->ds_lock); 467745cd3c5Smaybee if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) { 468fa9e4066Sahrens mutex_exit(&ds->ds_lock); 469745cd3c5Smaybee dmu_buf_rele(ds->ds_dbuf, tag); 470745cd3c5Smaybee return (ENOENT); 471fa9e4066Sahrens } 472fa9e4066Sahrens mutex_exit(&ds->ds_lock); 473ea8dc4b6Seschrock *dsp = ds; 474ea8dc4b6Seschrock return (0); 475fa9e4066Sahrens } 476fa9e4066Sahrens 477745cd3c5Smaybee static int 478745cd3c5Smaybee dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag) 479745cd3c5Smaybee { 480745cd3c5Smaybee dsl_pool_t *dp = ds->ds_dir->dd_pool; 481745cd3c5Smaybee 482745cd3c5Smaybee /* 483745cd3c5Smaybee * In syncing context we don't want the rwlock lock: there 484745cd3c5Smaybee * may be an existing writer waiting for sync phase to 485745cd3c5Smaybee * finish. We don't need to worry about such writers, since 486745cd3c5Smaybee * sync phase is single-threaded, so the writer can't be 487745cd3c5Smaybee * doing anything while we are active. 488745cd3c5Smaybee */ 489745cd3c5Smaybee if (dsl_pool_sync_context(dp)) { 490745cd3c5Smaybee ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 491745cd3c5Smaybee return (0); 492745cd3c5Smaybee } 493745cd3c5Smaybee 494745cd3c5Smaybee /* 495745cd3c5Smaybee * Normal users will hold the ds_rwlock as a READER until they 496745cd3c5Smaybee * are finished (i.e., call dsl_dataset_rele()). "Owners" will 497745cd3c5Smaybee * drop their READER lock after they set the ds_owner field. 498745cd3c5Smaybee * 499745cd3c5Smaybee * If the dataset is being destroyed, the destroy thread will 500745cd3c5Smaybee * obtain a WRITER lock for exclusive access after it's done its 501745cd3c5Smaybee * open-context work and then change the ds_owner to 502745cd3c5Smaybee * dsl_reaper once destruction is assured. So threads 503745cd3c5Smaybee * may block here temporarily, until the "destructability" of 504745cd3c5Smaybee * the dataset is determined. 505745cd3c5Smaybee */ 506745cd3c5Smaybee ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock)); 507745cd3c5Smaybee mutex_enter(&ds->ds_lock); 508745cd3c5Smaybee while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) { 509745cd3c5Smaybee rw_exit(&dp->dp_config_rwlock); 510745cd3c5Smaybee cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock); 511745cd3c5Smaybee if (DSL_DATASET_IS_DESTROYED(ds)) { 512745cd3c5Smaybee mutex_exit(&ds->ds_lock); 513745cd3c5Smaybee dsl_dataset_drop_ref(ds, tag); 514745cd3c5Smaybee rw_enter(&dp->dp_config_rwlock, RW_READER); 515745cd3c5Smaybee return (ENOENT); 516745cd3c5Smaybee } 517745cd3c5Smaybee rw_enter(&dp->dp_config_rwlock, RW_READER); 518745cd3c5Smaybee } 519745cd3c5Smaybee mutex_exit(&ds->ds_lock); 520745cd3c5Smaybee return (0); 521745cd3c5Smaybee } 522745cd3c5Smaybee 523745cd3c5Smaybee int 524745cd3c5Smaybee dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 525745cd3c5Smaybee dsl_dataset_t **dsp) 526745cd3c5Smaybee { 527745cd3c5Smaybee int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp); 528745cd3c5Smaybee 529745cd3c5Smaybee if (err) 530745cd3c5Smaybee return (err); 531745cd3c5Smaybee return (dsl_dataset_hold_ref(*dsp, tag)); 532745cd3c5Smaybee } 533745cd3c5Smaybee 534745cd3c5Smaybee int 535745cd3c5Smaybee dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, int flags, void *owner, 536745cd3c5Smaybee dsl_dataset_t **dsp) 537745cd3c5Smaybee { 538745cd3c5Smaybee int err = dsl_dataset_hold_obj(dp, dsobj, owner, dsp); 539745cd3c5Smaybee 540745cd3c5Smaybee ASSERT(DS_MODE_TYPE(flags) != DS_MODE_USER); 541745cd3c5Smaybee 542745cd3c5Smaybee if (err) 543745cd3c5Smaybee return (err); 544745cd3c5Smaybee if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) { 545745cd3c5Smaybee dsl_dataset_rele(*dsp, owner); 546745cd3c5Smaybee return (EBUSY); 547745cd3c5Smaybee } 548745cd3c5Smaybee return (0); 549745cd3c5Smaybee } 550745cd3c5Smaybee 551fa9e4066Sahrens int 552745cd3c5Smaybee dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp) 553fa9e4066Sahrens { 554fa9e4066Sahrens dsl_dir_t *dd; 555fa9e4066Sahrens dsl_pool_t *dp; 556745cd3c5Smaybee const char *snapname; 557fa9e4066Sahrens uint64_t obj; 558fa9e4066Sahrens int err = 0; 559fa9e4066Sahrens 560745cd3c5Smaybee err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname); 561ea8dc4b6Seschrock if (err) 562ea8dc4b6Seschrock return (err); 563fa9e4066Sahrens 564fa9e4066Sahrens dp = dd->dd_pool; 565fa9e4066Sahrens obj = dd->dd_phys->dd_head_dataset_obj; 566fa9e4066Sahrens rw_enter(&dp->dp_config_rwlock, RW_READER); 567745cd3c5Smaybee if (obj) 568745cd3c5Smaybee err = dsl_dataset_get_ref(dp, obj, tag, dsp); 569745cd3c5Smaybee else 570fa9e4066Sahrens err = ENOENT; 571745cd3c5Smaybee if (err) 572fa9e4066Sahrens goto out; 573fa9e4066Sahrens 574745cd3c5Smaybee err = dsl_dataset_hold_ref(*dsp, tag); 575fa9e4066Sahrens 576745cd3c5Smaybee /* we may be looking for a snapshot */ 577745cd3c5Smaybee if (err == 0 && snapname != NULL) { 578745cd3c5Smaybee dsl_dataset_t *ds = NULL; 579fa9e4066Sahrens 580745cd3c5Smaybee if (*snapname++ != '@') { 581745cd3c5Smaybee dsl_dataset_rele(*dsp, tag); 582fa9e4066Sahrens err = ENOENT; 583fa9e4066Sahrens goto out; 584fa9e4066Sahrens } 585fa9e4066Sahrens 586745cd3c5Smaybee dprintf("looking for snapshot '%s'\n", snapname); 587745cd3c5Smaybee err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 588745cd3c5Smaybee if (err == 0) 589745cd3c5Smaybee err = dsl_dataset_get_ref(dp, obj, tag, &ds); 590745cd3c5Smaybee dsl_dataset_rele(*dsp, tag); 591745cd3c5Smaybee 592745cd3c5Smaybee ASSERT3U((err == 0), ==, (ds != NULL)); 593745cd3c5Smaybee 594745cd3c5Smaybee if (ds) { 595745cd3c5Smaybee mutex_enter(&ds->ds_lock); 596745cd3c5Smaybee if (ds->ds_snapname[0] == 0) 597745cd3c5Smaybee (void) strlcpy(ds->ds_snapname, snapname, 598745cd3c5Smaybee sizeof (ds->ds_snapname)); 599745cd3c5Smaybee mutex_exit(&ds->ds_lock); 600745cd3c5Smaybee err = dsl_dataset_hold_ref(ds, tag); 601745cd3c5Smaybee *dsp = err ? NULL : ds; 602fa9e4066Sahrens } 603fa9e4066Sahrens } 604fa9e4066Sahrens out: 605fa9e4066Sahrens rw_exit(&dp->dp_config_rwlock); 606fa9e4066Sahrens dsl_dir_close(dd, FTAG); 607fa9e4066Sahrens return (err); 608fa9e4066Sahrens } 609fa9e4066Sahrens 610fa9e4066Sahrens int 611745cd3c5Smaybee dsl_dataset_own(const char *name, int flags, void *owner, dsl_dataset_t **dsp) 612fa9e4066Sahrens { 613745cd3c5Smaybee int err = dsl_dataset_hold(name, owner, dsp); 614745cd3c5Smaybee if (err) 615745cd3c5Smaybee return (err); 616745cd3c5Smaybee if ((*dsp)->ds_phys->ds_num_children > 0 && 617745cd3c5Smaybee !DS_MODE_IS_READONLY(flags)) { 618745cd3c5Smaybee dsl_dataset_rele(*dsp, owner); 619745cd3c5Smaybee return (EROFS); 620745cd3c5Smaybee } 621745cd3c5Smaybee if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) { 622745cd3c5Smaybee dsl_dataset_rele(*dsp, owner); 623745cd3c5Smaybee return (EBUSY); 624745cd3c5Smaybee } 625745cd3c5Smaybee return (0); 626fa9e4066Sahrens } 627fa9e4066Sahrens 628fa9e4066Sahrens void 629fa9e4066Sahrens dsl_dataset_name(dsl_dataset_t *ds, char *name) 630fa9e4066Sahrens { 631fa9e4066Sahrens if (ds == NULL) { 632fa9e4066Sahrens (void) strcpy(name, "mos"); 633fa9e4066Sahrens } else { 634fa9e4066Sahrens dsl_dir_name(ds->ds_dir, name); 635ea8dc4b6Seschrock VERIFY(0 == dsl_dataset_get_snapname(ds)); 636fa9e4066Sahrens if (ds->ds_snapname[0]) { 637fa9e4066Sahrens (void) strcat(name, "@"); 638745cd3c5Smaybee /* 639745cd3c5Smaybee * We use a "recursive" mutex so that we 640745cd3c5Smaybee * can call dprintf_ds() with ds_lock held. 641745cd3c5Smaybee */ 642fa9e4066Sahrens if (!MUTEX_HELD(&ds->ds_lock)) { 643fa9e4066Sahrens mutex_enter(&ds->ds_lock); 644fa9e4066Sahrens (void) strcat(name, ds->ds_snapname); 645fa9e4066Sahrens mutex_exit(&ds->ds_lock); 646fa9e4066Sahrens } else { 647fa9e4066Sahrens (void) strcat(name, ds->ds_snapname); 648fa9e4066Sahrens } 649fa9e4066Sahrens } 650fa9e4066Sahrens } 651fa9e4066Sahrens } 652fa9e4066Sahrens 653b7661cccSmmusante static int 654b7661cccSmmusante dsl_dataset_namelen(dsl_dataset_t *ds) 655b7661cccSmmusante { 656b7661cccSmmusante int result; 657b7661cccSmmusante 658b7661cccSmmusante if (ds == NULL) { 659b7661cccSmmusante result = 3; /* "mos" */ 660b7661cccSmmusante } else { 661b7661cccSmmusante result = dsl_dir_namelen(ds->ds_dir); 662b7661cccSmmusante VERIFY(0 == dsl_dataset_get_snapname(ds)); 663b7661cccSmmusante if (ds->ds_snapname[0]) { 664b7661cccSmmusante ++result; /* adding one for the @-sign */ 665b7661cccSmmusante if (!MUTEX_HELD(&ds->ds_lock)) { 666b7661cccSmmusante mutex_enter(&ds->ds_lock); 667b7661cccSmmusante result += strlen(ds->ds_snapname); 668b7661cccSmmusante mutex_exit(&ds->ds_lock); 669b7661cccSmmusante } else { 670b7661cccSmmusante result += strlen(ds->ds_snapname); 671b7661cccSmmusante } 672b7661cccSmmusante } 673b7661cccSmmusante } 674b7661cccSmmusante 675b7661cccSmmusante return (result); 676b7661cccSmmusante } 677b7661cccSmmusante 678088f3894Sahrens void 679745cd3c5Smaybee dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag) 680fa9e4066Sahrens { 681ea8dc4b6Seschrock dmu_buf_rele(ds->ds_dbuf, tag); 682fa9e4066Sahrens } 683fa9e4066Sahrens 6843cb34c60Sahrens void 685745cd3c5Smaybee dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 6863cb34c60Sahrens { 687745cd3c5Smaybee if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) { 688745cd3c5Smaybee rw_exit(&ds->ds_rwlock); 689745cd3c5Smaybee } 690745cd3c5Smaybee dsl_dataset_drop_ref(ds, tag); 691745cd3c5Smaybee } 692745cd3c5Smaybee 693745cd3c5Smaybee void 694745cd3c5Smaybee dsl_dataset_disown(dsl_dataset_t *ds, void *owner) 695745cd3c5Smaybee { 696745cd3c5Smaybee ASSERT((ds->ds_owner == owner && ds->ds_dbuf) || 697745cd3c5Smaybee (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); 698745cd3c5Smaybee 6993cb34c60Sahrens mutex_enter(&ds->ds_lock); 700745cd3c5Smaybee ds->ds_owner = NULL; 701745cd3c5Smaybee if (RW_WRITE_HELD(&ds->ds_rwlock)) { 702745cd3c5Smaybee rw_exit(&ds->ds_rwlock); 703745cd3c5Smaybee cv_broadcast(&ds->ds_exclusive_cv); 704745cd3c5Smaybee } 7053cb34c60Sahrens mutex_exit(&ds->ds_lock); 706745cd3c5Smaybee if (ds->ds_dbuf) 707745cd3c5Smaybee dsl_dataset_drop_ref(ds, owner); 708745cd3c5Smaybee else 709745cd3c5Smaybee dsl_dataset_evict(ds->ds_dbuf, ds); 7103cb34c60Sahrens } 7113cb34c60Sahrens 7123cb34c60Sahrens boolean_t 713745cd3c5Smaybee dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *owner) 7143cb34c60Sahrens { 715745cd3c5Smaybee boolean_t gotit = FALSE; 716745cd3c5Smaybee 7173cb34c60Sahrens mutex_enter(&ds->ds_lock); 718745cd3c5Smaybee if (ds->ds_owner == NULL && 719745cd3c5Smaybee (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { 720745cd3c5Smaybee ds->ds_owner = owner; 721745cd3c5Smaybee if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) 722745cd3c5Smaybee rw_exit(&ds->ds_rwlock); 723745cd3c5Smaybee gotit = TRUE; 7243cb34c60Sahrens } 7253cb34c60Sahrens mutex_exit(&ds->ds_lock); 726745cd3c5Smaybee return (gotit); 727745cd3c5Smaybee } 728745cd3c5Smaybee 729745cd3c5Smaybee void 730745cd3c5Smaybee dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner) 731745cd3c5Smaybee { 732745cd3c5Smaybee ASSERT3P(owner, ==, ds->ds_owner); 733745cd3c5Smaybee if (!RW_WRITE_HELD(&ds->ds_rwlock)) 734745cd3c5Smaybee rw_enter(&ds->ds_rwlock, RW_WRITER); 7353cb34c60Sahrens } 7363cb34c60Sahrens 7371d452cf5Sahrens uint64_t 738088f3894Sahrens dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 739ab04eb8eStimh uint64_t flags, dmu_tx_t *tx) 740fa9e4066Sahrens { 7413cb34c60Sahrens dsl_pool_t *dp = dd->dd_pool; 742fa9e4066Sahrens dmu_buf_t *dbuf; 743fa9e4066Sahrens dsl_dataset_phys_t *dsphys; 7443cb34c60Sahrens uint64_t dsobj; 745fa9e4066Sahrens objset_t *mos = dp->dp_meta_objset; 746fa9e4066Sahrens 747088f3894Sahrens if (origin == NULL) 748088f3894Sahrens origin = dp->dp_origin_snap; 749088f3894Sahrens 7503cb34c60Sahrens ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 7513cb34c60Sahrens ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 752fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 7533cb34c60Sahrens ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 754fa9e4066Sahrens 7551649cd4bStabriz dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 7561649cd4bStabriz DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 757ea8dc4b6Seschrock VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 758fa9e4066Sahrens dmu_buf_will_dirty(dbuf, tx); 759fa9e4066Sahrens dsphys = dbuf->db_data; 760745cd3c5Smaybee bzero(dsphys, sizeof (dsl_dataset_phys_t)); 761fa9e4066Sahrens dsphys->ds_dir_obj = dd->dd_object; 762ab04eb8eStimh dsphys->ds_flags = flags; 763fa9e4066Sahrens dsphys->ds_fsid_guid = unique_create(); 764fa9e4066Sahrens (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 765fa9e4066Sahrens sizeof (dsphys->ds_guid)); 766fa9e4066Sahrens dsphys->ds_snapnames_zapobj = 767ab04eb8eStimh zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 768ab04eb8eStimh DMU_OT_NONE, 0, tx); 769fa9e4066Sahrens dsphys->ds_creation_time = gethrestime_sec(); 770088f3894Sahrens dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 771fa9e4066Sahrens dsphys->ds_deadlist_obj = 772fa9e4066Sahrens bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 773a9799022Sck 7743cb34c60Sahrens if (origin) { 7753cb34c60Sahrens dsphys->ds_prev_snap_obj = origin->ds_object; 776fa9e4066Sahrens dsphys->ds_prev_snap_txg = 7773cb34c60Sahrens origin->ds_phys->ds_creation_txg; 778fa9e4066Sahrens dsphys->ds_used_bytes = 7793cb34c60Sahrens origin->ds_phys->ds_used_bytes; 780fa9e4066Sahrens dsphys->ds_compressed_bytes = 7813cb34c60Sahrens origin->ds_phys->ds_compressed_bytes; 782fa9e4066Sahrens dsphys->ds_uncompressed_bytes = 7833cb34c60Sahrens origin->ds_phys->ds_uncompressed_bytes; 7843cb34c60Sahrens dsphys->ds_bp = origin->ds_phys->ds_bp; 785579ae4d5Stimh dsphys->ds_flags |= origin->ds_phys->ds_flags; 786fa9e4066Sahrens 7873cb34c60Sahrens dmu_buf_will_dirty(origin->ds_dbuf, tx); 7883cb34c60Sahrens origin->ds_phys->ds_num_children++; 789fa9e4066Sahrens 790088f3894Sahrens if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 791088f3894Sahrens if (origin->ds_phys->ds_next_clones_obj == 0) { 792088f3894Sahrens origin->ds_phys->ds_next_clones_obj = 793088f3894Sahrens zap_create(mos, 794088f3894Sahrens DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 795088f3894Sahrens } 796088f3894Sahrens VERIFY(0 == zap_add_int(mos, 797088f3894Sahrens origin->ds_phys->ds_next_clones_obj, 798088f3894Sahrens dsobj, tx)); 799088f3894Sahrens } 800088f3894Sahrens 801fa9e4066Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 8023cb34c60Sahrens dd->dd_phys->dd_origin_obj = origin->ds_object; 803fa9e4066Sahrens } 804ab04eb8eStimh 805ab04eb8eStimh if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 806ab04eb8eStimh dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 807ab04eb8eStimh 808ea8dc4b6Seschrock dmu_buf_rele(dbuf, FTAG); 809fa9e4066Sahrens 810fa9e4066Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 811fa9e4066Sahrens dd->dd_phys->dd_head_dataset_obj = dsobj; 8123cb34c60Sahrens 8133cb34c60Sahrens return (dsobj); 8143cb34c60Sahrens } 8153cb34c60Sahrens 8163cb34c60Sahrens uint64_t 817ab04eb8eStimh dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 818ab04eb8eStimh dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 8193cb34c60Sahrens { 8203cb34c60Sahrens dsl_pool_t *dp = pdd->dd_pool; 8213cb34c60Sahrens uint64_t dsobj, ddobj; 8223cb34c60Sahrens dsl_dir_t *dd; 8233cb34c60Sahrens 8243cb34c60Sahrens ASSERT(lastname[0] != '@'); 8253cb34c60Sahrens 826088f3894Sahrens ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 8273cb34c60Sahrens VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 8283cb34c60Sahrens 829088f3894Sahrens dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx); 8303cb34c60Sahrens 8313cb34c60Sahrens dsl_deleg_set_create_perms(dd, tx, cr); 8323cb34c60Sahrens 833fa9e4066Sahrens dsl_dir_close(dd, FTAG); 834fa9e4066Sahrens 8351d452cf5Sahrens return (dsobj); 836fa9e4066Sahrens } 837fa9e4066Sahrens 8381d452cf5Sahrens struct destroyarg { 8391d452cf5Sahrens dsl_sync_task_group_t *dstg; 8401d452cf5Sahrens char *snapname; 8411d452cf5Sahrens char *failed; 8421d452cf5Sahrens }; 8431d452cf5Sahrens 8441d452cf5Sahrens static int 8451d452cf5Sahrens dsl_snapshot_destroy_one(char *name, void *arg) 846fa9e4066Sahrens { 8471d452cf5Sahrens struct destroyarg *da = arg; 8481d452cf5Sahrens dsl_dataset_t *ds; 8491d452cf5Sahrens char *cp; 850fa9e4066Sahrens int err; 851fa9e4066Sahrens 8521d452cf5Sahrens (void) strcat(name, "@"); 8531d452cf5Sahrens (void) strcat(name, da->snapname); 854745cd3c5Smaybee err = dsl_dataset_own(name, DS_MODE_READONLY | DS_MODE_INCONSISTENT, 855cdf5b4caSmmusante da->dstg, &ds); 8561d452cf5Sahrens cp = strchr(name, '@'); 8571d452cf5Sahrens *cp = '\0'; 858745cd3c5Smaybee if (err == 0) { 859745cd3c5Smaybee dsl_dataset_make_exclusive(ds, da->dstg); 8603baa08fcSek if (ds->ds_user_ptr) { 8613baa08fcSek ds->ds_user_evict_func(ds, ds->ds_user_ptr); 8623baa08fcSek ds->ds_user_ptr = NULL; 8633baa08fcSek } 864745cd3c5Smaybee dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 865745cd3c5Smaybee dsl_dataset_destroy_sync, ds, da->dstg, 0); 866745cd3c5Smaybee } else if (err == ENOENT) { 867745cd3c5Smaybee err = 0; 868745cd3c5Smaybee } else { 8691d452cf5Sahrens (void) strcpy(da->failed, name); 8701d452cf5Sahrens } 871745cd3c5Smaybee return (err); 8721d452cf5Sahrens } 87331fd60d3Sahrens 8741d452cf5Sahrens /* 8751d452cf5Sahrens * Destroy 'snapname' in all descendants of 'fsname'. 8761d452cf5Sahrens */ 8771d452cf5Sahrens #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 8781d452cf5Sahrens int 8791d452cf5Sahrens dsl_snapshots_destroy(char *fsname, char *snapname) 8801d452cf5Sahrens { 8811d452cf5Sahrens int err; 8821d452cf5Sahrens struct destroyarg da; 8831d452cf5Sahrens dsl_sync_task_t *dst; 8841d452cf5Sahrens spa_t *spa; 8851d452cf5Sahrens 88640feaa91Sahrens err = spa_open(fsname, &spa, FTAG); 8871d452cf5Sahrens if (err) 8881d452cf5Sahrens return (err); 8891d452cf5Sahrens da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 8901d452cf5Sahrens da.snapname = snapname; 8911d452cf5Sahrens da.failed = fsname; 8921d452cf5Sahrens 8931d452cf5Sahrens err = dmu_objset_find(fsname, 8940b69c2f0Sahrens dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 8951d452cf5Sahrens 8961d452cf5Sahrens if (err == 0) 8971d452cf5Sahrens err = dsl_sync_task_group_wait(da.dstg); 8981d452cf5Sahrens 8991d452cf5Sahrens for (dst = list_head(&da.dstg->dstg_tasks); dst; 9001d452cf5Sahrens dst = list_next(&da.dstg->dstg_tasks, dst)) { 9011d452cf5Sahrens dsl_dataset_t *ds = dst->dst_arg1; 902745cd3c5Smaybee /* 903745cd3c5Smaybee * Return the file system name that triggered the error 904745cd3c5Smaybee */ 9051d452cf5Sahrens if (dst->dst_err) { 9061d452cf5Sahrens dsl_dataset_name(ds, fsname); 90740feaa91Sahrens *strchr(fsname, '@') = '\0'; 908e1930233Sbonwick } 909745cd3c5Smaybee dsl_dataset_disown(ds, da.dstg); 910fa9e4066Sahrens } 911fa9e4066Sahrens 9121d452cf5Sahrens dsl_sync_task_group_destroy(da.dstg); 9131d452cf5Sahrens spa_close(spa, FTAG); 914fa9e4066Sahrens return (err); 915fa9e4066Sahrens } 916fa9e4066Sahrens 9173cb34c60Sahrens /* 918745cd3c5Smaybee * ds must be opened as OWNER. On return (whether successful or not), 919745cd3c5Smaybee * ds will be closed and caller can no longer dereference it. 9203cb34c60Sahrens */ 921fa9e4066Sahrens int 9223cb34c60Sahrens dsl_dataset_destroy(dsl_dataset_t *ds, void *tag) 923fa9e4066Sahrens { 924fa9e4066Sahrens int err; 9251d452cf5Sahrens dsl_sync_task_group_t *dstg; 9261d452cf5Sahrens objset_t *os; 927fa9e4066Sahrens dsl_dir_t *dd; 9281d452cf5Sahrens uint64_t obj; 9291d452cf5Sahrens 9303cb34c60Sahrens if (dsl_dataset_is_snapshot(ds)) { 9311d452cf5Sahrens /* Destroying a snapshot is simpler */ 932745cd3c5Smaybee dsl_dataset_make_exclusive(ds, tag); 9333baa08fcSek 9343baa08fcSek if (ds->ds_user_ptr) { 9353baa08fcSek ds->ds_user_evict_func(ds, ds->ds_user_ptr); 9363baa08fcSek ds->ds_user_ptr = NULL; 9373baa08fcSek } 9381d452cf5Sahrens err = dsl_sync_task_do(ds->ds_dir->dd_pool, 9391d452cf5Sahrens dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 9403cb34c60Sahrens ds, tag, 0); 9413cb34c60Sahrens goto out; 9421d452cf5Sahrens } 943fa9e4066Sahrens 9441d452cf5Sahrens dd = ds->ds_dir; 945fa9e4066Sahrens 9461d452cf5Sahrens /* 9471d452cf5Sahrens * Check for errors and mark this ds as inconsistent, in 9481d452cf5Sahrens * case we crash while freeing the objects. 9491d452cf5Sahrens */ 9501d452cf5Sahrens err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 9511d452cf5Sahrens dsl_dataset_destroy_begin_sync, ds, NULL, 0); 9523cb34c60Sahrens if (err) 9533cb34c60Sahrens goto out; 9543cb34c60Sahrens 9553cb34c60Sahrens err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os); 9563cb34c60Sahrens if (err) 9573cb34c60Sahrens goto out; 958fa9e4066Sahrens 9591d452cf5Sahrens /* 9601d452cf5Sahrens * remove the objects in open context, so that we won't 9611d452cf5Sahrens * have too much to do in syncing context. 9621d452cf5Sahrens */ 9636754306eSahrens for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 9646754306eSahrens ds->ds_phys->ds_prev_snap_txg)) { 965cdb0ab79Smaybee /* 966cdb0ab79Smaybee * Ignore errors, if there is not enough disk space 967cdb0ab79Smaybee * we will deal with it in dsl_dataset_destroy_sync(). 968cdb0ab79Smaybee */ 969cdb0ab79Smaybee (void) dmu_free_object(os, obj); 9701d452cf5Sahrens } 9711d452cf5Sahrens 9721d452cf5Sahrens dmu_objset_close(os); 9731d452cf5Sahrens if (err != ESRCH) 9743cb34c60Sahrens goto out; 9751d452cf5Sahrens 97668038c2cSmaybee rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 97768038c2cSmaybee err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); 97868038c2cSmaybee rw_exit(&dd->dd_pool->dp_config_rwlock); 97968038c2cSmaybee 98068038c2cSmaybee if (err) 98168038c2cSmaybee goto out; 98268038c2cSmaybee 9833cb34c60Sahrens if (ds->ds_user_ptr) { 984745cd3c5Smaybee /* 985745cd3c5Smaybee * We need to sync out all in-flight IO before we try 986745cd3c5Smaybee * to evict (the dataset evict func is trying to clear 987745cd3c5Smaybee * the cached entries for this dataset in the ARC). 988745cd3c5Smaybee */ 989745cd3c5Smaybee txg_wait_synced(dd->dd_pool, 0); 9901d452cf5Sahrens } 9911d452cf5Sahrens 9921d452cf5Sahrens /* 9931d452cf5Sahrens * Blow away the dsl_dir + head dataset. 9941d452cf5Sahrens */ 995745cd3c5Smaybee dsl_dataset_make_exclusive(ds, tag); 99668038c2cSmaybee if (ds->ds_user_ptr) { 99768038c2cSmaybee ds->ds_user_evict_func(ds, ds->ds_user_ptr); 99868038c2cSmaybee ds->ds_user_ptr = NULL; 99968038c2cSmaybee } 10001d452cf5Sahrens dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 10011d452cf5Sahrens dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 10023cb34c60Sahrens dsl_dataset_destroy_sync, ds, tag, 0); 10031d452cf5Sahrens dsl_sync_task_create(dstg, dsl_dir_destroy_check, 10041d452cf5Sahrens dsl_dir_destroy_sync, dd, FTAG, 0); 10051d452cf5Sahrens err = dsl_sync_task_group_wait(dstg); 10061d452cf5Sahrens dsl_sync_task_group_destroy(dstg); 1007745cd3c5Smaybee /* if it is successful, dsl_dir_destroy_sync will close the dd */ 10083cb34c60Sahrens if (err) 10091d452cf5Sahrens dsl_dir_close(dd, FTAG); 10103cb34c60Sahrens out: 1011745cd3c5Smaybee dsl_dataset_disown(ds, tag); 1012fa9e4066Sahrens return (err); 1013fa9e4066Sahrens } 1014fa9e4066Sahrens 10151d452cf5Sahrens int 10163cb34c60Sahrens dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost) 10171d452cf5Sahrens { 10181c8564a7SMark Maybee int err; 10191c8564a7SMark Maybee 1020745cd3c5Smaybee ASSERT(ds->ds_owner); 10213cb34c60Sahrens 10221c8564a7SMark Maybee dsl_dataset_make_exclusive(ds, ds->ds_owner); 10231c8564a7SMark Maybee err = dsl_sync_task_do(ds->ds_dir->dd_pool, 10241d452cf5Sahrens dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 10251c8564a7SMark Maybee ds, &ost, 0); 10261c8564a7SMark Maybee /* drop exclusive access */ 10271c8564a7SMark Maybee mutex_enter(&ds->ds_lock); 10281c8564a7SMark Maybee rw_exit(&ds->ds_rwlock); 10291c8564a7SMark Maybee cv_broadcast(&ds->ds_exclusive_cv); 10301c8564a7SMark Maybee mutex_exit(&ds->ds_lock); 10311c8564a7SMark Maybee return (err); 10321d452cf5Sahrens } 10331d452cf5Sahrens 1034fa9e4066Sahrens void * 1035fa9e4066Sahrens dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 1036fa9e4066Sahrens void *p, dsl_dataset_evict_func_t func) 1037fa9e4066Sahrens { 1038fa9e4066Sahrens void *old; 1039fa9e4066Sahrens 1040fa9e4066Sahrens mutex_enter(&ds->ds_lock); 1041fa9e4066Sahrens old = ds->ds_user_ptr; 1042fa9e4066Sahrens if (old == NULL) { 1043fa9e4066Sahrens ds->ds_user_ptr = p; 1044fa9e4066Sahrens ds->ds_user_evict_func = func; 1045fa9e4066Sahrens } 1046fa9e4066Sahrens mutex_exit(&ds->ds_lock); 1047fa9e4066Sahrens return (old); 1048fa9e4066Sahrens } 1049fa9e4066Sahrens 1050fa9e4066Sahrens void * 1051fa9e4066Sahrens dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 1052fa9e4066Sahrens { 1053fa9e4066Sahrens return (ds->ds_user_ptr); 1054fa9e4066Sahrens } 1055fa9e4066Sahrens 1056fa9e4066Sahrens 1057c717a561Smaybee blkptr_t * 1058c717a561Smaybee dsl_dataset_get_blkptr(dsl_dataset_t *ds) 1059fa9e4066Sahrens { 1060c717a561Smaybee return (&ds->ds_phys->ds_bp); 1061fa9e4066Sahrens } 1062fa9e4066Sahrens 1063fa9e4066Sahrens void 1064fa9e4066Sahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 1065fa9e4066Sahrens { 1066fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 1067fa9e4066Sahrens /* If it's the meta-objset, set dp_meta_rootbp */ 1068fa9e4066Sahrens if (ds == NULL) { 1069fa9e4066Sahrens tx->tx_pool->dp_meta_rootbp = *bp; 1070fa9e4066Sahrens } else { 1071fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 1072fa9e4066Sahrens ds->ds_phys->ds_bp = *bp; 1073fa9e4066Sahrens } 1074fa9e4066Sahrens } 1075fa9e4066Sahrens 1076fa9e4066Sahrens spa_t * 1077fa9e4066Sahrens dsl_dataset_get_spa(dsl_dataset_t *ds) 1078fa9e4066Sahrens { 1079fa9e4066Sahrens return (ds->ds_dir->dd_pool->dp_spa); 1080fa9e4066Sahrens } 1081fa9e4066Sahrens 1082fa9e4066Sahrens void 1083fa9e4066Sahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 1084fa9e4066Sahrens { 1085fa9e4066Sahrens dsl_pool_t *dp; 1086fa9e4066Sahrens 1087fa9e4066Sahrens if (ds == NULL) /* this is the meta-objset */ 1088fa9e4066Sahrens return; 1089fa9e4066Sahrens 1090fa9e4066Sahrens ASSERT(ds->ds_user_ptr != NULL); 1091a2eea2e1Sahrens 1092a2eea2e1Sahrens if (ds->ds_phys->ds_next_snap_obj != 0) 1093a2eea2e1Sahrens panic("dirtying snapshot!"); 1094fa9e4066Sahrens 1095fa9e4066Sahrens dp = ds->ds_dir->dd_pool; 1096fa9e4066Sahrens 1097fa9e4066Sahrens if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 1098fa9e4066Sahrens /* up the hold count until we can be written out */ 1099fa9e4066Sahrens dmu_buf_add_ref(ds->ds_dbuf, ds); 1100fa9e4066Sahrens } 1101fa9e4066Sahrens } 1102fa9e4066Sahrens 1103a9799022Sck /* 1104a9799022Sck * The unique space in the head dataset can be calculated by subtracting 1105a9799022Sck * the space used in the most recent snapshot, that is still being used 1106a9799022Sck * in this file system, from the space currently in use. To figure out 1107a9799022Sck * the space in the most recent snapshot still in use, we need to take 1108a9799022Sck * the total space used in the snapshot and subtract out the space that 1109a9799022Sck * has been freed up since the snapshot was taken. 1110a9799022Sck */ 1111a9799022Sck static void 1112a9799022Sck dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1113a9799022Sck { 1114a9799022Sck uint64_t mrs_used; 1115a9799022Sck uint64_t dlused, dlcomp, dluncomp; 1116a9799022Sck 1117a9799022Sck ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj); 1118a9799022Sck 1119a9799022Sck if (ds->ds_phys->ds_prev_snap_obj != 0) 1120a9799022Sck mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; 1121a9799022Sck else 1122a9799022Sck mrs_used = 0; 1123a9799022Sck 1124a9799022Sck VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp, 1125a9799022Sck &dluncomp)); 1126a9799022Sck 1127a9799022Sck ASSERT3U(dlused, <=, mrs_used); 1128a9799022Sck ds->ds_phys->ds_unique_bytes = 1129a9799022Sck ds->ds_phys->ds_used_bytes - (mrs_used - dlused); 1130a9799022Sck 1131a9799022Sck if (!DS_UNIQUE_IS_ACCURATE(ds) && 1132a9799022Sck spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1133a9799022Sck SPA_VERSION_UNIQUE_ACCURATE) 1134a9799022Sck ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1135a9799022Sck } 1136a9799022Sck 1137a9799022Sck static uint64_t 1138a9799022Sck dsl_dataset_unique(dsl_dataset_t *ds) 1139a9799022Sck { 1140a9799022Sck if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds)) 1141a9799022Sck dsl_dataset_recalc_head_uniq(ds); 1142a9799022Sck 1143a9799022Sck return (ds->ds_phys->ds_unique_bytes); 1144a9799022Sck } 1145a9799022Sck 1146fa9e4066Sahrens struct killarg { 1147*74e7dc98SMatthew Ahrens dsl_dataset_t *ds; 1148fa9e4066Sahrens zio_t *zio; 1149fa9e4066Sahrens dmu_tx_t *tx; 1150fa9e4066Sahrens }; 1151fa9e4066Sahrens 1152*74e7dc98SMatthew Ahrens /* ARGSUSED */ 1153fa9e4066Sahrens static int 1154fa9e4066Sahrens kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 1155fa9e4066Sahrens { 1156fa9e4066Sahrens struct killarg *ka = arg; 1157fa9e4066Sahrens blkptr_t *bp = &bc->bc_blkptr; 1158fa9e4066Sahrens 1159fa9e4066Sahrens ASSERT3U(bc->bc_errno, ==, 0); 1160fa9e4066Sahrens 1161*74e7dc98SMatthew Ahrens ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); 1162*74e7dc98SMatthew Ahrens (void) dsl_dataset_block_kill(ka->ds, bp, ka->zio, ka->tx); 1163*74e7dc98SMatthew Ahrens 1164fa9e4066Sahrens return (0); 1165fa9e4066Sahrens } 1166fa9e4066Sahrens 1167fa9e4066Sahrens /* ARGSUSED */ 11681d452cf5Sahrens static int 11691d452cf5Sahrens dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 1170fa9e4066Sahrens { 11711d452cf5Sahrens dsl_dataset_t *ds = arg1; 11723cb34c60Sahrens dmu_objset_type_t *ost = arg2; 1173fa9e4066Sahrens 11741d452cf5Sahrens /* 11753cb34c60Sahrens * We can only roll back to emptyness if it is a ZPL objset. 11761d452cf5Sahrens */ 11773cb34c60Sahrens if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0) 1178fa9e4066Sahrens return (EINVAL); 1179fa9e4066Sahrens 11801d452cf5Sahrens /* 11811d452cf5Sahrens * This must not be a snapshot. 11821d452cf5Sahrens */ 11831d452cf5Sahrens if (ds->ds_phys->ds_next_snap_obj != 0) 1184fa9e4066Sahrens return (EINVAL); 1185fa9e4066Sahrens 1186fa9e4066Sahrens /* 1187fa9e4066Sahrens * If we made changes this txg, traverse_dsl_dataset won't find 1188fa9e4066Sahrens * them. Try again. 1189fa9e4066Sahrens */ 11901d452cf5Sahrens if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1191fa9e4066Sahrens return (EAGAIN); 1192fa9e4066Sahrens 11931d452cf5Sahrens return (0); 11941d452cf5Sahrens } 11951d452cf5Sahrens 11961d452cf5Sahrens /* ARGSUSED */ 11971d452cf5Sahrens static void 1198ecd6cf80Smarks dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 11991d452cf5Sahrens { 12001d452cf5Sahrens dsl_dataset_t *ds = arg1; 12013cb34c60Sahrens dmu_objset_type_t *ost = arg2; 12021d452cf5Sahrens objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1203fa9e4066Sahrens 1204fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 1205fa9e4066Sahrens 120686ccc033Sperrin /* 120786ccc033Sperrin * Before the roll back destroy the zil. 120886ccc033Sperrin */ 120986ccc033Sperrin if (ds->ds_user_ptr != NULL) { 121086ccc033Sperrin zil_rollback_destroy( 121186ccc033Sperrin ((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx); 12123cb34c60Sahrens 12133cb34c60Sahrens /* 12143cb34c60Sahrens * We need to make sure that the objset_impl_t is reopened after 12153cb34c60Sahrens * we do the rollback, otherwise it will have the wrong 12163cb34c60Sahrens * objset_phys_t. Normally this would happen when this 1217745cd3c5Smaybee * dataset-open is closed, thus causing the 12183cb34c60Sahrens * dataset to be immediately evicted. But when doing "zfs recv 12193cb34c60Sahrens * -F", we reopen the objset before that, so that there is no 12203cb34c60Sahrens * window where the dataset is closed and inconsistent. 12213cb34c60Sahrens */ 12223cb34c60Sahrens ds->ds_user_evict_func(ds, ds->ds_user_ptr); 12233cb34c60Sahrens ds->ds_user_ptr = NULL; 122486ccc033Sperrin } 12253a8a1de4Sperrin 1226*74e7dc98SMatthew Ahrens /* Transfer space that was freed since last snap back to the head. */ 1227*74e7dc98SMatthew Ahrens { 1228*74e7dc98SMatthew Ahrens uint64_t used; 1229*74e7dc98SMatthew Ahrens 1230*74e7dc98SMatthew Ahrens VERIFY(0 == bplist_space_birthrange(&ds->ds_deadlist, 1231*74e7dc98SMatthew Ahrens ds->ds_origin_txg, UINT64_MAX, &used)); 1232*74e7dc98SMatthew Ahrens dsl_dir_transfer_space(ds->ds_dir, used, 1233*74e7dc98SMatthew Ahrens DD_USED_SNAP, DD_USED_HEAD, tx); 1234*74e7dc98SMatthew Ahrens } 1235*74e7dc98SMatthew Ahrens 1236fa9e4066Sahrens /* Zero out the deadlist. */ 1237fa9e4066Sahrens bplist_close(&ds->ds_deadlist); 1238fa9e4066Sahrens bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1239fa9e4066Sahrens ds->ds_phys->ds_deadlist_obj = 1240fa9e4066Sahrens bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1241ea8dc4b6Seschrock VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1242ea8dc4b6Seschrock ds->ds_phys->ds_deadlist_obj)); 1243fa9e4066Sahrens 1244fa9e4066Sahrens { 1245fa9e4066Sahrens /* Free blkptrs that we gave birth to */ 1246fa9e4066Sahrens zio_t *zio; 1247fa9e4066Sahrens struct killarg ka; 1248fa9e4066Sahrens 1249fa9e4066Sahrens zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 1250fa9e4066Sahrens ZIO_FLAG_MUSTSUCCEED); 1251*74e7dc98SMatthew Ahrens ka.ds = ds; 1252fa9e4066Sahrens ka.zio = zio; 1253fa9e4066Sahrens ka.tx = tx; 1254fa9e4066Sahrens (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1255fa9e4066Sahrens ADVANCE_POST, kill_blkptr, &ka); 1256fa9e4066Sahrens (void) zio_wait(zio); 1257fa9e4066Sahrens } 1258fa9e4066Sahrens 1259*74e7dc98SMatthew Ahrens ASSERT(!(ds->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) || 1260*74e7dc98SMatthew Ahrens ds->ds_phys->ds_unique_bytes == 0); 1261*74e7dc98SMatthew Ahrens 1262088f3894Sahrens if (ds->ds_prev && ds->ds_prev != ds->ds_dir->dd_pool->dp_origin_snap) { 12633cb34c60Sahrens /* Change our contents to that of the prev snapshot */ 1264*74e7dc98SMatthew Ahrens 12653cb34c60Sahrens ASSERT3U(ds->ds_prev->ds_object, ==, 12663cb34c60Sahrens ds->ds_phys->ds_prev_snap_obj); 1267*74e7dc98SMatthew Ahrens ASSERT3U(ds->ds_phys->ds_used_bytes, <=, 1268*74e7dc98SMatthew Ahrens ds->ds_prev->ds_phys->ds_used_bytes); 1269*74e7dc98SMatthew Ahrens 12703cb34c60Sahrens ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 12713cb34c60Sahrens ds->ds_phys->ds_used_bytes = 12723cb34c60Sahrens ds->ds_prev->ds_phys->ds_used_bytes; 12733cb34c60Sahrens ds->ds_phys->ds_compressed_bytes = 12743cb34c60Sahrens ds->ds_prev->ds_phys->ds_compressed_bytes; 12753cb34c60Sahrens ds->ds_phys->ds_uncompressed_bytes = 12763cb34c60Sahrens ds->ds_prev->ds_phys->ds_uncompressed_bytes; 12773cb34c60Sahrens ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 1278fa9e4066Sahrens 12793cb34c60Sahrens if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 12803cb34c60Sahrens dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 12813cb34c60Sahrens ds->ds_prev->ds_phys->ds_unique_bytes = 0; 12823cb34c60Sahrens } 12833cb34c60Sahrens } else { 1284088f3894Sahrens objset_impl_t *osi; 1285088f3894Sahrens 1286*74e7dc98SMatthew Ahrens ASSERT3U(ds->ds_phys->ds_used_bytes, ==, 0); 1287*74e7dc98SMatthew Ahrens ASSERT3U(ds->ds_phys->ds_compressed_bytes, ==, 0); 1288*74e7dc98SMatthew Ahrens ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, ==, 0); 1289*74e7dc98SMatthew Ahrens 12903cb34c60Sahrens bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t)); 12913cb34c60Sahrens ds->ds_phys->ds_flags = 0; 12923cb34c60Sahrens ds->ds_phys->ds_unique_bytes = 0; 1293*74e7dc98SMatthew Ahrens if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1294*74e7dc98SMatthew Ahrens SPA_VERSION_UNIQUE_ACCURATE) 1295*74e7dc98SMatthew Ahrens ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1296*74e7dc98SMatthew Ahrens 1297088f3894Sahrens osi = dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds, 12983cb34c60Sahrens &ds->ds_phys->ds_bp, *ost, tx); 1299088f3894Sahrens #ifdef _KERNEL 1300088f3894Sahrens zfs_create_fs(&osi->os, kcred, NULL, tx); 1301088f3894Sahrens #endif 130285edac42Sahrens } 1303ecd6cf80Smarks 1304ecd6cf80Smarks spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa, 1305ecd6cf80Smarks tx, cr, "dataset = %llu", ds->ds_object); 1306fa9e4066Sahrens } 1307fa9e4066Sahrens 1308e1930233Sbonwick /* ARGSUSED */ 1309e1930233Sbonwick static int 13101d452cf5Sahrens dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1311e1930233Sbonwick { 13121d452cf5Sahrens dsl_dataset_t *ds = arg1; 13133cb34c60Sahrens objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 13143cb34c60Sahrens uint64_t count; 13153cb34c60Sahrens int err; 1316e1930233Sbonwick 1317e1930233Sbonwick /* 1318e1930233Sbonwick * Can't delete a head dataset if there are snapshots of it. 1319e1930233Sbonwick * (Except if the only snapshots are from the branch we cloned 1320e1930233Sbonwick * from.) 1321e1930233Sbonwick */ 1322e1930233Sbonwick if (ds->ds_prev != NULL && 1323e1930233Sbonwick ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1324e1930233Sbonwick return (EINVAL); 1325e1930233Sbonwick 13263cb34c60Sahrens /* 13273cb34c60Sahrens * This is really a dsl_dir thing, but check it here so that 13283cb34c60Sahrens * we'll be less likely to leave this dataset inconsistent & 13293cb34c60Sahrens * nearly destroyed. 13303cb34c60Sahrens */ 13313cb34c60Sahrens err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 13323cb34c60Sahrens if (err) 13333cb34c60Sahrens return (err); 13343cb34c60Sahrens if (count != 0) 13353cb34c60Sahrens return (EEXIST); 13363cb34c60Sahrens 1337e1930233Sbonwick return (0); 1338e1930233Sbonwick } 1339e1930233Sbonwick 13401d452cf5Sahrens /* ARGSUSED */ 13411d452cf5Sahrens static void 1342ecd6cf80Smarks dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1343fa9e4066Sahrens { 13441d452cf5Sahrens dsl_dataset_t *ds = arg1; 1345ecd6cf80Smarks dsl_pool_t *dp = ds->ds_dir->dd_pool; 1346fa9e4066Sahrens 13471d452cf5Sahrens /* Mark it as inconsistent on-disk, in case we crash */ 13481d452cf5Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 13491d452cf5Sahrens ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1350ecd6cf80Smarks 1351ecd6cf80Smarks spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1352ecd6cf80Smarks cr, "dataset = %llu", ds->ds_object); 13531d452cf5Sahrens } 1354fa9e4066Sahrens 13551d452cf5Sahrens /* ARGSUSED */ 13563cb34c60Sahrens int 13571d452cf5Sahrens dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 13581d452cf5Sahrens { 13591d452cf5Sahrens dsl_dataset_t *ds = arg1; 1360fa9e4066Sahrens 1361745cd3c5Smaybee /* we have an owner hold, so noone else can destroy us */ 1362745cd3c5Smaybee ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 1363745cd3c5Smaybee 1364fa9e4066Sahrens /* Can't delete a branch point. */ 13651d452cf5Sahrens if (ds->ds_phys->ds_num_children > 1) 13661d452cf5Sahrens return (EEXIST); 1367fa9e4066Sahrens 1368fa9e4066Sahrens /* 1369fa9e4066Sahrens * Can't delete a head dataset if there are snapshots of it. 1370fa9e4066Sahrens * (Except if the only snapshots are from the branch we cloned 1371fa9e4066Sahrens * from.) 1372fa9e4066Sahrens */ 1373fa9e4066Sahrens if (ds->ds_prev != NULL && 13741d452cf5Sahrens ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1375fa9e4066Sahrens return (EINVAL); 1376fa9e4066Sahrens 1377fa9e4066Sahrens /* 1378fa9e4066Sahrens * If we made changes this txg, traverse_dsl_dataset won't find 1379fa9e4066Sahrens * them. Try again. 1380fa9e4066Sahrens */ 13811d452cf5Sahrens if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1382fa9e4066Sahrens return (EAGAIN); 13831d452cf5Sahrens 13841d452cf5Sahrens /* XXX we should do some i/o error checking... */ 13851d452cf5Sahrens return (0); 13861d452cf5Sahrens } 13871d452cf5Sahrens 1388745cd3c5Smaybee struct refsarg { 1389745cd3c5Smaybee kmutex_t lock; 1390745cd3c5Smaybee boolean_t gone; 1391745cd3c5Smaybee kcondvar_t cv; 1392745cd3c5Smaybee }; 1393745cd3c5Smaybee 1394745cd3c5Smaybee /* ARGSUSED */ 1395745cd3c5Smaybee static void 1396745cd3c5Smaybee dsl_dataset_refs_gone(dmu_buf_t *db, void *argv) 1397745cd3c5Smaybee { 1398745cd3c5Smaybee struct refsarg *arg = argv; 1399745cd3c5Smaybee 1400745cd3c5Smaybee mutex_enter(&arg->lock); 1401745cd3c5Smaybee arg->gone = TRUE; 1402745cd3c5Smaybee cv_signal(&arg->cv); 1403745cd3c5Smaybee mutex_exit(&arg->lock); 1404745cd3c5Smaybee } 1405745cd3c5Smaybee 1406745cd3c5Smaybee static void 1407745cd3c5Smaybee dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag) 1408745cd3c5Smaybee { 1409745cd3c5Smaybee struct refsarg arg; 1410745cd3c5Smaybee 1411745cd3c5Smaybee mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL); 1412745cd3c5Smaybee cv_init(&arg.cv, NULL, CV_DEFAULT, NULL); 1413745cd3c5Smaybee arg.gone = FALSE; 1414745cd3c5Smaybee (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys, 1415745cd3c5Smaybee dsl_dataset_refs_gone); 1416745cd3c5Smaybee dmu_buf_rele(ds->ds_dbuf, tag); 1417745cd3c5Smaybee mutex_enter(&arg.lock); 1418745cd3c5Smaybee while (!arg.gone) 1419745cd3c5Smaybee cv_wait(&arg.cv, &arg.lock); 1420745cd3c5Smaybee ASSERT(arg.gone); 1421745cd3c5Smaybee mutex_exit(&arg.lock); 1422745cd3c5Smaybee ds->ds_dbuf = NULL; 1423745cd3c5Smaybee ds->ds_phys = NULL; 1424745cd3c5Smaybee mutex_destroy(&arg.lock); 1425745cd3c5Smaybee cv_destroy(&arg.cv); 1426745cd3c5Smaybee } 1427745cd3c5Smaybee 14283cb34c60Sahrens void 1429ecd6cf80Smarks dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 14301d452cf5Sahrens { 14311d452cf5Sahrens dsl_dataset_t *ds = arg1; 14321d452cf5Sahrens zio_t *zio; 14331d452cf5Sahrens int err; 14341d452cf5Sahrens int after_branch_point = FALSE; 14351d452cf5Sahrens dsl_pool_t *dp = ds->ds_dir->dd_pool; 14361d452cf5Sahrens objset_t *mos = dp->dp_meta_objset; 14371d452cf5Sahrens dsl_dataset_t *ds_prev = NULL; 14381d452cf5Sahrens uint64_t obj; 14391d452cf5Sahrens 1440745cd3c5Smaybee ASSERT(ds->ds_owner); 14411d452cf5Sahrens ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 14421d452cf5Sahrens ASSERT(ds->ds_prev == NULL || 14431d452cf5Sahrens ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 14441d452cf5Sahrens ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 14451d452cf5Sahrens 1446745cd3c5Smaybee /* signal any waiters that this dataset is going away */ 1447745cd3c5Smaybee mutex_enter(&ds->ds_lock); 1448745cd3c5Smaybee ds->ds_owner = dsl_reaper; 1449745cd3c5Smaybee cv_broadcast(&ds->ds_exclusive_cv); 1450745cd3c5Smaybee mutex_exit(&ds->ds_lock); 1451745cd3c5Smaybee 1452a9799022Sck /* Remove our reservation */ 1453a9799022Sck if (ds->ds_reserved != 0) { 1454a9799022Sck uint64_t val = 0; 1455a9799022Sck dsl_dataset_set_reservation_sync(ds, &val, cr, tx); 1456a9799022Sck ASSERT3U(ds->ds_reserved, ==, 0); 1457a9799022Sck } 1458a9799022Sck 14591d452cf5Sahrens ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 14601d452cf5Sahrens 1461088f3894Sahrens dsl_pool_ds_destroyed(ds, tx); 1462088f3894Sahrens 14631d452cf5Sahrens obj = ds->ds_object; 1464fa9e4066Sahrens 1465fa9e4066Sahrens if (ds->ds_phys->ds_prev_snap_obj != 0) { 1466fa9e4066Sahrens if (ds->ds_prev) { 1467fa9e4066Sahrens ds_prev = ds->ds_prev; 1468fa9e4066Sahrens } else { 1469745cd3c5Smaybee VERIFY(0 == dsl_dataset_hold_obj(dp, 1470745cd3c5Smaybee ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); 1471fa9e4066Sahrens } 1472fa9e4066Sahrens after_branch_point = 1473fa9e4066Sahrens (ds_prev->ds_phys->ds_next_snap_obj != obj); 1474fa9e4066Sahrens 1475fa9e4066Sahrens dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1476088f3894Sahrens if (after_branch_point && 1477088f3894Sahrens ds_prev->ds_phys->ds_next_clones_obj != 0) { 1478088f3894Sahrens VERIFY(0 == zap_remove_int(mos, 1479088f3894Sahrens ds_prev->ds_phys->ds_next_clones_obj, obj, tx)); 1480088f3894Sahrens if (ds->ds_phys->ds_next_snap_obj != 0) { 1481088f3894Sahrens VERIFY(0 == zap_add_int(mos, 1482088f3894Sahrens ds_prev->ds_phys->ds_next_clones_obj, 1483088f3894Sahrens ds->ds_phys->ds_next_snap_obj, tx)); 1484088f3894Sahrens } 1485088f3894Sahrens } 1486fa9e4066Sahrens if (after_branch_point && 1487fa9e4066Sahrens ds->ds_phys->ds_next_snap_obj == 0) { 1488fa9e4066Sahrens /* This clone is toast. */ 1489fa9e4066Sahrens ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1490fa9e4066Sahrens ds_prev->ds_phys->ds_num_children--; 1491fa9e4066Sahrens } else if (!after_branch_point) { 1492fa9e4066Sahrens ds_prev->ds_phys->ds_next_snap_obj = 1493fa9e4066Sahrens ds->ds_phys->ds_next_snap_obj; 1494fa9e4066Sahrens } 1495fa9e4066Sahrens } 1496fa9e4066Sahrens 1497fa9e4066Sahrens zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1498fa9e4066Sahrens 1499fa9e4066Sahrens if (ds->ds_phys->ds_next_snap_obj != 0) { 15001d452cf5Sahrens blkptr_t bp; 1501fa9e4066Sahrens dsl_dataset_t *ds_next; 1502fa9e4066Sahrens uint64_t itor = 0; 1503a9799022Sck uint64_t old_unique; 1504*74e7dc98SMatthew Ahrens int64_t used = 0, compressed = 0, uncompressed = 0; 1505fa9e4066Sahrens 1506745cd3c5Smaybee VERIFY(0 == dsl_dataset_hold_obj(dp, 1507745cd3c5Smaybee ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); 1508fa9e4066Sahrens ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1509fa9e4066Sahrens 1510a9799022Sck old_unique = dsl_dataset_unique(ds_next); 1511a9799022Sck 1512fa9e4066Sahrens dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1513fa9e4066Sahrens ds_next->ds_phys->ds_prev_snap_obj = 1514fa9e4066Sahrens ds->ds_phys->ds_prev_snap_obj; 1515fa9e4066Sahrens ds_next->ds_phys->ds_prev_snap_txg = 1516fa9e4066Sahrens ds->ds_phys->ds_prev_snap_txg; 1517fa9e4066Sahrens ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1518fa9e4066Sahrens ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1519fa9e4066Sahrens 1520fa9e4066Sahrens /* 1521fa9e4066Sahrens * Transfer to our deadlist (which will become next's 1522fa9e4066Sahrens * new deadlist) any entries from next's current 1523fa9e4066Sahrens * deadlist which were born before prev, and free the 1524fa9e4066Sahrens * other entries. 1525fa9e4066Sahrens * 1526fa9e4066Sahrens * XXX we're doing this long task with the config lock held 1527fa9e4066Sahrens */ 1528745cd3c5Smaybee while (bplist_iterate(&ds_next->ds_deadlist, &itor, &bp) == 0) { 1529fa9e4066Sahrens if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1530ea8dc4b6Seschrock VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1531ea8dc4b6Seschrock &bp, tx)); 1532fa9e4066Sahrens if (ds_prev && !after_branch_point && 1533fa9e4066Sahrens bp.blk_birth > 1534fa9e4066Sahrens ds_prev->ds_phys->ds_prev_snap_txg) { 1535fa9e4066Sahrens ds_prev->ds_phys->ds_unique_bytes += 153699653d4eSeschrock bp_get_dasize(dp->dp_spa, &bp); 1537fa9e4066Sahrens } 1538fa9e4066Sahrens } else { 153999653d4eSeschrock used += bp_get_dasize(dp->dp_spa, &bp); 1540fa9e4066Sahrens compressed += BP_GET_PSIZE(&bp); 1541fa9e4066Sahrens uncompressed += BP_GET_UCSIZE(&bp); 1542fa9e4066Sahrens /* XXX check return value? */ 1543088f3894Sahrens (void) dsl_free(zio, dp, tx->tx_txg, 1544fa9e4066Sahrens &bp, NULL, NULL, ARC_NOWAIT); 1545fa9e4066Sahrens } 1546fa9e4066Sahrens } 1547fa9e4066Sahrens 1548*74e7dc98SMatthew Ahrens ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1549*74e7dc98SMatthew Ahrens 1550*74e7dc98SMatthew Ahrens /* change snapused */ 1551*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 1552*74e7dc98SMatthew Ahrens -used, -compressed, -uncompressed, tx); 1553*74e7dc98SMatthew Ahrens 1554fa9e4066Sahrens /* free next's deadlist */ 1555fa9e4066Sahrens bplist_close(&ds_next->ds_deadlist); 1556fa9e4066Sahrens bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1557fa9e4066Sahrens 1558fa9e4066Sahrens /* set next's deadlist to our deadlist */ 1559745cd3c5Smaybee bplist_close(&ds->ds_deadlist); 1560fa9e4066Sahrens ds_next->ds_phys->ds_deadlist_obj = 1561fa9e4066Sahrens ds->ds_phys->ds_deadlist_obj; 1562ea8dc4b6Seschrock VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1563ea8dc4b6Seschrock ds_next->ds_phys->ds_deadlist_obj)); 1564fa9e4066Sahrens ds->ds_phys->ds_deadlist_obj = 0; 1565fa9e4066Sahrens 1566fa9e4066Sahrens if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1567fa9e4066Sahrens /* 1568fa9e4066Sahrens * Update next's unique to include blocks which 1569fa9e4066Sahrens * were previously shared by only this snapshot 1570fa9e4066Sahrens * and it. Those blocks will be born after the 1571fa9e4066Sahrens * prev snap and before this snap, and will have 1572fa9e4066Sahrens * died after the next snap and before the one 1573fa9e4066Sahrens * after that (ie. be on the snap after next's 1574fa9e4066Sahrens * deadlist). 1575fa9e4066Sahrens * 1576fa9e4066Sahrens * XXX we're doing this long task with the 1577fa9e4066Sahrens * config lock held 1578fa9e4066Sahrens */ 1579fa9e4066Sahrens dsl_dataset_t *ds_after_next; 1580*74e7dc98SMatthew Ahrens uint64_t space; 1581fa9e4066Sahrens 1582745cd3c5Smaybee VERIFY(0 == dsl_dataset_hold_obj(dp, 1583745cd3c5Smaybee ds_next->ds_phys->ds_next_snap_obj, 1584745cd3c5Smaybee FTAG, &ds_after_next)); 1585*74e7dc98SMatthew Ahrens 1586*74e7dc98SMatthew Ahrens VERIFY(0 == 1587*74e7dc98SMatthew Ahrens bplist_space_birthrange(&ds_after_next->ds_deadlist, 1588*74e7dc98SMatthew Ahrens ds->ds_phys->ds_prev_snap_txg, 1589*74e7dc98SMatthew Ahrens ds->ds_phys->ds_creation_txg, &space)); 1590*74e7dc98SMatthew Ahrens ds_next->ds_phys->ds_unique_bytes += space; 1591fa9e4066Sahrens 1592745cd3c5Smaybee dsl_dataset_rele(ds_after_next, FTAG); 1593fa9e4066Sahrens ASSERT3P(ds_next->ds_prev, ==, NULL); 1594fa9e4066Sahrens } else { 1595fa9e4066Sahrens ASSERT3P(ds_next->ds_prev, ==, ds); 1596745cd3c5Smaybee dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); 1597745cd3c5Smaybee ds_next->ds_prev = NULL; 1598fa9e4066Sahrens if (ds_prev) { 1599745cd3c5Smaybee VERIFY(0 == dsl_dataset_get_ref(dp, 1600745cd3c5Smaybee ds->ds_phys->ds_prev_snap_obj, 1601745cd3c5Smaybee ds_next, &ds_next->ds_prev)); 1602fa9e4066Sahrens } 1603a9799022Sck 1604a9799022Sck dsl_dataset_recalc_head_uniq(ds_next); 1605a9799022Sck 1606a9799022Sck /* 1607a9799022Sck * Reduce the amount of our unconsmed refreservation 1608a9799022Sck * being charged to our parent by the amount of 1609a9799022Sck * new unique data we have gained. 1610a9799022Sck */ 1611a9799022Sck if (old_unique < ds_next->ds_reserved) { 1612a9799022Sck int64_t mrsdelta; 1613a9799022Sck uint64_t new_unique = 1614a9799022Sck ds_next->ds_phys->ds_unique_bytes; 1615a9799022Sck 1616a9799022Sck ASSERT(old_unique <= new_unique); 1617a9799022Sck mrsdelta = MIN(new_unique - old_unique, 1618a9799022Sck ds_next->ds_reserved - old_unique); 1619*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(ds->ds_dir, 1620*74e7dc98SMatthew Ahrens DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); 1621a9799022Sck } 1622fa9e4066Sahrens } 1623745cd3c5Smaybee dsl_dataset_rele(ds_next, FTAG); 1624fa9e4066Sahrens } else { 1625fa9e4066Sahrens /* 1626fa9e4066Sahrens * There's no next snapshot, so this is a head dataset. 1627fa9e4066Sahrens * Destroy the deadlist. Unless it's a clone, the 1628fa9e4066Sahrens * deadlist should be empty. (If it's a clone, it's 1629fa9e4066Sahrens * safe to ignore the deadlist contents.) 1630fa9e4066Sahrens */ 1631fa9e4066Sahrens struct killarg ka; 1632fa9e4066Sahrens 1633fa9e4066Sahrens ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1634fa9e4066Sahrens bplist_close(&ds->ds_deadlist); 1635fa9e4066Sahrens bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1636fa9e4066Sahrens ds->ds_phys->ds_deadlist_obj = 0; 1637fa9e4066Sahrens 1638fa9e4066Sahrens /* 1639fa9e4066Sahrens * Free everything that we point to (that's born after 1640fa9e4066Sahrens * the previous snapshot, if we are a clone) 1641fa9e4066Sahrens * 1642*74e7dc98SMatthew Ahrens * NB: this should be very quick, because we already 1643*74e7dc98SMatthew Ahrens * freed all the objects in open context. 1644fa9e4066Sahrens */ 1645*74e7dc98SMatthew Ahrens ka.ds = ds; 1646fa9e4066Sahrens ka.zio = zio; 1647fa9e4066Sahrens ka.tx = tx; 1648fa9e4066Sahrens err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1649fa9e4066Sahrens ADVANCE_POST, kill_blkptr, &ka); 1650fa9e4066Sahrens ASSERT3U(err, ==, 0); 1651*74e7dc98SMatthew Ahrens ASSERT(spa_version(dp->dp_spa) < SPA_VERSION_UNIQUE_ACCURATE || 1652*74e7dc98SMatthew Ahrens ds->ds_phys->ds_unique_bytes == 0); 1653fa9e4066Sahrens } 1654fa9e4066Sahrens 1655fa9e4066Sahrens err = zio_wait(zio); 1656fa9e4066Sahrens ASSERT3U(err, ==, 0); 1657fa9e4066Sahrens 16581d452cf5Sahrens if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1659745cd3c5Smaybee /* Erase the link in the dir */ 16601d452cf5Sahrens dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 16611d452cf5Sahrens ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1662745cd3c5Smaybee ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); 1663745cd3c5Smaybee err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1664745cd3c5Smaybee ASSERT(err == 0); 1665fa9e4066Sahrens } else { 1666fa9e4066Sahrens /* remove from snapshot namespace */ 1667fa9e4066Sahrens dsl_dataset_t *ds_head; 1668745cd3c5Smaybee ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); 1669745cd3c5Smaybee VERIFY(0 == dsl_dataset_hold_obj(dp, 1670745cd3c5Smaybee ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); 16718660574dSahrens VERIFY(0 == dsl_dataset_get_snapname(ds)); 1672fa9e4066Sahrens #ifdef ZFS_DEBUG 1673fa9e4066Sahrens { 1674fa9e4066Sahrens uint64_t val; 1675ab04eb8eStimh 1676745cd3c5Smaybee err = dsl_dataset_snap_lookup(ds_head, 1677ab04eb8eStimh ds->ds_snapname, &val); 1678fa9e4066Sahrens ASSERT3U(err, ==, 0); 1679fa9e4066Sahrens ASSERT3U(val, ==, obj); 1680fa9e4066Sahrens } 1681fa9e4066Sahrens #endif 1682745cd3c5Smaybee err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx); 1683fa9e4066Sahrens ASSERT(err == 0); 1684745cd3c5Smaybee dsl_dataset_rele(ds_head, FTAG); 1685fa9e4066Sahrens } 1686fa9e4066Sahrens 1687fa9e4066Sahrens if (ds_prev && ds->ds_prev != ds_prev) 1688745cd3c5Smaybee dsl_dataset_rele(ds_prev, FTAG); 1689fa9e4066Sahrens 1690990b4856Slling spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1691ecd6cf80Smarks spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx, 1692ecd6cf80Smarks cr, "dataset = %llu", ds->ds_object); 1693ecd6cf80Smarks 1694088f3894Sahrens if (ds->ds_phys->ds_next_clones_obj != 0) { 1695088f3894Sahrens uint64_t count; 1696088f3894Sahrens ASSERT(0 == zap_count(mos, 1697088f3894Sahrens ds->ds_phys->ds_next_clones_obj, &count) && count == 0); 1698088f3894Sahrens VERIFY(0 == dmu_object_free(mos, 1699088f3894Sahrens ds->ds_phys->ds_next_clones_obj, tx)); 1700088f3894Sahrens } 1701*74e7dc98SMatthew Ahrens if (ds->ds_phys->ds_props_obj != 0) 1702*74e7dc98SMatthew Ahrens VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); 1703745cd3c5Smaybee dsl_dir_close(ds->ds_dir, ds); 1704745cd3c5Smaybee ds->ds_dir = NULL; 1705745cd3c5Smaybee dsl_dataset_drain_refs(ds, tag); 17061d452cf5Sahrens VERIFY(0 == dmu_object_free(mos, obj, tx)); 1707fa9e4066Sahrens } 1708fa9e4066Sahrens 1709a9799022Sck static int 1710a9799022Sck dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1711a9799022Sck { 1712a9799022Sck uint64_t asize; 1713a9799022Sck 1714a9799022Sck if (!dmu_tx_is_syncing(tx)) 1715a9799022Sck return (0); 1716a9799022Sck 1717a9799022Sck /* 1718a9799022Sck * If there's an fs-only reservation, any blocks that might become 1719a9799022Sck * owned by the snapshot dataset must be accommodated by space 1720a9799022Sck * outside of the reservation. 1721a9799022Sck */ 1722a9799022Sck asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1723a9799022Sck if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE)) 1724a9799022Sck return (ENOSPC); 1725a9799022Sck 1726a9799022Sck /* 1727a9799022Sck * Propogate any reserved space for this snapshot to other 1728a9799022Sck * snapshot checks in this sync group. 1729a9799022Sck */ 1730a9799022Sck if (asize > 0) 1731a9799022Sck dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1732a9799022Sck 1733a9799022Sck return (0); 1734a9799022Sck } 1735a9799022Sck 17361d452cf5Sahrens /* ARGSUSED */ 1737fa9e4066Sahrens int 17381d452cf5Sahrens dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1739fa9e4066Sahrens { 17403cb34c60Sahrens dsl_dataset_t *ds = arg1; 17411d452cf5Sahrens const char *snapname = arg2; 1742fa9e4066Sahrens int err; 17431d452cf5Sahrens uint64_t value; 1744fa9e4066Sahrens 17451d452cf5Sahrens /* 17461d452cf5Sahrens * We don't allow multiple snapshots of the same txg. If there 17471d452cf5Sahrens * is already one, try again. 17481d452cf5Sahrens */ 17491d452cf5Sahrens if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 17501d452cf5Sahrens return (EAGAIN); 1751fa9e4066Sahrens 17521d452cf5Sahrens /* 17531d452cf5Sahrens * Check for conflicting name snapshot name. 17541d452cf5Sahrens */ 1755745cd3c5Smaybee err = dsl_dataset_snap_lookup(ds, snapname, &value); 17561d452cf5Sahrens if (err == 0) 1757fa9e4066Sahrens return (EEXIST); 17581d452cf5Sahrens if (err != ENOENT) 17591d452cf5Sahrens return (err); 1760fa9e4066Sahrens 1761b7661cccSmmusante /* 1762b7661cccSmmusante * Check that the dataset's name is not too long. Name consists 1763b7661cccSmmusante * of the dataset's length + 1 for the @-sign + snapshot name's length 1764b7661cccSmmusante */ 1765b7661cccSmmusante if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1766b7661cccSmmusante return (ENAMETOOLONG); 1767b7661cccSmmusante 1768a9799022Sck err = dsl_dataset_snapshot_reserve_space(ds, tx); 1769a9799022Sck if (err) 1770a9799022Sck return (err); 1771a9799022Sck 17721d452cf5Sahrens ds->ds_trysnap_txg = tx->tx_txg; 17731d452cf5Sahrens return (0); 17741d452cf5Sahrens } 1775fa9e4066Sahrens 17761d452cf5Sahrens void 1777ecd6cf80Smarks dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 17781d452cf5Sahrens { 17793cb34c60Sahrens dsl_dataset_t *ds = arg1; 17801d452cf5Sahrens const char *snapname = arg2; 17811d452cf5Sahrens dsl_pool_t *dp = ds->ds_dir->dd_pool; 17821d452cf5Sahrens dmu_buf_t *dbuf; 17831d452cf5Sahrens dsl_dataset_phys_t *dsphys; 1784088f3894Sahrens uint64_t dsobj, crtxg; 17851d452cf5Sahrens objset_t *mos = dp->dp_meta_objset; 17861d452cf5Sahrens int err; 1787fa9e4066Sahrens 17881d452cf5Sahrens ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1789fa9e4066Sahrens 1790088f3894Sahrens /* 1791088f3894Sahrens * The origin's ds_creation_txg has to be < TXG_INITIAL 1792088f3894Sahrens */ 1793088f3894Sahrens if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1794088f3894Sahrens crtxg = 1; 1795088f3894Sahrens else 1796088f3894Sahrens crtxg = tx->tx_txg; 1797088f3894Sahrens 17981649cd4bStabriz dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 17991649cd4bStabriz DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1800ea8dc4b6Seschrock VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1801fa9e4066Sahrens dmu_buf_will_dirty(dbuf, tx); 1802fa9e4066Sahrens dsphys = dbuf->db_data; 1803745cd3c5Smaybee bzero(dsphys, sizeof (dsl_dataset_phys_t)); 18041d452cf5Sahrens dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1805fa9e4066Sahrens dsphys->ds_fsid_guid = unique_create(); 1806fa9e4066Sahrens (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1807fa9e4066Sahrens sizeof (dsphys->ds_guid)); 1808fa9e4066Sahrens dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1809fa9e4066Sahrens dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1810fa9e4066Sahrens dsphys->ds_next_snap_obj = ds->ds_object; 1811fa9e4066Sahrens dsphys->ds_num_children = 1; 1812fa9e4066Sahrens dsphys->ds_creation_time = gethrestime_sec(); 1813088f3894Sahrens dsphys->ds_creation_txg = crtxg; 1814fa9e4066Sahrens dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1815fa9e4066Sahrens dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1816fa9e4066Sahrens dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1817fa9e4066Sahrens dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 181899653d4eSeschrock dsphys->ds_flags = ds->ds_phys->ds_flags; 1819fa9e4066Sahrens dsphys->ds_bp = ds->ds_phys->ds_bp; 1820ea8dc4b6Seschrock dmu_buf_rele(dbuf, FTAG); 1821fa9e4066Sahrens 18221d452cf5Sahrens ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 18231d452cf5Sahrens if (ds->ds_prev) { 1824088f3894Sahrens uint64_t next_clones_obj = 1825088f3894Sahrens ds->ds_prev->ds_phys->ds_next_clones_obj; 18261d452cf5Sahrens ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1827fa9e4066Sahrens ds->ds_object || 18281d452cf5Sahrens ds->ds_prev->ds_phys->ds_num_children > 1); 18291d452cf5Sahrens if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 18301d452cf5Sahrens dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1831fa9e4066Sahrens ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 18321d452cf5Sahrens ds->ds_prev->ds_phys->ds_creation_txg); 18331d452cf5Sahrens ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1834088f3894Sahrens } else if (next_clones_obj != 0) { 1835088f3894Sahrens VERIFY3U(0, ==, zap_remove_int(mos, 1836088f3894Sahrens next_clones_obj, dsphys->ds_next_snap_obj, tx)); 1837088f3894Sahrens VERIFY3U(0, ==, zap_add_int(mos, 1838088f3894Sahrens next_clones_obj, dsobj, tx)); 1839fa9e4066Sahrens } 1840fa9e4066Sahrens } 1841fa9e4066Sahrens 1842a9799022Sck /* 1843a9799022Sck * If we have a reference-reservation on this dataset, we will 1844a9799022Sck * need to increase the amount of refreservation being charged 1845a9799022Sck * since our unique space is going to zero. 1846a9799022Sck */ 1847a9799022Sck if (ds->ds_reserved) { 1848a9799022Sck int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1849*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1850*74e7dc98SMatthew Ahrens add, 0, 0, tx); 1851a9799022Sck } 1852a9799022Sck 1853fa9e4066Sahrens bplist_close(&ds->ds_deadlist); 1854fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 1855a4611edeSahrens ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 1856fa9e4066Sahrens ds->ds_phys->ds_prev_snap_obj = dsobj; 1857088f3894Sahrens ds->ds_phys->ds_prev_snap_txg = crtxg; 1858fa9e4066Sahrens ds->ds_phys->ds_unique_bytes = 0; 1859a9799022Sck if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1860a9799022Sck ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1861fa9e4066Sahrens ds->ds_phys->ds_deadlist_obj = 1862fa9e4066Sahrens bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1863ea8dc4b6Seschrock VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1864ea8dc4b6Seschrock ds->ds_phys->ds_deadlist_obj)); 1865fa9e4066Sahrens 1866fa9e4066Sahrens dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1867fa9e4066Sahrens err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1868fa9e4066Sahrens snapname, 8, 1, &dsobj, tx); 1869fa9e4066Sahrens ASSERT(err == 0); 1870fa9e4066Sahrens 1871fa9e4066Sahrens if (ds->ds_prev) 1872745cd3c5Smaybee dsl_dataset_drop_ref(ds->ds_prev, ds); 1873745cd3c5Smaybee VERIFY(0 == dsl_dataset_get_ref(dp, 1874745cd3c5Smaybee ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 1875ecd6cf80Smarks 1876088f3894Sahrens dsl_pool_ds_snapshotted(ds, tx); 1877088f3894Sahrens 1878ecd6cf80Smarks spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr, 187940feaa91Sahrens "dataset = %llu", dsobj); 1880fa9e4066Sahrens } 1881fa9e4066Sahrens 1882fa9e4066Sahrens void 1883c717a561Smaybee dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1884fa9e4066Sahrens { 1885fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 1886fa9e4066Sahrens ASSERT(ds->ds_user_ptr != NULL); 1887fa9e4066Sahrens ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1888fa9e4066Sahrens 188991ebeef5Sahrens /* 189091ebeef5Sahrens * in case we had to change ds_fsid_guid when we opened it, 189191ebeef5Sahrens * sync it out now. 189291ebeef5Sahrens */ 189391ebeef5Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 189491ebeef5Sahrens ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 189591ebeef5Sahrens 1896fa9e4066Sahrens dsl_dir_dirty(ds->ds_dir, tx); 1897c717a561Smaybee dmu_objset_sync(ds->ds_user_ptr, zio, tx); 1898fa9e4066Sahrens } 1899fa9e4066Sahrens 1900fa9e4066Sahrens void 1901a2eea2e1Sahrens dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1902fa9e4066Sahrens { 1903a9799022Sck uint64_t refd, avail, uobjs, aobjs; 1904a9799022Sck 1905a2eea2e1Sahrens dsl_dir_stats(ds->ds_dir, nv); 1906fa9e4066Sahrens 1907a9799022Sck dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1908a9799022Sck dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1909a9799022Sck dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1910a9799022Sck 1911a2eea2e1Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1912a2eea2e1Sahrens ds->ds_phys->ds_creation_time); 1913a2eea2e1Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1914a2eea2e1Sahrens ds->ds_phys->ds_creation_txg); 1915a9799022Sck dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1916a9799022Sck ds->ds_quota); 1917a9799022Sck dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1918a9799022Sck ds->ds_reserved); 1919c5904d13Seschrock dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 1920c5904d13Seschrock ds->ds_phys->ds_guid); 1921fa9e4066Sahrens 1922fa9e4066Sahrens if (ds->ds_phys->ds_next_snap_obj) { 1923fa9e4066Sahrens /* 1924fa9e4066Sahrens * This is a snapshot; override the dd's space used with 1925a2eea2e1Sahrens * our unique space and compression ratio. 1926fa9e4066Sahrens */ 1927a2eea2e1Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1928a2eea2e1Sahrens ds->ds_phys->ds_unique_bytes); 1929a2eea2e1Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 1930a2eea2e1Sahrens ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1931a2eea2e1Sahrens (ds->ds_phys->ds_uncompressed_bytes * 100 / 1932a2eea2e1Sahrens ds->ds_phys->ds_compressed_bytes)); 1933fa9e4066Sahrens } 1934fa9e4066Sahrens } 1935fa9e4066Sahrens 1936a2eea2e1Sahrens void 1937a2eea2e1Sahrens dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1938a2eea2e1Sahrens { 1939a2eea2e1Sahrens stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1940a2eea2e1Sahrens stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 19413cb34c60Sahrens stat->dds_guid = ds->ds_phys->ds_guid; 1942a2eea2e1Sahrens if (ds->ds_phys->ds_next_snap_obj) { 1943a2eea2e1Sahrens stat->dds_is_snapshot = B_TRUE; 1944a2eea2e1Sahrens stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1945a2eea2e1Sahrens } 1946a2eea2e1Sahrens 1947a2eea2e1Sahrens /* clone origin is really a dsl_dir thing... */ 19484ccbb6e7Sahrens rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 1949088f3894Sahrens if (dsl_dir_is_clone(ds->ds_dir)) { 1950a2eea2e1Sahrens dsl_dataset_t *ods; 1951a2eea2e1Sahrens 1952745cd3c5Smaybee VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, 1953745cd3c5Smaybee ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 19543cb34c60Sahrens dsl_dataset_name(ods, stat->dds_origin); 1955745cd3c5Smaybee dsl_dataset_drop_ref(ods, FTAG); 1956a2eea2e1Sahrens } 19574ccbb6e7Sahrens rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 1958a2eea2e1Sahrens } 1959a2eea2e1Sahrens 1960a2eea2e1Sahrens uint64_t 1961a2eea2e1Sahrens dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1962a2eea2e1Sahrens { 196391ebeef5Sahrens return (ds->ds_fsid_guid); 1964a2eea2e1Sahrens } 1965a2eea2e1Sahrens 1966a2eea2e1Sahrens void 1967a2eea2e1Sahrens dsl_dataset_space(dsl_dataset_t *ds, 1968a2eea2e1Sahrens uint64_t *refdbytesp, uint64_t *availbytesp, 1969a2eea2e1Sahrens uint64_t *usedobjsp, uint64_t *availobjsp) 1970fa9e4066Sahrens { 1971a2eea2e1Sahrens *refdbytesp = ds->ds_phys->ds_used_bytes; 1972a2eea2e1Sahrens *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1973a9799022Sck if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 1974a9799022Sck *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 1975a9799022Sck if (ds->ds_quota != 0) { 1976a9799022Sck /* 1977a9799022Sck * Adjust available bytes according to refquota 1978a9799022Sck */ 1979a9799022Sck if (*refdbytesp < ds->ds_quota) 1980a9799022Sck *availbytesp = MIN(*availbytesp, 1981a9799022Sck ds->ds_quota - *refdbytesp); 1982a9799022Sck else 1983a9799022Sck *availbytesp = 0; 1984a9799022Sck } 1985a2eea2e1Sahrens *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 1986a2eea2e1Sahrens *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1987fa9e4066Sahrens } 1988fa9e4066Sahrens 1989f18faf3fSek boolean_t 1990f18faf3fSek dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 1991f18faf3fSek { 1992f18faf3fSek dsl_pool_t *dp = ds->ds_dir->dd_pool; 1993f18faf3fSek 1994f18faf3fSek ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 1995f18faf3fSek dsl_pool_sync_context(dp)); 1996f18faf3fSek if (ds->ds_prev == NULL) 1997f18faf3fSek return (B_FALSE); 1998f18faf3fSek if (ds->ds_phys->ds_bp.blk_birth > 1999f18faf3fSek ds->ds_prev->ds_phys->ds_creation_txg) 2000f18faf3fSek return (B_TRUE); 2001f18faf3fSek return (B_FALSE); 2002f18faf3fSek } 2003f18faf3fSek 20041d452cf5Sahrens /* ARGSUSED */ 2005fa9e4066Sahrens static int 20061d452cf5Sahrens dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 2007fa9e4066Sahrens { 20081d452cf5Sahrens dsl_dataset_t *ds = arg1; 20091d452cf5Sahrens char *newsnapname = arg2; 20101d452cf5Sahrens dsl_dir_t *dd = ds->ds_dir; 20111d452cf5Sahrens dsl_dataset_t *hds; 2012fa9e4066Sahrens uint64_t val; 20131d452cf5Sahrens int err; 2014fa9e4066Sahrens 2015745cd3c5Smaybee err = dsl_dataset_hold_obj(dd->dd_pool, 2016745cd3c5Smaybee dd->dd_phys->dd_head_dataset_obj, FTAG, &hds); 2017fa9e4066Sahrens if (err) 2018fa9e4066Sahrens return (err); 2019fa9e4066Sahrens 20201d452cf5Sahrens /* new name better not be in use */ 2021745cd3c5Smaybee err = dsl_dataset_snap_lookup(hds, newsnapname, &val); 2022745cd3c5Smaybee dsl_dataset_rele(hds, FTAG); 20231d452cf5Sahrens 20241d452cf5Sahrens if (err == 0) 20251d452cf5Sahrens err = EEXIST; 20261d452cf5Sahrens else if (err == ENOENT) 20271d452cf5Sahrens err = 0; 2028cdf5b4caSmmusante 2029cdf5b4caSmmusante /* dataset name + 1 for the "@" + the new snapshot name must fit */ 2030cdf5b4caSmmusante if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 2031cdf5b4caSmmusante err = ENAMETOOLONG; 2032cdf5b4caSmmusante 20331d452cf5Sahrens return (err); 20341d452cf5Sahrens } 2035fa9e4066Sahrens 20361d452cf5Sahrens static void 2037ecd6cf80Smarks dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, 2038ecd6cf80Smarks cred_t *cr, dmu_tx_t *tx) 20391d452cf5Sahrens { 20401d452cf5Sahrens dsl_dataset_t *ds = arg1; 2041ecd6cf80Smarks const char *newsnapname = arg2; 20421d452cf5Sahrens dsl_dir_t *dd = ds->ds_dir; 20431d452cf5Sahrens objset_t *mos = dd->dd_pool->dp_meta_objset; 20441d452cf5Sahrens dsl_dataset_t *hds; 20451d452cf5Sahrens int err; 2046fa9e4066Sahrens 20471d452cf5Sahrens ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 2048fa9e4066Sahrens 2049745cd3c5Smaybee VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, 2050745cd3c5Smaybee dd->dd_phys->dd_head_dataset_obj, FTAG, &hds)); 2051fa9e4066Sahrens 20521d452cf5Sahrens VERIFY(0 == dsl_dataset_get_snapname(ds)); 2053745cd3c5Smaybee err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx); 2054fa9e4066Sahrens ASSERT3U(err, ==, 0); 20551d452cf5Sahrens mutex_enter(&ds->ds_lock); 20561d452cf5Sahrens (void) strcpy(ds->ds_snapname, newsnapname); 20571d452cf5Sahrens mutex_exit(&ds->ds_lock); 20581d452cf5Sahrens err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 20591d452cf5Sahrens ds->ds_snapname, 8, 1, &ds->ds_object, tx); 2060fa9e4066Sahrens ASSERT3U(err, ==, 0); 2061fa9e4066Sahrens 2062ecd6cf80Smarks spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 2063ecd6cf80Smarks cr, "dataset = %llu", ds->ds_object); 2064745cd3c5Smaybee dsl_dataset_rele(hds, FTAG); 2065fa9e4066Sahrens } 2066fa9e4066Sahrens 2067f18faf3fSek struct renamesnaparg { 2068cdf5b4caSmmusante dsl_sync_task_group_t *dstg; 2069cdf5b4caSmmusante char failed[MAXPATHLEN]; 2070cdf5b4caSmmusante char *oldsnap; 2071cdf5b4caSmmusante char *newsnap; 2072cdf5b4caSmmusante }; 2073cdf5b4caSmmusante 2074cdf5b4caSmmusante static int 2075cdf5b4caSmmusante dsl_snapshot_rename_one(char *name, void *arg) 2076cdf5b4caSmmusante { 2077f18faf3fSek struct renamesnaparg *ra = arg; 2078cdf5b4caSmmusante dsl_dataset_t *ds = NULL; 2079cdf5b4caSmmusante char *cp; 2080cdf5b4caSmmusante int err; 2081cdf5b4caSmmusante 2082cdf5b4caSmmusante cp = name + strlen(name); 2083cdf5b4caSmmusante *cp = '@'; 2084cdf5b4caSmmusante (void) strcpy(cp + 1, ra->oldsnap); 2085ecd6cf80Smarks 2086ecd6cf80Smarks /* 2087ecd6cf80Smarks * For recursive snapshot renames the parent won't be changing 2088ecd6cf80Smarks * so we just pass name for both the to/from argument. 2089ecd6cf80Smarks */ 2090a0dc2951SMatthew Ahrens err = zfs_secpolicy_rename_perms(name, name, CRED()); 2091a0dc2951SMatthew Ahrens if (err == ENOENT) { 2092a0dc2951SMatthew Ahrens return (0); 2093a0dc2951SMatthew Ahrens } else if (err) { 2094ecd6cf80Smarks (void) strcpy(ra->failed, name); 2095ecd6cf80Smarks return (err); 2096ecd6cf80Smarks } 2097ecd6cf80Smarks 2098745cd3c5Smaybee #ifdef _KERNEL 2099745cd3c5Smaybee /* 2100745cd3c5Smaybee * For all filesystems undergoing rename, we'll need to unmount it. 2101745cd3c5Smaybee */ 2102745cd3c5Smaybee (void) zfs_unmount_snap(name, NULL); 2103745cd3c5Smaybee #endif 2104745cd3c5Smaybee err = dsl_dataset_hold(name, ra->dstg, &ds); 2105745cd3c5Smaybee *cp = '\0'; 2106cdf5b4caSmmusante if (err == ENOENT) { 2107cdf5b4caSmmusante return (0); 2108745cd3c5Smaybee } else if (err) { 2109cdf5b4caSmmusante (void) strcpy(ra->failed, name); 2110cdf5b4caSmmusante return (err); 2111cdf5b4caSmmusante } 2112cdf5b4caSmmusante 2113cdf5b4caSmmusante dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 2114cdf5b4caSmmusante dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 2115cdf5b4caSmmusante 2116cdf5b4caSmmusante return (0); 2117cdf5b4caSmmusante } 2118cdf5b4caSmmusante 2119cdf5b4caSmmusante static int 2120cdf5b4caSmmusante dsl_recursive_rename(char *oldname, const char *newname) 2121cdf5b4caSmmusante { 2122cdf5b4caSmmusante int err; 2123f18faf3fSek struct renamesnaparg *ra; 2124cdf5b4caSmmusante dsl_sync_task_t *dst; 2125cdf5b4caSmmusante spa_t *spa; 2126cdf5b4caSmmusante char *cp, *fsname = spa_strdup(oldname); 2127cdf5b4caSmmusante int len = strlen(oldname); 2128cdf5b4caSmmusante 2129cdf5b4caSmmusante /* truncate the snapshot name to get the fsname */ 2130cdf5b4caSmmusante cp = strchr(fsname, '@'); 2131cdf5b4caSmmusante *cp = '\0'; 2132cdf5b4caSmmusante 213340feaa91Sahrens err = spa_open(fsname, &spa, FTAG); 2134cdf5b4caSmmusante if (err) { 2135cdf5b4caSmmusante kmem_free(fsname, len + 1); 2136cdf5b4caSmmusante return (err); 2137cdf5b4caSmmusante } 2138f18faf3fSek ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 2139cdf5b4caSmmusante ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 2140cdf5b4caSmmusante 2141cdf5b4caSmmusante ra->oldsnap = strchr(oldname, '@') + 1; 2142cdf5b4caSmmusante ra->newsnap = strchr(newname, '@') + 1; 2143cdf5b4caSmmusante *ra->failed = '\0'; 2144cdf5b4caSmmusante 2145cdf5b4caSmmusante err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 2146cdf5b4caSmmusante DS_FIND_CHILDREN); 2147cdf5b4caSmmusante kmem_free(fsname, len + 1); 2148cdf5b4caSmmusante 2149cdf5b4caSmmusante if (err == 0) { 2150cdf5b4caSmmusante err = dsl_sync_task_group_wait(ra->dstg); 2151cdf5b4caSmmusante } 2152cdf5b4caSmmusante 2153cdf5b4caSmmusante for (dst = list_head(&ra->dstg->dstg_tasks); dst; 2154cdf5b4caSmmusante dst = list_next(&ra->dstg->dstg_tasks, dst)) { 2155cdf5b4caSmmusante dsl_dataset_t *ds = dst->dst_arg1; 2156cdf5b4caSmmusante if (dst->dst_err) { 2157cdf5b4caSmmusante dsl_dir_name(ds->ds_dir, ra->failed); 21582572aa4eSmmusante (void) strcat(ra->failed, "@"); 21592572aa4eSmmusante (void) strcat(ra->failed, ra->newsnap); 2160cdf5b4caSmmusante } 2161745cd3c5Smaybee dsl_dataset_rele(ds, ra->dstg); 2162cdf5b4caSmmusante } 2163cdf5b4caSmmusante 2164ecd6cf80Smarks if (err) 2165ecd6cf80Smarks (void) strcpy(oldname, ra->failed); 2166cdf5b4caSmmusante 2167cdf5b4caSmmusante dsl_sync_task_group_destroy(ra->dstg); 2168f18faf3fSek kmem_free(ra, sizeof (struct renamesnaparg)); 2169cdf5b4caSmmusante spa_close(spa, FTAG); 2170cdf5b4caSmmusante return (err); 2171cdf5b4caSmmusante } 2172cdf5b4caSmmusante 21733a5a36beSmmusante static int 21743a5a36beSmmusante dsl_valid_rename(char *oldname, void *arg) 21753a5a36beSmmusante { 21763a5a36beSmmusante int delta = *(int *)arg; 21773a5a36beSmmusante 21783a5a36beSmmusante if (strlen(oldname) + delta >= MAXNAMELEN) 21793a5a36beSmmusante return (ENAMETOOLONG); 21803a5a36beSmmusante 21813a5a36beSmmusante return (0); 21823a5a36beSmmusante } 21833a5a36beSmmusante 2184fa9e4066Sahrens #pragma weak dmu_objset_rename = dsl_dataset_rename 2185fa9e4066Sahrens int 2186745cd3c5Smaybee dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive) 2187fa9e4066Sahrens { 2188fa9e4066Sahrens dsl_dir_t *dd; 21891d452cf5Sahrens dsl_dataset_t *ds; 2190fa9e4066Sahrens const char *tail; 2191fa9e4066Sahrens int err; 2192fa9e4066Sahrens 21931d452cf5Sahrens err = dsl_dir_open(oldname, FTAG, &dd, &tail); 2194ea8dc4b6Seschrock if (err) 2195ea8dc4b6Seschrock return (err); 2196fa9e4066Sahrens if (tail == NULL) { 21973a5a36beSmmusante int delta = strlen(newname) - strlen(oldname); 21983a5a36beSmmusante 2199088f3894Sahrens /* if we're growing, validate child name lengths */ 22003a5a36beSmmusante if (delta > 0) 22013a5a36beSmmusante err = dmu_objset_find(oldname, dsl_valid_rename, 22023a5a36beSmmusante &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 22033a5a36beSmmusante 22043a5a36beSmmusante if (!err) 22053a5a36beSmmusante err = dsl_dir_rename(dd, newname); 2206fa9e4066Sahrens dsl_dir_close(dd, FTAG); 2207fa9e4066Sahrens return (err); 2208fa9e4066Sahrens } 2209fa9e4066Sahrens if (tail[0] != '@') { 2210fa9e4066Sahrens /* the name ended in a nonexistant component */ 2211fa9e4066Sahrens dsl_dir_close(dd, FTAG); 2212fa9e4066Sahrens return (ENOENT); 2213fa9e4066Sahrens } 2214fa9e4066Sahrens 2215fa9e4066Sahrens dsl_dir_close(dd, FTAG); 22161d452cf5Sahrens 22171d452cf5Sahrens /* new name must be snapshot in same filesystem */ 22181d452cf5Sahrens tail = strchr(newname, '@'); 22191d452cf5Sahrens if (tail == NULL) 22201d452cf5Sahrens return (EINVAL); 22211d452cf5Sahrens tail++; 22221d452cf5Sahrens if (strncmp(oldname, newname, tail - newname) != 0) 22231d452cf5Sahrens return (EXDEV); 22241d452cf5Sahrens 2225cdf5b4caSmmusante if (recursive) { 2226cdf5b4caSmmusante err = dsl_recursive_rename(oldname, newname); 2227cdf5b4caSmmusante } else { 2228745cd3c5Smaybee err = dsl_dataset_hold(oldname, FTAG, &ds); 2229cdf5b4caSmmusante if (err) 2230cdf5b4caSmmusante return (err); 22311d452cf5Sahrens 2232cdf5b4caSmmusante err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2233cdf5b4caSmmusante dsl_dataset_snapshot_rename_check, 2234cdf5b4caSmmusante dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 22351d452cf5Sahrens 2236745cd3c5Smaybee dsl_dataset_rele(ds, FTAG); 2237cdf5b4caSmmusante } 22381d452cf5Sahrens 2239fa9e4066Sahrens return (err); 2240fa9e4066Sahrens } 224199653d4eSeschrock 2242088f3894Sahrens struct promotenode { 2243745cd3c5Smaybee list_node_t link; 2244745cd3c5Smaybee dsl_dataset_t *ds; 2245745cd3c5Smaybee }; 2246745cd3c5Smaybee 22471d452cf5Sahrens struct promotearg { 2248*74e7dc98SMatthew Ahrens list_t shared_snaps, origin_snaps, clone_snaps; 2249*74e7dc98SMatthew Ahrens dsl_dataset_t *origin_origin, *origin_head; 2250*74e7dc98SMatthew Ahrens uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 22511d452cf5Sahrens }; 22521d452cf5Sahrens 2253*74e7dc98SMatthew Ahrens static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2254*74e7dc98SMatthew Ahrens 2255ecd6cf80Smarks /* ARGSUSED */ 225699653d4eSeschrock static int 22571d452cf5Sahrens dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 225899653d4eSeschrock { 22591d452cf5Sahrens dsl_dataset_t *hds = arg1; 22601d452cf5Sahrens struct promotearg *pa = arg2; 2261*74e7dc98SMatthew Ahrens struct promotenode *snap = list_head(&pa->shared_snaps); 2262745cd3c5Smaybee dsl_dataset_t *origin_ds = snap->ds; 2263745cd3c5Smaybee int err; 22641d452cf5Sahrens 2265088f3894Sahrens /* Check that it is a real clone */ 2266088f3894Sahrens if (!dsl_dir_is_clone(hds->ds_dir)) 226799653d4eSeschrock return (EINVAL); 226899653d4eSeschrock 22691d452cf5Sahrens /* Since this is so expensive, don't do the preliminary check */ 22701d452cf5Sahrens if (!dmu_tx_is_syncing(tx)) 22711d452cf5Sahrens return (0); 22721d452cf5Sahrens 2273745cd3c5Smaybee if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) 2274745cd3c5Smaybee return (EXDEV); 227599653d4eSeschrock 22763cb34c60Sahrens /* compute origin's new unique space */ 2277*74e7dc98SMatthew Ahrens snap = list_tail(&pa->clone_snaps); 2278*74e7dc98SMatthew Ahrens ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2279*74e7dc98SMatthew Ahrens err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2280*74e7dc98SMatthew Ahrens origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, &pa->unique); 2281*74e7dc98SMatthew Ahrens if (err) 2282745cd3c5Smaybee return (err); 228399653d4eSeschrock 2284745cd3c5Smaybee /* 2285745cd3c5Smaybee * Walk the snapshots that we are moving 2286745cd3c5Smaybee * 2287*74e7dc98SMatthew Ahrens * Compute space to transfer. Consider the incremental changes 2288*74e7dc98SMatthew Ahrens * to used for each snapshot: 2289*74e7dc98SMatthew Ahrens * (my used) = (prev's used) + (blocks born) - (blocks killed) 2290*74e7dc98SMatthew Ahrens * So each snapshot gave birth to: 2291*74e7dc98SMatthew Ahrens * (blocks born) = (my used) - (prev's used) + (blocks killed) 2292745cd3c5Smaybee * So a sequence would look like: 2293*74e7dc98SMatthew Ahrens * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2294745cd3c5Smaybee * Which simplifies to: 2295*74e7dc98SMatthew Ahrens * uN + kN + kN-1 + ... + k1 + k0 2296745cd3c5Smaybee * Note however, if we stop before we reach the ORIGIN we get: 2297*74e7dc98SMatthew Ahrens * uN + kN + kN-1 + ... + kM - uM-1 2298745cd3c5Smaybee */ 2299745cd3c5Smaybee pa->used = origin_ds->ds_phys->ds_used_bytes; 2300745cd3c5Smaybee pa->comp = origin_ds->ds_phys->ds_compressed_bytes; 2301745cd3c5Smaybee pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 2302*74e7dc98SMatthew Ahrens for (snap = list_head(&pa->shared_snaps); snap; 2303*74e7dc98SMatthew Ahrens snap = list_next(&pa->shared_snaps, snap)) { 230499653d4eSeschrock uint64_t val, dlused, dlcomp, dluncomp; 2305745cd3c5Smaybee dsl_dataset_t *ds = snap->ds; 230699653d4eSeschrock 230799653d4eSeschrock /* Check that the snapshot name does not conflict */ 2308*74e7dc98SMatthew Ahrens VERIFY(0 == dsl_dataset_get_snapname(ds)); 2309745cd3c5Smaybee err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2310745cd3c5Smaybee if (err == 0) 2311*74e7dc98SMatthew Ahrens return (EEXIST); 2312745cd3c5Smaybee if (err != ENOENT) 2313*74e7dc98SMatthew Ahrens return (err); 231499653d4eSeschrock 2315745cd3c5Smaybee /* The very first snapshot does not have a deadlist */ 2316*74e7dc98SMatthew Ahrens if (ds->ds_phys->ds_prev_snap_obj == 0) 2317*74e7dc98SMatthew Ahrens continue; 2318*74e7dc98SMatthew Ahrens 2319*74e7dc98SMatthew Ahrens if (err = bplist_space(&ds->ds_deadlist, 2320*74e7dc98SMatthew Ahrens &dlused, &dlcomp, &dluncomp)) 2321*74e7dc98SMatthew Ahrens return (err); 2322*74e7dc98SMatthew Ahrens pa->used += dlused; 2323*74e7dc98SMatthew Ahrens pa->comp += dlcomp; 2324*74e7dc98SMatthew Ahrens pa->uncomp += dluncomp; 2325*74e7dc98SMatthew Ahrens } 2326745cd3c5Smaybee 2327745cd3c5Smaybee /* 2328745cd3c5Smaybee * If we are a clone of a clone then we never reached ORIGIN, 2329745cd3c5Smaybee * so we need to subtract out the clone origin's used space. 2330745cd3c5Smaybee */ 2331*74e7dc98SMatthew Ahrens if (pa->origin_origin) { 2332*74e7dc98SMatthew Ahrens pa->used -= pa->origin_origin->ds_phys->ds_used_bytes; 2333*74e7dc98SMatthew Ahrens pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; 2334*74e7dc98SMatthew Ahrens pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; 233599653d4eSeschrock } 233699653d4eSeschrock 233799653d4eSeschrock /* Check that there is enough space here */ 2338*74e7dc98SMatthew Ahrens err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2339*74e7dc98SMatthew Ahrens pa->used); 2340*74e7dc98SMatthew Ahrens if (err) 2341*74e7dc98SMatthew Ahrens return (err); 2342*74e7dc98SMatthew Ahrens 2343*74e7dc98SMatthew Ahrens /* 2344*74e7dc98SMatthew Ahrens * Compute the amounts of space that will be used by snapshots 2345*74e7dc98SMatthew Ahrens * after the promotion (for both origin and clone). For each, 2346*74e7dc98SMatthew Ahrens * it is the amount of space that will be on all of their 2347*74e7dc98SMatthew Ahrens * deadlists (that was not born before their new origin). 2348*74e7dc98SMatthew Ahrens */ 2349*74e7dc98SMatthew Ahrens if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2350*74e7dc98SMatthew Ahrens uint64_t space; 2351*74e7dc98SMatthew Ahrens 2352*74e7dc98SMatthew Ahrens /* 2353*74e7dc98SMatthew Ahrens * Note, typically this will not be a clone of a clone, 2354*74e7dc98SMatthew Ahrens * so snap->ds->ds_origin_txg will be < TXG_INITIAL, so 2355*74e7dc98SMatthew Ahrens * these snaplist_space() -> bplist_space_birthrange() 2356*74e7dc98SMatthew Ahrens * calls will be fast because they do not have to 2357*74e7dc98SMatthew Ahrens * iterate over all bps. 2358*74e7dc98SMatthew Ahrens */ 2359*74e7dc98SMatthew Ahrens snap = list_head(&pa->origin_snaps); 2360*74e7dc98SMatthew Ahrens err = snaplist_space(&pa->shared_snaps, 2361*74e7dc98SMatthew Ahrens snap->ds->ds_origin_txg, &pa->cloneusedsnap); 2362*74e7dc98SMatthew Ahrens if (err) 2363*74e7dc98SMatthew Ahrens return (err); 2364*74e7dc98SMatthew Ahrens 2365*74e7dc98SMatthew Ahrens err = snaplist_space(&pa->clone_snaps, 2366*74e7dc98SMatthew Ahrens snap->ds->ds_origin_txg, &space); 2367*74e7dc98SMatthew Ahrens if (err) 2368*74e7dc98SMatthew Ahrens return (err); 2369*74e7dc98SMatthew Ahrens pa->cloneusedsnap += space; 2370*74e7dc98SMatthew Ahrens } 2371*74e7dc98SMatthew Ahrens if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2372*74e7dc98SMatthew Ahrens err = snaplist_space(&pa->origin_snaps, 2373*74e7dc98SMatthew Ahrens origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap); 2374*74e7dc98SMatthew Ahrens if (err) 2375*74e7dc98SMatthew Ahrens return (err); 2376745cd3c5Smaybee } 23771d452cf5Sahrens 2378*74e7dc98SMatthew Ahrens return (0); 23791d452cf5Sahrens } 238099653d4eSeschrock 23811d452cf5Sahrens static void 2382ecd6cf80Smarks dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 23831d452cf5Sahrens { 23841d452cf5Sahrens dsl_dataset_t *hds = arg1; 23851d452cf5Sahrens struct promotearg *pa = arg2; 2386*74e7dc98SMatthew Ahrens struct promotenode *snap = list_head(&pa->shared_snaps); 2387745cd3c5Smaybee dsl_dataset_t *origin_ds = snap->ds; 2388*74e7dc98SMatthew Ahrens dsl_dataset_t *origin_head; 23891d452cf5Sahrens dsl_dir_t *dd = hds->ds_dir; 23901d452cf5Sahrens dsl_pool_t *dp = hds->ds_dir->dd_pool; 23913cb34c60Sahrens dsl_dir_t *odd = NULL; 2392088f3894Sahrens uint64_t oldnext_obj; 2393*74e7dc98SMatthew Ahrens int64_t delta; 23941d452cf5Sahrens 23951d452cf5Sahrens ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 23961d452cf5Sahrens 2397*74e7dc98SMatthew Ahrens snap = list_head(&pa->origin_snaps); 2398*74e7dc98SMatthew Ahrens origin_head = snap->ds; 2399*74e7dc98SMatthew Ahrens 24000b69c2f0Sahrens /* 24013cb34c60Sahrens * We need to explicitly open odd, since origin_ds's dd will be 24020b69c2f0Sahrens * changing. 24030b69c2f0Sahrens */ 24043cb34c60Sahrens VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, 24053cb34c60Sahrens NULL, FTAG, &odd)); 240699653d4eSeschrock 2407745cd3c5Smaybee /* change origin's next snap */ 2408745cd3c5Smaybee dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2409088f3894Sahrens oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2410*74e7dc98SMatthew Ahrens snap = list_tail(&pa->clone_snaps); 2411*74e7dc98SMatthew Ahrens ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2412*74e7dc98SMatthew Ahrens origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2413745cd3c5Smaybee 2414088f3894Sahrens /* change the origin's next clone */ 2415088f3894Sahrens if (origin_ds->ds_phys->ds_next_clones_obj) { 2416088f3894Sahrens VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, 2417088f3894Sahrens origin_ds->ds_phys->ds_next_clones_obj, 2418*74e7dc98SMatthew Ahrens origin_ds->ds_phys->ds_next_snap_obj, tx)); 2419088f3894Sahrens VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 2420088f3894Sahrens origin_ds->ds_phys->ds_next_clones_obj, 2421088f3894Sahrens oldnext_obj, tx)); 2422088f3894Sahrens } 2423088f3894Sahrens 2424745cd3c5Smaybee /* change origin */ 2425745cd3c5Smaybee dmu_buf_will_dirty(dd->dd_dbuf, tx); 2426745cd3c5Smaybee ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2427745cd3c5Smaybee dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2428*74e7dc98SMatthew Ahrens hds->ds_origin_txg = origin_head->ds_origin_txg; 2429745cd3c5Smaybee dmu_buf_will_dirty(odd->dd_dbuf, tx); 2430745cd3c5Smaybee odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2431*74e7dc98SMatthew Ahrens origin_head->ds_origin_txg = origin_ds->ds_phys->ds_creation_txg; 2432745cd3c5Smaybee 243399653d4eSeschrock /* move snapshots to this dir */ 2434*74e7dc98SMatthew Ahrens for (snap = list_head(&pa->shared_snaps); snap; 2435*74e7dc98SMatthew Ahrens snap = list_next(&pa->shared_snaps, snap)) { 2436745cd3c5Smaybee dsl_dataset_t *ds = snap->ds; 243799653d4eSeschrock 24383baa08fcSek /* unregister props as dsl_dir is changing */ 24393baa08fcSek if (ds->ds_user_ptr) { 24403baa08fcSek ds->ds_user_evict_func(ds, ds->ds_user_ptr); 24413baa08fcSek ds->ds_user_ptr = NULL; 24423baa08fcSek } 244399653d4eSeschrock /* move snap name entry */ 2444*74e7dc98SMatthew Ahrens VERIFY(0 == dsl_dataset_get_snapname(ds)); 2445*74e7dc98SMatthew Ahrens VERIFY(0 == dsl_dataset_snap_remove(origin_head, 2446745cd3c5Smaybee ds->ds_snapname, tx)); 24471d452cf5Sahrens VERIFY(0 == zap_add(dp->dp_meta_objset, 244899653d4eSeschrock hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 244999653d4eSeschrock 8, 1, &ds->ds_object, tx)); 245099653d4eSeschrock /* change containing dsl_dir */ 245199653d4eSeschrock dmu_buf_will_dirty(ds->ds_dbuf, tx); 24523cb34c60Sahrens ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 245399653d4eSeschrock ds->ds_phys->ds_dir_obj = dd->dd_object; 24543cb34c60Sahrens ASSERT3P(ds->ds_dir, ==, odd); 245599653d4eSeschrock dsl_dir_close(ds->ds_dir, ds); 24561d452cf5Sahrens VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 245799653d4eSeschrock NULL, ds, &ds->ds_dir)); 245899653d4eSeschrock 245999653d4eSeschrock ASSERT3U(dsl_prop_numcb(ds), ==, 0); 2460*74e7dc98SMatthew Ahrens } 2461*74e7dc98SMatthew Ahrens 2462*74e7dc98SMatthew Ahrens /* 2463*74e7dc98SMatthew Ahrens * Change space accounting. 2464*74e7dc98SMatthew Ahrens * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2465*74e7dc98SMatthew Ahrens * both be valid, or both be 0 (resulting in delta == 0). This 2466*74e7dc98SMatthew Ahrens * is true for each of {clone,origin} independently. 2467*74e7dc98SMatthew Ahrens */ 2468*74e7dc98SMatthew Ahrens 2469*74e7dc98SMatthew Ahrens delta = pa->cloneusedsnap - 2470*74e7dc98SMatthew Ahrens dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2471*74e7dc98SMatthew Ahrens ASSERT3S(delta, >=, 0); 2472*74e7dc98SMatthew Ahrens ASSERT3U(pa->used, >=, delta); 2473*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2474*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(dd, DD_USED_HEAD, 2475*74e7dc98SMatthew Ahrens pa->used - delta, pa->comp, pa->uncomp, tx); 2476*74e7dc98SMatthew Ahrens 2477*74e7dc98SMatthew Ahrens delta = pa->originusedsnap - 2478*74e7dc98SMatthew Ahrens odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2479*74e7dc98SMatthew Ahrens ASSERT3S(delta, <=, 0); 2480*74e7dc98SMatthew Ahrens ASSERT3U(pa->used, >=, -delta); 2481*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2482*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(odd, DD_USED_HEAD, 2483*74e7dc98SMatthew Ahrens -pa->used - delta, -pa->comp, -pa->uncomp, tx); 248499653d4eSeschrock 24853cb34c60Sahrens origin_ds->ds_phys->ds_unique_bytes = pa->unique; 248699653d4eSeschrock 2487ecd6cf80Smarks /* log history record */ 2488ecd6cf80Smarks spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2489745cd3c5Smaybee cr, "dataset = %llu", hds->ds_object); 2490ecd6cf80Smarks 24913cb34c60Sahrens dsl_dir_close(odd, FTAG); 249299653d4eSeschrock } 249399653d4eSeschrock 2494*74e7dc98SMatthew Ahrens static char *snaplist_tag = "snaplist"; 2495*74e7dc98SMatthew Ahrens /* 2496*74e7dc98SMatthew Ahrens * Make a list of dsl_dataset_t's for the snapshots between first_obj 2497*74e7dc98SMatthew Ahrens * (exclusive) and last_obj (inclusive). The list will be in reverse 2498*74e7dc98SMatthew Ahrens * order (last_obj will be the list_head()). If first_obj == 0, do all 2499*74e7dc98SMatthew Ahrens * snapshots back to this dataset's origin. 2500*74e7dc98SMatthew Ahrens */ 2501*74e7dc98SMatthew Ahrens static int 2502*74e7dc98SMatthew Ahrens snaplist_make(dsl_pool_t *dp, boolean_t own, 2503*74e7dc98SMatthew Ahrens uint64_t first_obj, uint64_t last_obj, list_t *l) 2504*74e7dc98SMatthew Ahrens { 2505*74e7dc98SMatthew Ahrens uint64_t obj = last_obj; 2506*74e7dc98SMatthew Ahrens 2507*74e7dc98SMatthew Ahrens ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); 2508*74e7dc98SMatthew Ahrens 2509*74e7dc98SMatthew Ahrens list_create(l, sizeof (struct promotenode), 2510*74e7dc98SMatthew Ahrens offsetof(struct promotenode, link)); 2511*74e7dc98SMatthew Ahrens 2512*74e7dc98SMatthew Ahrens while (obj != first_obj) { 2513*74e7dc98SMatthew Ahrens dsl_dataset_t *ds; 2514*74e7dc98SMatthew Ahrens struct promotenode *snap; 2515*74e7dc98SMatthew Ahrens int err; 2516*74e7dc98SMatthew Ahrens 2517*74e7dc98SMatthew Ahrens if (own) { 2518*74e7dc98SMatthew Ahrens err = dsl_dataset_own_obj(dp, obj, 2519*74e7dc98SMatthew Ahrens 0, snaplist_tag, &ds); 2520*74e7dc98SMatthew Ahrens if (err == 0) 2521*74e7dc98SMatthew Ahrens dsl_dataset_make_exclusive(ds, snaplist_tag); 2522*74e7dc98SMatthew Ahrens } else { 2523*74e7dc98SMatthew Ahrens err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds); 2524*74e7dc98SMatthew Ahrens } 2525*74e7dc98SMatthew Ahrens if (err == ENOENT) { 2526*74e7dc98SMatthew Ahrens /* lost race with snapshot destroy */ 2527*74e7dc98SMatthew Ahrens struct promotenode *last = list_tail(l); 2528*74e7dc98SMatthew Ahrens ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj); 2529*74e7dc98SMatthew Ahrens obj = last->ds->ds_phys->ds_prev_snap_obj; 2530*74e7dc98SMatthew Ahrens continue; 2531*74e7dc98SMatthew Ahrens } else if (err) { 2532*74e7dc98SMatthew Ahrens return (err); 2533*74e7dc98SMatthew Ahrens } 2534*74e7dc98SMatthew Ahrens 2535*74e7dc98SMatthew Ahrens if (first_obj == 0) 2536*74e7dc98SMatthew Ahrens first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2537*74e7dc98SMatthew Ahrens 2538*74e7dc98SMatthew Ahrens snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP); 2539*74e7dc98SMatthew Ahrens snap->ds = ds; 2540*74e7dc98SMatthew Ahrens list_insert_tail(l, snap); 2541*74e7dc98SMatthew Ahrens obj = ds->ds_phys->ds_prev_snap_obj; 2542*74e7dc98SMatthew Ahrens } 2543*74e7dc98SMatthew Ahrens 2544*74e7dc98SMatthew Ahrens return (0); 2545*74e7dc98SMatthew Ahrens } 2546*74e7dc98SMatthew Ahrens 2547*74e7dc98SMatthew Ahrens static int 2548*74e7dc98SMatthew Ahrens snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2549*74e7dc98SMatthew Ahrens { 2550*74e7dc98SMatthew Ahrens struct promotenode *snap; 2551*74e7dc98SMatthew Ahrens 2552*74e7dc98SMatthew Ahrens *spacep = 0; 2553*74e7dc98SMatthew Ahrens for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2554*74e7dc98SMatthew Ahrens uint64_t used; 2555*74e7dc98SMatthew Ahrens int err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2556*74e7dc98SMatthew Ahrens mintxg, UINT64_MAX, &used); 2557*74e7dc98SMatthew Ahrens if (err) 2558*74e7dc98SMatthew Ahrens return (err); 2559*74e7dc98SMatthew Ahrens *spacep += used; 2560*74e7dc98SMatthew Ahrens } 2561*74e7dc98SMatthew Ahrens return (0); 2562*74e7dc98SMatthew Ahrens } 2563*74e7dc98SMatthew Ahrens 2564*74e7dc98SMatthew Ahrens static void 2565*74e7dc98SMatthew Ahrens snaplist_destroy(list_t *l, boolean_t own) 2566*74e7dc98SMatthew Ahrens { 2567*74e7dc98SMatthew Ahrens struct promotenode *snap; 2568*74e7dc98SMatthew Ahrens 2569*74e7dc98SMatthew Ahrens if (!list_link_active(&l->list_head)) 2570*74e7dc98SMatthew Ahrens return; 2571*74e7dc98SMatthew Ahrens 2572*74e7dc98SMatthew Ahrens while ((snap = list_tail(l)) != NULL) { 2573*74e7dc98SMatthew Ahrens list_remove(l, snap); 2574*74e7dc98SMatthew Ahrens if (own) 2575*74e7dc98SMatthew Ahrens dsl_dataset_disown(snap->ds, snaplist_tag); 2576*74e7dc98SMatthew Ahrens else 2577*74e7dc98SMatthew Ahrens dsl_dataset_rele(snap->ds, snaplist_tag); 2578*74e7dc98SMatthew Ahrens kmem_free(snap, sizeof (struct promotenode)); 2579*74e7dc98SMatthew Ahrens } 2580*74e7dc98SMatthew Ahrens list_destroy(l); 2581*74e7dc98SMatthew Ahrens } 2582*74e7dc98SMatthew Ahrens 2583*74e7dc98SMatthew Ahrens /* 2584*74e7dc98SMatthew Ahrens * Promote a clone. Nomenclature note: 2585*74e7dc98SMatthew Ahrens * "clone" or "cds": the original clone which is being promoted 2586*74e7dc98SMatthew Ahrens * "origin" or "ods": the snapshot which is originally clone's origin 2587*74e7dc98SMatthew Ahrens * "origin head" or "ohds": the dataset which is the head 2588*74e7dc98SMatthew Ahrens * (filesystem/volume) for the origin 2589*74e7dc98SMatthew Ahrens * "origin origin": the origin of the origin's filesystem (typically 2590*74e7dc98SMatthew Ahrens * NULL, indicating that the clone is not a clone of a clone). 2591*74e7dc98SMatthew Ahrens */ 259299653d4eSeschrock int 259399653d4eSeschrock dsl_dataset_promote(const char *name) 259499653d4eSeschrock { 259599653d4eSeschrock dsl_dataset_t *ds; 2596745cd3c5Smaybee dsl_dir_t *dd; 2597745cd3c5Smaybee dsl_pool_t *dp; 259899653d4eSeschrock dmu_object_info_t doi; 2599*74e7dc98SMatthew Ahrens struct promotearg pa = { 0 }; 2600088f3894Sahrens struct promotenode *snap; 2601745cd3c5Smaybee int err; 260299653d4eSeschrock 2603745cd3c5Smaybee err = dsl_dataset_hold(name, FTAG, &ds); 260499653d4eSeschrock if (err) 260599653d4eSeschrock return (err); 2606745cd3c5Smaybee dd = ds->ds_dir; 2607745cd3c5Smaybee dp = dd->dd_pool; 260899653d4eSeschrock 2609745cd3c5Smaybee err = dmu_object_info(dp->dp_meta_objset, 261099653d4eSeschrock ds->ds_phys->ds_snapnames_zapobj, &doi); 261199653d4eSeschrock if (err) { 2612745cd3c5Smaybee dsl_dataset_rele(ds, FTAG); 261399653d4eSeschrock return (err); 261499653d4eSeschrock } 261599653d4eSeschrock 2616*74e7dc98SMatthew Ahrens if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) { 2617*74e7dc98SMatthew Ahrens dsl_dataset_rele(ds, FTAG); 2618*74e7dc98SMatthew Ahrens return (EINVAL); 2619*74e7dc98SMatthew Ahrens } 2620*74e7dc98SMatthew Ahrens 2621745cd3c5Smaybee /* 2622745cd3c5Smaybee * We are going to inherit all the snapshots taken before our 2623745cd3c5Smaybee * origin (i.e., our new origin will be our parent's origin). 2624745cd3c5Smaybee * Take ownership of them so that we can rename them into our 2625745cd3c5Smaybee * namespace. 2626745cd3c5Smaybee */ 2627745cd3c5Smaybee rw_enter(&dp->dp_config_rwlock, RW_READER); 2628088f3894Sahrens 2629*74e7dc98SMatthew Ahrens err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj, 2630*74e7dc98SMatthew Ahrens &pa.shared_snaps); 2631*74e7dc98SMatthew Ahrens if (err != 0) 2632*74e7dc98SMatthew Ahrens goto out; 2633088f3894Sahrens 2634*74e7dc98SMatthew Ahrens err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps); 2635*74e7dc98SMatthew Ahrens if (err != 0) 2636*74e7dc98SMatthew Ahrens goto out; 2637088f3894Sahrens 2638*74e7dc98SMatthew Ahrens snap = list_head(&pa.shared_snaps); 2639*74e7dc98SMatthew Ahrens ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2640*74e7dc98SMatthew Ahrens err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj, 2641*74e7dc98SMatthew Ahrens snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps); 2642*74e7dc98SMatthew Ahrens if (err != 0) 2643*74e7dc98SMatthew Ahrens goto out; 2644088f3894Sahrens 2645*74e7dc98SMatthew Ahrens if (dsl_dir_is_clone(snap->ds->ds_dir)) { 2646*74e7dc98SMatthew Ahrens err = dsl_dataset_own_obj(dp, 2647*74e7dc98SMatthew Ahrens snap->ds->ds_dir->dd_phys->dd_origin_obj, 2648*74e7dc98SMatthew Ahrens 0, FTAG, &pa.origin_origin); 2649*74e7dc98SMatthew Ahrens if (err != 0) 2650*74e7dc98SMatthew Ahrens goto out; 2651*74e7dc98SMatthew Ahrens } 2652745cd3c5Smaybee 2653*74e7dc98SMatthew Ahrens out: 2654*74e7dc98SMatthew Ahrens rw_exit(&dp->dp_config_rwlock); 2655745cd3c5Smaybee 265699653d4eSeschrock /* 265799653d4eSeschrock * Add in 128x the snapnames zapobj size, since we will be moving 265899653d4eSeschrock * a bunch of snapnames to the promoted ds, and dirtying their 265999653d4eSeschrock * bonus buffers. 266099653d4eSeschrock */ 2661*74e7dc98SMatthew Ahrens if (err == 0) { 2662*74e7dc98SMatthew Ahrens err = dsl_sync_task_do(dp, dsl_dataset_promote_check, 2663*74e7dc98SMatthew Ahrens dsl_dataset_promote_sync, ds, &pa, 2664*74e7dc98SMatthew Ahrens 2 + 2 * doi.doi_physical_blks); 2665745cd3c5Smaybee } 2666*74e7dc98SMatthew Ahrens 2667*74e7dc98SMatthew Ahrens snaplist_destroy(&pa.shared_snaps, B_TRUE); 2668*74e7dc98SMatthew Ahrens snaplist_destroy(&pa.clone_snaps, B_FALSE); 2669*74e7dc98SMatthew Ahrens snaplist_destroy(&pa.origin_snaps, B_FALSE); 2670*74e7dc98SMatthew Ahrens if (pa.origin_origin) 2671*74e7dc98SMatthew Ahrens dsl_dataset_disown(pa.origin_origin, FTAG); 2672745cd3c5Smaybee dsl_dataset_rele(ds, FTAG); 267399653d4eSeschrock return (err); 267499653d4eSeschrock } 2675b1b8ab34Slling 26763cb34c60Sahrens struct cloneswaparg { 26773cb34c60Sahrens dsl_dataset_t *cds; /* clone dataset */ 26783cb34c60Sahrens dsl_dataset_t *ohds; /* origin's head dataset */ 26793cb34c60Sahrens boolean_t force; 2680a9b821a0Sck int64_t unused_refres_delta; /* change in unconsumed refreservation */ 26813cb34c60Sahrens }; 2682f18faf3fSek 2683f18faf3fSek /* ARGSUSED */ 2684f18faf3fSek static int 2685f18faf3fSek dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) 2686f18faf3fSek { 26873cb34c60Sahrens struct cloneswaparg *csa = arg1; 2688f18faf3fSek 26893cb34c60Sahrens /* they should both be heads */ 26903cb34c60Sahrens if (dsl_dataset_is_snapshot(csa->cds) || 26913cb34c60Sahrens dsl_dataset_is_snapshot(csa->ohds)) 2692f18faf3fSek return (EINVAL); 2693f18faf3fSek 26943cb34c60Sahrens /* the branch point should be just before them */ 26953cb34c60Sahrens if (csa->cds->ds_prev != csa->ohds->ds_prev) 2696f18faf3fSek return (EINVAL); 2697f18faf3fSek 26983cb34c60Sahrens /* cds should be the clone */ 26993cb34c60Sahrens if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj != 27003cb34c60Sahrens csa->ohds->ds_object) 27013cb34c60Sahrens return (EINVAL); 2702f18faf3fSek 27033cb34c60Sahrens /* the clone should be a child of the origin */ 27043cb34c60Sahrens if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) 27053cb34c60Sahrens return (EINVAL); 2706f18faf3fSek 27073cb34c60Sahrens /* ohds shouldn't be modified unless 'force' */ 27083cb34c60Sahrens if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) 27093cb34c60Sahrens return (ETXTBSY); 2710a9b821a0Sck 2711a9b821a0Sck /* adjust amount of any unconsumed refreservation */ 2712a9b821a0Sck csa->unused_refres_delta = 2713a9b821a0Sck (int64_t)MIN(csa->ohds->ds_reserved, 2714a9b821a0Sck csa->ohds->ds_phys->ds_unique_bytes) - 2715a9b821a0Sck (int64_t)MIN(csa->ohds->ds_reserved, 2716a9b821a0Sck csa->cds->ds_phys->ds_unique_bytes); 2717a9b821a0Sck 2718a9b821a0Sck if (csa->unused_refres_delta > 0 && 2719a9b821a0Sck csa->unused_refres_delta > 2720a9b821a0Sck dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) 2721a9b821a0Sck return (ENOSPC); 2722a9b821a0Sck 27233cb34c60Sahrens return (0); 2724f18faf3fSek } 2725f18faf3fSek 2726f18faf3fSek /* ARGSUSED */ 2727f18faf3fSek static void 2728f18faf3fSek dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2729f18faf3fSek { 27303cb34c60Sahrens struct cloneswaparg *csa = arg1; 27313cb34c60Sahrens dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; 2732f18faf3fSek 2733a9b821a0Sck ASSERT(csa->cds->ds_reserved == 0); 2734a9b821a0Sck ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota); 2735a9b821a0Sck 27363cb34c60Sahrens dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); 27373cb34c60Sahrens dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); 27383cb34c60Sahrens dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx); 2739f18faf3fSek 27403cb34c60Sahrens if (csa->cds->ds_user_ptr != NULL) { 27413cb34c60Sahrens csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr); 27423cb34c60Sahrens csa->cds->ds_user_ptr = NULL; 27433cb34c60Sahrens } 2744f18faf3fSek 27453cb34c60Sahrens if (csa->ohds->ds_user_ptr != NULL) { 27463cb34c60Sahrens csa->ohds->ds_user_evict_func(csa->ohds, 27473cb34c60Sahrens csa->ohds->ds_user_ptr); 27483cb34c60Sahrens csa->ohds->ds_user_ptr = NULL; 27493cb34c60Sahrens } 2750f18faf3fSek 2751f18faf3fSek /* reset origin's unique bytes */ 2752*74e7dc98SMatthew Ahrens VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, 2753*74e7dc98SMatthew Ahrens csa->cds->ds_prev->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2754*74e7dc98SMatthew Ahrens &csa->cds->ds_prev->ds_phys->ds_unique_bytes)); 2755f18faf3fSek 2756f18faf3fSek /* swap blkptrs */ 2757f18faf3fSek { 2758f18faf3fSek blkptr_t tmp; 27593cb34c60Sahrens tmp = csa->ohds->ds_phys->ds_bp; 27603cb34c60Sahrens csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; 27613cb34c60Sahrens csa->cds->ds_phys->ds_bp = tmp; 2762f18faf3fSek } 2763f18faf3fSek 2764f18faf3fSek /* set dd_*_bytes */ 2765f18faf3fSek { 2766f18faf3fSek int64_t dused, dcomp, duncomp; 2767f18faf3fSek uint64_t cdl_used, cdl_comp, cdl_uncomp; 2768f18faf3fSek uint64_t odl_used, odl_comp, odl_uncomp; 2769f18faf3fSek 2770*74e7dc98SMatthew Ahrens ASSERT3U(csa->cds->ds_dir->dd_phys-> 2771*74e7dc98SMatthew Ahrens dd_used_breakdown[DD_USED_SNAP], ==, 0); 2772*74e7dc98SMatthew Ahrens 27733cb34c60Sahrens VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used, 2774f18faf3fSek &cdl_comp, &cdl_uncomp)); 27753cb34c60Sahrens VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used, 2776f18faf3fSek &odl_comp, &odl_uncomp)); 2777*74e7dc98SMatthew Ahrens 27783cb34c60Sahrens dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - 27793cb34c60Sahrens (csa->ohds->ds_phys->ds_used_bytes + odl_used); 27803cb34c60Sahrens dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - 27813cb34c60Sahrens (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); 27823cb34c60Sahrens duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + 27833cb34c60Sahrens cdl_uncomp - 27843cb34c60Sahrens (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); 27853cb34c60Sahrens 2786*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD, 27873cb34c60Sahrens dused, dcomp, duncomp, tx); 2788*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD, 27893cb34c60Sahrens -dused, -dcomp, -duncomp, tx); 2790*74e7dc98SMatthew Ahrens 2791*74e7dc98SMatthew Ahrens /* 2792*74e7dc98SMatthew Ahrens * The difference in the space used by snapshots is the 2793*74e7dc98SMatthew Ahrens * difference in snapshot space due to the head's 2794*74e7dc98SMatthew Ahrens * deadlist (since that's the only thing that's 2795*74e7dc98SMatthew Ahrens * changing that affects the snapused). 2796*74e7dc98SMatthew Ahrens */ 2797*74e7dc98SMatthew Ahrens VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, 2798*74e7dc98SMatthew Ahrens csa->ohds->ds_origin_txg, UINT64_MAX, &cdl_used)); 2799*74e7dc98SMatthew Ahrens VERIFY(0 == bplist_space_birthrange(&csa->ohds->ds_deadlist, 2800*74e7dc98SMatthew Ahrens csa->ohds->ds_origin_txg, UINT64_MAX, &odl_used)); 2801*74e7dc98SMatthew Ahrens dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used, 2802*74e7dc98SMatthew Ahrens DD_USED_HEAD, DD_USED_SNAP, tx); 28033cb34c60Sahrens } 28043cb34c60Sahrens 28053cb34c60Sahrens #define SWITCH64(x, y) \ 28063cb34c60Sahrens { \ 28073cb34c60Sahrens uint64_t __tmp = (x); \ 28083cb34c60Sahrens (x) = (y); \ 28093cb34c60Sahrens (y) = __tmp; \ 2810f18faf3fSek } 2811f18faf3fSek 2812f18faf3fSek /* swap ds_*_bytes */ 28133cb34c60Sahrens SWITCH64(csa->ohds->ds_phys->ds_used_bytes, 28143cb34c60Sahrens csa->cds->ds_phys->ds_used_bytes); 28153cb34c60Sahrens SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, 28163cb34c60Sahrens csa->cds->ds_phys->ds_compressed_bytes); 28173cb34c60Sahrens SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, 28183cb34c60Sahrens csa->cds->ds_phys->ds_uncompressed_bytes); 2819a9b821a0Sck SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, 2820a9b821a0Sck csa->cds->ds_phys->ds_unique_bytes); 2821a9b821a0Sck 2822a9b821a0Sck /* apply any parent delta for change in unconsumed refreservation */ 2823*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV, 2824*74e7dc98SMatthew Ahrens csa->unused_refres_delta, 0, 0, tx); 2825f18faf3fSek 2826f18faf3fSek /* swap deadlists */ 28273cb34c60Sahrens bplist_close(&csa->cds->ds_deadlist); 28283cb34c60Sahrens bplist_close(&csa->ohds->ds_deadlist); 28293cb34c60Sahrens SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, 28303cb34c60Sahrens csa->cds->ds_phys->ds_deadlist_obj); 28313cb34c60Sahrens VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, 28323cb34c60Sahrens csa->cds->ds_phys->ds_deadlist_obj)); 28333cb34c60Sahrens VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, 28343cb34c60Sahrens csa->ohds->ds_phys->ds_deadlist_obj)); 2835f18faf3fSek } 2836f18faf3fSek 2837f18faf3fSek /* 2838745cd3c5Smaybee * Swap 'clone' with its origin head file system. Used at the end 2839745cd3c5Smaybee * of "online recv" to swizzle the file system to the new version. 2840f18faf3fSek */ 2841f18faf3fSek int 28423cb34c60Sahrens dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, 28433cb34c60Sahrens boolean_t force) 2844f18faf3fSek { 28453cb34c60Sahrens struct cloneswaparg csa; 2846745cd3c5Smaybee int error; 2847f18faf3fSek 2848745cd3c5Smaybee ASSERT(clone->ds_owner); 2849745cd3c5Smaybee ASSERT(origin_head->ds_owner); 2850745cd3c5Smaybee retry: 2851745cd3c5Smaybee /* Need exclusive access for the swap */ 2852745cd3c5Smaybee rw_enter(&clone->ds_rwlock, RW_WRITER); 2853745cd3c5Smaybee if (!rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) { 2854745cd3c5Smaybee rw_exit(&clone->ds_rwlock); 2855745cd3c5Smaybee rw_enter(&origin_head->ds_rwlock, RW_WRITER); 2856745cd3c5Smaybee if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) { 2857745cd3c5Smaybee rw_exit(&origin_head->ds_rwlock); 2858745cd3c5Smaybee goto retry; 2859745cd3c5Smaybee } 2860745cd3c5Smaybee } 28613cb34c60Sahrens csa.cds = clone; 28623cb34c60Sahrens csa.ohds = origin_head; 28633cb34c60Sahrens csa.force = force; 2864745cd3c5Smaybee error = dsl_sync_task_do(clone->ds_dir->dd_pool, 2865f18faf3fSek dsl_dataset_clone_swap_check, 2866745cd3c5Smaybee dsl_dataset_clone_swap_sync, &csa, NULL, 9); 2867745cd3c5Smaybee return (error); 2868f18faf3fSek } 2869f18faf3fSek 2870b1b8ab34Slling /* 2871b1b8ab34Slling * Given a pool name and a dataset object number in that pool, 2872b1b8ab34Slling * return the name of that dataset. 2873b1b8ab34Slling */ 2874b1b8ab34Slling int 2875b1b8ab34Slling dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2876b1b8ab34Slling { 2877b1b8ab34Slling spa_t *spa; 2878b1b8ab34Slling dsl_pool_t *dp; 2879745cd3c5Smaybee dsl_dataset_t *ds; 2880b1b8ab34Slling int error; 2881b1b8ab34Slling 2882b1b8ab34Slling if ((error = spa_open(pname, &spa, FTAG)) != 0) 2883b1b8ab34Slling return (error); 2884b1b8ab34Slling dp = spa_get_dsl(spa); 2885b1b8ab34Slling rw_enter(&dp->dp_config_rwlock, RW_READER); 2886745cd3c5Smaybee if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) { 2887745cd3c5Smaybee dsl_dataset_name(ds, buf); 2888745cd3c5Smaybee dsl_dataset_rele(ds, FTAG); 2889b1b8ab34Slling } 2890b1b8ab34Slling rw_exit(&dp->dp_config_rwlock); 2891b1b8ab34Slling spa_close(spa, FTAG); 2892b1b8ab34Slling 2893745cd3c5Smaybee return (error); 2894b1b8ab34Slling } 2895a9799022Sck 2896a9799022Sck int 2897a9799022Sck dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2898745cd3c5Smaybee uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 2899a9799022Sck { 2900a9799022Sck int error = 0; 2901a9799022Sck 2902a9799022Sck ASSERT3S(asize, >, 0); 2903a9799022Sck 29049082849eSck /* 29059082849eSck * *ref_rsrv is the portion of asize that will come from any 29069082849eSck * unconsumed refreservation space. 29079082849eSck */ 29089082849eSck *ref_rsrv = 0; 29099082849eSck 2910a9799022Sck mutex_enter(&ds->ds_lock); 2911a9799022Sck /* 2912a9799022Sck * Make a space adjustment for reserved bytes. 2913a9799022Sck */ 2914a9799022Sck if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 2915a9799022Sck ASSERT3U(*used, >=, 2916a9799022Sck ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2917a9799022Sck *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 29189082849eSck *ref_rsrv = 29199082849eSck asize - MIN(asize, parent_delta(ds, asize + inflight)); 2920a9799022Sck } 2921a9799022Sck 2922a9799022Sck if (!check_quota || ds->ds_quota == 0) { 2923a9799022Sck mutex_exit(&ds->ds_lock); 2924a9799022Sck return (0); 2925a9799022Sck } 2926a9799022Sck /* 2927a9799022Sck * If they are requesting more space, and our current estimate 2928a9799022Sck * is over quota, they get to try again unless the actual 2929a9799022Sck * on-disk is over quota and there are no pending changes (which 2930a9799022Sck * may free up space for us). 2931a9799022Sck */ 2932a9799022Sck if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { 2933a9799022Sck if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) 2934a9799022Sck error = ERESTART; 2935a9799022Sck else 2936a9799022Sck error = EDQUOT; 2937a9799022Sck } 2938a9799022Sck mutex_exit(&ds->ds_lock); 2939a9799022Sck 2940a9799022Sck return (error); 2941a9799022Sck } 2942a9799022Sck 2943a9799022Sck /* ARGSUSED */ 2944a9799022Sck static int 2945a9799022Sck dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 2946a9799022Sck { 2947a9799022Sck dsl_dataset_t *ds = arg1; 2948a9799022Sck uint64_t *quotap = arg2; 2949a9799022Sck uint64_t new_quota = *quotap; 2950a9799022Sck 2951a9799022Sck if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) 2952a9799022Sck return (ENOTSUP); 2953a9799022Sck 2954a9799022Sck if (new_quota == 0) 2955a9799022Sck return (0); 2956a9799022Sck 2957a9799022Sck if (new_quota < ds->ds_phys->ds_used_bytes || 2958a9799022Sck new_quota < ds->ds_reserved) 2959a9799022Sck return (ENOSPC); 2960a9799022Sck 2961a9799022Sck return (0); 2962a9799022Sck } 2963a9799022Sck 2964a9799022Sck /* ARGSUSED */ 2965a9799022Sck void 2966a9799022Sck dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2967a9799022Sck { 2968a9799022Sck dsl_dataset_t *ds = arg1; 2969a9799022Sck uint64_t *quotap = arg2; 2970a9799022Sck uint64_t new_quota = *quotap; 2971a9799022Sck 2972a9799022Sck dmu_buf_will_dirty(ds->ds_dbuf, tx); 2973a9799022Sck 2974a9799022Sck ds->ds_quota = new_quota; 2975a9799022Sck 2976a9799022Sck dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx); 2977a9799022Sck 2978a9799022Sck spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa, 2979a9799022Sck tx, cr, "%lld dataset = %llu ", 2980745cd3c5Smaybee (longlong_t)new_quota, ds->ds_object); 2981a9799022Sck } 2982a9799022Sck 2983a9799022Sck int 2984a9799022Sck dsl_dataset_set_quota(const char *dsname, uint64_t quota) 2985a9799022Sck { 2986a9799022Sck dsl_dataset_t *ds; 2987a9799022Sck int err; 2988a9799022Sck 2989745cd3c5Smaybee err = dsl_dataset_hold(dsname, FTAG, &ds); 2990a9799022Sck if (err) 2991a9799022Sck return (err); 2992a9799022Sck 2993a9b821a0Sck if (quota != ds->ds_quota) { 2994a9b821a0Sck /* 2995a9b821a0Sck * If someone removes a file, then tries to set the quota, we 2996a9b821a0Sck * want to make sure the file freeing takes effect. 2997a9b821a0Sck */ 2998a9b821a0Sck txg_wait_open(ds->ds_dir->dd_pool, 0); 2999a9799022Sck 3000a9b821a0Sck err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3001a9b821a0Sck dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, 3002a9b821a0Sck ds, "a, 0); 3003a9b821a0Sck } 3004745cd3c5Smaybee dsl_dataset_rele(ds, FTAG); 3005a9799022Sck return (err); 3006a9799022Sck } 3007a9799022Sck 3008a9799022Sck static int 3009a9799022Sck dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 3010a9799022Sck { 3011a9799022Sck dsl_dataset_t *ds = arg1; 3012a9799022Sck uint64_t *reservationp = arg2; 3013a9799022Sck uint64_t new_reservation = *reservationp; 3014a9799022Sck int64_t delta; 3015a9799022Sck uint64_t unique; 3016a9799022Sck 3017a9799022Sck if (new_reservation > INT64_MAX) 3018a9799022Sck return (EOVERFLOW); 3019a9799022Sck 3020a9799022Sck if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 3021a9799022Sck SPA_VERSION_REFRESERVATION) 3022a9799022Sck return (ENOTSUP); 3023a9799022Sck 3024a9799022Sck if (dsl_dataset_is_snapshot(ds)) 3025a9799022Sck return (EINVAL); 3026a9799022Sck 3027a9799022Sck /* 3028a9799022Sck * If we are doing the preliminary check in open context, the 3029a9799022Sck * space estimates may be inaccurate. 3030a9799022Sck */ 3031a9799022Sck if (!dmu_tx_is_syncing(tx)) 3032a9799022Sck return (0); 3033a9799022Sck 3034a9799022Sck mutex_enter(&ds->ds_lock); 3035a9799022Sck unique = dsl_dataset_unique(ds); 3036a9799022Sck delta = MAX(unique, new_reservation) - MAX(unique, ds->ds_reserved); 3037a9799022Sck mutex_exit(&ds->ds_lock); 3038a9799022Sck 3039a9799022Sck if (delta > 0 && 3040a9799022Sck delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 3041a9799022Sck return (ENOSPC); 3042a9799022Sck if (delta > 0 && ds->ds_quota > 0 && 3043a9799022Sck new_reservation > ds->ds_quota) 3044a9799022Sck return (ENOSPC); 3045a9799022Sck 3046a9799022Sck return (0); 3047a9799022Sck } 3048a9799022Sck 3049a9799022Sck /* ARGSUSED */ 3050a9799022Sck static void 3051a9799022Sck dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, 3052a9799022Sck dmu_tx_t *tx) 3053a9799022Sck { 3054a9799022Sck dsl_dataset_t *ds = arg1; 3055a9799022Sck uint64_t *reservationp = arg2; 3056a9799022Sck uint64_t new_reservation = *reservationp; 3057a9799022Sck uint64_t unique; 3058a9799022Sck int64_t delta; 3059a9799022Sck 3060a9799022Sck dmu_buf_will_dirty(ds->ds_dbuf, tx); 3061a9799022Sck 3062a9799022Sck mutex_enter(&ds->ds_lock); 3063a9799022Sck unique = dsl_dataset_unique(ds); 3064a9799022Sck delta = MAX(0, (int64_t)(new_reservation - unique)) - 3065a9799022Sck MAX(0, (int64_t)(ds->ds_reserved - unique)); 3066a9799022Sck ds->ds_reserved = new_reservation; 3067a9799022Sck mutex_exit(&ds->ds_lock); 3068a9799022Sck 3069a9799022Sck dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation", 3070a9799022Sck new_reservation, cr, tx); 3071a9799022Sck 3072*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 3073a9799022Sck 3074a9799022Sck spa_history_internal_log(LOG_DS_REFRESERV, 3075a9799022Sck ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu", 3076a9799022Sck (longlong_t)new_reservation, 3077a9799022Sck ds->ds_dir->dd_phys->dd_head_dataset_obj); 3078a9799022Sck } 3079a9799022Sck 3080a9799022Sck int 3081a9799022Sck dsl_dataset_set_reservation(const char *dsname, uint64_t reservation) 3082a9799022Sck { 3083a9799022Sck dsl_dataset_t *ds; 3084a9799022Sck int err; 3085a9799022Sck 3086745cd3c5Smaybee err = dsl_dataset_hold(dsname, FTAG, &ds); 3087a9799022Sck if (err) 3088a9799022Sck return (err); 3089a9799022Sck 3090a9799022Sck err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3091a9799022Sck dsl_dataset_set_reservation_check, 3092a9799022Sck dsl_dataset_set_reservation_sync, ds, &reservation, 0); 3093745cd3c5Smaybee dsl_dataset_rele(ds, FTAG); 3094a9799022Sck return (err); 3095a9799022Sck } 3096