1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22fb5dd802SLin Ling * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #include <sys/dsl_pool.h> 27fa9e4066Sahrens #include <sys/dsl_dataset.h> 28fa9e4066Sahrens #include <sys/dsl_dir.h> 291d452cf5Sahrens #include <sys/dsl_synctask.h> 30fa9e4066Sahrens #include <sys/dmu_tx.h> 31fa9e4066Sahrens #include <sys/dmu_objset.h> 32fa9e4066Sahrens #include <sys/arc.h> 33fa9e4066Sahrens #include <sys/zap.h> 34c717a561Smaybee #include <sys/zio.h> 35fa9e4066Sahrens #include <sys/zfs_context.h> 36fa9e4066Sahrens #include <sys/fs/zfs.h> 37088f3894Sahrens #include <sys/zfs_znode.h> 38088f3894Sahrens #include <sys/spa_impl.h> 39fa9e4066Sahrens 401ab7f2deSmaybee int zfs_no_write_throttle = 0; 4105715f94SMark Maybee int zfs_write_limit_shift = 3; /* 1/8th of physical memory */ 42bf0ec83aSLin Ling int zfs_txg_synctime_ms = 5000; /* target millisecs to sync a txg */ 4305715f94SMark Maybee 4405715f94SMark Maybee uint64_t zfs_write_limit_min = 32 << 20; /* min write limit is 32MB */ 4505715f94SMark Maybee uint64_t zfs_write_limit_max = 0; /* max data payload per txg */ 4605715f94SMark Maybee uint64_t zfs_write_limit_inflated = 0; 471ab7f2deSmaybee uint64_t zfs_write_limit_override = 0; 481ab7f2deSmaybee 4905715f94SMark Maybee kmutex_t zfs_write_limit_lock; 5005715f94SMark Maybee 5105715f94SMark Maybee static pgcnt_t old_physmem = 0; 52088f3894Sahrens 53ea8dc4b6Seschrock static int 54088f3894Sahrens dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp) 55fa9e4066Sahrens { 56fa9e4066Sahrens uint64_t obj; 57fa9e4066Sahrens int err; 58fa9e4066Sahrens 59fa9e4066Sahrens err = zap_lookup(dp->dp_meta_objset, 60fa9e4066Sahrens dp->dp_root_dir->dd_phys->dd_child_dir_zapobj, 61088f3894Sahrens name, sizeof (obj), 1, &obj); 62ea8dc4b6Seschrock if (err) 63ea8dc4b6Seschrock return (err); 64fa9e4066Sahrens 65088f3894Sahrens return (dsl_dir_open_obj(dp, obj, name, dp, ddp)); 66fa9e4066Sahrens } 67fa9e4066Sahrens 68fa9e4066Sahrens static dsl_pool_t * 69fa9e4066Sahrens dsl_pool_open_impl(spa_t *spa, uint64_t txg) 70fa9e4066Sahrens { 71fa9e4066Sahrens dsl_pool_t *dp; 72fa9e4066Sahrens blkptr_t *bp = spa_get_rootblkptr(spa); 73fa9e4066Sahrens 74fa9e4066Sahrens dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP); 75fa9e4066Sahrens dp->dp_spa = spa; 76fa9e4066Sahrens dp->dp_meta_rootbp = *bp; 775ad82045Snd rw_init(&dp->dp_config_rwlock, NULL, RW_DEFAULT, NULL); 781ab7f2deSmaybee dp->dp_write_limit = zfs_write_limit_min; 79fa9e4066Sahrens txg_init(dp, txg); 80fa9e4066Sahrens 81fa9e4066Sahrens txg_list_create(&dp->dp_dirty_datasets, 82fa9e4066Sahrens offsetof(dsl_dataset_t, ds_dirty_link)); 83fa9e4066Sahrens txg_list_create(&dp->dp_dirty_dirs, 84fa9e4066Sahrens offsetof(dsl_dir_t, dd_dirty_link)); 851d452cf5Sahrens txg_list_create(&dp->dp_sync_tasks, 861d452cf5Sahrens offsetof(dsl_sync_task_group_t, dstg_node)); 873cb34c60Sahrens list_create(&dp->dp_synced_datasets, sizeof (dsl_dataset_t), 88fa9e4066Sahrens offsetof(dsl_dataset_t, ds_synced_link)); 89fa9e4066Sahrens 901ab7f2deSmaybee mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL); 91088f3894Sahrens mutex_init(&dp->dp_scrub_cancel_lock, NULL, MUTEX_DEFAULT, NULL); 921ab7f2deSmaybee 939d3574bfSNeil Perrin dp->dp_vnrele_taskq = taskq_create("zfs_vn_rele_taskq", 1, minclsyspri, 949d3574bfSNeil Perrin 1, 4, 0); 959d3574bfSNeil Perrin 96fa9e4066Sahrens return (dp); 97fa9e4066Sahrens } 98fa9e4066Sahrens 99ea8dc4b6Seschrock int 100ea8dc4b6Seschrock dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp) 101fa9e4066Sahrens { 102fa9e4066Sahrens int err; 103fa9e4066Sahrens dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); 104088f3894Sahrens dsl_dir_t *dd; 105088f3894Sahrens dsl_dataset_t *ds; 106fa9e4066Sahrens 107088f3894Sahrens rw_enter(&dp->dp_config_rwlock, RW_WRITER); 108503ad85cSMatthew Ahrens err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, 109503ad85cSMatthew Ahrens &dp->dp_meta_objset); 110ea8dc4b6Seschrock if (err) 111ea8dc4b6Seschrock goto out; 112ea8dc4b6Seschrock 113fa9e4066Sahrens err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 114fa9e4066Sahrens DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, 115fa9e4066Sahrens &dp->dp_root_dir_obj); 116ea8dc4b6Seschrock if (err) 117ea8dc4b6Seschrock goto out; 118ea8dc4b6Seschrock 119ea8dc4b6Seschrock err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, 120ea8dc4b6Seschrock NULL, dp, &dp->dp_root_dir); 121ea8dc4b6Seschrock if (err) 122ea8dc4b6Seschrock goto out; 123fa9e4066Sahrens 124088f3894Sahrens err = dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir); 125ea8dc4b6Seschrock if (err) 126ea8dc4b6Seschrock goto out; 127ea8dc4b6Seschrock 128088f3894Sahrens if (spa_version(spa) >= SPA_VERSION_ORIGIN) { 129088f3894Sahrens err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd); 130088f3894Sahrens if (err) 131088f3894Sahrens goto out; 132088f3894Sahrens err = dsl_dataset_hold_obj(dp, dd->dd_phys->dd_head_dataset_obj, 133088f3894Sahrens FTAG, &ds); 1348f63aa46SLin Ling if (err == 0) { 1358f63aa46SLin Ling err = dsl_dataset_hold_obj(dp, 1368f63aa46SLin Ling ds->ds_phys->ds_prev_snap_obj, dp, 1378f63aa46SLin Ling &dp->dp_origin_snap); 1388f63aa46SLin Ling dsl_dataset_rele(ds, FTAG); 1398f63aa46SLin Ling } 1408f63aa46SLin Ling dsl_dir_close(dd, dp); 141088f3894Sahrens if (err) 142088f3894Sahrens goto out; 143088f3894Sahrens } 144088f3894Sahrens 145ca45db41SChris Kirby err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 146ca45db41SChris Kirby DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1, 147ca45db41SChris Kirby &dp->dp_tmp_userrefs_obj); 148ca45db41SChris Kirby if (err == ENOENT) 149ca45db41SChris Kirby err = 0; 150ca45db41SChris Kirby if (err) 151ca45db41SChris Kirby goto out; 152ca45db41SChris Kirby 153088f3894Sahrens /* get scrub status */ 154088f3894Sahrens err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 155088f3894Sahrens DMU_POOL_SCRUB_FUNC, sizeof (uint32_t), 1, 156088f3894Sahrens &dp->dp_scrub_func); 157088f3894Sahrens if (err == 0) { 158088f3894Sahrens err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 159088f3894Sahrens DMU_POOL_SCRUB_QUEUE, sizeof (uint64_t), 1, 160088f3894Sahrens &dp->dp_scrub_queue_obj); 161088f3894Sahrens if (err) 162088f3894Sahrens goto out; 163088f3894Sahrens err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 164088f3894Sahrens DMU_POOL_SCRUB_MIN_TXG, sizeof (uint64_t), 1, 165088f3894Sahrens &dp->dp_scrub_min_txg); 166088f3894Sahrens if (err) 167088f3894Sahrens goto out; 168088f3894Sahrens err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 169088f3894Sahrens DMU_POOL_SCRUB_MAX_TXG, sizeof (uint64_t), 1, 170088f3894Sahrens &dp->dp_scrub_max_txg); 171088f3894Sahrens if (err) 172088f3894Sahrens goto out; 173088f3894Sahrens err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 174bbfd46c4SJeff Bonwick DMU_POOL_SCRUB_BOOKMARK, sizeof (uint64_t), 175bbfd46c4SJeff Bonwick sizeof (dp->dp_scrub_bookmark) / sizeof (uint64_t), 176088f3894Sahrens &dp->dp_scrub_bookmark); 177088f3894Sahrens if (err) 178088f3894Sahrens goto out; 179088f3894Sahrens err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 180bbfd46c4SJeff Bonwick DMU_POOL_SCRUB_DDT_BOOKMARK, sizeof (uint64_t), 181bbfd46c4SJeff Bonwick sizeof (dp->dp_scrub_ddt_bookmark) / sizeof (uint64_t), 182bbfd46c4SJeff Bonwick &dp->dp_scrub_ddt_bookmark); 183bbfd46c4SJeff Bonwick if (err && err != ENOENT) 184bbfd46c4SJeff Bonwick goto out; 185bbfd46c4SJeff Bonwick err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 186bbfd46c4SJeff Bonwick DMU_POOL_SCRUB_DDT_CLASS_MAX, sizeof (uint64_t), 1, 187bbfd46c4SJeff Bonwick &dp->dp_scrub_ddt_class_max); 188bbfd46c4SJeff Bonwick if (err && err != ENOENT) 189bbfd46c4SJeff Bonwick goto out; 190bbfd46c4SJeff Bonwick err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 191088f3894Sahrens DMU_POOL_SCRUB_ERRORS, sizeof (uint64_t), 1, 192088f3894Sahrens &spa->spa_scrub_errors); 193088f3894Sahrens if (err) 194088f3894Sahrens goto out; 195088f3894Sahrens if (spa_version(spa) < SPA_VERSION_DSL_SCRUB) { 196088f3894Sahrens /* 197088f3894Sahrens * A new-type scrub was in progress on an old 198088f3894Sahrens * pool. Restart from the beginning, since the 199088f3894Sahrens * old software may have changed the pool in the 200088f3894Sahrens * meantime. 201088f3894Sahrens */ 202088f3894Sahrens dsl_pool_scrub_restart(dp); 203088f3894Sahrens } 204088f3894Sahrens } else { 205088f3894Sahrens /* 206088f3894Sahrens * It's OK if there is no scrub in progress (and if 207088f3894Sahrens * there was an I/O error, ignore it). 208088f3894Sahrens */ 209088f3894Sahrens err = 0; 210088f3894Sahrens } 211088f3894Sahrens 212ea8dc4b6Seschrock out: 213fa9e4066Sahrens rw_exit(&dp->dp_config_rwlock); 214ea8dc4b6Seschrock if (err) 215ea8dc4b6Seschrock dsl_pool_close(dp); 216ea8dc4b6Seschrock else 217ea8dc4b6Seschrock *dpp = dp; 218fa9e4066Sahrens 219ea8dc4b6Seschrock return (err); 220fa9e4066Sahrens } 221fa9e4066Sahrens 222fa9e4066Sahrens void 223fa9e4066Sahrens dsl_pool_close(dsl_pool_t *dp) 224fa9e4066Sahrens { 225088f3894Sahrens /* drop our references from dsl_pool_open() */ 226088f3894Sahrens 227088f3894Sahrens /* 228088f3894Sahrens * Since we held the origin_snap from "syncing" context (which 229088f3894Sahrens * includes pool-opening context), it actually only got a "ref" 230088f3894Sahrens * and not a hold, so just drop that here. 231088f3894Sahrens */ 232088f3894Sahrens if (dp->dp_origin_snap) 233088f3894Sahrens dsl_dataset_drop_ref(dp->dp_origin_snap, dp); 234ea8dc4b6Seschrock if (dp->dp_mos_dir) 235ea8dc4b6Seschrock dsl_dir_close(dp->dp_mos_dir, dp); 236ea8dc4b6Seschrock if (dp->dp_root_dir) 237ea8dc4b6Seschrock dsl_dir_close(dp->dp_root_dir, dp); 238fa9e4066Sahrens 239fa9e4066Sahrens /* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */ 240ea8dc4b6Seschrock if (dp->dp_meta_objset) 241503ad85cSMatthew Ahrens dmu_objset_evict(dp->dp_meta_objset); 242fa9e4066Sahrens 243fa9e4066Sahrens txg_list_destroy(&dp->dp_dirty_datasets); 24454a91118SChris Kirby txg_list_destroy(&dp->dp_sync_tasks); 245fa9e4066Sahrens txg_list_destroy(&dp->dp_dirty_dirs); 2463cb34c60Sahrens list_destroy(&dp->dp_synced_datasets); 247fa9e4066Sahrens 248874395d5Smaybee arc_flush(dp->dp_spa); 249fa9e4066Sahrens txg_fini(dp); 2505ad82045Snd rw_destroy(&dp->dp_config_rwlock); 2511ab7f2deSmaybee mutex_destroy(&dp->dp_lock); 252088f3894Sahrens mutex_destroy(&dp->dp_scrub_cancel_lock); 2539d3574bfSNeil Perrin taskq_destroy(dp->dp_vnrele_taskq); 25488b7b0f2SMatthew Ahrens if (dp->dp_blkstats) 25588b7b0f2SMatthew Ahrens kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); 256fa9e4066Sahrens kmem_free(dp, sizeof (dsl_pool_t)); 257fa9e4066Sahrens } 258fa9e4066Sahrens 259fa9e4066Sahrens dsl_pool_t * 2600a48a24eStimh dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) 261fa9e4066Sahrens { 262fa9e4066Sahrens int err; 263fa9e4066Sahrens dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); 264fa9e4066Sahrens dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg); 265503ad85cSMatthew Ahrens objset_t *os; 266088f3894Sahrens dsl_dataset_t *ds; 267088f3894Sahrens uint64_t dsobj; 268088f3894Sahrens 269088f3894Sahrens /* create and open the MOS (meta-objset) */ 270503ad85cSMatthew Ahrens dp->dp_meta_objset = dmu_objset_create_impl(spa, 271503ad85cSMatthew Ahrens NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); 272fa9e4066Sahrens 273fa9e4066Sahrens /* create the pool directory */ 274fa9e4066Sahrens err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 275fa9e4066Sahrens DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx); 276fa9e4066Sahrens ASSERT3U(err, ==, 0); 277fa9e4066Sahrens 278fa9e4066Sahrens /* create and open the root dir */ 279088f3894Sahrens dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx); 280ea8dc4b6Seschrock VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj, 281ea8dc4b6Seschrock NULL, dp, &dp->dp_root_dir)); 282fa9e4066Sahrens 283fa9e4066Sahrens /* create and open the meta-objset dir */ 284088f3894Sahrens (void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx); 285088f3894Sahrens VERIFY(0 == dsl_pool_open_special_dir(dp, 286088f3894Sahrens MOS_DIR_NAME, &dp->dp_mos_dir)); 287088f3894Sahrens 288088f3894Sahrens if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) 289088f3894Sahrens dsl_pool_create_origin(dp, tx); 290088f3894Sahrens 291088f3894Sahrens /* create the root dataset */ 292088f3894Sahrens dsobj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); 293088f3894Sahrens 294088f3894Sahrens /* create the root objset */ 295088f3894Sahrens VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 296503ad85cSMatthew Ahrens os = dmu_objset_create_impl(dp->dp_spa, ds, 297088f3894Sahrens dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx); 298088f3894Sahrens #ifdef _KERNEL 299503ad85cSMatthew Ahrens zfs_create_fs(os, kcred, zplprops, tx); 300088f3894Sahrens #endif 301088f3894Sahrens dsl_dataset_rele(ds, FTAG); 302fa9e4066Sahrens 303fa9e4066Sahrens dmu_tx_commit(tx); 304fa9e4066Sahrens 305fa9e4066Sahrens return (dp); 306fa9e4066Sahrens } 307fa9e4066Sahrens 308fa9e4066Sahrens void 309fa9e4066Sahrens dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) 310fa9e4066Sahrens { 311c717a561Smaybee zio_t *zio; 312fa9e4066Sahrens dmu_tx_t *tx; 313c717a561Smaybee dsl_dir_t *dd; 314c717a561Smaybee dsl_dataset_t *ds; 315c717a561Smaybee dsl_sync_task_group_t *dstg; 316503ad85cSMatthew Ahrens objset_t *mos = dp->dp_meta_objset; 31705715f94SMark Maybee hrtime_t start, write_time; 31805715f94SMark Maybee uint64_t data_written; 319c717a561Smaybee int err; 320fa9e4066Sahrens 321fa9e4066Sahrens tx = dmu_tx_create_assigned(dp, txg); 322fa9e4066Sahrens 32305715f94SMark Maybee dp->dp_read_overhead = 0; 3240fd90d51SMark Maybee start = gethrtime(); 32514843421SMatthew Ahrens 326c717a561Smaybee zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 327c717a561Smaybee while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) { 32814843421SMatthew Ahrens /* 32914843421SMatthew Ahrens * We must not sync any non-MOS datasets twice, because 33014843421SMatthew Ahrens * we may have taken a snapshot of them. However, we 33114843421SMatthew Ahrens * may sync newly-created datasets on pass 2. 33214843421SMatthew Ahrens */ 33314843421SMatthew Ahrens ASSERT(!list_link_active(&ds->ds_synced_link)); 33414843421SMatthew Ahrens list_insert_tail(&dp->dp_synced_datasets, ds); 335c717a561Smaybee dsl_dataset_sync(ds, zio, tx); 336c717a561Smaybee } 33705715f94SMark Maybee DTRACE_PROBE(pool_sync__1setup); 338c717a561Smaybee err = zio_wait(zio); 33914843421SMatthew Ahrens 34005715f94SMark Maybee write_time = gethrtime() - start; 341c717a561Smaybee ASSERT(err == 0); 34205715f94SMark Maybee DTRACE_PROBE(pool_sync__2rootzio); 343c717a561Smaybee 34414843421SMatthew Ahrens for (ds = list_head(&dp->dp_synced_datasets); ds; 34514843421SMatthew Ahrens ds = list_next(&dp->dp_synced_datasets, ds)) 346*0a586ceaSMark Shellenbaum dmu_objset_do_userquota_updates(ds->ds_objset, tx); 34714843421SMatthew Ahrens 34814843421SMatthew Ahrens /* 34914843421SMatthew Ahrens * Sync the datasets again to push out the changes due to 35014843421SMatthew Ahrens * userquota updates. This must be done before we process the 35114843421SMatthew Ahrens * sync tasks, because that could cause a snapshot of a dataset 35214843421SMatthew Ahrens * whose ds_bp will be rewritten when we do this 2nd sync. 35314843421SMatthew Ahrens */ 35414843421SMatthew Ahrens zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 35514843421SMatthew Ahrens while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) { 35614843421SMatthew Ahrens ASSERT(list_link_active(&ds->ds_synced_link)); 35714843421SMatthew Ahrens dmu_buf_rele(ds->ds_dbuf, ds); 35814843421SMatthew Ahrens dsl_dataset_sync(ds, zio, tx); 35914843421SMatthew Ahrens } 36014843421SMatthew Ahrens err = zio_wait(zio); 36114843421SMatthew Ahrens 362b24ab676SJeff Bonwick /* 363b24ab676SJeff Bonwick * If anything was added to a deadlist during a zio done callback, 364b24ab676SJeff Bonwick * it had to be put on the deferred queue. Enqueue it for real now. 365b24ab676SJeff Bonwick */ 366b24ab676SJeff Bonwick for (ds = list_head(&dp->dp_synced_datasets); ds; 367b24ab676SJeff Bonwick ds = list_next(&dp->dp_synced_datasets, ds)) 368b24ab676SJeff Bonwick bplist_sync(&ds->ds_deadlist, 369b24ab676SJeff Bonwick bplist_enqueue_cb, &ds->ds_deadlist, tx); 370b24ab676SJeff Bonwick 37114843421SMatthew Ahrens while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) { 37214843421SMatthew Ahrens /* 37314843421SMatthew Ahrens * No more sync tasks should have been added while we 37414843421SMatthew Ahrens * were syncing. 37514843421SMatthew Ahrens */ 37614843421SMatthew Ahrens ASSERT(spa_sync_pass(dp->dp_spa) == 1); 377c717a561Smaybee dsl_sync_task_group_sync(dstg, tx); 37814843421SMatthew Ahrens } 37905715f94SMark Maybee DTRACE_PROBE(pool_sync__3task); 38005715f94SMark Maybee 38105715f94SMark Maybee start = gethrtime(); 382c717a561Smaybee while (dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) 383c717a561Smaybee dsl_dir_sync(dd, tx); 38405715f94SMark Maybee write_time += gethrtime() - start; 385fa9e4066Sahrens 386b16da2e2SGeorge Wilson if (spa_sync_pass(dp->dp_spa) == 1) { 387b16da2e2SGeorge Wilson dp->dp_scrub_prefetch_zio_root = zio_root(dp->dp_spa, NULL, 388b16da2e2SGeorge Wilson NULL, ZIO_FLAG_CANFAIL); 389088f3894Sahrens dsl_pool_scrub_sync(dp, tx); 390b16da2e2SGeorge Wilson (void) zio_wait(dp->dp_scrub_prefetch_zio_root); 391b16da2e2SGeorge Wilson } 392088f3894Sahrens 39305715f94SMark Maybee start = gethrtime(); 394503ad85cSMatthew Ahrens if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL || 395503ad85cSMatthew Ahrens list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) { 396c717a561Smaybee zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 397503ad85cSMatthew Ahrens dmu_objset_sync(mos, zio, tx); 398c717a561Smaybee err = zio_wait(zio); 399c717a561Smaybee ASSERT(err == 0); 400fa9e4066Sahrens dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", ""); 401fa9e4066Sahrens spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); 402fa9e4066Sahrens } 40305715f94SMark Maybee write_time += gethrtime() - start; 40405715f94SMark Maybee DTRACE_PROBE2(pool_sync__4io, hrtime_t, write_time, 40505715f94SMark Maybee hrtime_t, dp->dp_read_overhead); 40605715f94SMark Maybee write_time -= dp->dp_read_overhead; 407fa9e4066Sahrens 408fa9e4066Sahrens dmu_tx_commit(tx); 40905715f94SMark Maybee 41005715f94SMark Maybee data_written = dp->dp_space_towrite[txg & TXG_MASK]; 41105715f94SMark Maybee dp->dp_space_towrite[txg & TXG_MASK] = 0; 41205715f94SMark Maybee ASSERT(dp->dp_tempreserved[txg & TXG_MASK] == 0); 41305715f94SMark Maybee 41405715f94SMark Maybee /* 41505715f94SMark Maybee * If the write limit max has not been explicitly set, set it 41605715f94SMark Maybee * to a fraction of available physical memory (default 1/8th). 41705715f94SMark Maybee * Note that we must inflate the limit because the spa 41805715f94SMark Maybee * inflates write sizes to account for data replication. 41905715f94SMark Maybee * Check this each sync phase to catch changing memory size. 42005715f94SMark Maybee */ 42105715f94SMark Maybee if (physmem != old_physmem && zfs_write_limit_shift) { 42205715f94SMark Maybee mutex_enter(&zfs_write_limit_lock); 42305715f94SMark Maybee old_physmem = physmem; 42405715f94SMark Maybee zfs_write_limit_max = ptob(physmem) >> zfs_write_limit_shift; 42505715f94SMark Maybee zfs_write_limit_inflated = MAX(zfs_write_limit_min, 42605715f94SMark Maybee spa_get_asize(dp->dp_spa, zfs_write_limit_max)); 42705715f94SMark Maybee mutex_exit(&zfs_write_limit_lock); 42805715f94SMark Maybee } 42905715f94SMark Maybee 43005715f94SMark Maybee /* 43105715f94SMark Maybee * Attempt to keep the sync time consistent by adjusting the 43205715f94SMark Maybee * amount of write traffic allowed into each transaction group. 43305715f94SMark Maybee * Weight the throughput calculation towards the current value: 43405715f94SMark Maybee * thru = 3/4 old_thru + 1/4 new_thru 435fb5dd802SLin Ling * 436fb5dd802SLin Ling * Note: write_time is in nanosecs, so write_time/MICROSEC 437fb5dd802SLin Ling * yields millisecs 43805715f94SMark Maybee */ 43905715f94SMark Maybee ASSERT(zfs_write_limit_min > 0); 440fb5dd802SLin Ling if (data_written > zfs_write_limit_min / 8 && write_time > MICROSEC) { 441fb5dd802SLin Ling uint64_t throughput = data_written / (write_time / MICROSEC); 442fb5dd802SLin Ling 44305715f94SMark Maybee if (dp->dp_throughput) 44405715f94SMark Maybee dp->dp_throughput = throughput / 4 + 44505715f94SMark Maybee 3 * dp->dp_throughput / 4; 44605715f94SMark Maybee else 44705715f94SMark Maybee dp->dp_throughput = throughput; 44805715f94SMark Maybee dp->dp_write_limit = MIN(zfs_write_limit_inflated, 44905715f94SMark Maybee MAX(zfs_write_limit_min, 450fb5dd802SLin Ling dp->dp_throughput * zfs_txg_synctime_ms)); 45105715f94SMark Maybee } 452fa9e4066Sahrens } 453fa9e4066Sahrens 454fa9e4066Sahrens void 455b24ab676SJeff Bonwick dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg) 456fa9e4066Sahrens { 457fa9e4066Sahrens dsl_dataset_t *ds; 458b24ab676SJeff Bonwick objset_t *os; 459fa9e4066Sahrens 4603cb34c60Sahrens while (ds = list_head(&dp->dp_synced_datasets)) { 4613cb34c60Sahrens list_remove(&dp->dp_synced_datasets, ds); 462b24ab676SJeff Bonwick os = ds->ds_objset; 463b24ab676SJeff Bonwick zil_clean(os->os_zil); 464b24ab676SJeff Bonwick ASSERT(!dmu_objset_is_dirty(os, txg)); 465af2c4821Smaybee dmu_buf_rele(ds->ds_dbuf, ds); 466fa9e4066Sahrens } 467b24ab676SJeff Bonwick ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg)); 468fa9e4066Sahrens } 469fa9e4066Sahrens 470c717a561Smaybee /* 471c717a561Smaybee * TRUE if the current thread is the tx_sync_thread or if we 472c717a561Smaybee * are being called from SPA context during pool initialization. 473c717a561Smaybee */ 474fa9e4066Sahrens int 475fa9e4066Sahrens dsl_pool_sync_context(dsl_pool_t *dp) 476fa9e4066Sahrens { 477fa9e4066Sahrens return (curthread == dp->dp_tx.tx_sync_thread || 478c717a561Smaybee spa_get_dsl(dp->dp_spa) == NULL); 479fa9e4066Sahrens } 480fa9e4066Sahrens 481fa9e4066Sahrens uint64_t 482fa9e4066Sahrens dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree) 483fa9e4066Sahrens { 484fa9e4066Sahrens uint64_t space, resv; 485fa9e4066Sahrens 486fa9e4066Sahrens /* 48744cd46caSbillm * Reserve about 1.6% (1/64), or at least 32MB, for allocation 488fa9e4066Sahrens * efficiency. 489fa9e4066Sahrens * XXX The intent log is not accounted for, so it must fit 490fa9e4066Sahrens * within this slop. 491fa9e4066Sahrens * 492fa9e4066Sahrens * If we're trying to assess whether it's OK to do a free, 493fa9e4066Sahrens * cut the reservation in half to allow forward progress 494fa9e4066Sahrens * (e.g. make it possible to rm(1) files from a full pool). 495fa9e4066Sahrens */ 496485bbbf5SGeorge Wilson space = spa_get_dspace(dp->dp_spa); 49744cd46caSbillm resv = MAX(space >> 6, SPA_MINDEVSIZE >> 1); 498fa9e4066Sahrens if (netfree) 499fa9e4066Sahrens resv >>= 1; 500fa9e4066Sahrens 501fa9e4066Sahrens return (space - resv); 502fa9e4066Sahrens } 5031ab7f2deSmaybee 5041ab7f2deSmaybee int 5051ab7f2deSmaybee dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx) 5061ab7f2deSmaybee { 5071ab7f2deSmaybee uint64_t reserved = 0; 5081ab7f2deSmaybee uint64_t write_limit = (zfs_write_limit_override ? 5091ab7f2deSmaybee zfs_write_limit_override : dp->dp_write_limit); 5101ab7f2deSmaybee 5111ab7f2deSmaybee if (zfs_no_write_throttle) { 512c5904d13Seschrock atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], 513c5904d13Seschrock space); 5141ab7f2deSmaybee return (0); 5151ab7f2deSmaybee } 5161ab7f2deSmaybee 5171ab7f2deSmaybee /* 5181ab7f2deSmaybee * Check to see if we have exceeded the maximum allowed IO for 5191ab7f2deSmaybee * this transaction group. We can do this without locks since 5201ab7f2deSmaybee * a little slop here is ok. Note that we do the reserved check 5211ab7f2deSmaybee * with only half the requested reserve: this is because the 5221ab7f2deSmaybee * reserve requests are worst-case, and we really don't want to 5231ab7f2deSmaybee * throttle based off of worst-case estimates. 5241ab7f2deSmaybee */ 5251ab7f2deSmaybee if (write_limit > 0) { 5261ab7f2deSmaybee reserved = dp->dp_space_towrite[tx->tx_txg & TXG_MASK] 5271ab7f2deSmaybee + dp->dp_tempreserved[tx->tx_txg & TXG_MASK] / 2; 5281ab7f2deSmaybee 5291ab7f2deSmaybee if (reserved && reserved > write_limit) 5301ab7f2deSmaybee return (ERESTART); 5311ab7f2deSmaybee } 5321ab7f2deSmaybee 5331ab7f2deSmaybee atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], space); 5341ab7f2deSmaybee 5351ab7f2deSmaybee /* 5361ab7f2deSmaybee * If this transaction group is over 7/8ths capacity, delay 5371ab7f2deSmaybee * the caller 1 clock tick. This will slow down the "fill" 5381ab7f2deSmaybee * rate until the sync process can catch up with us. 5391ab7f2deSmaybee */ 540e8397a2bSgw if (reserved && reserved > (write_limit - (write_limit >> 3))) 5411ab7f2deSmaybee txg_delay(dp, tx->tx_txg, 1); 5421ab7f2deSmaybee 5431ab7f2deSmaybee return (0); 5441ab7f2deSmaybee } 5451ab7f2deSmaybee 5461ab7f2deSmaybee void 5471ab7f2deSmaybee dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx) 5481ab7f2deSmaybee { 5491ab7f2deSmaybee ASSERT(dp->dp_tempreserved[tx->tx_txg & TXG_MASK] >= space); 5501ab7f2deSmaybee atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], -space); 5511ab7f2deSmaybee } 5521ab7f2deSmaybee 5531ab7f2deSmaybee void 5541ab7f2deSmaybee dsl_pool_memory_pressure(dsl_pool_t *dp) 5551ab7f2deSmaybee { 5561ab7f2deSmaybee uint64_t space_inuse = 0; 5571ab7f2deSmaybee int i; 5581ab7f2deSmaybee 5591ab7f2deSmaybee if (dp->dp_write_limit == zfs_write_limit_min) 5601ab7f2deSmaybee return; 5611ab7f2deSmaybee 5621ab7f2deSmaybee for (i = 0; i < TXG_SIZE; i++) { 5631ab7f2deSmaybee space_inuse += dp->dp_space_towrite[i]; 5641ab7f2deSmaybee space_inuse += dp->dp_tempreserved[i]; 5651ab7f2deSmaybee } 5661ab7f2deSmaybee dp->dp_write_limit = MAX(zfs_write_limit_min, 5671ab7f2deSmaybee MIN(dp->dp_write_limit, space_inuse / 4)); 5681ab7f2deSmaybee } 5691ab7f2deSmaybee 5701ab7f2deSmaybee void 5711ab7f2deSmaybee dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx) 5721ab7f2deSmaybee { 5731ab7f2deSmaybee if (space > 0) { 5741ab7f2deSmaybee mutex_enter(&dp->dp_lock); 5751ab7f2deSmaybee dp->dp_space_towrite[tx->tx_txg & TXG_MASK] += space; 5761ab7f2deSmaybee mutex_exit(&dp->dp_lock); 5771ab7f2deSmaybee } 5781ab7f2deSmaybee } 579088f3894Sahrens 580088f3894Sahrens /* ARGSUSED */ 581088f3894Sahrens static int 582088f3894Sahrens upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) 583088f3894Sahrens { 584088f3894Sahrens dmu_tx_t *tx = arg; 585088f3894Sahrens dsl_dataset_t *ds, *prev = NULL; 586088f3894Sahrens int err; 587088f3894Sahrens dsl_pool_t *dp = spa_get_dsl(spa); 588088f3894Sahrens 589088f3894Sahrens err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); 590088f3894Sahrens if (err) 591088f3894Sahrens return (err); 592088f3894Sahrens 593088f3894Sahrens while (ds->ds_phys->ds_prev_snap_obj != 0) { 594088f3894Sahrens err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, 595088f3894Sahrens FTAG, &prev); 596088f3894Sahrens if (err) { 597088f3894Sahrens dsl_dataset_rele(ds, FTAG); 598088f3894Sahrens return (err); 599088f3894Sahrens } 600088f3894Sahrens 601088f3894Sahrens if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) 602088f3894Sahrens break; 603088f3894Sahrens dsl_dataset_rele(ds, FTAG); 604088f3894Sahrens ds = prev; 605088f3894Sahrens prev = NULL; 606088f3894Sahrens } 607088f3894Sahrens 608088f3894Sahrens if (prev == NULL) { 609088f3894Sahrens prev = dp->dp_origin_snap; 610088f3894Sahrens 611088f3894Sahrens /* 612088f3894Sahrens * The $ORIGIN can't have any data, or the accounting 613088f3894Sahrens * will be wrong. 614088f3894Sahrens */ 615088f3894Sahrens ASSERT(prev->ds_phys->ds_bp.blk_birth == 0); 616088f3894Sahrens 617088f3894Sahrens /* The origin doesn't get attached to itself */ 618088f3894Sahrens if (ds->ds_object == prev->ds_object) { 619088f3894Sahrens dsl_dataset_rele(ds, FTAG); 620088f3894Sahrens return (0); 621088f3894Sahrens } 622088f3894Sahrens 623088f3894Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 624088f3894Sahrens ds->ds_phys->ds_prev_snap_obj = prev->ds_object; 625088f3894Sahrens ds->ds_phys->ds_prev_snap_txg = prev->ds_phys->ds_creation_txg; 626088f3894Sahrens 627088f3894Sahrens dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 628088f3894Sahrens ds->ds_dir->dd_phys->dd_origin_obj = prev->ds_object; 629088f3894Sahrens 630088f3894Sahrens dmu_buf_will_dirty(prev->ds_dbuf, tx); 631088f3894Sahrens prev->ds_phys->ds_num_children++; 632088f3894Sahrens 633088f3894Sahrens if (ds->ds_phys->ds_next_snap_obj == 0) { 634088f3894Sahrens ASSERT(ds->ds_prev == NULL); 635088f3894Sahrens VERIFY(0 == dsl_dataset_hold_obj(dp, 636088f3894Sahrens ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 637088f3894Sahrens } 638088f3894Sahrens } 639088f3894Sahrens 640088f3894Sahrens ASSERT(ds->ds_dir->dd_phys->dd_origin_obj == prev->ds_object); 641088f3894Sahrens ASSERT(ds->ds_phys->ds_prev_snap_obj == prev->ds_object); 642088f3894Sahrens 643088f3894Sahrens if (prev->ds_phys->ds_next_clones_obj == 0) { 644c33e334fSMatthew Ahrens dmu_buf_will_dirty(prev->ds_dbuf, tx); 645088f3894Sahrens prev->ds_phys->ds_next_clones_obj = 646088f3894Sahrens zap_create(dp->dp_meta_objset, 647088f3894Sahrens DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 648088f3894Sahrens } 649088f3894Sahrens VERIFY(0 == zap_add_int(dp->dp_meta_objset, 650088f3894Sahrens prev->ds_phys->ds_next_clones_obj, ds->ds_object, tx)); 651088f3894Sahrens 652088f3894Sahrens dsl_dataset_rele(ds, FTAG); 653088f3894Sahrens if (prev != dp->dp_origin_snap) 654088f3894Sahrens dsl_dataset_rele(prev, FTAG); 655088f3894Sahrens return (0); 656088f3894Sahrens } 657088f3894Sahrens 658088f3894Sahrens void 659088f3894Sahrens dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx) 660088f3894Sahrens { 661088f3894Sahrens ASSERT(dmu_tx_is_syncing(tx)); 662088f3894Sahrens ASSERT(dp->dp_origin_snap != NULL); 663088f3894Sahrens 664c33e334fSMatthew Ahrens VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb, 665c33e334fSMatthew Ahrens tx, DS_FIND_CHILDREN)); 666088f3894Sahrens } 667088f3894Sahrens 668088f3894Sahrens void 669088f3894Sahrens dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx) 670088f3894Sahrens { 671088f3894Sahrens uint64_t dsobj; 672088f3894Sahrens dsl_dataset_t *ds; 673088f3894Sahrens 674088f3894Sahrens ASSERT(dmu_tx_is_syncing(tx)); 675088f3894Sahrens ASSERT(dp->dp_origin_snap == NULL); 676088f3894Sahrens 677088f3894Sahrens /* create the origin dir, ds, & snap-ds */ 678088f3894Sahrens rw_enter(&dp->dp_config_rwlock, RW_WRITER); 679088f3894Sahrens dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME, 680088f3894Sahrens NULL, 0, kcred, tx); 681088f3894Sahrens VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 682088f3894Sahrens dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, kcred, tx); 683088f3894Sahrens VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, 684088f3894Sahrens dp, &dp->dp_origin_snap)); 685088f3894Sahrens dsl_dataset_rele(ds, FTAG); 686088f3894Sahrens rw_exit(&dp->dp_config_rwlock); 687088f3894Sahrens } 6889d3574bfSNeil Perrin 6899d3574bfSNeil Perrin taskq_t * 6909d3574bfSNeil Perrin dsl_pool_vnrele_taskq(dsl_pool_t *dp) 6919d3574bfSNeil Perrin { 6929d3574bfSNeil Perrin return (dp->dp_vnrele_taskq); 6939d3574bfSNeil Perrin } 694ca45db41SChris Kirby 695ca45db41SChris Kirby /* 696ca45db41SChris Kirby * Walk through the pool-wide zap object of temporary snapshot user holds 697ca45db41SChris Kirby * and release them. 698ca45db41SChris Kirby */ 699ca45db41SChris Kirby void 700ca45db41SChris Kirby dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp) 701ca45db41SChris Kirby { 702ca45db41SChris Kirby zap_attribute_t za; 703ca45db41SChris Kirby zap_cursor_t zc; 704ca45db41SChris Kirby objset_t *mos = dp->dp_meta_objset; 705ca45db41SChris Kirby uint64_t zapobj = dp->dp_tmp_userrefs_obj; 706ca45db41SChris Kirby 707ca45db41SChris Kirby if (zapobj == 0) 708ca45db41SChris Kirby return; 709ca45db41SChris Kirby ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); 710ca45db41SChris Kirby 711ca45db41SChris Kirby for (zap_cursor_init(&zc, mos, zapobj); 712ca45db41SChris Kirby zap_cursor_retrieve(&zc, &za) == 0; 713ca45db41SChris Kirby zap_cursor_advance(&zc)) { 714ca45db41SChris Kirby char *htag; 715ca45db41SChris Kirby uint64_t dsobj; 716ca45db41SChris Kirby 717ca45db41SChris Kirby htag = strchr(za.za_name, '-'); 718ca45db41SChris Kirby *htag = '\0'; 719ca45db41SChris Kirby ++htag; 720ca45db41SChris Kirby dsobj = strtonum(za.za_name, NULL); 721ca45db41SChris Kirby (void) dsl_dataset_user_release_tmp(dp, dsobj, htag); 722ca45db41SChris Kirby } 723ca45db41SChris Kirby zap_cursor_fini(&zc); 724ca45db41SChris Kirby } 725ca45db41SChris Kirby 726ca45db41SChris Kirby /* 727ca45db41SChris Kirby * Create the pool-wide zap object for storing temporary snapshot holds. 728ca45db41SChris Kirby */ 729ca45db41SChris Kirby void 730ca45db41SChris Kirby dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx) 731ca45db41SChris Kirby { 732ca45db41SChris Kirby objset_t *mos = dp->dp_meta_objset; 733ca45db41SChris Kirby 734ca45db41SChris Kirby ASSERT(dp->dp_tmp_userrefs_obj == 0); 735ca45db41SChris Kirby ASSERT(dmu_tx_is_syncing(tx)); 736ca45db41SChris Kirby 737ca45db41SChris Kirby dp->dp_tmp_userrefs_obj = zap_create(mos, DMU_OT_USERREFS, 738ca45db41SChris Kirby DMU_OT_NONE, 0, tx); 739ca45db41SChris Kirby 740ca45db41SChris Kirby VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, 741ca45db41SChris Kirby sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj, tx) == 0); 742ca45db41SChris Kirby } 743ca45db41SChris Kirby 744ca45db41SChris Kirby static int 745ca45db41SChris Kirby dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj, 74615508ac0SChris Kirby const char *tag, uint64_t *now, dmu_tx_t *tx, boolean_t holding) 747ca45db41SChris Kirby { 748ca45db41SChris Kirby objset_t *mos = dp->dp_meta_objset; 749ca45db41SChris Kirby uint64_t zapobj = dp->dp_tmp_userrefs_obj; 750ca45db41SChris Kirby char *name; 751ca45db41SChris Kirby int error; 752ca45db41SChris Kirby 753ca45db41SChris Kirby ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); 754ca45db41SChris Kirby ASSERT(dmu_tx_is_syncing(tx)); 755ca45db41SChris Kirby 756ca45db41SChris Kirby /* 757ca45db41SChris Kirby * If the pool was created prior to SPA_VERSION_USERREFS, the 758ca45db41SChris Kirby * zap object for temporary holds might not exist yet. 759ca45db41SChris Kirby */ 760ca45db41SChris Kirby if (zapobj == 0) { 761ca45db41SChris Kirby if (holding) { 762ca45db41SChris Kirby dsl_pool_user_hold_create_obj(dp, tx); 763ca45db41SChris Kirby zapobj = dp->dp_tmp_userrefs_obj; 764ca45db41SChris Kirby } else { 765ca45db41SChris Kirby return (ENOENT); 766ca45db41SChris Kirby } 767ca45db41SChris Kirby } 768ca45db41SChris Kirby 769ca45db41SChris Kirby name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag); 770ca45db41SChris Kirby if (holding) 77115508ac0SChris Kirby error = zap_add(mos, zapobj, name, 8, 1, now, tx); 772ca45db41SChris Kirby else 773ca45db41SChris Kirby error = zap_remove(mos, zapobj, name, tx); 774ca45db41SChris Kirby strfree(name); 775ca45db41SChris Kirby 776ca45db41SChris Kirby return (error); 777ca45db41SChris Kirby } 778ca45db41SChris Kirby 779ca45db41SChris Kirby /* 780ca45db41SChris Kirby * Add a temporary hold for the given dataset object and tag. 781ca45db41SChris Kirby */ 782ca45db41SChris Kirby int 783ca45db41SChris Kirby dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag, 78415508ac0SChris Kirby uint64_t *now, dmu_tx_t *tx) 785ca45db41SChris Kirby { 78615508ac0SChris Kirby return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE)); 787ca45db41SChris Kirby } 788ca45db41SChris Kirby 789ca45db41SChris Kirby /* 790ca45db41SChris Kirby * Release a temporary hold for the given dataset object and tag. 791ca45db41SChris Kirby */ 792ca45db41SChris Kirby int 793ca45db41SChris Kirby dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag, 794ca45db41SChris Kirby dmu_tx_t *tx) 795ca45db41SChris Kirby { 796ca45db41SChris Kirby return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, NULL, 797ca45db41SChris Kirby tx, B_FALSE)); 798ca45db41SChris Kirby } 799