1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 55ad82045Snd * Common Development and Distribution License (the "License"). 65ad82045Snd * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 223f9d6ad7SLin Ling * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23383e7c74SXin Li * Portions Copyright 2011 Martin Matuska 24adbbcffaSAdam H. Leventhal * Copyright (c) 2013 by Delphix. All rights reserved. 25fa9e4066Sahrens */ 26fa9e4066Sahrens 27fa9e4066Sahrens #include <sys/zfs_context.h> 28fa9e4066Sahrens #include <sys/txg_impl.h> 29fa9e4066Sahrens #include <sys/dmu_impl.h> 30d20e665cSRicardo M. Correia #include <sys/dmu_tx.h> 31fa9e4066Sahrens #include <sys/dsl_pool.h> 323f9d6ad7SLin Ling #include <sys/dsl_scan.h> 33fa9e4066Sahrens #include <sys/callb.h> 34fa9e4066Sahrens 35fa9e4066Sahrens /* 36adbbcffaSAdam H. Leventhal * ZFS Transaction Groups 37adbbcffaSAdam H. Leventhal * ---------------------- 38adbbcffaSAdam H. Leventhal * 39adbbcffaSAdam H. Leventhal * ZFS transaction groups are, as the name implies, groups of transactions 40adbbcffaSAdam H. Leventhal * that act on persistent state. ZFS asserts consistency at the granularity of 41adbbcffaSAdam H. Leventhal * these transaction groups. Each successive transaction group (txg) is 42adbbcffaSAdam H. Leventhal * assigned a 64-bit consecutive identifier. There are three active 43adbbcffaSAdam H. Leventhal * transaction group states: open, quiescing, or syncing. At any given time, 44adbbcffaSAdam H. Leventhal * there may be an active txg associated with each state; each active txg may 45adbbcffaSAdam H. Leventhal * either be processing, or blocked waiting to enter the next state. There may 46adbbcffaSAdam H. Leventhal * be up to three active txgs, and there is always a txg in the open state 47adbbcffaSAdam H. Leventhal * (though it may be blocked waiting to enter the quiescing state). In broad 48adbbcffaSAdam H. Leventhal * strokes, transactions — operations that change in-memory structures — are 49adbbcffaSAdam H. Leventhal * accepted into the txg in the open state, and are completed while the txg is 50adbbcffaSAdam H. Leventhal * in the open or quiescing states. The accumulated changes are written to 51adbbcffaSAdam H. Leventhal * disk in the syncing state. 52adbbcffaSAdam H. Leventhal * 53adbbcffaSAdam H. Leventhal * Open 54adbbcffaSAdam H. Leventhal * 55adbbcffaSAdam H. Leventhal * When a new txg becomes active, it first enters the open state. New 56adbbcffaSAdam H. Leventhal * transactions — updates to in-memory structures — are assigned to the 57adbbcffaSAdam H. Leventhal * currently open txg. There is always a txg in the open state so that ZFS can 58adbbcffaSAdam H. Leventhal * accept new changes (though the txg may refuse new changes if it has hit 59adbbcffaSAdam H. Leventhal * some limit). ZFS advances the open txg to the next state for a variety of 60adbbcffaSAdam H. Leventhal * reasons such as it hitting a time or size threshold, or the execution of an 61adbbcffaSAdam H. Leventhal * administrative action that must be completed in the syncing state. 62adbbcffaSAdam H. Leventhal * 63adbbcffaSAdam H. Leventhal * Quiescing 64adbbcffaSAdam H. Leventhal * 65adbbcffaSAdam H. Leventhal * After a txg exits the open state, it enters the quiescing state. The 66adbbcffaSAdam H. Leventhal * quiescing state is intended to provide a buffer between accepting new 67adbbcffaSAdam H. Leventhal * transactions in the open state and writing them out to stable storage in 68adbbcffaSAdam H. Leventhal * the syncing state. While quiescing, transactions can continue their 69adbbcffaSAdam H. Leventhal * operation without delaying either of the other states. Typically, a txg is 70adbbcffaSAdam H. Leventhal * in the quiescing state very briefly since the operations are bounded by 71adbbcffaSAdam H. Leventhal * software latencies rather than, say, slower I/O latencies. After all 72adbbcffaSAdam H. Leventhal * transactions complete, the txg is ready to enter the next state. 73adbbcffaSAdam H. Leventhal * 74adbbcffaSAdam H. Leventhal * Syncing 75adbbcffaSAdam H. Leventhal * 76adbbcffaSAdam H. Leventhal * In the syncing state, the in-memory state built up during the open and (to 77adbbcffaSAdam H. Leventhal * a lesser degree) the quiescing states is written to stable storage. The 78adbbcffaSAdam H. Leventhal * process of writing out modified data can, in turn modify more data. For 79adbbcffaSAdam H. Leventhal * example when we write new blocks, we need to allocate space for them; those 80adbbcffaSAdam H. Leventhal * allocations modify metadata (space maps)... which themselves must be 81adbbcffaSAdam H. Leventhal * written to stable storage. During the sync state, ZFS iterates, writing out 82adbbcffaSAdam H. Leventhal * data until it converges and all in-memory changes have been written out. 83adbbcffaSAdam H. Leventhal * The first such pass is the largest as it encompasses all the modified user 84adbbcffaSAdam H. Leventhal * data (as opposed to filesystem metadata). Subsequent passes typically have 85adbbcffaSAdam H. Leventhal * far less data to write as they consist exclusively of filesystem metadata. 86adbbcffaSAdam H. Leventhal * 87adbbcffaSAdam H. Leventhal * To ensure convergence, after a certain number of passes ZFS begins 88adbbcffaSAdam H. Leventhal * overwriting locations on stable storage that had been allocated earlier in 89adbbcffaSAdam H. Leventhal * the syncing state (and subsequently freed). ZFS usually allocates new 90adbbcffaSAdam H. Leventhal * blocks to optimize for large, continuous, writes. For the syncing state to 91adbbcffaSAdam H. Leventhal * converge however it must complete a pass where no new blocks are allocated 92adbbcffaSAdam H. Leventhal * since each allocation requires a modification of persistent metadata. 93adbbcffaSAdam H. Leventhal * Further, to hasten convergence, after a prescribed number of passes, ZFS 94adbbcffaSAdam H. Leventhal * also defers frees, and stops compressing. 95adbbcffaSAdam H. Leventhal * 96adbbcffaSAdam H. Leventhal * In addition to writing out user data, we must also execute synctasks during 97adbbcffaSAdam H. Leventhal * the syncing context. A synctask is the mechanism by which some 98adbbcffaSAdam H. Leventhal * administrative activities work such as creating and destroying snapshots or 99adbbcffaSAdam H. Leventhal * datasets. Note that when a synctask is initiated it enters the open txg, 100adbbcffaSAdam H. Leventhal * and ZFS then pushes that txg as quickly as possible to completion of the 101adbbcffaSAdam H. Leventhal * syncing state in order to reduce the latency of the administrative 102adbbcffaSAdam H. Leventhal * activity. To complete the syncing state, ZFS writes out a new uberblock, 103adbbcffaSAdam H. Leventhal * the root of the tree of blocks that comprise all state stored on the ZFS 104adbbcffaSAdam H. Leventhal * pool. Finally, if there is a quiesced txg waiting, we signal that it can 105adbbcffaSAdam H. Leventhal * now transition to the syncing state. 106fa9e4066Sahrens */ 107fa9e4066Sahrens 108fa9e4066Sahrens static void txg_sync_thread(dsl_pool_t *dp); 109fa9e4066Sahrens static void txg_quiesce_thread(dsl_pool_t *dp); 110fa9e4066Sahrens 11144ecc532SGeorge Wilson int zfs_txg_timeout = 5; /* max seconds worth of delta per txg */ 112fa9e4066Sahrens 113fa9e4066Sahrens /* 114fa9e4066Sahrens * Prepare the txg subsystem. 115fa9e4066Sahrens */ 116fa9e4066Sahrens void 117fa9e4066Sahrens txg_init(dsl_pool_t *dp, uint64_t txg) 118fa9e4066Sahrens { 119fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 1205ad82045Snd int c; 121fa9e4066Sahrens bzero(tx, sizeof (tx_state_t)); 122fa9e4066Sahrens 123fa9e4066Sahrens tx->tx_cpu = kmem_zalloc(max_ncpus * sizeof (tx_cpu_t), KM_SLEEP); 124fa9e4066Sahrens 1258f38d419Sek for (c = 0; c < max_ncpus; c++) { 1268f38d419Sek int i; 1278f38d419Sek 1285ad82045Snd mutex_init(&tx->tx_cpu[c].tc_lock, NULL, MUTEX_DEFAULT, NULL); 1298f38d419Sek for (i = 0; i < TXG_SIZE; i++) { 1308f38d419Sek cv_init(&tx->tx_cpu[c].tc_cv[i], NULL, CV_DEFAULT, 1318f38d419Sek NULL); 132d20e665cSRicardo M. Correia list_create(&tx->tx_cpu[c].tc_callbacks[i], 133d20e665cSRicardo M. Correia sizeof (dmu_tx_callback_t), 134d20e665cSRicardo M. Correia offsetof(dmu_tx_callback_t, dcb_node)); 1358f38d419Sek } 1368f38d419Sek } 1375ad82045Snd 1385ad82045Snd mutex_init(&tx->tx_sync_lock, NULL, MUTEX_DEFAULT, NULL); 139fa9e4066Sahrens 140b5e70f97SRicardo M. Correia cv_init(&tx->tx_sync_more_cv, NULL, CV_DEFAULT, NULL); 141b5e70f97SRicardo M. Correia cv_init(&tx->tx_sync_done_cv, NULL, CV_DEFAULT, NULL); 142b5e70f97SRicardo M. Correia cv_init(&tx->tx_quiesce_more_cv, NULL, CV_DEFAULT, NULL); 143b5e70f97SRicardo M. Correia cv_init(&tx->tx_quiesce_done_cv, NULL, CV_DEFAULT, NULL); 144b5e70f97SRicardo M. Correia cv_init(&tx->tx_exit_cv, NULL, CV_DEFAULT, NULL); 145b5e70f97SRicardo M. Correia 146fa9e4066Sahrens tx->tx_open_txg = txg; 147fa9e4066Sahrens } 148fa9e4066Sahrens 149fa9e4066Sahrens /* 150fa9e4066Sahrens * Close down the txg subsystem. 151fa9e4066Sahrens */ 152fa9e4066Sahrens void 153fa9e4066Sahrens txg_fini(dsl_pool_t *dp) 154fa9e4066Sahrens { 155fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 1565ad82045Snd int c; 157fa9e4066Sahrens 158fa9e4066Sahrens ASSERT(tx->tx_threads == 0); 159fa9e4066Sahrens 1605ad82045Snd mutex_destroy(&tx->tx_sync_lock); 1615ad82045Snd 162b5e70f97SRicardo M. Correia cv_destroy(&tx->tx_sync_more_cv); 163b5e70f97SRicardo M. Correia cv_destroy(&tx->tx_sync_done_cv); 164b5e70f97SRicardo M. Correia cv_destroy(&tx->tx_quiesce_more_cv); 165b5e70f97SRicardo M. Correia cv_destroy(&tx->tx_quiesce_done_cv); 166b5e70f97SRicardo M. Correia cv_destroy(&tx->tx_exit_cv); 167b5e70f97SRicardo M. Correia 1688f38d419Sek for (c = 0; c < max_ncpus; c++) { 1698f38d419Sek int i; 1708f38d419Sek 1715ad82045Snd mutex_destroy(&tx->tx_cpu[c].tc_lock); 172d20e665cSRicardo M. Correia for (i = 0; i < TXG_SIZE; i++) { 1738f38d419Sek cv_destroy(&tx->tx_cpu[c].tc_cv[i]); 174d20e665cSRicardo M. Correia list_destroy(&tx->tx_cpu[c].tc_callbacks[i]); 175d20e665cSRicardo M. Correia } 1768f38d419Sek } 177fa9e4066Sahrens 178d20e665cSRicardo M. Correia if (tx->tx_commit_cb_taskq != NULL) 179d20e665cSRicardo M. Correia taskq_destroy(tx->tx_commit_cb_taskq); 180d20e665cSRicardo M. Correia 181fa9e4066Sahrens kmem_free(tx->tx_cpu, max_ncpus * sizeof (tx_cpu_t)); 182fa9e4066Sahrens 183fa9e4066Sahrens bzero(tx, sizeof (tx_state_t)); 184fa9e4066Sahrens } 185fa9e4066Sahrens 186fa9e4066Sahrens /* 187fa9e4066Sahrens * Start syncing transaction groups. 188fa9e4066Sahrens */ 189fa9e4066Sahrens void 190fa9e4066Sahrens txg_sync_start(dsl_pool_t *dp) 191fa9e4066Sahrens { 192fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 193fa9e4066Sahrens 194fa9e4066Sahrens mutex_enter(&tx->tx_sync_lock); 195fa9e4066Sahrens 196fa9e4066Sahrens dprintf("pool %p\n", dp); 197fa9e4066Sahrens 198fa9e4066Sahrens ASSERT(tx->tx_threads == 0); 199fa9e4066Sahrens 2001ab7f2deSmaybee tx->tx_threads = 2; 201fa9e4066Sahrens 202fa9e4066Sahrens tx->tx_quiesce_thread = thread_create(NULL, 0, txg_quiesce_thread, 203fa9e4066Sahrens dp, 0, &p0, TS_RUN, minclsyspri); 204fa9e4066Sahrens 205088f3894Sahrens /* 206088f3894Sahrens * The sync thread can need a larger-than-default stack size on 207088f3894Sahrens * 32-bit x86. This is due in part to nested pools and 208088f3894Sahrens * scrub_visitbp() recursion. 209088f3894Sahrens */ 2103f9d6ad7SLin Ling tx->tx_sync_thread = thread_create(NULL, 32<<10, txg_sync_thread, 211fa9e4066Sahrens dp, 0, &p0, TS_RUN, minclsyspri); 212fa9e4066Sahrens 213fa9e4066Sahrens mutex_exit(&tx->tx_sync_lock); 214fa9e4066Sahrens } 215fa9e4066Sahrens 216fa9e4066Sahrens static void 217fa9e4066Sahrens txg_thread_enter(tx_state_t *tx, callb_cpr_t *cpr) 218fa9e4066Sahrens { 219fa9e4066Sahrens CALLB_CPR_INIT(cpr, &tx->tx_sync_lock, callb_generic_cpr, FTAG); 220fa9e4066Sahrens mutex_enter(&tx->tx_sync_lock); 221fa9e4066Sahrens } 222fa9e4066Sahrens 223fa9e4066Sahrens static void 224fa9e4066Sahrens txg_thread_exit(tx_state_t *tx, callb_cpr_t *cpr, kthread_t **tpp) 225fa9e4066Sahrens { 226fa9e4066Sahrens ASSERT(*tpp != NULL); 227fa9e4066Sahrens *tpp = NULL; 228fa9e4066Sahrens tx->tx_threads--; 229fa9e4066Sahrens cv_broadcast(&tx->tx_exit_cv); 230fa9e4066Sahrens CALLB_CPR_EXIT(cpr); /* drops &tx->tx_sync_lock */ 231fa9e4066Sahrens thread_exit(); 232fa9e4066Sahrens } 233fa9e4066Sahrens 234fa9e4066Sahrens static void 2351ab7f2deSmaybee txg_thread_wait(tx_state_t *tx, callb_cpr_t *cpr, kcondvar_t *cv, uint64_t time) 236fa9e4066Sahrens { 237fa9e4066Sahrens CALLB_CPR_SAFE_BEGIN(cpr); 238fa9e4066Sahrens 2391ab7f2deSmaybee if (time) 240d3d50737SRafael Vanoni (void) cv_timedwait(cv, &tx->tx_sync_lock, 241d3d50737SRafael Vanoni ddi_get_lbolt() + time); 242fa9e4066Sahrens else 243fa9e4066Sahrens cv_wait(cv, &tx->tx_sync_lock); 244fa9e4066Sahrens 245fa9e4066Sahrens CALLB_CPR_SAFE_END(cpr, &tx->tx_sync_lock); 246fa9e4066Sahrens } 247fa9e4066Sahrens 248fa9e4066Sahrens /* 249fa9e4066Sahrens * Stop syncing transaction groups. 250fa9e4066Sahrens */ 251fa9e4066Sahrens void 252fa9e4066Sahrens txg_sync_stop(dsl_pool_t *dp) 253fa9e4066Sahrens { 254fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 255fa9e4066Sahrens 256fa9e4066Sahrens dprintf("pool %p\n", dp); 257fa9e4066Sahrens /* 258fa9e4066Sahrens * Finish off any work in progress. 259fa9e4066Sahrens */ 2601ab7f2deSmaybee ASSERT(tx->tx_threads == 2); 261468c413aSTim Haley 262468c413aSTim Haley /* 263468c413aSTim Haley * We need to ensure that we've vacated the deferred space_maps. 264468c413aSTim Haley */ 265468c413aSTim Haley txg_wait_synced(dp, tx->tx_open_txg + TXG_DEFER_SIZE); 266fa9e4066Sahrens 267fa9e4066Sahrens /* 2681ab7f2deSmaybee * Wake all sync threads and wait for them to die. 269fa9e4066Sahrens */ 270fa9e4066Sahrens mutex_enter(&tx->tx_sync_lock); 271fa9e4066Sahrens 2721ab7f2deSmaybee ASSERT(tx->tx_threads == 2); 273fa9e4066Sahrens 274fa9e4066Sahrens tx->tx_exiting = 1; 275fa9e4066Sahrens 276fa9e4066Sahrens cv_broadcast(&tx->tx_quiesce_more_cv); 277fa9e4066Sahrens cv_broadcast(&tx->tx_quiesce_done_cv); 278fa9e4066Sahrens cv_broadcast(&tx->tx_sync_more_cv); 279fa9e4066Sahrens 280fa9e4066Sahrens while (tx->tx_threads != 0) 281fa9e4066Sahrens cv_wait(&tx->tx_exit_cv, &tx->tx_sync_lock); 282fa9e4066Sahrens 283fa9e4066Sahrens tx->tx_exiting = 0; 284fa9e4066Sahrens 285fa9e4066Sahrens mutex_exit(&tx->tx_sync_lock); 286fa9e4066Sahrens } 287fa9e4066Sahrens 288fa9e4066Sahrens uint64_t 289fa9e4066Sahrens txg_hold_open(dsl_pool_t *dp, txg_handle_t *th) 290fa9e4066Sahrens { 291fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 292fa9e4066Sahrens tx_cpu_t *tc = &tx->tx_cpu[CPU_SEQID]; 293fa9e4066Sahrens uint64_t txg; 294fa9e4066Sahrens 295fa9e4066Sahrens mutex_enter(&tc->tc_lock); 296fa9e4066Sahrens 297fa9e4066Sahrens txg = tx->tx_open_txg; 298fa9e4066Sahrens tc->tc_count[txg & TXG_MASK]++; 299fa9e4066Sahrens 300fa9e4066Sahrens th->th_cpu = tc; 301fa9e4066Sahrens th->th_txg = txg; 302fa9e4066Sahrens 303fa9e4066Sahrens return (txg); 304fa9e4066Sahrens } 305fa9e4066Sahrens 306fa9e4066Sahrens void 307fa9e4066Sahrens txg_rele_to_quiesce(txg_handle_t *th) 308fa9e4066Sahrens { 309fa9e4066Sahrens tx_cpu_t *tc = th->th_cpu; 310fa9e4066Sahrens 311fa9e4066Sahrens mutex_exit(&tc->tc_lock); 312fa9e4066Sahrens } 313fa9e4066Sahrens 314d20e665cSRicardo M. Correia void 315d20e665cSRicardo M. Correia txg_register_callbacks(txg_handle_t *th, list_t *tx_callbacks) 316d20e665cSRicardo M. Correia { 317d20e665cSRicardo M. Correia tx_cpu_t *tc = th->th_cpu; 318d20e665cSRicardo M. Correia int g = th->th_txg & TXG_MASK; 319d20e665cSRicardo M. Correia 320d20e665cSRicardo M. Correia mutex_enter(&tc->tc_lock); 321d20e665cSRicardo M. Correia list_move_tail(&tc->tc_callbacks[g], tx_callbacks); 322d20e665cSRicardo M. Correia mutex_exit(&tc->tc_lock); 323d20e665cSRicardo M. Correia } 324d20e665cSRicardo M. Correia 325fa9e4066Sahrens void 326fa9e4066Sahrens txg_rele_to_sync(txg_handle_t *th) 327fa9e4066Sahrens { 328fa9e4066Sahrens tx_cpu_t *tc = th->th_cpu; 329fa9e4066Sahrens int g = th->th_txg & TXG_MASK; 330fa9e4066Sahrens 331fa9e4066Sahrens mutex_enter(&tc->tc_lock); 332fa9e4066Sahrens ASSERT(tc->tc_count[g] != 0); 333fa9e4066Sahrens if (--tc->tc_count[g] == 0) 334fa9e4066Sahrens cv_broadcast(&tc->tc_cv[g]); 335fa9e4066Sahrens mutex_exit(&tc->tc_lock); 336fa9e4066Sahrens 337fa9e4066Sahrens th->th_cpu = NULL; /* defensive */ 338fa9e4066Sahrens } 339fa9e4066Sahrens 340fa9e4066Sahrens static void 341fa9e4066Sahrens txg_quiesce(dsl_pool_t *dp, uint64_t txg) 342fa9e4066Sahrens { 343fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 344fa9e4066Sahrens int g = txg & TXG_MASK; 345fa9e4066Sahrens int c; 346fa9e4066Sahrens 347fa9e4066Sahrens /* 348fa9e4066Sahrens * Grab all tx_cpu locks so nobody else can get into this txg. 349fa9e4066Sahrens */ 350fa9e4066Sahrens for (c = 0; c < max_ncpus; c++) 351fa9e4066Sahrens mutex_enter(&tx->tx_cpu[c].tc_lock); 352fa9e4066Sahrens 353fa9e4066Sahrens ASSERT(txg == tx->tx_open_txg); 354fa9e4066Sahrens tx->tx_open_txg++; 355fa9e4066Sahrens 356fa9e4066Sahrens /* 357fa9e4066Sahrens * Now that we've incremented tx_open_txg, we can let threads 358fa9e4066Sahrens * enter the next transaction group. 359fa9e4066Sahrens */ 360fa9e4066Sahrens for (c = 0; c < max_ncpus; c++) 361fa9e4066Sahrens mutex_exit(&tx->tx_cpu[c].tc_lock); 362fa9e4066Sahrens 363fa9e4066Sahrens /* 364fa9e4066Sahrens * Quiesce the transaction group by waiting for everyone to txg_exit(). 365fa9e4066Sahrens */ 366fa9e4066Sahrens for (c = 0; c < max_ncpus; c++) { 367fa9e4066Sahrens tx_cpu_t *tc = &tx->tx_cpu[c]; 368fa9e4066Sahrens mutex_enter(&tc->tc_lock); 369fa9e4066Sahrens while (tc->tc_count[g] != 0) 370fa9e4066Sahrens cv_wait(&tc->tc_cv[g], &tc->tc_lock); 371fa9e4066Sahrens mutex_exit(&tc->tc_lock); 372fa9e4066Sahrens } 373fa9e4066Sahrens } 374fa9e4066Sahrens 375d20e665cSRicardo M. Correia static void 376d20e665cSRicardo M. Correia txg_do_callbacks(list_t *cb_list) 377d20e665cSRicardo M. Correia { 378d20e665cSRicardo M. Correia dmu_tx_do_callbacks(cb_list, 0); 379d20e665cSRicardo M. Correia 380d20e665cSRicardo M. Correia list_destroy(cb_list); 381d20e665cSRicardo M. Correia 382d20e665cSRicardo M. Correia kmem_free(cb_list, sizeof (list_t)); 383d20e665cSRicardo M. Correia } 384d20e665cSRicardo M. Correia 385d20e665cSRicardo M. Correia /* 386d20e665cSRicardo M. Correia * Dispatch the commit callbacks registered on this txg to worker threads. 387d20e665cSRicardo M. Correia */ 388d20e665cSRicardo M. Correia static void 389d20e665cSRicardo M. Correia txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg) 390d20e665cSRicardo M. Correia { 391d20e665cSRicardo M. Correia int c; 392d20e665cSRicardo M. Correia tx_state_t *tx = &dp->dp_tx; 393d20e665cSRicardo M. Correia list_t *cb_list; 394d20e665cSRicardo M. Correia 395d20e665cSRicardo M. Correia for (c = 0; c < max_ncpus; c++) { 396d20e665cSRicardo M. Correia tx_cpu_t *tc = &tx->tx_cpu[c]; 397d20e665cSRicardo M. Correia /* No need to lock tx_cpu_t at this point */ 398d20e665cSRicardo M. Correia 399d20e665cSRicardo M. Correia int g = txg & TXG_MASK; 400d20e665cSRicardo M. Correia 401d20e665cSRicardo M. Correia if (list_is_empty(&tc->tc_callbacks[g])) 402d20e665cSRicardo M. Correia continue; 403d20e665cSRicardo M. Correia 404d20e665cSRicardo M. Correia if (tx->tx_commit_cb_taskq == NULL) { 405d20e665cSRicardo M. Correia /* 406d20e665cSRicardo M. Correia * Commit callback taskq hasn't been created yet. 407d20e665cSRicardo M. Correia */ 408d20e665cSRicardo M. Correia tx->tx_commit_cb_taskq = taskq_create("tx_commit_cb", 409d20e665cSRicardo M. Correia max_ncpus, minclsyspri, max_ncpus, max_ncpus * 2, 410d20e665cSRicardo M. Correia TASKQ_PREPOPULATE); 411d20e665cSRicardo M. Correia } 412d20e665cSRicardo M. Correia 413d20e665cSRicardo M. Correia cb_list = kmem_alloc(sizeof (list_t), KM_SLEEP); 414d20e665cSRicardo M. Correia list_create(cb_list, sizeof (dmu_tx_callback_t), 415d20e665cSRicardo M. Correia offsetof(dmu_tx_callback_t, dcb_node)); 416d20e665cSRicardo M. Correia 417d20e665cSRicardo M. Correia list_move_tail(&tc->tc_callbacks[g], cb_list); 418d20e665cSRicardo M. Correia 419d20e665cSRicardo M. Correia (void) taskq_dispatch(tx->tx_commit_cb_taskq, (task_func_t *) 420d20e665cSRicardo M. Correia txg_do_callbacks, cb_list, TQ_SLEEP); 421d20e665cSRicardo M. Correia } 422d20e665cSRicardo M. Correia } 423d20e665cSRicardo M. Correia 424fa9e4066Sahrens static void 425fa9e4066Sahrens txg_sync_thread(dsl_pool_t *dp) 426fa9e4066Sahrens { 427b16da2e2SGeorge Wilson spa_t *spa = dp->dp_spa; 428fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 429fa9e4066Sahrens callb_cpr_t cpr; 43005715f94SMark Maybee uint64_t start, delta; 431fa9e4066Sahrens 432fa9e4066Sahrens txg_thread_enter(tx, &cpr); 433fa9e4066Sahrens 4341ab7f2deSmaybee start = delta = 0; 435fa9e4066Sahrens for (;;) { 43605715f94SMark Maybee uint64_t timer, timeout = zfs_txg_timeout * hz; 43705715f94SMark Maybee uint64_t txg; 438fa9e4066Sahrens 439fa9e4066Sahrens /* 4403f9d6ad7SLin Ling * We sync when we're scanning, there's someone waiting 44188b7b0f2SMatthew Ahrens * on us, or the quiesce thread has handed off a txg to 44288b7b0f2SMatthew Ahrens * us, or we have reached our timeout. 443fa9e4066Sahrens */ 4441ab7f2deSmaybee timer = (delta >= timeout ? 0 : timeout - delta); 445cde58dbcSMatthew Ahrens while (!dsl_scan_active(dp->dp_scan) && 44688b7b0f2SMatthew Ahrens !tx->tx_exiting && timer > 0 && 447fa9e4066Sahrens tx->tx_synced_txg >= tx->tx_sync_txg_waiting && 448fa9e4066Sahrens tx->tx_quiesced_txg == 0) { 449fa9e4066Sahrens dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n", 450fa9e4066Sahrens tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp); 4511ab7f2deSmaybee txg_thread_wait(tx, &cpr, &tx->tx_sync_more_cv, timer); 452d3d50737SRafael Vanoni delta = ddi_get_lbolt() - start; 4531ab7f2deSmaybee timer = (delta > timeout ? 0 : timeout - delta); 454fa9e4066Sahrens } 455fa9e4066Sahrens 456fa9e4066Sahrens /* 457fa9e4066Sahrens * Wait until the quiesce thread hands off a txg to us, 458fa9e4066Sahrens * prompting it to do so if necessary. 459fa9e4066Sahrens */ 460fa9e4066Sahrens while (!tx->tx_exiting && tx->tx_quiesced_txg == 0) { 461fa9e4066Sahrens if (tx->tx_quiesce_txg_waiting < tx->tx_open_txg+1) 462fa9e4066Sahrens tx->tx_quiesce_txg_waiting = tx->tx_open_txg+1; 463fa9e4066Sahrens cv_broadcast(&tx->tx_quiesce_more_cv); 464fa9e4066Sahrens txg_thread_wait(tx, &cpr, &tx->tx_quiesce_done_cv, 0); 465fa9e4066Sahrens } 466fa9e4066Sahrens 467fa9e4066Sahrens if (tx->tx_exiting) 468fa9e4066Sahrens txg_thread_exit(tx, &cpr, &tx->tx_sync_thread); 469fa9e4066Sahrens 470fa9e4066Sahrens /* 471fa9e4066Sahrens * Consume the quiesced txg which has been handed off to 472fa9e4066Sahrens * us. This may cause the quiescing thread to now be 473fa9e4066Sahrens * able to quiesce another txg, so we must signal it. 474fa9e4066Sahrens */ 475fa9e4066Sahrens txg = tx->tx_quiesced_txg; 476fa9e4066Sahrens tx->tx_quiesced_txg = 0; 477fa9e4066Sahrens tx->tx_syncing_txg = txg; 478fa9e4066Sahrens cv_broadcast(&tx->tx_quiesce_more_cv); 479fa9e4066Sahrens 480fa9e4066Sahrens dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n", 4818f38d419Sek txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting); 482fa9e4066Sahrens mutex_exit(&tx->tx_sync_lock); 48305715f94SMark Maybee 484d3d50737SRafael Vanoni start = ddi_get_lbolt(); 485b16da2e2SGeorge Wilson spa_sync(spa, txg); 486d3d50737SRafael Vanoni delta = ddi_get_lbolt() - start; 4871ab7f2deSmaybee 488fa9e4066Sahrens mutex_enter(&tx->tx_sync_lock); 489fa9e4066Sahrens tx->tx_synced_txg = txg; 490fa9e4066Sahrens tx->tx_syncing_txg = 0; 491fa9e4066Sahrens cv_broadcast(&tx->tx_sync_done_cv); 492d20e665cSRicardo M. Correia 493d20e665cSRicardo M. Correia /* 494d20e665cSRicardo M. Correia * Dispatch commit callbacks to worker threads. 495d20e665cSRicardo M. Correia */ 496d20e665cSRicardo M. Correia txg_dispatch_callbacks(dp, txg); 497fa9e4066Sahrens } 498fa9e4066Sahrens } 499fa9e4066Sahrens 500fa9e4066Sahrens static void 501fa9e4066Sahrens txg_quiesce_thread(dsl_pool_t *dp) 502fa9e4066Sahrens { 503fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 504fa9e4066Sahrens callb_cpr_t cpr; 505fa9e4066Sahrens 506fa9e4066Sahrens txg_thread_enter(tx, &cpr); 507fa9e4066Sahrens 508fa9e4066Sahrens for (;;) { 509fa9e4066Sahrens uint64_t txg; 510fa9e4066Sahrens 511fa9e4066Sahrens /* 512fa9e4066Sahrens * We quiesce when there's someone waiting on us. 513fa9e4066Sahrens * However, we can only have one txg in "quiescing" or 514fa9e4066Sahrens * "quiesced, waiting to sync" state. So we wait until 515fa9e4066Sahrens * the "quiesced, waiting to sync" txg has been consumed 516fa9e4066Sahrens * by the sync thread. 517fa9e4066Sahrens */ 518fa9e4066Sahrens while (!tx->tx_exiting && 519fa9e4066Sahrens (tx->tx_open_txg >= tx->tx_quiesce_txg_waiting || 520fa9e4066Sahrens tx->tx_quiesced_txg != 0)) 521fa9e4066Sahrens txg_thread_wait(tx, &cpr, &tx->tx_quiesce_more_cv, 0); 522fa9e4066Sahrens 523fa9e4066Sahrens if (tx->tx_exiting) 524fa9e4066Sahrens txg_thread_exit(tx, &cpr, &tx->tx_quiesce_thread); 525fa9e4066Sahrens 526fa9e4066Sahrens txg = tx->tx_open_txg; 527fa9e4066Sahrens dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n", 528fa9e4066Sahrens txg, tx->tx_quiesce_txg_waiting, 529fa9e4066Sahrens tx->tx_sync_txg_waiting); 530fa9e4066Sahrens mutex_exit(&tx->tx_sync_lock); 531fa9e4066Sahrens txg_quiesce(dp, txg); 532fa9e4066Sahrens mutex_enter(&tx->tx_sync_lock); 533fa9e4066Sahrens 534fa9e4066Sahrens /* 535fa9e4066Sahrens * Hand this txg off to the sync thread. 536fa9e4066Sahrens */ 537fa9e4066Sahrens dprintf("quiesce done, handing off txg %llu\n", txg); 538fa9e4066Sahrens tx->tx_quiesced_txg = txg; 539fa9e4066Sahrens cv_broadcast(&tx->tx_sync_more_cv); 540fa9e4066Sahrens cv_broadcast(&tx->tx_quiesce_done_cv); 541fa9e4066Sahrens } 542fa9e4066Sahrens } 543fa9e4066Sahrens 5441ab7f2deSmaybee /* 5451ab7f2deSmaybee * Delay this thread by 'ticks' if we are still in the open transaction 5461ab7f2deSmaybee * group and there is already a waiting txg quiesing or quiesced. Abort 5471ab7f2deSmaybee * the delay if this txg stalls or enters the quiesing state. 5481ab7f2deSmaybee */ 5491ab7f2deSmaybee void 5501ab7f2deSmaybee txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks) 5511ab7f2deSmaybee { 5521ab7f2deSmaybee tx_state_t *tx = &dp->dp_tx; 55361bb40edSMartin Matuska clock_t timeout = ddi_get_lbolt() + ticks; 5541ab7f2deSmaybee 5551ab7f2deSmaybee /* don't delay if this txg could transition to quiesing immediately */ 5561ab7f2deSmaybee if (tx->tx_open_txg > txg || 5571ab7f2deSmaybee tx->tx_syncing_txg == txg-1 || tx->tx_synced_txg == txg-1) 5581ab7f2deSmaybee return; 5591ab7f2deSmaybee 5601ab7f2deSmaybee mutex_enter(&tx->tx_sync_lock); 5611ab7f2deSmaybee if (tx->tx_open_txg > txg || tx->tx_synced_txg == txg-1) { 5621ab7f2deSmaybee mutex_exit(&tx->tx_sync_lock); 5631ab7f2deSmaybee return; 5641ab7f2deSmaybee } 5651ab7f2deSmaybee 566d3d50737SRafael Vanoni while (ddi_get_lbolt() < timeout && 5671ab7f2deSmaybee tx->tx_syncing_txg < txg-1 && !txg_stalled(dp)) 5681ab7f2deSmaybee (void) cv_timedwait(&tx->tx_quiesce_more_cv, &tx->tx_sync_lock, 5691ab7f2deSmaybee timeout); 5701ab7f2deSmaybee 5711ab7f2deSmaybee mutex_exit(&tx->tx_sync_lock); 5721ab7f2deSmaybee } 5731ab7f2deSmaybee 574fa9e4066Sahrens void 575fa9e4066Sahrens txg_wait_synced(dsl_pool_t *dp, uint64_t txg) 576fa9e4066Sahrens { 577fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 578fa9e4066Sahrens 579*3b2aab18SMatthew Ahrens ASSERT(!dsl_pool_config_held(dp)); 580*3b2aab18SMatthew Ahrens 581fa9e4066Sahrens mutex_enter(&tx->tx_sync_lock); 5821ab7f2deSmaybee ASSERT(tx->tx_threads == 2); 583fa9e4066Sahrens if (txg == 0) 584b24ab676SJeff Bonwick txg = tx->tx_open_txg + TXG_DEFER_SIZE; 585fa9e4066Sahrens if (tx->tx_sync_txg_waiting < txg) 586fa9e4066Sahrens tx->tx_sync_txg_waiting = txg; 587fa9e4066Sahrens dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n", 588fa9e4066Sahrens txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting); 589fa9e4066Sahrens while (tx->tx_synced_txg < txg) { 590fa9e4066Sahrens dprintf("broadcasting sync more " 591fa9e4066Sahrens "tx_synced=%llu waiting=%llu dp=%p\n", 592fa9e4066Sahrens tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp); 593fa9e4066Sahrens cv_broadcast(&tx->tx_sync_more_cv); 594fa9e4066Sahrens cv_wait(&tx->tx_sync_done_cv, &tx->tx_sync_lock); 595fa9e4066Sahrens } 596fa9e4066Sahrens mutex_exit(&tx->tx_sync_lock); 597fa9e4066Sahrens } 598fa9e4066Sahrens 599fa9e4066Sahrens void 600fa9e4066Sahrens txg_wait_open(dsl_pool_t *dp, uint64_t txg) 601fa9e4066Sahrens { 602fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 603fa9e4066Sahrens 604*3b2aab18SMatthew Ahrens ASSERT(!dsl_pool_config_held(dp)); 605*3b2aab18SMatthew Ahrens 606fa9e4066Sahrens mutex_enter(&tx->tx_sync_lock); 6071ab7f2deSmaybee ASSERT(tx->tx_threads == 2); 608fa9e4066Sahrens if (txg == 0) 609fa9e4066Sahrens txg = tx->tx_open_txg + 1; 610fa9e4066Sahrens if (tx->tx_quiesce_txg_waiting < txg) 611fa9e4066Sahrens tx->tx_quiesce_txg_waiting = txg; 612fa9e4066Sahrens dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n", 613fa9e4066Sahrens txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting); 614fa9e4066Sahrens while (tx->tx_open_txg < txg) { 615fa9e4066Sahrens cv_broadcast(&tx->tx_quiesce_more_cv); 616fa9e4066Sahrens cv_wait(&tx->tx_quiesce_done_cv, &tx->tx_sync_lock); 617fa9e4066Sahrens } 618fa9e4066Sahrens mutex_exit(&tx->tx_sync_lock); 619fa9e4066Sahrens } 620fa9e4066Sahrens 621088f3894Sahrens boolean_t 622fa9e4066Sahrens txg_stalled(dsl_pool_t *dp) 623fa9e4066Sahrens { 624fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 625fa9e4066Sahrens return (tx->tx_quiesce_txg_waiting > tx->tx_open_txg); 626fa9e4066Sahrens } 627fa9e4066Sahrens 628088f3894Sahrens boolean_t 629088f3894Sahrens txg_sync_waiting(dsl_pool_t *dp) 630088f3894Sahrens { 631088f3894Sahrens tx_state_t *tx = &dp->dp_tx; 632088f3894Sahrens 633088f3894Sahrens return (tx->tx_syncing_txg <= tx->tx_sync_txg_waiting || 634088f3894Sahrens tx->tx_quiesced_txg != 0); 635088f3894Sahrens } 636088f3894Sahrens 637fa9e4066Sahrens /* 638fa9e4066Sahrens * Per-txg object lists. 639fa9e4066Sahrens */ 640fa9e4066Sahrens void 641fa9e4066Sahrens txg_list_create(txg_list_t *tl, size_t offset) 642fa9e4066Sahrens { 643fa9e4066Sahrens int t; 644fa9e4066Sahrens 645fa9e4066Sahrens mutex_init(&tl->tl_lock, NULL, MUTEX_DEFAULT, NULL); 646fa9e4066Sahrens 647fa9e4066Sahrens tl->tl_offset = offset; 648fa9e4066Sahrens 649fa9e4066Sahrens for (t = 0; t < TXG_SIZE; t++) 650fa9e4066Sahrens tl->tl_head[t] = NULL; 651fa9e4066Sahrens } 652fa9e4066Sahrens 653fa9e4066Sahrens void 654fa9e4066Sahrens txg_list_destroy(txg_list_t *tl) 655fa9e4066Sahrens { 656fa9e4066Sahrens int t; 657fa9e4066Sahrens 658fa9e4066Sahrens for (t = 0; t < TXG_SIZE; t++) 659fa9e4066Sahrens ASSERT(txg_list_empty(tl, t)); 660fa9e4066Sahrens 661fa9e4066Sahrens mutex_destroy(&tl->tl_lock); 662fa9e4066Sahrens } 663fa9e4066Sahrens 664ce636f8bSMatthew Ahrens boolean_t 665fa9e4066Sahrens txg_list_empty(txg_list_t *tl, uint64_t txg) 666fa9e4066Sahrens { 667fa9e4066Sahrens return (tl->tl_head[txg & TXG_MASK] == NULL); 668fa9e4066Sahrens } 669fa9e4066Sahrens 670fa9e4066Sahrens /* 671*3b2aab18SMatthew Ahrens * Add an entry to the list (unless it's already on the list). 672*3b2aab18SMatthew Ahrens * Returns B_TRUE if it was actually added. 673fa9e4066Sahrens */ 674*3b2aab18SMatthew Ahrens boolean_t 675fa9e4066Sahrens txg_list_add(txg_list_t *tl, void *p, uint64_t txg) 676fa9e4066Sahrens { 677fa9e4066Sahrens int t = txg & TXG_MASK; 678fa9e4066Sahrens txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); 679*3b2aab18SMatthew Ahrens boolean_t add; 680fa9e4066Sahrens 681fa9e4066Sahrens mutex_enter(&tl->tl_lock); 682*3b2aab18SMatthew Ahrens add = (tn->tn_member[t] == 0); 683*3b2aab18SMatthew Ahrens if (add) { 684fa9e4066Sahrens tn->tn_member[t] = 1; 685fa9e4066Sahrens tn->tn_next[t] = tl->tl_head[t]; 686fa9e4066Sahrens tl->tl_head[t] = tn; 687fa9e4066Sahrens } 688fa9e4066Sahrens mutex_exit(&tl->tl_lock); 689fa9e4066Sahrens 690*3b2aab18SMatthew Ahrens return (add); 691fa9e4066Sahrens } 692fa9e4066Sahrens 693495807d7SMatthew Ahrens /* 694*3b2aab18SMatthew Ahrens * Add an entry to the end of the list, unless it's already on the list. 695*3b2aab18SMatthew Ahrens * (walks list to find end) 696*3b2aab18SMatthew Ahrens * Returns B_TRUE if it was actually added. 697495807d7SMatthew Ahrens */ 698*3b2aab18SMatthew Ahrens boolean_t 699495807d7SMatthew Ahrens txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg) 700495807d7SMatthew Ahrens { 701495807d7SMatthew Ahrens int t = txg & TXG_MASK; 702495807d7SMatthew Ahrens txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); 703*3b2aab18SMatthew Ahrens boolean_t add; 704495807d7SMatthew Ahrens 705495807d7SMatthew Ahrens mutex_enter(&tl->tl_lock); 706*3b2aab18SMatthew Ahrens add = (tn->tn_member[t] == 0); 707*3b2aab18SMatthew Ahrens if (add) { 708495807d7SMatthew Ahrens txg_node_t **tp; 709495807d7SMatthew Ahrens 710495807d7SMatthew Ahrens for (tp = &tl->tl_head[t]; *tp != NULL; tp = &(*tp)->tn_next[t]) 711495807d7SMatthew Ahrens continue; 712495807d7SMatthew Ahrens 713495807d7SMatthew Ahrens tn->tn_member[t] = 1; 714495807d7SMatthew Ahrens tn->tn_next[t] = NULL; 715495807d7SMatthew Ahrens *tp = tn; 716495807d7SMatthew Ahrens } 717495807d7SMatthew Ahrens mutex_exit(&tl->tl_lock); 718495807d7SMatthew Ahrens 719*3b2aab18SMatthew Ahrens return (add); 720495807d7SMatthew Ahrens } 721495807d7SMatthew Ahrens 722fa9e4066Sahrens /* 723fa9e4066Sahrens * Remove the head of the list and return it. 724fa9e4066Sahrens */ 725fa9e4066Sahrens void * 726fa9e4066Sahrens txg_list_remove(txg_list_t *tl, uint64_t txg) 727fa9e4066Sahrens { 728fa9e4066Sahrens int t = txg & TXG_MASK; 729fa9e4066Sahrens txg_node_t *tn; 730fa9e4066Sahrens void *p = NULL; 731fa9e4066Sahrens 732fa9e4066Sahrens mutex_enter(&tl->tl_lock); 733fa9e4066Sahrens if ((tn = tl->tl_head[t]) != NULL) { 734fa9e4066Sahrens p = (char *)tn - tl->tl_offset; 735fa9e4066Sahrens tl->tl_head[t] = tn->tn_next[t]; 736fa9e4066Sahrens tn->tn_next[t] = NULL; 737fa9e4066Sahrens tn->tn_member[t] = 0; 738fa9e4066Sahrens } 739fa9e4066Sahrens mutex_exit(&tl->tl_lock); 740fa9e4066Sahrens 741fa9e4066Sahrens return (p); 742fa9e4066Sahrens } 743fa9e4066Sahrens 744fa9e4066Sahrens /* 745fa9e4066Sahrens * Remove a specific item from the list and return it. 746fa9e4066Sahrens */ 747fa9e4066Sahrens void * 748fa9e4066Sahrens txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg) 749fa9e4066Sahrens { 750fa9e4066Sahrens int t = txg & TXG_MASK; 751fa9e4066Sahrens txg_node_t *tn, **tp; 752fa9e4066Sahrens 753fa9e4066Sahrens mutex_enter(&tl->tl_lock); 754fa9e4066Sahrens 755fa9e4066Sahrens for (tp = &tl->tl_head[t]; (tn = *tp) != NULL; tp = &tn->tn_next[t]) { 756fa9e4066Sahrens if ((char *)tn - tl->tl_offset == p) { 757fa9e4066Sahrens *tp = tn->tn_next[t]; 758fa9e4066Sahrens tn->tn_next[t] = NULL; 759fa9e4066Sahrens tn->tn_member[t] = 0; 760fa9e4066Sahrens mutex_exit(&tl->tl_lock); 761fa9e4066Sahrens return (p); 762fa9e4066Sahrens } 763fa9e4066Sahrens } 764fa9e4066Sahrens 765fa9e4066Sahrens mutex_exit(&tl->tl_lock); 766fa9e4066Sahrens 767fa9e4066Sahrens return (NULL); 768fa9e4066Sahrens } 769fa9e4066Sahrens 770*3b2aab18SMatthew Ahrens boolean_t 771fa9e4066Sahrens txg_list_member(txg_list_t *tl, void *p, uint64_t txg) 772fa9e4066Sahrens { 773fa9e4066Sahrens int t = txg & TXG_MASK; 774fa9e4066Sahrens txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); 775fa9e4066Sahrens 776*3b2aab18SMatthew Ahrens return (tn->tn_member[t] != 0); 777fa9e4066Sahrens } 778fa9e4066Sahrens 779fa9e4066Sahrens /* 780fa9e4066Sahrens * Walk a txg list -- only safe if you know it's not changing. 781fa9e4066Sahrens */ 782fa9e4066Sahrens void * 783fa9e4066Sahrens txg_list_head(txg_list_t *tl, uint64_t txg) 784fa9e4066Sahrens { 785fa9e4066Sahrens int t = txg & TXG_MASK; 786fa9e4066Sahrens txg_node_t *tn = tl->tl_head[t]; 787fa9e4066Sahrens 788fa9e4066Sahrens return (tn == NULL ? NULL : (char *)tn - tl->tl_offset); 789fa9e4066Sahrens } 790fa9e4066Sahrens 791fa9e4066Sahrens void * 792fa9e4066Sahrens txg_list_next(txg_list_t *tl, void *p, uint64_t txg) 793fa9e4066Sahrens { 794fa9e4066Sahrens int t = txg & TXG_MASK; 795fa9e4066Sahrens txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); 796fa9e4066Sahrens 797fa9e4066Sahrens tn = tn->tn_next[t]; 798fa9e4066Sahrens 799fa9e4066Sahrens return (tn == NULL ? NULL : (char *)tn - tl->tl_offset); 800fa9e4066Sahrens } 801