xref: /illumos-gate/usr/src/uts/common/fs/zfs/dmu_tx.c (revision fa9e4066f08beec538e775443c5be79dd423fcab)
1*fa9e4066Sahrens /*
2*fa9e4066Sahrens  * CDDL HEADER START
3*fa9e4066Sahrens  *
4*fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5*fa9e4066Sahrens  * Common Development and Distribution License, Version 1.0 only
6*fa9e4066Sahrens  * (the "License").  You may not use this file except in compliance
7*fa9e4066Sahrens  * with the License.
8*fa9e4066Sahrens  *
9*fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
11*fa9e4066Sahrens  * See the License for the specific language governing permissions
12*fa9e4066Sahrens  * and limitations under the License.
13*fa9e4066Sahrens  *
14*fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
15*fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
17*fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
18*fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
19*fa9e4066Sahrens  *
20*fa9e4066Sahrens  * CDDL HEADER END
21*fa9e4066Sahrens  */
22*fa9e4066Sahrens /*
23*fa9e4066Sahrens  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*fa9e4066Sahrens  * Use is subject to license terms.
25*fa9e4066Sahrens  */
26*fa9e4066Sahrens 
27*fa9e4066Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*fa9e4066Sahrens 
29*fa9e4066Sahrens #include <sys/dmu.h>
30*fa9e4066Sahrens #include <sys/dmu_impl.h>
31*fa9e4066Sahrens #include <sys/dbuf.h>
32*fa9e4066Sahrens #include <sys/dmu_tx.h>
33*fa9e4066Sahrens #include <sys/dmu_objset.h>
34*fa9e4066Sahrens #include <sys/dsl_dataset.h> /* for dsl_dataset_block_freeable() */
35*fa9e4066Sahrens #include <sys/dsl_dir.h> /* for dsl_dir_tempreserve_*() */
36*fa9e4066Sahrens #include <sys/dsl_pool.h>
37*fa9e4066Sahrens #include <sys/zap_impl.h>	/* for ZAP_BLOCK_SHIFT */
38*fa9e4066Sahrens #include <sys/spa.h>
39*fa9e4066Sahrens #include <sys/zfs_context.h>
40*fa9e4066Sahrens 
41*fa9e4066Sahrens #ifdef ZFS_DEBUG
42*fa9e4066Sahrens int dmu_use_tx_debug_bufs = 1;
43*fa9e4066Sahrens #endif
44*fa9e4066Sahrens 
45*fa9e4066Sahrens dmu_tx_t *
46*fa9e4066Sahrens dmu_tx_create_ds(dsl_dir_t *dd)
47*fa9e4066Sahrens {
48*fa9e4066Sahrens 	dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP);
49*fa9e4066Sahrens 	tx->tx_dir = dd;
50*fa9e4066Sahrens 	if (dd)
51*fa9e4066Sahrens 		tx->tx_pool = dd->dd_pool;
52*fa9e4066Sahrens 	list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t),
53*fa9e4066Sahrens 	    offsetof(dmu_tx_hold_t, dth_node));
54*fa9e4066Sahrens 	refcount_create(&tx->tx_space_written);
55*fa9e4066Sahrens 	refcount_create(&tx->tx_space_freed);
56*fa9e4066Sahrens 	return (tx);
57*fa9e4066Sahrens }
58*fa9e4066Sahrens 
59*fa9e4066Sahrens dmu_tx_t *
60*fa9e4066Sahrens dmu_tx_create(objset_t *os)
61*fa9e4066Sahrens {
62*fa9e4066Sahrens 	dmu_tx_t *tx = dmu_tx_create_ds(os->os->os_dsl_dataset->ds_dir);
63*fa9e4066Sahrens 	tx->tx_objset = os;
64*fa9e4066Sahrens 	return (tx);
65*fa9e4066Sahrens }
66*fa9e4066Sahrens 
67*fa9e4066Sahrens dmu_tx_t *
68*fa9e4066Sahrens dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg)
69*fa9e4066Sahrens {
70*fa9e4066Sahrens 	dmu_tx_t *tx = dmu_tx_create_ds(NULL);
71*fa9e4066Sahrens 
72*fa9e4066Sahrens 	ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg);
73*fa9e4066Sahrens 	tx->tx_pool = dp;
74*fa9e4066Sahrens 	tx->tx_txg = txg;
75*fa9e4066Sahrens 	tx->tx_anyobj = TRUE;
76*fa9e4066Sahrens 
77*fa9e4066Sahrens 	return (tx);
78*fa9e4066Sahrens }
79*fa9e4066Sahrens 
80*fa9e4066Sahrens int
81*fa9e4066Sahrens dmu_tx_is_syncing(dmu_tx_t *tx)
82*fa9e4066Sahrens {
83*fa9e4066Sahrens 	return (tx->tx_anyobj);
84*fa9e4066Sahrens }
85*fa9e4066Sahrens 
86*fa9e4066Sahrens int
87*fa9e4066Sahrens dmu_tx_private_ok(dmu_tx_t *tx)
88*fa9e4066Sahrens {
89*fa9e4066Sahrens 	return (tx->tx_anyobj || tx->tx_privateobj);
90*fa9e4066Sahrens }
91*fa9e4066Sahrens 
92*fa9e4066Sahrens static void
93*fa9e4066Sahrens dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object,
94*fa9e4066Sahrens     enum dmu_tx_hold_type type, dmu_tx_hold_func_t func,
95*fa9e4066Sahrens     uint64_t arg1, uint64_t arg2)
96*fa9e4066Sahrens {
97*fa9e4066Sahrens 	dmu_tx_hold_t *dth;
98*fa9e4066Sahrens 	dnode_t *dn = NULL;
99*fa9e4066Sahrens 
100*fa9e4066Sahrens 	if (object != DMU_NEW_OBJECT) {
101*fa9e4066Sahrens 		dn = dnode_hold(os->os, object, tx);
102*fa9e4066Sahrens 
103*fa9e4066Sahrens 		if (tx->tx_txg != 0) {
104*fa9e4066Sahrens 			mutex_enter(&dn->dn_mtx);
105*fa9e4066Sahrens 			/*
106*fa9e4066Sahrens 			 * dn->dn_assigned_txg == tx->tx_txg doesn't pose a
107*fa9e4066Sahrens 			 * problem, but there's no way for it to happen (for
108*fa9e4066Sahrens 			 * now, at least).
109*fa9e4066Sahrens 			 */
110*fa9e4066Sahrens 			ASSERT(dn->dn_assigned_txg == 0);
111*fa9e4066Sahrens 			ASSERT(dn->dn_assigned_tx == NULL);
112*fa9e4066Sahrens 			dn->dn_assigned_txg = tx->tx_txg;
113*fa9e4066Sahrens 			dn->dn_assigned_tx = tx;
114*fa9e4066Sahrens 			(void) refcount_add(&dn->dn_tx_holds, tx);
115*fa9e4066Sahrens 			mutex_exit(&dn->dn_mtx);
116*fa9e4066Sahrens 		}
117*fa9e4066Sahrens 	}
118*fa9e4066Sahrens 
119*fa9e4066Sahrens 	dth = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP);
120*fa9e4066Sahrens 	dth->dth_dnode = dn;
121*fa9e4066Sahrens 	dth->dth_type = type;
122*fa9e4066Sahrens 	dth->dth_func = func;
123*fa9e4066Sahrens 	dth->dth_arg1 = arg1;
124*fa9e4066Sahrens 	dth->dth_arg2 = arg2;
125*fa9e4066Sahrens 	/*
126*fa9e4066Sahrens 	 * XXX Investigate using a different data structure to keep
127*fa9e4066Sahrens 	 * track of dnodes in a tx.  Maybe array, since there will
128*fa9e4066Sahrens 	 * generally not be many entries?
129*fa9e4066Sahrens 	 */
130*fa9e4066Sahrens 	list_insert_tail(&tx->tx_holds, dth);
131*fa9e4066Sahrens }
132*fa9e4066Sahrens 
133*fa9e4066Sahrens void
134*fa9e4066Sahrens dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object)
135*fa9e4066Sahrens {
136*fa9e4066Sahrens 	/*
137*fa9e4066Sahrens 	 * If we're syncing, they can manipulate any object anyhow, and
138*fa9e4066Sahrens 	 * the hold on the dnode_t can cause problems.
139*fa9e4066Sahrens 	 */
140*fa9e4066Sahrens 	if (!dmu_tx_is_syncing(tx)) {
141*fa9e4066Sahrens 		dmu_tx_hold_object_impl(tx, os, object, THT_NEWOBJECT,
142*fa9e4066Sahrens 		    NULL, 0, 0);
143*fa9e4066Sahrens 	}
144*fa9e4066Sahrens }
145*fa9e4066Sahrens 
146*fa9e4066Sahrens /* ARGSUSED */
147*fa9e4066Sahrens static void
148*fa9e4066Sahrens dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
149*fa9e4066Sahrens {
150*fa9e4066Sahrens 	uint64_t start, end, space;
151*fa9e4066Sahrens 	int min_bs, max_bs, min_ibs, max_ibs, epbs, bits;
152*fa9e4066Sahrens 
153*fa9e4066Sahrens 	if (len == 0)
154*fa9e4066Sahrens 		return;
155*fa9e4066Sahrens 
156*fa9e4066Sahrens 	min_bs = SPA_MINBLOCKSHIFT;
157*fa9e4066Sahrens 	max_bs = SPA_MAXBLOCKSHIFT;
158*fa9e4066Sahrens 	min_ibs = DN_MIN_INDBLKSHIFT;
159*fa9e4066Sahrens 	max_ibs = DN_MAX_INDBLKSHIFT;
160*fa9e4066Sahrens 
161*fa9e4066Sahrens 	/*
162*fa9e4066Sahrens 	 * If there's more than one block, the blocksize can't change,
163*fa9e4066Sahrens 	 * so we can make a more precise estimate.  Alternatively,
164*fa9e4066Sahrens 	 * if the dnode's ibs is larger than max_ibs, always use that.
165*fa9e4066Sahrens 	 * This ensures that if we reduce DN_MAX_INDBLKSHIFT,
166*fa9e4066Sahrens 	 * the code will still work correctly on existing pools.
167*fa9e4066Sahrens 	 */
168*fa9e4066Sahrens 	if (dn && (dn->dn_maxblkid != 0 || dn->dn_indblkshift > max_ibs)) {
169*fa9e4066Sahrens 		min_ibs = max_ibs = dn->dn_indblkshift;
170*fa9e4066Sahrens 		if (dn->dn_datablkshift != 0)
171*fa9e4066Sahrens 			min_bs = max_bs = dn->dn_datablkshift;
172*fa9e4066Sahrens 	}
173*fa9e4066Sahrens 
174*fa9e4066Sahrens 	/*
175*fa9e4066Sahrens 	 * 'end' is the last thing we will access, not one past.
176*fa9e4066Sahrens 	 * This way we won't overflow when accessing the last byte.
177*fa9e4066Sahrens 	 */
178*fa9e4066Sahrens 	start = P2ALIGN(off, 1ULL << max_bs);
179*fa9e4066Sahrens 	end = P2ROUNDUP(off + len, 1ULL << max_bs) - 1;
180*fa9e4066Sahrens 	space = end - start + 1;
181*fa9e4066Sahrens 
182*fa9e4066Sahrens 	start >>= min_bs;
183*fa9e4066Sahrens 	end >>= min_bs;
184*fa9e4066Sahrens 
185*fa9e4066Sahrens 	epbs = min_ibs - SPA_BLKPTRSHIFT;
186*fa9e4066Sahrens 
187*fa9e4066Sahrens 	/*
188*fa9e4066Sahrens 	 * The object contains at most 2^(64 - min_bs) blocks,
189*fa9e4066Sahrens 	 * and each indirect level maps 2^epbs.
190*fa9e4066Sahrens 	 */
191*fa9e4066Sahrens 	for (bits = 64 - min_bs; bits >= 0; bits -= epbs) {
192*fa9e4066Sahrens 		start >>= epbs;
193*fa9e4066Sahrens 		end >>= epbs;
194*fa9e4066Sahrens 		/*
195*fa9e4066Sahrens 		 * If we increase the number of levels of indirection,
196*fa9e4066Sahrens 		 * we'll need new blkid=0 indirect blocks.  If start == 0,
197*fa9e4066Sahrens 		 * we're already accounting for that blocks; and if end == 0,
198*fa9e4066Sahrens 		 * we can't increase the number of levels beyond that.
199*fa9e4066Sahrens 		 */
200*fa9e4066Sahrens 		if (start != 0 && end != 0)
201*fa9e4066Sahrens 			space += 1ULL << max_ibs;
202*fa9e4066Sahrens 		space += (end - start + 1) << max_ibs;
203*fa9e4066Sahrens 	}
204*fa9e4066Sahrens 
205*fa9e4066Sahrens 	ASSERT(space < 2 * DMU_MAX_ACCESS);
206*fa9e4066Sahrens 
207*fa9e4066Sahrens 	tx->tx_space_towrite += space;
208*fa9e4066Sahrens }
209*fa9e4066Sahrens 
210*fa9e4066Sahrens static void
211*fa9e4066Sahrens dmu_tx_count_dnode(dmu_tx_t *tx, dnode_t *dn)
212*fa9e4066Sahrens {
213*fa9e4066Sahrens 	dnode_t *mdn = tx->tx_objset->os->os_meta_dnode;
214*fa9e4066Sahrens 	uint64_t object = dn ? dn->dn_object : DN_MAX_OBJECT - 1;
215*fa9e4066Sahrens 	uint64_t pre_write_space;
216*fa9e4066Sahrens 
217*fa9e4066Sahrens 	ASSERT(object < DN_MAX_OBJECT);
218*fa9e4066Sahrens 	pre_write_space = tx->tx_space_towrite;
219*fa9e4066Sahrens 	dmu_tx_count_write(tx, mdn, object << DNODE_SHIFT, 1 << DNODE_SHIFT);
220*fa9e4066Sahrens 	if (dn && dn->dn_dbuf->db_blkptr &&
221*fa9e4066Sahrens 	    dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
222*fa9e4066Sahrens 	    dn->dn_dbuf->db_blkptr->blk_birth, tx)) {
223*fa9e4066Sahrens 		tx->tx_space_tooverwrite +=
224*fa9e4066Sahrens 			tx->tx_space_towrite - pre_write_space;
225*fa9e4066Sahrens 		tx->tx_space_towrite = pre_write_space;
226*fa9e4066Sahrens 	}
227*fa9e4066Sahrens }
228*fa9e4066Sahrens 
229*fa9e4066Sahrens /* ARGSUSED */
230*fa9e4066Sahrens static void
231*fa9e4066Sahrens dmu_tx_hold_write_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
232*fa9e4066Sahrens {
233*fa9e4066Sahrens 	dmu_tx_count_write(tx, dn, off, len);
234*fa9e4066Sahrens 	dmu_tx_count_dnode(tx, dn);
235*fa9e4066Sahrens }
236*fa9e4066Sahrens 
237*fa9e4066Sahrens void
238*fa9e4066Sahrens dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
239*fa9e4066Sahrens {
240*fa9e4066Sahrens 	ASSERT(tx->tx_txg == 0);
241*fa9e4066Sahrens 	ASSERT(len > 0 && len < DMU_MAX_ACCESS);
242*fa9e4066Sahrens 	ASSERT(UINT64_MAX - off >= len - 1);
243*fa9e4066Sahrens 
244*fa9e4066Sahrens 	dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_WRITE,
245*fa9e4066Sahrens 	    dmu_tx_hold_write_impl, off, len);
246*fa9e4066Sahrens }
247*fa9e4066Sahrens 
248*fa9e4066Sahrens static void
249*fa9e4066Sahrens dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
250*fa9e4066Sahrens {
251*fa9e4066Sahrens 	uint64_t blkid, nblks;
252*fa9e4066Sahrens 	uint64_t space = 0;
253*fa9e4066Sahrens 	dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
254*fa9e4066Sahrens 
255*fa9e4066Sahrens 	ASSERT(dn->dn_assigned_tx == tx || dn->dn_assigned_tx == NULL);
256*fa9e4066Sahrens 
257*fa9e4066Sahrens 	if (dn->dn_datablkshift == 0)
258*fa9e4066Sahrens 		return;
259*fa9e4066Sahrens 	/*
260*fa9e4066Sahrens 	 * not that the dnode can change, since it isn't dirty, but
261*fa9e4066Sahrens 	 * dbuf_hold_impl() wants us to have the struct_rwlock.
262*fa9e4066Sahrens 	 * also need it to protect dn_maxblkid.
263*fa9e4066Sahrens 	 */
264*fa9e4066Sahrens 	rw_enter(&dn->dn_struct_rwlock, RW_READER);
265*fa9e4066Sahrens 	blkid = off >> dn->dn_datablkshift;
266*fa9e4066Sahrens 	nblks = (off + len) >> dn->dn_datablkshift;
267*fa9e4066Sahrens 
268*fa9e4066Sahrens 	if (blkid >= dn->dn_maxblkid)
269*fa9e4066Sahrens 		goto out;
270*fa9e4066Sahrens 	if (blkid + nblks > dn->dn_maxblkid)
271*fa9e4066Sahrens 		nblks = dn->dn_maxblkid - blkid;
272*fa9e4066Sahrens 
273*fa9e4066Sahrens 	/* don't bother after the 100,000 blocks */
274*fa9e4066Sahrens 	nblks = MIN(nblks, 128*1024);
275*fa9e4066Sahrens 
276*fa9e4066Sahrens 	if (dn->dn_phys->dn_nlevels == 1) {
277*fa9e4066Sahrens 		int i;
278*fa9e4066Sahrens 		for (i = 0; i < nblks; i++) {
279*fa9e4066Sahrens 			blkptr_t *bp = dn->dn_phys->dn_blkptr;
280*fa9e4066Sahrens 			ASSERT3U(blkid + i, <, dn->dn_phys->dn_nblkptr);
281*fa9e4066Sahrens 			bp += blkid + i;
282*fa9e4066Sahrens 			if (dsl_dataset_block_freeable(ds, bp->blk_birth, tx)) {
283*fa9e4066Sahrens 				dprintf_bp(bp, "can free old%s", "");
284*fa9e4066Sahrens 				space += BP_GET_ASIZE(bp);
285*fa9e4066Sahrens 			}
286*fa9e4066Sahrens 		}
287*fa9e4066Sahrens 		goto out;
288*fa9e4066Sahrens 	}
289*fa9e4066Sahrens 
290*fa9e4066Sahrens 	while (nblks) {
291*fa9e4066Sahrens 		dmu_buf_impl_t *dbuf;
292*fa9e4066Sahrens 		int err, epbs, blkoff, tochk;
293*fa9e4066Sahrens 
294*fa9e4066Sahrens 		epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
295*fa9e4066Sahrens 		blkoff = P2PHASE(blkid, 1<<epbs);
296*fa9e4066Sahrens 		tochk = MIN((1<<epbs) - blkoff, nblks);
297*fa9e4066Sahrens 
298*fa9e4066Sahrens 		err = dbuf_hold_impl(dn, 1, blkid >> epbs, TRUE, FTAG, &dbuf);
299*fa9e4066Sahrens 		if (err == 0) {
300*fa9e4066Sahrens 			int i;
301*fa9e4066Sahrens 			blkptr_t *bp;
302*fa9e4066Sahrens 
303*fa9e4066Sahrens 			dbuf_read_havestruct(dbuf);
304*fa9e4066Sahrens 
305*fa9e4066Sahrens 			bp = dbuf->db.db_data;
306*fa9e4066Sahrens 			bp += blkoff;
307*fa9e4066Sahrens 
308*fa9e4066Sahrens 			for (i = 0; i < tochk; i++) {
309*fa9e4066Sahrens 				if (dsl_dataset_block_freeable(ds,
310*fa9e4066Sahrens 				    bp[i].blk_birth, tx)) {
311*fa9e4066Sahrens 					dprintf_bp(&bp[i],
312*fa9e4066Sahrens 					    "can free old%s", "");
313*fa9e4066Sahrens 					space += BP_GET_ASIZE(&bp[i]);
314*fa9e4066Sahrens 				}
315*fa9e4066Sahrens 			}
316*fa9e4066Sahrens 			dbuf_remove_ref(dbuf, FTAG);
317*fa9e4066Sahrens 		} else {
318*fa9e4066Sahrens 			/* the indirect block is sparse */
319*fa9e4066Sahrens 			ASSERT(err == ENOENT);
320*fa9e4066Sahrens 		}
321*fa9e4066Sahrens 
322*fa9e4066Sahrens 		blkid += tochk;
323*fa9e4066Sahrens 		nblks -= tochk;
324*fa9e4066Sahrens 	}
325*fa9e4066Sahrens out:
326*fa9e4066Sahrens 	rw_exit(&dn->dn_struct_rwlock);
327*fa9e4066Sahrens 
328*fa9e4066Sahrens 	tx->tx_space_tofree += space;
329*fa9e4066Sahrens }
330*fa9e4066Sahrens 
331*fa9e4066Sahrens static void
332*fa9e4066Sahrens dmu_tx_hold_free_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
333*fa9e4066Sahrens {
334*fa9e4066Sahrens 	int dirty;
335*fa9e4066Sahrens 
336*fa9e4066Sahrens 	/* first block */
337*fa9e4066Sahrens 	if (off != 0 /* || dn->dn_maxblkid == 0 */)
338*fa9e4066Sahrens 		dmu_tx_count_write(tx, dn, off, 1);
339*fa9e4066Sahrens 	/* last block */
340*fa9e4066Sahrens 	if (len != DMU_OBJECT_END)
341*fa9e4066Sahrens 		dmu_tx_count_write(tx, dn, off+len, 1);
342*fa9e4066Sahrens 
343*fa9e4066Sahrens 	dmu_tx_count_dnode(tx, dn);
344*fa9e4066Sahrens 
345*fa9e4066Sahrens 	if (off >= (dn->dn_maxblkid+1) * dn->dn_datablksz)
346*fa9e4066Sahrens 		return;
347*fa9e4066Sahrens 	if (len == DMU_OBJECT_END)
348*fa9e4066Sahrens 		len = (dn->dn_maxblkid+1) * dn->dn_datablksz - off;
349*fa9e4066Sahrens 
350*fa9e4066Sahrens 	/* XXX locking */
351*fa9e4066Sahrens 	dirty = dn->dn_dirtyblksz[0] | dn->dn_dirtyblksz[1] |
352*fa9e4066Sahrens 	    dn->dn_dirtyblksz[2] | dn->dn_dirtyblksz[3];
353*fa9e4066Sahrens 	if (dn->dn_assigned_tx != NULL && !dirty)
354*fa9e4066Sahrens 		dmu_tx_count_free(tx, dn, off, len);
355*fa9e4066Sahrens }
356*fa9e4066Sahrens 
357*fa9e4066Sahrens void
358*fa9e4066Sahrens dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
359*fa9e4066Sahrens {
360*fa9e4066Sahrens 	ASSERT(tx->tx_txg == 0);
361*fa9e4066Sahrens 
362*fa9e4066Sahrens 	dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_FREE,
363*fa9e4066Sahrens 	    dmu_tx_hold_free_impl, off, len);
364*fa9e4066Sahrens }
365*fa9e4066Sahrens 
366*fa9e4066Sahrens /* ARGSUSED */
367*fa9e4066Sahrens static void
368*fa9e4066Sahrens dmu_tx_hold_zap_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t nops, uint64_t cops)
369*fa9e4066Sahrens {
370*fa9e4066Sahrens 	uint64_t nblocks;
371*fa9e4066Sahrens 	int epbs;
372*fa9e4066Sahrens 
373*fa9e4066Sahrens 	dmu_tx_count_dnode(tx, dn);
374*fa9e4066Sahrens 
375*fa9e4066Sahrens 	if (dn == NULL) {
376*fa9e4066Sahrens 		/*
377*fa9e4066Sahrens 		 * Assuming that nops+cops is not super huge, we will be
378*fa9e4066Sahrens 		 * able to fit a new object's entries into one leaf
379*fa9e4066Sahrens 		 * block.  So there will be at most 2 blocks total,
380*fa9e4066Sahrens 		 * including the header block.
381*fa9e4066Sahrens 		 */
382*fa9e4066Sahrens 		dmu_tx_count_write(tx, dn, 0, 2 << ZAP_BLOCK_SHIFT);
383*fa9e4066Sahrens 		return;
384*fa9e4066Sahrens 	}
385*fa9e4066Sahrens 
386*fa9e4066Sahrens 	ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap);
387*fa9e4066Sahrens 
388*fa9e4066Sahrens 	if (dn->dn_maxblkid == 0 && nops == 0) {
389*fa9e4066Sahrens 		/*
390*fa9e4066Sahrens 		 * If there is only one block  (i.e. this is a micro-zap)
391*fa9e4066Sahrens 		 * and we are only doing updates, the accounting is simple.
392*fa9e4066Sahrens 		 */
393*fa9e4066Sahrens 		if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
394*fa9e4066Sahrens 		    dn->dn_phys->dn_blkptr[0].blk_birth, tx))
395*fa9e4066Sahrens 			tx->tx_space_tooverwrite += dn->dn_datablksz;
396*fa9e4066Sahrens 		else
397*fa9e4066Sahrens 			tx->tx_space_towrite += dn->dn_datablksz;
398*fa9e4066Sahrens 		return;
399*fa9e4066Sahrens 	}
400*fa9e4066Sahrens 
401*fa9e4066Sahrens 	/*
402*fa9e4066Sahrens 	 * 3 blocks overwritten per op: target leaf, ptrtbl block, header block
403*fa9e4066Sahrens 	 * 3 new blocks written per op: new split leaf, 2 grown ptrtbl blocks
404*fa9e4066Sahrens 	 */
405*fa9e4066Sahrens 	dmu_tx_count_write(tx, dn, dn->dn_maxblkid * dn->dn_datablksz,
406*fa9e4066Sahrens 	    (nops * 6ULL + cops * 3ULL) << ZAP_BLOCK_SHIFT);
407*fa9e4066Sahrens 
408*fa9e4066Sahrens 	/*
409*fa9e4066Sahrens 	 * If the modified blocks are scattered to the four winds,
410*fa9e4066Sahrens 	 * we'll have to modify an indirect twig for each.
411*fa9e4066Sahrens 	 */
412*fa9e4066Sahrens 	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
413*fa9e4066Sahrens 	for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs)
414*fa9e4066Sahrens 		tx->tx_space_towrite +=
415*fa9e4066Sahrens 		    ((nops + cops) * 3ULL) << dn->dn_indblkshift;
416*fa9e4066Sahrens }
417*fa9e4066Sahrens 
418*fa9e4066Sahrens void
419*fa9e4066Sahrens dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int ops)
420*fa9e4066Sahrens {
421*fa9e4066Sahrens 	ASSERT(tx->tx_txg == 0);
422*fa9e4066Sahrens 
423*fa9e4066Sahrens 	dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_ZAP,
424*fa9e4066Sahrens 	    dmu_tx_hold_zap_impl, (ops > 0?ops:0), (ops < 0?-ops:0));
425*fa9e4066Sahrens }
426*fa9e4066Sahrens 
427*fa9e4066Sahrens void
428*fa9e4066Sahrens dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object)
429*fa9e4066Sahrens {
430*fa9e4066Sahrens 	ASSERT(tx->tx_txg == 0);
431*fa9e4066Sahrens 
432*fa9e4066Sahrens 	dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_BONUS,
433*fa9e4066Sahrens 	    dmu_tx_hold_write_impl, 0, 0);
434*fa9e4066Sahrens }
435*fa9e4066Sahrens 
436*fa9e4066Sahrens 
437*fa9e4066Sahrens /* ARGSUSED */
438*fa9e4066Sahrens static void
439*fa9e4066Sahrens dmu_tx_hold_space_impl(dmu_tx_t *tx, dnode_t *dn,
440*fa9e4066Sahrens     uint64_t space, uint64_t unused)
441*fa9e4066Sahrens {
442*fa9e4066Sahrens 	tx->tx_space_towrite += space;
443*fa9e4066Sahrens }
444*fa9e4066Sahrens 
445*fa9e4066Sahrens void
446*fa9e4066Sahrens dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space)
447*fa9e4066Sahrens {
448*fa9e4066Sahrens 	ASSERT(tx->tx_txg == 0);
449*fa9e4066Sahrens 
450*fa9e4066Sahrens 	dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT, THT_SPACE,
451*fa9e4066Sahrens 	    dmu_tx_hold_space_impl, space, 0);
452*fa9e4066Sahrens }
453*fa9e4066Sahrens 
454*fa9e4066Sahrens int
455*fa9e4066Sahrens dmu_tx_holds(dmu_tx_t *tx, uint64_t object)
456*fa9e4066Sahrens {
457*fa9e4066Sahrens 	dmu_tx_hold_t *dth;
458*fa9e4066Sahrens 	int holds = 0;
459*fa9e4066Sahrens 
460*fa9e4066Sahrens 	/*
461*fa9e4066Sahrens 	 * By asserting that the tx is assigned, we're counting the
462*fa9e4066Sahrens 	 * number of dn_tx_holds, which is the same as the number of
463*fa9e4066Sahrens 	 * dn_holds.  Otherwise, we'd be counting dn_holds, but
464*fa9e4066Sahrens 	 * dn_tx_holds could be 0.
465*fa9e4066Sahrens 	 */
466*fa9e4066Sahrens 	ASSERT(tx->tx_txg != 0);
467*fa9e4066Sahrens 
468*fa9e4066Sahrens 	/* if (tx->tx_anyobj == TRUE) */
469*fa9e4066Sahrens 		/* return (0); */
470*fa9e4066Sahrens 
471*fa9e4066Sahrens 	for (dth = list_head(&tx->tx_holds); dth;
472*fa9e4066Sahrens 	    dth = list_next(&tx->tx_holds, dth)) {
473*fa9e4066Sahrens 		if (dth->dth_dnode && dth->dth_dnode->dn_object == object)
474*fa9e4066Sahrens 			holds++;
475*fa9e4066Sahrens 	}
476*fa9e4066Sahrens 
477*fa9e4066Sahrens 	return (holds);
478*fa9e4066Sahrens }
479*fa9e4066Sahrens 
480*fa9e4066Sahrens void
481*fa9e4066Sahrens dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
482*fa9e4066Sahrens {
483*fa9e4066Sahrens #ifdef ZFS_DEBUG
484*fa9e4066Sahrens 	dmu_tx_hold_t *dth;
485*fa9e4066Sahrens 	int match_object = FALSE, match_offset = FALSE;
486*fa9e4066Sahrens 	dnode_t *dn = db->db_dnode;
487*fa9e4066Sahrens 
488*fa9e4066Sahrens 	ASSERT(tx->tx_txg != 0);
489*fa9e4066Sahrens 	ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset->os);
490*fa9e4066Sahrens 	ASSERT3U(dn->dn_object, ==, db->db.db_object);
491*fa9e4066Sahrens 
492*fa9e4066Sahrens 	if (tx->tx_anyobj)
493*fa9e4066Sahrens 		return;
494*fa9e4066Sahrens 
495*fa9e4066Sahrens 	/* XXX No checking on the meta dnode for now */
496*fa9e4066Sahrens 	if (db->db.db_object & DMU_PRIVATE_OBJECT)
497*fa9e4066Sahrens 		return;
498*fa9e4066Sahrens 
499*fa9e4066Sahrens 	for (dth = list_head(&tx->tx_holds); dth;
500*fa9e4066Sahrens 	    dth = list_next(&tx->tx_holds, dth)) {
501*fa9e4066Sahrens 		ASSERT(dn == NULL || dn->dn_assigned_txg == tx->tx_txg);
502*fa9e4066Sahrens 		if (dth->dth_dnode == dn && dth->dth_type != THT_NEWOBJECT)
503*fa9e4066Sahrens 			match_object = TRUE;
504*fa9e4066Sahrens 		if (dth->dth_dnode == NULL || dth->dth_dnode == dn) {
505*fa9e4066Sahrens 			int datablkshift = dn->dn_datablkshift ?
506*fa9e4066Sahrens 			    dn->dn_datablkshift : SPA_MAXBLOCKSHIFT;
507*fa9e4066Sahrens 			int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
508*fa9e4066Sahrens 			int shift = datablkshift + epbs * db->db_level;
509*fa9e4066Sahrens 			uint64_t beginblk = shift >= 64 ? 0 :
510*fa9e4066Sahrens 			    (dth->dth_arg1 >> shift);
511*fa9e4066Sahrens 			uint64_t endblk = shift >= 64 ? 0 :
512*fa9e4066Sahrens 			    ((dth->dth_arg1 + dth->dth_arg2 - 1) >> shift);
513*fa9e4066Sahrens 			uint64_t blkid = db->db_blkid;
514*fa9e4066Sahrens 
515*fa9e4066Sahrens 			/* XXX dth_arg2 better not be zero... */
516*fa9e4066Sahrens 
517*fa9e4066Sahrens 			dprintf("found dth type %x beginblk=%llx endblk=%llx\n",
518*fa9e4066Sahrens 			    dth->dth_type, beginblk, endblk);
519*fa9e4066Sahrens 
520*fa9e4066Sahrens 			switch (dth->dth_type) {
521*fa9e4066Sahrens 			case THT_WRITE:
522*fa9e4066Sahrens 				if (blkid >= beginblk && blkid <= endblk)
523*fa9e4066Sahrens 					match_offset = TRUE;
524*fa9e4066Sahrens 				/*
525*fa9e4066Sahrens 				 * We will let this hold work for the bonus
526*fa9e4066Sahrens 				 * buffer so that we don't need to hold it
527*fa9e4066Sahrens 				 * when creating a new object.
528*fa9e4066Sahrens 				 */
529*fa9e4066Sahrens 				if (blkid == DB_BONUS_BLKID)
530*fa9e4066Sahrens 					match_offset = TRUE;
531*fa9e4066Sahrens 				/*
532*fa9e4066Sahrens 				 * They might have to increase nlevels,
533*fa9e4066Sahrens 				 * thus dirtying the new TLIBs.  Or the
534*fa9e4066Sahrens 				 * might have to change the block size,
535*fa9e4066Sahrens 				 * thus dirying the new lvl=0 blk=0.
536*fa9e4066Sahrens 				 */
537*fa9e4066Sahrens 				if (blkid == 0)
538*fa9e4066Sahrens 					match_offset = TRUE;
539*fa9e4066Sahrens 				break;
540*fa9e4066Sahrens 			case THT_FREE:
541*fa9e4066Sahrens 				if (blkid == beginblk &&
542*fa9e4066Sahrens 				    (dth->dth_arg1 != 0 ||
543*fa9e4066Sahrens 				    dn->dn_maxblkid == 0))
544*fa9e4066Sahrens 					match_offset = TRUE;
545*fa9e4066Sahrens 				if (blkid == endblk &&
546*fa9e4066Sahrens 				    dth->dth_arg2 != DMU_OBJECT_END)
547*fa9e4066Sahrens 					match_offset = TRUE;
548*fa9e4066Sahrens 				break;
549*fa9e4066Sahrens 			case THT_BONUS:
550*fa9e4066Sahrens 				if (blkid == DB_BONUS_BLKID)
551*fa9e4066Sahrens 					match_offset = TRUE;
552*fa9e4066Sahrens 				break;
553*fa9e4066Sahrens 			case THT_ZAP:
554*fa9e4066Sahrens 				match_offset = TRUE;
555*fa9e4066Sahrens 				break;
556*fa9e4066Sahrens 			case THT_NEWOBJECT:
557*fa9e4066Sahrens 				match_object = TRUE;
558*fa9e4066Sahrens 				break;
559*fa9e4066Sahrens 			default:
560*fa9e4066Sahrens 				ASSERT(!"bad dth_type");
561*fa9e4066Sahrens 			}
562*fa9e4066Sahrens 		}
563*fa9e4066Sahrens 		if (match_object && match_offset)
564*fa9e4066Sahrens 			return;
565*fa9e4066Sahrens 	}
566*fa9e4066Sahrens 	panic("dirtying dbuf obj=%llx lvl=%u blkid=%llx but not tx_held\n",
567*fa9e4066Sahrens 	    (u_longlong_t)db->db.db_object, db->db_level,
568*fa9e4066Sahrens 	    (u_longlong_t)db->db_blkid);
569*fa9e4066Sahrens #endif
570*fa9e4066Sahrens }
571*fa9e4066Sahrens 
572*fa9e4066Sahrens static int
573*fa9e4066Sahrens dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how, dmu_tx_hold_t **last_dth)
574*fa9e4066Sahrens {
575*fa9e4066Sahrens 	dmu_tx_hold_t *dth;
576*fa9e4066Sahrens 	uint64_t lsize, asize, fsize;
577*fa9e4066Sahrens 
578*fa9e4066Sahrens 	*last_dth = NULL;
579*fa9e4066Sahrens 
580*fa9e4066Sahrens 	tx->tx_space_towrite = 0;
581*fa9e4066Sahrens 	tx->tx_space_tofree = 0;
582*fa9e4066Sahrens 	tx->tx_space_tooverwrite = 0;
583*fa9e4066Sahrens 	tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh);
584*fa9e4066Sahrens 
585*fa9e4066Sahrens 	if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg)
586*fa9e4066Sahrens 		return (ERESTART);
587*fa9e4066Sahrens 
588*fa9e4066Sahrens 	for (dth = list_head(&tx->tx_holds); dth;
589*fa9e4066Sahrens 	    *last_dth = dth, dth = list_next(&tx->tx_holds, dth)) {
590*fa9e4066Sahrens 		dnode_t *dn = dth->dth_dnode;
591*fa9e4066Sahrens 		if (dn != NULL) {
592*fa9e4066Sahrens 			mutex_enter(&dn->dn_mtx);
593*fa9e4066Sahrens 			while (dn->dn_assigned_txg == tx->tx_txg - 1) {
594*fa9e4066Sahrens 				if (txg_how != TXG_WAIT) {
595*fa9e4066Sahrens 					mutex_exit(&dn->dn_mtx);
596*fa9e4066Sahrens 					return (ERESTART);
597*fa9e4066Sahrens 				}
598*fa9e4066Sahrens 				cv_wait(&dn->dn_notxholds, &dn->dn_mtx);
599*fa9e4066Sahrens 			}
600*fa9e4066Sahrens 			if (dn->dn_assigned_txg == 0) {
601*fa9e4066Sahrens 				ASSERT(dn->dn_assigned_tx == NULL);
602*fa9e4066Sahrens 				dn->dn_assigned_txg = tx->tx_txg;
603*fa9e4066Sahrens 				dn->dn_assigned_tx = tx;
604*fa9e4066Sahrens 			} else {
605*fa9e4066Sahrens 				ASSERT(dn->dn_assigned_txg == tx->tx_txg);
606*fa9e4066Sahrens 				if (dn->dn_assigned_tx != tx)
607*fa9e4066Sahrens 					dn->dn_assigned_tx = NULL;
608*fa9e4066Sahrens 			}
609*fa9e4066Sahrens 			(void) refcount_add(&dn->dn_tx_holds, tx);
610*fa9e4066Sahrens 			mutex_exit(&dn->dn_mtx);
611*fa9e4066Sahrens 		}
612*fa9e4066Sahrens 		if (dth->dth_func)
613*fa9e4066Sahrens 			dth->dth_func(tx, dn, dth->dth_arg1, dth->dth_arg2);
614*fa9e4066Sahrens 	}
615*fa9e4066Sahrens 
616*fa9e4066Sahrens 	/*
617*fa9e4066Sahrens 	 * Convert logical size to worst-case allocated size.
618*fa9e4066Sahrens 	 */
619*fa9e4066Sahrens 	fsize = spa_get_asize(tx->tx_pool->dp_spa, tx->tx_space_tooverwrite) +
620*fa9e4066Sahrens 	    tx->tx_space_tofree;
621*fa9e4066Sahrens 	lsize = tx->tx_space_towrite + tx->tx_space_tooverwrite;
622*fa9e4066Sahrens 	asize = spa_get_asize(tx->tx_pool->dp_spa, lsize);
623*fa9e4066Sahrens 	tx->tx_space_towrite = asize;
624*fa9e4066Sahrens 
625*fa9e4066Sahrens 	if (tx->tx_dir && asize != 0) {
626*fa9e4066Sahrens 		int err = dsl_dir_tempreserve_space(tx->tx_dir,
627*fa9e4066Sahrens 		    lsize, asize, fsize, &tx->tx_tempreserve_cookie, tx);
628*fa9e4066Sahrens 		if (err)
629*fa9e4066Sahrens 			return (err);
630*fa9e4066Sahrens 	}
631*fa9e4066Sahrens 
632*fa9e4066Sahrens 	return (0);
633*fa9e4066Sahrens }
634*fa9e4066Sahrens 
635*fa9e4066Sahrens static uint64_t
636*fa9e4066Sahrens dmu_tx_unassign(dmu_tx_t *tx, dmu_tx_hold_t *last_dth)
637*fa9e4066Sahrens {
638*fa9e4066Sahrens 	uint64_t txg = tx->tx_txg;
639*fa9e4066Sahrens 	dmu_tx_hold_t *dth;
640*fa9e4066Sahrens 
641*fa9e4066Sahrens 	ASSERT(txg != 0);
642*fa9e4066Sahrens 
643*fa9e4066Sahrens 	txg_rele_to_quiesce(&tx->tx_txgh);
644*fa9e4066Sahrens 
645*fa9e4066Sahrens 	for (dth = last_dth; dth; dth = list_prev(&tx->tx_holds, dth)) {
646*fa9e4066Sahrens 		dnode_t *dn = dth->dth_dnode;
647*fa9e4066Sahrens 
648*fa9e4066Sahrens 		if (dn == NULL)
649*fa9e4066Sahrens 			continue;
650*fa9e4066Sahrens 		mutex_enter(&dn->dn_mtx);
651*fa9e4066Sahrens 		ASSERT3U(dn->dn_assigned_txg, ==, txg);
652*fa9e4066Sahrens 
653*fa9e4066Sahrens 		if (refcount_remove(&dn->dn_tx_holds, tx) == 0) {
654*fa9e4066Sahrens 			dn->dn_assigned_txg = 0;
655*fa9e4066Sahrens 			dn->dn_assigned_tx = NULL;
656*fa9e4066Sahrens 			cv_broadcast(&dn->dn_notxholds);
657*fa9e4066Sahrens 		}
658*fa9e4066Sahrens 		mutex_exit(&dn->dn_mtx);
659*fa9e4066Sahrens 	}
660*fa9e4066Sahrens 
661*fa9e4066Sahrens 	txg_rele_to_sync(&tx->tx_txgh);
662*fa9e4066Sahrens 
663*fa9e4066Sahrens 	tx->tx_txg = 0;
664*fa9e4066Sahrens 	return (txg);
665*fa9e4066Sahrens }
666*fa9e4066Sahrens 
667*fa9e4066Sahrens /*
668*fa9e4066Sahrens  * Assign tx to a transaction group.  txg_how can be one of:
669*fa9e4066Sahrens  *
670*fa9e4066Sahrens  * (1)	TXG_WAIT.  If the current open txg is full, waits until there's
671*fa9e4066Sahrens  *	a new one.  This should be used when you're not holding locks.
672*fa9e4066Sahrens  *	If will only fail if we're truly out of space (or over quota).
673*fa9e4066Sahrens  *
674*fa9e4066Sahrens  * (2)	TXG_NOWAIT.  If we can't assign into the current open txg without
675*fa9e4066Sahrens  *	blocking, returns immediately with ERESTART.  This should be used
676*fa9e4066Sahrens  *	whenever you're holding locks.  On an ERESTART error, the caller
677*fa9e4066Sahrens  *	should drop locks, do a txg_wait_open(dp, 0), and try again.
678*fa9e4066Sahrens  *
679*fa9e4066Sahrens  * (3)	A specific txg.  Use this if you need to ensure that multiple
680*fa9e4066Sahrens  *	transactions all sync in the same txg.  Like TXG_NOWAIT, it
681*fa9e4066Sahrens  *	returns ERESTART if it can't assign you into the requested txg.
682*fa9e4066Sahrens  */
683*fa9e4066Sahrens int
684*fa9e4066Sahrens dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
685*fa9e4066Sahrens {
686*fa9e4066Sahrens 	dmu_tx_hold_t *last_dth;
687*fa9e4066Sahrens 	int err;
688*fa9e4066Sahrens 
689*fa9e4066Sahrens 	ASSERT(tx->tx_txg == 0);
690*fa9e4066Sahrens 	ASSERT(txg_how != 0);
691*fa9e4066Sahrens 	ASSERT(!dsl_pool_sync_context(tx->tx_pool));
692*fa9e4066Sahrens 	ASSERT3U(tx->tx_space_towrite, ==, 0);
693*fa9e4066Sahrens 	ASSERT3U(tx->tx_space_tofree, ==, 0);
694*fa9e4066Sahrens 
695*fa9e4066Sahrens 	while ((err = dmu_tx_try_assign(tx, txg_how, &last_dth)) != 0) {
696*fa9e4066Sahrens 		uint64_t txg = dmu_tx_unassign(tx, last_dth);
697*fa9e4066Sahrens 
698*fa9e4066Sahrens 		if (err != ERESTART || txg_how != TXG_WAIT)
699*fa9e4066Sahrens 			return (err);
700*fa9e4066Sahrens 
701*fa9e4066Sahrens 		txg_wait_open(tx->tx_pool, txg + 1);
702*fa9e4066Sahrens 	}
703*fa9e4066Sahrens 
704*fa9e4066Sahrens 	txg_rele_to_quiesce(&tx->tx_txgh);
705*fa9e4066Sahrens 
706*fa9e4066Sahrens 	return (0);
707*fa9e4066Sahrens }
708*fa9e4066Sahrens 
709*fa9e4066Sahrens void
710*fa9e4066Sahrens dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta)
711*fa9e4066Sahrens {
712*fa9e4066Sahrens 	if (tx->tx_dir == NULL || delta == 0)
713*fa9e4066Sahrens 		return;
714*fa9e4066Sahrens 
715*fa9e4066Sahrens 	if (delta > 0) {
716*fa9e4066Sahrens 		ASSERT3U(refcount_count(&tx->tx_space_written) + delta, <=,
717*fa9e4066Sahrens 		    tx->tx_space_towrite);
718*fa9e4066Sahrens 		(void) refcount_add_many(&tx->tx_space_written, delta, NULL);
719*fa9e4066Sahrens 	} else {
720*fa9e4066Sahrens 		(void) refcount_add_many(&tx->tx_space_freed, -delta, NULL);
721*fa9e4066Sahrens 	}
722*fa9e4066Sahrens }
723*fa9e4066Sahrens 
724*fa9e4066Sahrens void
725*fa9e4066Sahrens dmu_tx_commit(dmu_tx_t *tx)
726*fa9e4066Sahrens {
727*fa9e4066Sahrens 	dmu_tx_hold_t *dth;
728*fa9e4066Sahrens 
729*fa9e4066Sahrens 	ASSERT(tx->tx_txg != 0);
730*fa9e4066Sahrens 
731*fa9e4066Sahrens 	while (dth = list_head(&tx->tx_holds)) {
732*fa9e4066Sahrens 		dnode_t *dn = dth->dth_dnode;
733*fa9e4066Sahrens 
734*fa9e4066Sahrens 		list_remove(&tx->tx_holds, dth);
735*fa9e4066Sahrens 		kmem_free(dth, sizeof (dmu_tx_hold_t));
736*fa9e4066Sahrens 		if (dn == NULL)
737*fa9e4066Sahrens 			continue;
738*fa9e4066Sahrens 		mutex_enter(&dn->dn_mtx);
739*fa9e4066Sahrens 		ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
740*fa9e4066Sahrens 
741*fa9e4066Sahrens 		if (refcount_remove(&dn->dn_tx_holds, tx) == 0) {
742*fa9e4066Sahrens 			dn->dn_assigned_txg = 0;
743*fa9e4066Sahrens 			dn->dn_assigned_tx = NULL;
744*fa9e4066Sahrens 			cv_broadcast(&dn->dn_notxholds);
745*fa9e4066Sahrens 		}
746*fa9e4066Sahrens 		mutex_exit(&dn->dn_mtx);
747*fa9e4066Sahrens 		dnode_rele(dn, tx);
748*fa9e4066Sahrens 	}
749*fa9e4066Sahrens 
750*fa9e4066Sahrens 	if (tx->tx_dir && tx->tx_space_towrite > 0) {
751*fa9e4066Sahrens 		dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx);
752*fa9e4066Sahrens 	}
753*fa9e4066Sahrens 
754*fa9e4066Sahrens 	if (tx->tx_anyobj == FALSE)
755*fa9e4066Sahrens 		txg_rele_to_sync(&tx->tx_txgh);
756*fa9e4066Sahrens 	dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n",
757*fa9e4066Sahrens 	    tx->tx_space_towrite, refcount_count(&tx->tx_space_written),
758*fa9e4066Sahrens 	    tx->tx_space_tofree, refcount_count(&tx->tx_space_freed));
759*fa9e4066Sahrens 	refcount_destroy_many(&tx->tx_space_written,
760*fa9e4066Sahrens 	    refcount_count(&tx->tx_space_written));
761*fa9e4066Sahrens 	refcount_destroy_many(&tx->tx_space_freed,
762*fa9e4066Sahrens 	    refcount_count(&tx->tx_space_freed));
763*fa9e4066Sahrens #ifdef ZFS_DEBUG
764*fa9e4066Sahrens 	if (tx->tx_debug_buf)
765*fa9e4066Sahrens 		kmem_free(tx->tx_debug_buf, 4096);
766*fa9e4066Sahrens #endif
767*fa9e4066Sahrens 	kmem_free(tx, sizeof (dmu_tx_t));
768*fa9e4066Sahrens }
769*fa9e4066Sahrens 
770*fa9e4066Sahrens void
771*fa9e4066Sahrens dmu_tx_abort(dmu_tx_t *tx)
772*fa9e4066Sahrens {
773*fa9e4066Sahrens 	dmu_tx_hold_t *dth;
774*fa9e4066Sahrens 
775*fa9e4066Sahrens 	ASSERT(tx->tx_txg == 0);
776*fa9e4066Sahrens 
777*fa9e4066Sahrens 	while (dth = list_head(&tx->tx_holds)) {
778*fa9e4066Sahrens 		dnode_t *dn = dth->dth_dnode;
779*fa9e4066Sahrens 
780*fa9e4066Sahrens 		list_remove(&tx->tx_holds, dth);
781*fa9e4066Sahrens 		kmem_free(dth, sizeof (dmu_tx_hold_t));
782*fa9e4066Sahrens 		if (dn != NULL)
783*fa9e4066Sahrens 			dnode_rele(dn, tx);
784*fa9e4066Sahrens 	}
785*fa9e4066Sahrens 	refcount_destroy_many(&tx->tx_space_written,
786*fa9e4066Sahrens 	    refcount_count(&tx->tx_space_written));
787*fa9e4066Sahrens 	refcount_destroy_many(&tx->tx_space_freed,
788*fa9e4066Sahrens 	    refcount_count(&tx->tx_space_freed));
789*fa9e4066Sahrens #ifdef ZFS_DEBUG
790*fa9e4066Sahrens 	if (tx->tx_debug_buf)
791*fa9e4066Sahrens 		kmem_free(tx->tx_debug_buf, 4096);
792*fa9e4066Sahrens #endif
793*fa9e4066Sahrens 	kmem_free(tx, sizeof (dmu_tx_t));
794*fa9e4066Sahrens }
795*fa9e4066Sahrens 
796*fa9e4066Sahrens uint64_t
797*fa9e4066Sahrens dmu_tx_get_txg(dmu_tx_t *tx)
798*fa9e4066Sahrens {
799*fa9e4066Sahrens 	ASSERT(tx->tx_txg != 0);
800*fa9e4066Sahrens 	return (tx->tx_txg);
801*fa9e4066Sahrens }
802