1fa9e406ahrens/*
2fa9e406ahrens * CDDL HEADER START
3fa9e406ahrens *
4fa9e406ahrens * The contents of this file are subject to the terms of the
5f65e61cahrens * Common Development and Distribution License (the "License").
6f65e61cahrens * You may not use this file except in compliance with the License.
7fa9e406ahrens *
8fa9e406ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e406ahrens * or http://www.opensolaris.org/os/licensing.
10fa9e406ahrens * See the License for the specific language governing permissions
11fa9e406ahrens * and limitations under the License.
12fa9e406ahrens *
13fa9e406ahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e406ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e406ahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e406ahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e406ahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e406ahrens *
19fa9e406ahrens * CDDL HEADER END
20fa9e406ahrens */
21fa9e406ahrens/*
2206e0070Mark Shellenbaum * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
234d7988dPaul Dagnelie * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
24bc9014eJustin Gibbs * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
25c3d26abMatthew Ahrens * Copyright (c) 2014 Integros [integros.com]
26f06dce2Andrew Stormont * Copyright 2017 RackTop Systems.
27fa9e406ahrens */
28fa9e406ahrens
29fa9e406ahrens#include <sys/zfs_context.h>
30fa9e406ahrens#include <sys/dbuf.h>
31fa9e406ahrens#include <sys/dnode.h>
32fa9e406ahrens#include <sys/dmu.h>
33fa9e406ahrens#include <sys/dmu_impl.h>
34fa9e406ahrens#include <sys/dmu_tx.h>
35fa9e406ahrens#include <sys/dmu_objset.h>
36fa9e406ahrens#include <sys/dsl_dir.h>
37fa9e406ahrens#include <sys/dsl_dataset.h>
38fa9e406ahrens#include <sys/spa.h>
39fa9e406ahrens#include <sys/zio.h>
40fa9e406ahrens#include <sys/dmu_zfetch.h>
41bf16b11Matthew Ahrens#include <sys/range_tree.h>
42f67950bNasf-Fan#include <sys/zfs_project.h>
43fa9e406ahrens
4454811daToomas Soomednode_stats_t dnode_stats = {
4554811daToomas Soome	{ "dnode_hold_dbuf_hold",		KSTAT_DATA_UINT64 },
4654811daToomas Soome	{ "dnode_hold_dbuf_read",		KSTAT_DATA_UINT64 },
4754811daToomas Soome	{ "dnode_hold_alloc_hits",		KSTAT_DATA_UINT64 },
4854811daToomas Soome	{ "dnode_hold_alloc_misses",		KSTAT_DATA_UINT64 },
4954811daToomas Soome	{ "dnode_hold_alloc_interior",		KSTAT_DATA_UINT64 },
5054811daToomas Soome	{ "dnode_hold_alloc_lock_retry",	KSTAT_DATA_UINT64 },
5154811daToomas Soome	{ "dnode_hold_alloc_lock_misses",	KSTAT_DATA_UINT64 },
5254811daToomas Soome	{ "dnode_hold_alloc_type_none",		KSTAT_DATA_UINT64 },
5354811daToomas Soome	{ "dnode_hold_free_hits",		KSTAT_DATA_UINT64 },
5454811daToomas Soome	{ "dnode_hold_free_misses",		KSTAT_DATA_UINT64 },
5554811daToomas Soome	{ "dnode_hold_free_lock_misses",	KSTAT_DATA_UINT64 },
5654811daToomas Soome	{ "dnode_hold_free_lock_retry",		KSTAT_DATA_UINT64 },
5754811daToomas Soome	{ "dnode_hold_free_overflow",		KSTAT_DATA_UINT64 },
5854811daToomas Soome	{ "dnode_hold_free_refcount",		KSTAT_DATA_UINT64 },
5954811daToomas Soome	{ "dnode_free_interior_lock_retry",	KSTAT_DATA_UINT64 },
6054811daToomas Soome	{ "dnode_allocate",			KSTAT_DATA_UINT64 },
6154811daToomas Soome	{ "dnode_reallocate",			KSTAT_DATA_UINT64 },
6254811daToomas Soome	{ "dnode_buf_evict",			KSTAT_DATA_UINT64 },
6354811daToomas Soome	{ "dnode_alloc_next_chunk",		KSTAT_DATA_UINT64 },
6454811daToomas Soome	{ "dnode_alloc_race",			KSTAT_DATA_UINT64 },
6554811daToomas Soome	{ "dnode_alloc_next_block",		KSTAT_DATA_UINT64 },
6654811daToomas Soome	{ "dnode_move_invalid",			KSTAT_DATA_UINT64 },
6754811daToomas Soome	{ "dnode_move_recheck1",		KSTAT_DATA_UINT64 },
6854811daToomas Soome	{ "dnode_move_recheck2",		KSTAT_DATA_UINT64 },
6954811daToomas Soome	{ "dnode_move_special",			KSTAT_DATA_UINT64 },
7054811daToomas Soome	{ "dnode_move_handle",			KSTAT_DATA_UINT64 },
7154811daToomas Soome	{ "dnode_move_rwlock",			KSTAT_DATA_UINT64 },
7254811daToomas Soome	{ "dnode_move_active",			KSTAT_DATA_UINT64 },
7354811daToomas Soome};
7454811daToomas Soome
7554811daToomas Soomestatic kstat_t *dnode_ksp;
76fa9e406ahrensstatic kmem_cache_t *dnode_cache;
77fa9e406ahrens
78fa9e406ahrensstatic dnode_phys_t dnode_phys_zero;
79fa9e406ahrens
80fa9e406ahrensint zfs_default_bs = SPA_MINBLOCKSHIFT;
81fa9e406ahrensint zfs_default_ibs = DN_MAX_INDBLKSHIFT;
82fa9e406ahrens
83f06dce2Andrew Stormont#ifdef	_KERNEL
84744947dTom Ericksonstatic kmem_cbrc_t dnode_move(void *, void *, size_t, void *);
85f06dce2Andrew Stormont#endif	/* _KERNEL */
86744947dTom Erickson
870f6d88aAlex Reecestatic int
880f6d88aAlex Reecedbuf_compare(const void *x1, const void *x2)
890f6d88aAlex Reece{
900f6d88aAlex Reece	const dmu_buf_impl_t *d1 = x1;
910f6d88aAlex Reece	const dmu_buf_impl_t *d2 = x2;
920f6d88aAlex Reece
934d7988dPaul Dagnelie	int cmp = TREE_CMP(d1->db_level, d2->db_level);
94c4ab0d3Gvozden Neskovic	if (likely(cmp))
95c4ab0d3Gvozden Neskovic		return (cmp);
960f6d88aAlex Reece
974d7988dPaul Dagnelie	cmp = TREE_CMP(d1->db_blkid, d2->db_blkid);
98c4ab0d3Gvozden Neskovic	if (likely(cmp))
99c4ab0d3Gvozden Neskovic		return (cmp);
1000f6d88aAlex Reece
101a846f19Alex Reece	if (d1->db_state == DB_SEARCH) {
102a846f19Alex Reece		ASSERT3S(d2->db_state, !=, DB_SEARCH);
1030f6d88aAlex Reece		return (-1);
104a846f19Alex Reece	} else if (d2->db_state == DB_SEARCH) {
105a846f19Alex Reece		ASSERT3S(d1->db_state, !=, DB_SEARCH);
10686bb58aAlex Reece		return (1);
10786bb58aAlex Reece	}
10886bb58aAlex Reece
1094d7988dPaul Dagnelie	return (TREE_PCMP(d1, d2));
1100f6d88aAlex Reece}
1110f6d88aAlex Reece
112fa9e406ahrens/* ARGSUSED */
113fa9e406ahrensstatic int
114fa9e406ahrensdnode_cons(void *arg, void *unused, int kmflag)
115fa9e406ahrens{
116fa9e406ahrens	dnode_t *dn = arg;
117744947dTom Erickson	int i;
118fa9e406ahrens
119fa9e406ahrens	rw_init(&dn->dn_struct_rwlock, NULL, RW_DEFAULT, NULL);
120fa9e406ahrens	mutex_init(&dn->dn_mtx, NULL, MUTEX_DEFAULT, NULL);
121fa9e406ahrens	mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL);
122b5e70f9Ricardo M. Correia	cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL);
123b5e70f9Ricardo M. Correia
1243b2aab1Matthew Ahrens	/*
1253b2aab1Matthew Ahrens	 * Every dbuf has a reference, and dropping a tracked reference is
1263b2aab1Matthew Ahrens	 * O(number of references), so don't track dn_holds.
1273b2aab1Matthew Ahrens	 */
128e914aceTim Schumacher	zfs_refcount_create_untracked(&dn->dn_holds);
129e914aceTim Schumacher	zfs_refcount_create(&dn->dn_tx_holds);
130744947dTom Erickson	list_link_init(&dn->dn_link);
131744947dTom Erickson
132744947dTom Erickson	bzero(&dn->dn_next_nblkptr[0], sizeof (dn->dn_next_nblkptr));
133744947dTom Erickson	bzero(&dn->dn_next_nlevels[0], sizeof (dn->dn_next_nlevels));
134744947dTom Erickson	bzero(&dn->dn_next_indblkshift[0], sizeof (dn->dn_next_indblkshift));
135744947dTom Erickson	bzero(&dn->dn_next_bonustype[0], sizeof (dn->dn_next_bonustype));
136744947dTom Erickson	bzero(&dn->dn_rm_spillblk[0], sizeof (dn->dn_rm_spillblk));
137744947dTom Erickson	bzero(&dn->dn_next_bonuslen[0], sizeof (dn->dn_next_bonuslen));
138744947dTom Erickson	bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz));
139eb63303Tom Caputi	bzero(&dn->dn_next_maxblkid[0], sizeof (dn->dn_next_maxblkid));
140fa9e406ahrens
141fa9e406ahrens	for (i = 0; i < TXG_SIZE; i++) {
142aa02ea0Tom Caputi		multilist_link_init(&dn->dn_dirty_link[i]);
143bf16b11Matthew Ahrens		dn->dn_free_ranges[i] = NULL;
144c717a56maybee		list_create(&dn->dn_dirty_records[i],
145c717a56maybee		    sizeof (dbuf_dirty_record_t),
146c717a56maybee		    offsetof(dbuf_dirty_record_t, dr_dirty_node));
147fa9e406ahrens	}
148fa9e406ahrens
149744947dTom Erickson	dn->dn_allocated_txg = 0;
150744947dTom Erickson	dn->dn_free_txg = 0;
151744947dTom Erickson	dn->dn_assigned_txg = 0;
152aa02ea0Tom Caputi	dn->dn_dirty_txg = 0;
153744947dTom Erickson	dn->dn_dirtyctx = 0;
154744947dTom Erickson	dn->dn_dirtyctx_firstset = NULL;
155744947dTom Erickson	dn->dn_bonus = NULL;
156744947dTom Erickson	dn->dn_have_spill = B_FALSE;
157744947dTom Erickson	dn->dn_zio = NULL;
158744947dTom Erickson	dn->dn_oldused = 0;
159744947dTom Erickson	dn->dn_oldflags = 0;
160744947dTom Erickson	dn->dn_olduid = 0;
161744947dTom Erickson	dn->dn_oldgid = 0;
162f67950bNasf-Fan	dn->dn_oldprojid = ZFS_DEFAULT_PROJID;
163744947dTom Erickson	dn->dn_newuid = 0;
164744947dTom Erickson	dn->dn_newgid = 0;
165f67950bNasf-Fan	dn->dn_newprojid = ZFS_DEFAULT_PROJID;
166744947dTom Erickson	dn->dn_id_flags = 0;
167744947dTom Erickson
168744947dTom Erickson	dn->dn_dbufs_count = 0;
1690f6d88aAlex Reece	avl_create(&dn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t),
170fa9e406ahrens	    offsetof(dmu_buf_impl_t, db_link));
171fa9e406ahrens
172744947dTom Erickson	dn->dn_moved = 0;
173fa9e406ahrens	return (0);
174fa9e406ahrens}
175fa9e406ahrens
176fa9e406ahrens/* ARGSUSED */
177fa9e406ahrensstatic void
178fa9e406ahrensdnode_dest(void *arg, void *unused)
179fa9e406ahrens{
180fa9e406ahrens	int i;
181fa9e406ahrens	dnode_t *dn = arg;
182fa9e406ahrens
183fa9e406ahrens	rw_destroy(&dn->dn_struct_rwlock);
184fa9e406ahrens	mutex_destroy(&dn->dn_mtx);
185fa9e406ahrens	mutex_destroy(&dn->dn_dbufs_mtx);
186b5e70f9Ricardo M. Correia	cv_destroy(&dn->dn_notxholds);
187e914aceTim Schumacher	zfs_refcount_destroy(&dn->dn_holds);
188e914aceTim Schumacher	zfs_refcount_destroy(&dn->dn_tx_holds);
189744947dTom Erickson	ASSERT(!list_link_active(&dn->dn_link));
190fa9e406ahrens
191fa9e406ahrens	for (i = 0; i < TXG_SIZE; i++) {
192aa02ea0Tom Caputi		ASSERT(!multilist_link_active(&dn->dn_dirty_link[i]));
193bf16b11Matthew Ahrens		ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
194c717a56maybee		list_destroy(&dn->dn_dirty_records[i]);
195fb09f5aMadhav Suresh		ASSERT0(dn->dn_next_nblkptr[i]);
196fb09f5aMadhav Suresh		ASSERT0(dn->dn_next_nlevels[i]);
197fb09f5aMadhav Suresh		ASSERT0(dn->dn_next_indblkshift[i]);
198fb09f5aMadhav Suresh		ASSERT0(dn->dn_next_bonustype[i]);
199fb09f5aMadhav Suresh		ASSERT0(dn->dn_rm_spillblk[i]);
200fb09f5aMadhav Suresh		ASSERT0(dn->dn_next_bonuslen[i]);
201fb09f5aMadhav Suresh		ASSERT0(dn->dn_next_blksz[i]);
202eb63303Tom Caputi		ASSERT0(dn->dn_next_maxblkid[i]);
203fa9e406ahrens	}
204fa9e406ahrens
205fb09f5aMadhav Suresh	ASSERT0(dn->dn_allocated_txg);
206fb09f5aMadhav Suresh	ASSERT0(dn->dn_free_txg);
207fb09f5aMadhav Suresh	ASSERT0(dn->dn_assigned_txg);
208aa02ea0Tom Caputi	ASSERT0(dn->dn_dirty_txg);
209fb09f5aMadhav Suresh	ASSERT0(dn->dn_dirtyctx);
210744947dTom Erickson	ASSERT3P(dn->dn_dirtyctx_firstset, ==, NULL);
211744947dTom Erickson	ASSERT3P(dn->dn_bonus, ==, NULL);
212744947dTom Erickson	ASSERT(!dn->dn_have_spill);
213744947dTom Erickson	ASSERT3P(dn->dn_zio, ==, NULL);
214fb09f5aMadhav Suresh	ASSERT0(dn->dn_oldused);
215fb09f5aMadhav Suresh	ASSERT0(dn->dn_oldflags);
216fb09f5aMadhav Suresh	ASSERT0(dn->dn_olduid);
217fb09f5aMadhav Suresh	ASSERT0(dn->dn_oldgid);
218f67950bNasf-Fan	ASSERT0(dn->dn_oldprojid);
219fb09f5aMadhav Suresh	ASSERT0(dn->dn_newuid);
220fb09f5aMadhav Suresh	ASSERT0(dn->dn_newgid);
221f67950bNasf-Fan	ASSERT0(dn->dn_newprojid);
222fb09f5aMadhav Suresh	ASSERT0(dn->dn_id_flags);
223fb09f5aMadhav Suresh
224fb09f5aMadhav Suresh	ASSERT0(dn->dn_dbufs_count);
2250f6d88aAlex Reece	avl_destroy(&dn->dn_dbufs);
226fa9e406ahrens}
227fa9e406ahrens
228fa9e406ahrensvoid
229fa9e406ahrensdnode_init(void)
230fa9e406ahrens{
231744947dTom Erickson	ASSERT(dnode_cache == NULL);
232fa9e406ahrens	dnode_cache = kmem_cache_create("dnode_t",
233fa9e406ahrens	    sizeof (dnode_t),
234fa9e406ahrens	    0, dnode_cons, dnode_dest, NULL, NULL, NULL, 0);
235f06dce2Andrew Stormont#ifdef	_KERNEL
236744947dTom Erickson	kmem_cache_set_move(dnode_cache, dnode_move);
23754811daToomas Soome
23854811daToomas Soome	dnode_ksp = kstat_create("zfs", 0, "dnodestats", "misc",
23954811daToomas Soome	    KSTAT_TYPE_NAMED, sizeof (dnode_stats) / sizeof (kstat_named_t),
24054811daToomas Soome	    KSTAT_FLAG_VIRTUAL);
24154811daToomas Soome	if (dnode_ksp != NULL) {
24254811daToomas Soome		dnode_ksp->ks_data = &dnode_stats;
24354811daToomas Soome		kstat_install(dnode_ksp);
24454811daToomas Soome	}
245f06dce2Andrew Stormont#endif	/* _KERNEL */
246fa9e406ahrens}
247fa9e406ahrens
248fa9e406ahrensvoid
249fa9e406ahrensdnode_fini(void)
250fa9e406ahrens{
25154811daToomas Soome	if (dnode_ksp != NULL) {
25254811daToomas Soome		kstat_delete(dnode_ksp);
25354811daToomas Soome		dnode_ksp = NULL;
25454811daToomas Soome	}
25554811daToomas Soome
256fa9e406ahrens	kmem_cache_destroy(dnode_cache);
257744947dTom Erickson	dnode_cache = NULL;
258fa9e406ahrens}
259fa9e406ahrens
260fa9e406ahrens
2619c9dc39ek#ifdef ZFS_DEBUG
262fa9e406ahrensvoid
263fa9e406ahrensdnode_verify(dnode_t *dn)
264fa9e406ahrens{
265fa9e406ahrens	int drop_struct_lock = FALSE;
266fa9e406ahrens
267fa9e406ahrens	ASSERT(dn->dn_phys);
268fa9e406ahrens	ASSERT(dn->dn_objset);
269744947dTom Erickson	ASSERT(dn->dn_handle->dnh_dnode == dn);
270fa9e406ahrens
271ad135b5Christopher Siden	ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
272fa9e406ahrens
273fa9e406ahrens	if (!(zfs_flags & ZFS_DEBUG_DNODE_VERIFY))
274fa9e406ahrens		return;
275fa9e406ahrens
276fa9e406ahrens	if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
277fa9e406ahrens		rw_enter(&dn->dn_struct_rwlock, RW_READER);
278fa9e406ahrens		drop_struct_lock = TRUE;
279fa9e406ahrens	}
280fa9e406ahrens	if (dn->dn_phys->dn_type != DMU_OT_NONE || dn->dn_allocated_txg != 0) {
281fa9e406ahrens		int i;
28254811daToomas Soome		int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
283fa9e406ahrens		ASSERT3U(dn->dn_indblkshift, >=, 0);
284fa9e406ahrens		ASSERT3U(dn->dn_indblkshift, <=, SPA_MAXBLOCKSHIFT);
285fa9e406ahrens		if (dn->dn_datablkshift) {
286fa9e406ahrens			ASSERT3U(dn->dn_datablkshift, >=, SPA_MINBLOCKSHIFT);
287fa9e406ahrens			ASSERT3U(dn->dn_datablkshift, <=, SPA_MAXBLOCKSHIFT);
288fa9e406ahrens			ASSERT3U(1<<dn->dn_datablkshift, ==, dn->dn_datablksz);
289fa9e406ahrens		}
290fa9e406ahrens		ASSERT3U(dn->dn_nlevels, <=, 30);
291ad135b5Christopher Siden		ASSERT(DMU_OT_IS_VALID(dn->dn_type));
292fa9e406ahrens		ASSERT3U(dn->dn_nblkptr, >=, 1);
293fa9e406ahrens		ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
29454811daToomas Soome		ASSERT3U(dn->dn_bonuslen, <=, max_bonuslen);
295fa9e406ahrens		ASSERT3U(dn->dn_datablksz, ==,
296fa9e406ahrens		    dn->dn_datablkszsec << SPA_MINBLOCKSHIFT);
297fa9e406ahrens		ASSERT3U(ISP2(dn->dn_datablksz), ==, dn->dn_datablkshift != 0);
298fa9e406ahrens		ASSERT3U((dn->dn_nblkptr - 1) * sizeof (blkptr_t) +
29954811daToomas Soome		    dn->dn_bonuslen, <=, max_bonuslen);
300fa9e406ahrens		for (i = 0; i < TXG_SIZE; i++) {
301fa9e406ahrens			ASSERT3U(dn->dn_next_nlevels[i], <=, dn->dn_nlevels);
302fa9e406ahrens		}
303fa9e406ahrens	}
304fa9e406ahrens	if (dn->dn_phys->dn_type != DMU_OT_NONE)
305fa9e406ahrens		ASSERT3U(dn->dn_phys->dn_nlevels, <=, dn->dn_nlevels);
3061484342Matthew Ahrens	ASSERT(DMU_OBJECT_IS_SPECIAL(dn->dn_object) || dn->dn_dbuf != NULL);
307fa9e406ahrens	if (dn->dn_dbuf != NULL) {
308fa9e406ahrens		ASSERT3P(dn->dn_phys, ==,
309fa9e406ahrens		    (dnode_phys_t *)dn->dn_dbuf->db.db_data +
310fa9e406ahrens		    (dn->dn_object % (dn->dn_dbuf->db.db_size >> DNODE_SHIFT)));
311fa9e406ahrens	}
312fa9e406ahrens	if (drop_struct_lock)
313fa9e406ahrens		rw_exit(&dn->dn_struct_rwlock);
314fa9e406ahrens}
3159c9dc39ek#endif
316fa9e406ahrens
317fa9e406ahrensvoid
318fa9e406ahrensdnode_byteswap(dnode_phys_t *dnp)
319fa9e406ahrens{
320fa9e406ahrens	uint64_t *buf64 = (void*)&dnp->dn_blkptr;
321fa9e406ahrens	int i;
322fa9e406ahrens
323fa9e406ahrens	if (dnp->dn_type == DMU_OT_NONE) {
324fa9e406ahrens		bzero(dnp, sizeof (dnode_phys_t));
325fa9e406ahrens		return;
326fa9e406ahrens	}
327fa9e406ahrens
328fa9e406ahrens	dnp->dn_datablkszsec = BSWAP_16(dnp->dn_datablkszsec);
329fa9e406ahrens	dnp->dn_bonuslen = BSWAP_16(dnp->dn_bonuslen);
33054811daToomas Soome	dnp->dn_extra_slots = BSWAP_8(dnp->dn_extra_slots);
331fa9e406ahrens	dnp->dn_maxblkid = BSWAP_64(dnp->dn_maxblkid);
33299653d4eschrock	dnp->dn_used = BSWAP_64(dnp->dn_used);
333fa9e406ahrens
334fa9e406ahrens	/*
335fa9e406ahrens	 * dn_nblkptr is only one byte, so it's OK to read it in either
336fa9e406ahrens	 * byte order.  We can't read dn_bouslen.
337fa9e406ahrens	 */
338fa9e406ahrens	ASSERT(dnp->dn_indblkshift <= SPA_MAXBLOCKSHIFT);
339fa9e406ahrens	ASSERT(dnp->dn_nblkptr <= DN_MAX_NBLKPTR);
340fa9e406ahrens	for (i = 0; i < dnp->dn_nblkptr * sizeof (blkptr_t)/8; i++)
341fa9e406ahrens		buf64[i] = BSWAP_64(buf64[i]);
342fa9e406ahrens
343fa9e406ahrens	/*
344fa9e406ahrens	 * OK to check dn_bonuslen for zero, because it won't matter if
345fa9e406ahrens	 * we have the wrong byte order.  This is necessary because the
346fa9e406ahrens	 * dnode dnode is smaller than a regular dnode.
347fa9e406ahrens	 */
348fa9e406ahrens	if (dnp->dn_bonuslen != 0) {
349fa9e406ahrens		/*
350fa9e406ahrens		 * Note that the bonus length calculated here may be
351fa9e406ahrens		 * longer than the actual bonus buffer.  This is because
352fa9e406ahrens		 * we always put the bonus buffer after the last block
353fa9e406ahrens		 * pointer (instead of packing it against the end of the
354fa9e406ahrens		 * dnode buffer).
355fa9e406ahrens		 */
356fa9e406ahrens		int off = (dnp->dn_nblkptr-1) * sizeof (blkptr_t);
35754811daToomas Soome		int slots = dnp->dn_extra_slots + 1;
35854811daToomas Soome		size_t len = DN_SLOTS_TO_BONUSLEN(slots) - off;
359ad135b5Christopher Siden		ASSERT(DMU_OT_IS_VALID(dnp->dn_bonustype));
360ad135b5Christopher Siden		dmu_object_byteswap_t byteswap =
361ad135b5Christopher Siden		    DMU_OT_BYTESWAP(dnp->dn_bonustype);
362ad135b5Christopher Siden		dmu_ot_byteswap[byteswap].ob_func(dnp->dn_bonus + off, len);
363fa9e406ahrens	}
3640a586ceMark Shellenbaum
3650a586ceMark Shellenbaum	/* Swap SPILL block if we have one */
3660a586ceMark Shellenbaum	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
36754811daToomas Soome		byteswap_uint64_array(DN_SPILL_BLKPTR(dnp), sizeof (blkptr_t));
3680a586ceMark Shellenbaum
369fa9e406ahrens}
370fa9e406ahrens
371fa9e406ahrensvoid
372fa9e406ahrensdnode_buf_byteswap(void *vbuf, size_t size)
373fa9e406ahrens{
37454811daToomas Soome	int i = 0;
375fa9e406ahrens
376fa9e406ahrens	ASSERT3U(sizeof (dnode_phys_t), ==, (1<<DNODE_SHIFT));
377fa9e406ahrens	ASSERT((size & (sizeof (dnode_phys_t)-1)) == 0);
378fa9e406ahrens
37954811daToomas Soome	while (i < size) {
38054811daToomas Soome		dnode_phys_t *dnp = (void *)(((char *)vbuf) + i);
38154811daToomas Soome		dnode_byteswap(dnp);
38254811daToomas Soome
38354811daToomas Soome		i += DNODE_MIN_SIZE;
38454811daToomas Soome		if (dnp->dn_type != DMU_OT_NONE)
38554811daToomas Soome			i += dnp->dn_extra_slots * DNODE_MIN_SIZE;
386fa9e406ahrens	}
387fa9e406ahrens}
388fa9e406ahrens
3891934e92maybeevoid
3901934e92maybeednode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx)
3911934e92maybee{
392e914aceTim Schumacher	ASSERT3U(zfs_refcount_count(&dn->dn_holds), >=, 1);
3931934e92maybee
3941934e92maybee	dnode_setdirty(dn, tx);
3951934e92maybee	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
39654811daToomas Soome	ASSERT3U(newsize, <=, DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots) -
3971934e92maybee	    (dn->dn_nblkptr-1) * sizeof (blkptr_t));
3981934e92maybee	dn->dn_bonuslen = newsize;
3991934e92maybee	if (newsize == 0)
4001934e92maybee		dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = DN_ZERO_BONUSLEN;
4011934e92maybee	else
4021934e92maybee		dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen;
4031934e92maybee	rw_exit(&dn->dn_struct_rwlock);
4041934e92maybee}
4051934e92maybee
4060a586ceMark Shellenbaumvoid
4070a586ceMark Shellenbaumdnode_setbonus_type(dnode_t *dn, dmu_object_type_t newtype, dmu_tx_t *tx)
4080a586ceMark Shellenbaum{
409e914aceTim Schumacher	ASSERT3U(zfs_refcount_count(&dn->dn_holds), >=, 1);
4100a586ceMark Shellenbaum	dnode_setdirty(dn, tx);
4110a586ceMark Shellenbaum	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
4120a586ceMark Shellenbaum	dn->dn_bonustype = newtype;
4130a586ceMark Shellenbaum	dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
4140a586ceMark Shellenbaum	rw_exit(&dn->dn_struct_rwlock);
4150a586ceMark Shellenbaum}
4160a586ceMark Shellenbaum
4170a586ceMark Shellenbaumvoid
4180a586ceMark Shellenbaumdnode_rm_spill(dnode_t *dn, dmu_tx_t *tx)
4190a586ceMark Shellenbaum{
420e914aceTim Schumacher	ASSERT3U(zfs_refcount_count(&dn->dn_holds), >=, 1);
42106e0070Mark Shellenbaum	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
4220a586ceMark Shellenbaum	dnode_setdirty(dn, tx);
4230a586ceMark Shellenbaum	dn->dn_rm_spillblk[tx->tx_txg&TXG_MASK] = DN_KILL_SPILLBLK;
4240a586ceMark Shellenbaum	dn->dn_have_spill = B_FALSE;
4250a586ceMark Shellenbaum}
4260a586ceMark Shellenbaum
427fa9e406ahrensstatic void
428fa9e406ahrensdnode_setdblksz(dnode_t *dn, int size)
429fa9e406ahrens{
430fb09f5aMadhav Suresh	ASSERT0(P2PHASE(size, SPA_MINBLOCKSIZE));
431fa9e406ahrens	ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
432fa9e406ahrens	ASSERT3U(size, >=, SPA_MINBLOCKSIZE);
433fa9e406ahrens	ASSERT3U(size >> SPA_MINBLOCKSHIFT, <,
434fa9e406ahrens	    1<<(sizeof (dn->dn_phys->dn_datablkszsec) * 8));
435fa9e406ahrens	dn->dn_datablksz = size;
436fa9e406ahrens	dn->dn_datablkszsec = size >> SPA_MINBLOCKSHIFT;
437bf16b11Matthew Ahrens	dn->dn_datablkshift = ISP2(size) ? highbit64(size - 1) : 0;
438fa9e406ahrens}
439fa9e406ahrens
440fa9e406ahrensstatic dnode_t *
441503ad85Matthew Ahrensdnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
442744947dTom Erickson    uint64_t object, dnode_handle_t *dnh)
443fa9e406ahrens{
444bc9014eJustin Gibbs	dnode_t *dn;
445fa9e406ahrens
446bc9014eJustin Gibbs	dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
447f06dce2Andrew Stormont#ifdef _KERNEL
448744947dTom Erickson	ASSERT(!POINTER_IS_VALID(dn->dn_objset));
449f06dce2Andrew Stormont#endif /* _KERNEL */
450744947dTom Erickson	dn->dn_moved = 0;
451744947dTom Erickson
452744947dTom Erickson	/*
453744947dTom Erickson	 * Defer setting dn_objset until the dnode is ready to be a candidate
454744947dTom Erickson	 * for the dnode_move() callback.
455744947dTom Erickson	 */
456fa9e406ahrens	dn->dn_object = object;
457fa9e406ahrens	dn->dn_dbuf = db;
458744947dTom Erickson	dn->dn_handle = dnh;
459fa9e406ahrens	dn->dn_phys = dnp;
460fa9e406ahrens
461744947dTom Erickson	if (dnp->dn_datablkszsec) {
462fa9e406ahrens		dnode_setdblksz(dn, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
463744947dTom Erickson	} else {
464744947dTom Erickson		dn->dn_datablksz = 0;
465744947dTom Erickson		dn->dn_datablkszsec = 0;
466744947dTom Erickson		dn->dn_datablkshift = 0;
467744947dTom Erickson	}
468fa9e406ahrens	dn->dn_indblkshift = dnp->dn_indblkshift;
469fa9e406ahrens	dn->dn_nlevels = dnp->dn_nlevels;
470fa9e406ahrens	dn->dn_type = dnp->dn_type;
471fa9e406ahrens	dn->dn_nblkptr = dnp->dn_nblkptr;
472fa9e406ahrens	dn->dn_checksum = dnp->dn_checksum;
473fa9e406ahrens	dn->dn_compress = dnp->dn_compress;
474fa9e406ahrens	dn->dn_bonustype = dnp->dn_bonustype;
475fa9e406ahrens	dn->dn_bonuslen = dnp->dn_bonuslen;
47654811daToomas Soome	dn->dn_num_slots = dnp->dn_extra_slots + 1;
477fa9e406ahrens	dn->dn_maxblkid = dnp->dn_maxblkid;
4780a586ceMark Shellenbaum	dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0);
47906e0070Mark Shellenbaum	dn->dn_id_flags = 0;
480fa9e406ahrens
481fa9e406ahrens	dmu_zfetch_init(&dn->dn_zfetch, dn);
482fa9e406ahrens
483ad135b5Christopher Siden	ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
48454811daToomas Soome	ASSERT(zrl_is_locked(&dnh->dnh_zrlock));
48554811daToomas Soome	ASSERT(!DN_SLOT_IS_PTR(dnh->dnh_dnode));
486744947dTom Erickson
487fa9e406ahrens	mutex_enter(&os->os_lock);
488bc9014eJustin Gibbs
489bc9014eJustin Gibbs	/*
490bc9014eJustin Gibbs	 * Exclude special dnodes from os_dnodes so an empty os_dnodes
491bc9014eJustin Gibbs	 * signifies that the special dnodes have no references from
492bc9014eJustin Gibbs	 * their children (the entries in os_dnodes).  This allows
493bc9014eJustin Gibbs	 * dnode_destroy() to easily determine if the last child has
494bc9014eJustin Gibbs	 * been removed and then complete eviction of the objset.
495bc9014eJustin Gibbs	 */
496bc9014eJustin Gibbs	if (!DMU_OBJECT_IS_SPECIAL(object))
497bc9014eJustin Gibbs		list_insert_head(&os->os_dnodes, dn);
498744947dTom Erickson	membar_producer();
499bc9014eJustin Gibbs
500744947dTom Erickson	/*
501bc9014eJustin Gibbs	 * Everything else must be valid before assigning dn_objset
502bc9014eJustin Gibbs	 * makes the dnode eligible for dnode_move().
503744947dTom Erickson	 */
504744947dTom Erickson	dn->dn_objset = os;
505bc9014eJustin Gibbs
506bc9014eJustin Gibbs	dnh->dnh_dnode = dn;
507fa9e406ahrens	mutex_exit(&os->os_lock);
508fa9e406ahrens
5095a98e54Brendan Gregg - Sun Microsystems	arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER);
51054811daToomas Soome
511fa9e406ahrens	return (dn);
512fa9e406ahrens}
513fa9e406ahrens
514744947dTom Erickson/*
515744947dTom Erickson * Caller must be holding the dnode handle, which is released upon return.
516744947dTom Erickson */
517fa9e406ahrensstatic void
518fa9e406ahrensdnode_destroy(dnode_t *dn)
519fa9e406ahrens{
520503ad85Matthew Ahrens	objset_t *os = dn->dn_objset;
521bc9014eJustin Gibbs	boolean_t complete_os_eviction = B_FALSE;
522fa9e406ahrens
5230a586ceMark Shellenbaum	ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
524a2eea2eahrens
525fa9e406ahrens	mutex_enter(&os->os_lock);
526744947dTom Erickson	POINTER_INVALIDATE(&dn->dn_objset);
527bc9014eJustin Gibbs	if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
528bc9014eJustin Gibbs		list_remove(&os->os_dnodes, dn);
529bc9014eJustin Gibbs		complete_os_eviction =
530bc9014eJustin Gibbs		    list_is_empty(&os->os_dnodes) &&
531bc9014eJustin Gibbs		    list_link_active(&os->os_evicting_node);
532bc9014eJustin Gibbs	}
533fa9e406ahrens	mutex_exit(&os->os_lock);
534fa9e406ahrens
535744947dTom Erickson	/* the dnode can no longer move, so we can release the handle */
53654811daToomas Soome	if (!zrl_is_locked(&dn->dn_handle->dnh_zrlock))
53754811daToomas Soome		zrl_remove(&dn->dn_handle->dnh_zrlock);
538744947dTom Erickson
539744947dTom Erickson	dn->dn_allocated_txg = 0;
540744947dTom Erickson	dn->dn_free_txg = 0;
541744947dTom Erickson	dn->dn_assigned_txg = 0;
542aa02ea0Tom Caputi	dn->dn_dirty_txg = 0;
543744947dTom Erickson
544744947dTom Erickson	dn->dn_dirtyctx = 0;
545744947dTom Erickson	if (dn->dn_dirtyctx_firstset != NULL) {
546fa9e406ahrens		kmem_free(dn->dn_dirtyctx_firstset, 1);
547fa9e406ahrens		dn->dn_dirtyctx_firstset = NULL;
548fa9e406ahrens	}
549744947dTom Erickson	if (dn->dn_bonus != NULL) {
550ea8dc4beschrock		mutex_enter(&dn->dn_bonus->db_mtx);
551dcbf3bdGeorge Wilson		dbuf_destroy(dn->dn_bonus);
552ea8dc4beschrock		dn->dn_bonus = NULL;
553ea8dc4beschrock	}
554744947dTom Erickson	dn->dn_zio = NULL;
555744947dTom Erickson
556744947dTom Erickson	dn->dn_have_spill = B_FALSE;
557744947dTom Erickson	dn->dn_oldused = 0;
558744947dTom Erickson	dn->dn_oldflags = 0;
559744947dTom Erickson	dn->dn_olduid = 0;
560744947dTom Erickson	dn->dn_oldgid = 0;
561f67950bNasf-Fan	dn->dn_oldprojid = ZFS_DEFAULT_PROJID;
562744947dTom Erickson	dn->dn_newuid = 0;
563744947dTom Erickson	dn->dn_newgid = 0;
564f67950bNasf-Fan	dn->dn_newprojid = ZFS_DEFAULT_PROJID;
565744947dTom Erickson	dn->dn_id_flags = 0;
566744947dTom Erickson
567cf6106cMatthew Ahrens	dmu_zfetch_fini(&dn->dn_zfetch);
568fa9e406ahrens	kmem_cache_free(dnode_cache, dn);
5695a98e54Brendan Gregg - Sun Microsystems	arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
570bc9014eJustin Gibbs
571bc9014eJustin Gibbs	if (complete_os_eviction)
572bc9014eJustin Gibbs		dmu_objset_evict_done(os);
573fa9e406ahrens}
574fa9e406ahrens
575fa9e406ahrensvoid
576fa9e406ahrensdnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
57754811daToomas Soome    dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx)
578fa9e406ahrens{
579fa9e406ahrens	int i;
580fa9e406ahrens
58154811daToomas Soome	ASSERT3U(dn_slots, >, 0);
58254811daToomas Soome	ASSERT3U(dn_slots << DNODE_SHIFT, <=,
58354811daToomas Soome	    spa_maxdnodesize(dmu_objset_spa(dn->dn_objset)));
584b515258Matthew Ahrens	ASSERT3U(blocksize, <=,
585b515258Matthew Ahrens	    spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
586fa9e406ahrens	if (blocksize == 0)
587fa9e406ahrens		blocksize = 1 << zfs_default_bs;
5883b83abdahrens	else
5893b83abdahrens		blocksize = P2ROUNDUP(blocksize, SPA_MINBLOCKSIZE);
590fa9e406ahrens
591fa9e406ahrens	if (ibs == 0)
592fa9e406ahrens		ibs = zfs_default_ibs;
593fa9e406ahrens
594fa9e406ahrens	ibs = MIN(MAX(ibs, DN_MIN_INDBLKSHIFT), DN_MAX_INDBLKSHIFT);
595fa9e406ahrens
59654811daToomas Soome	dprintf("os=%p obj=%" PRIu64 " txg=%" PRIu64
59754811daToomas Soome	    " blocksize=%d ibs=%d dn_slots=%d\n",
59854811daToomas Soome	    dn->dn_objset, dn->dn_object, tx->tx_txg, blocksize, ibs, dn_slots);
59954811daToomas Soome	DNODE_STAT_BUMP(dnode_allocate);
600fa9e406ahrens
601fa9e406ahrens	ASSERT(dn->dn_type == DMU_OT_NONE);
602fa9e406ahrens	ASSERT(bcmp(dn->dn_phys, &dnode_phys_zero, sizeof (dnode_phys_t)) == 0);
603fa9e406ahrens	ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE);
604fa9e406ahrens	ASSERT(ot != DMU_OT_NONE);
605ad135b5Christopher Siden	ASSERT(DMU_OT_IS_VALID(ot));
606fa9e406ahrens	ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
6070a586ceMark Shellenbaum	    (bonustype == DMU_OT_SA && bonuslen == 0) ||
608fa9e406ahrens	    (bonustype != DMU_OT_NONE && bonuslen != 0));
609ad135b5Christopher Siden	ASSERT(DMU_OT_IS_VALID(bonustype));
61054811daToomas Soome	ASSERT3U(bonuslen, <=, DN_SLOTS_TO_BONUSLEN(dn_slots));
611fa9e406ahrens	ASSERT(dn->dn_type == DMU_OT_NONE);
612fb09f5aMadhav Suresh	ASSERT0(dn->dn_maxblkid);
613fb09f5aMadhav Suresh	ASSERT0(dn->dn_allocated_txg);
614aa02ea0Tom Caputi	ASSERT0(dn->dn_dirty_txg);
615fb09f5aMadhav Suresh	ASSERT0(dn->dn_assigned_txg);
616e914aceTim Schumacher	ASSERT(zfs_refcount_is_zero(&dn->dn_tx_holds));
617e914aceTim Schumacher	ASSERT3U(zfs_refcount_count(&dn->dn_holds), <=, 1);
6180f6d88aAlex Reece	ASSERT(avl_is_empty(&dn->dn_dbufs));
619fa9e406ahrens
620fa9e406ahrens	for (i = 0; i < TXG_SIZE; i++) {
621fb09f5aMadhav Suresh		ASSERT0(dn->dn_next_nblkptr[i]);
622fb09f5aMadhav Suresh		ASSERT0(dn->dn_next_nlevels[i]);
623fb09f5aMadhav Suresh		ASSERT0(dn->dn_next_indblkshift[i]);
624fb09f5aMadhav Suresh		ASSERT0(dn->dn_next_bonuslen[i]);
625fb09f5aMadhav Suresh		ASSERT0(dn->dn_next_bonustype[i]);
626fb09f5aMadhav Suresh		ASSERT0(dn->dn_rm_spillblk[i]);
627fb09f5aMadhav Suresh		ASSERT0(dn->dn_next_blksz[i]);
628eb63303Tom Caputi		ASSERT0(dn->dn_next_maxblkid[i]);
629aa02ea0Tom Caputi		ASSERT(!multilist_link_active(&dn->dn_dirty_link[i]));
630c717a56maybee		ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
631bf16b11Matthew Ahrens		ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
632fa9e406ahrens	}
633fa9e406ahrens
634fa9e406ahrens	dn->dn_type = ot;
635fa9e406ahrens	dnode_setdblksz(dn, blocksize);
636fa9e406ahrens	dn->dn_indblkshift = ibs;
637fa9e406ahrens	dn->dn_nlevels = 1;
63854811daToomas Soome	dn->dn_num_slots = dn_slots;
6390a586ceMark Shellenbaum	if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
6400a586ceMark Shellenbaum		dn->dn_nblkptr = 1;
64154811daToomas Soome	else {
64254811daToomas Soome		dn->dn_nblkptr = MIN(DN_MAX_NBLKPTR,
64354811daToomas Soome		    1 + ((DN_SLOTS_TO_BONUSLEN(dn_slots) - bonuslen) >>
64454811daToomas Soome		    SPA_BLKPTRSHIFT));
64554811daToomas Soome	}
64654811daToomas Soome
647fa9e406ahrens	dn->dn_bonustype = bonustype;
648fa9e406ahrens	dn->dn_bonuslen = bonuslen;
649fa9e406ahrens	dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
650fa9e406ahrens	dn->dn_compress = ZIO_COMPRESS_INHERIT;
651fa9e406ahrens	dn->dn_dirtyctx = 0;
652fa9e406ahrens
653fa9e406ahrens	dn->dn_free_txg = 0;
654fa9e406ahrens	if (dn->dn_dirtyctx_firstset) {
655fa9e406ahrens		kmem_free(dn->dn_dirtyctx_firstset, 1);
656fa9e406ahrens		dn->dn_dirtyctx_firstset = NULL;
657fa9e406ahrens	}
658fa9e406ahrens
659fa9e406ahrens	dn->dn_allocated_txg = tx->tx_txg;
6600a586ceMark Shellenbaum	dn->dn_id_flags = 0;
661f676ed3ahrens
662fa9e406ahrens	dnode_setdirty(dn, tx);
663f676ed3ahrens	dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs;
6641934e92maybee	dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen;
6650a586ceMark Shellenbaum	dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
666f676ed3ahrens	dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = dn->dn_datablksz;
667fa9e406ahrens}
668fa9e406ahrens
669fa9e406ahrensvoid
670fa9e406ahrensdnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
671eb63303Tom Caputi    dmu_object_type_t bonustype, int bonuslen, int dn_slots,
672eb63303Tom Caputi    boolean_t keep_spill, dmu_tx_t *tx)
673fa9e406ahrens{
6742bf405aMark Maybee	int nblkptr;
675c543ec0ahrens
676fa9e406ahrens	ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE);
677b515258Matthew Ahrens	ASSERT3U(blocksize, <=,
678b515258Matthew Ahrens	    spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
679fb09f5aMadhav Suresh	ASSERT0(blocksize % SPA_MINBLOCKSIZE);
680ea8dc4beschrock	ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
681fa9e406ahrens	ASSERT(tx->tx_txg != 0);
682fa9e406ahrens	ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
68306e0070Mark Shellenbaum	    (bonustype != DMU_OT_NONE && bonuslen != 0) ||
68406e0070Mark Shellenbaum	    (bonustype == DMU_OT_SA && bonuslen == 0));
685ad135b5Christopher Siden	ASSERT(DMU_OT_IS_VALID(bonustype));
68654811daToomas Soome	ASSERT3U(bonuslen, <=,
68754811daToomas Soome	    DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(dn->dn_objset))));
688946342aFabian Grünbichler	ASSERT3U(bonuslen, <=, DN_BONUS_SIZE(dn_slots << DNODE_SHIFT));
68954811daToomas Soome
69054811daToomas Soome	dnode_free_interior_slots(dn);
69154811daToomas Soome	DNODE_STAT_BUMP(dnode_reallocate);
692c543ec0ahrens
693ea8dc4beschrock	/* clean up any unreferenced dbufs */
6941934e92maybee	dnode_evict_dbufs(dn);
695da03de9Mark Maybee
69628d97a7Mark Shellenbaum	dn->dn_id_flags = 0;
69728d97a7Mark Shellenbaum
698fa9e406ahrens	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
699fa9e406ahrens	dnode_setdirty(dn, tx);
7002bf405aMark Maybee	if (dn->dn_datablksz != blocksize) {
7012bf405aMark Maybee		/* change blocksize */
7022bf405aMark Maybee		ASSERT(dn->dn_maxblkid == 0 &&
7032bf405aMark Maybee		    (BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) ||
7042bf405aMark Maybee		    dnode_block_freed(dn, 0)));
7052bf405aMark Maybee		dnode_setdblksz(dn, blocksize);
7062bf405aMark Maybee		dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize;
7072bf405aMark Maybee	}
7082bf405aMark Maybee	if (dn->dn_bonuslen != bonuslen)
7092bf405aMark Maybee		dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
71006e0070Mark Shellenbaum
71106e0070Mark Shellenbaum	if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
71206e0070Mark Shellenbaum		nblkptr = 1;
71306e0070Mark Shellenbaum	else
71454811daToomas Soome		nblkptr = MIN(DN_MAX_NBLKPTR,
71554811daToomas Soome		    1 + ((DN_SLOTS_TO_BONUSLEN(dn_slots) - bonuslen) >>
71654811daToomas Soome		    SPA_BLKPTRSHIFT));
7170a586ceMark Shellenbaum	if (dn->dn_bonustype != bonustype)
7180a586ceMark Shellenbaum		dn->dn_next_bonustype[tx->tx_txg&TXG_MASK] = bonustype;
719da03de9Mark Maybee	if (dn->dn_nblkptr != nblkptr)
720da03de9Mark Maybee		dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr;
721eb63303Tom Caputi	if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR && !keep_spill) {
72206e0070Mark Shellenbaum		dbuf_rm_spill(dn, tx);
72306e0070Mark Shellenbaum		dnode_rm_spill(dn, tx);
7240a586ceMark Shellenbaum	}
725fa9e406ahrens	rw_exit(&dn->dn_struct_rwlock);
726fa9e406ahrens
727fa9e406ahrens	/* change type */
728fa9e406ahrens	dn->dn_type = ot;
729fa9e406ahrens
730fa9e406ahrens	/* change bonus size and type */
731fa9e406ahrens	mutex_enter(&dn->dn_mtx);
732fa9e406ahrens	dn->dn_bonustype = bonustype;
733fa9e406ahrens	dn->dn_bonuslen = bonuslen;
73454811daToomas Soome	dn->dn_num_slots = dn_slots;
735da03de9Mark Maybee	dn->dn_nblkptr = nblkptr;
736fa9e406ahrens	dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
737fa9e406ahrens	dn->dn_compress = ZIO_COMPRESS_INHERIT;
738fa9e406ahrens	ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
739fa9e406ahrens
740da03de9Mark Maybee	/* fix up the bonus db_size */
741da03de9Mark Maybee	if (dn->dn_bonus) {
7421934e92maybee		dn->dn_bonus->db.db_size =
74354811daToomas Soome		    DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots) -
74454811daToomas Soome		    (dn->dn_nblkptr - 1) * sizeof (blkptr_t);
7451934e92maybee		ASSERT(dn->dn_bonuslen <= dn->dn_bonus->db.db_size);
7461934e92maybee	}
747432f72fahrens
748fa9e406ahrens	dn->dn_allocated_txg = tx->tx_txg;
749fa9e406ahrens	mutex_exit(&dn->dn_mtx);
750fa9e406ahrens}
751fa9e406ahrens
752f06dce2Andrew Stormont#ifdef	_KERNEL
753744947dTom Ericksonstatic void
754744947dTom Ericksondnode_move_impl(dnode_t *odn, dnode_t *ndn)
755744947dTom Erickson{
756744947dTom Erickson	int i;
757744947dTom Erickson
758744947dTom Erickson	ASSERT(!RW_LOCK_HELD(&odn->dn_struct_rwlock));
759744947dTom Erickson	ASSERT(MUTEX_NOT_HELD(&odn->dn_mtx));
760744947dTom Erickson	ASSERT(MUTEX_NOT_HELD(&odn->dn_dbufs_mtx));
761744947dTom Erickson	ASSERT(!RW_LOCK_HELD(&odn->dn_zfetch.zf_rwlock));
762744947dTom Erickson
763744947dTom Erickson	/* Copy fields. */
764744947dTom Erickson	ndn->dn_objset = odn->dn_objset;
765744947dTom Erickson	ndn->dn_object = odn->dn_object;
766744947dTom Erickson	ndn->dn_dbuf = odn->dn_dbuf;
767744947dTom Erickson	ndn->dn_handle = odn->dn_handle;
768744947dTom Erickson	ndn->dn_phys = odn->dn_phys;
769744947dTom Erickson	ndn->dn_type = odn->dn_type;
770744947dTom Erickson	ndn->dn_bonuslen = odn->dn_bonuslen;
771744947dTom Erickson	ndn->dn_bonustype = odn->dn_bonustype;
772744947dTom Erickson	ndn->dn_nblkptr = odn->dn_nblkptr;
773744947dTom Erickson	ndn->dn_checksum = odn->dn_checksum;
774744947dTom Erickson	ndn->dn_compress = odn->dn_compress;
775744947dTom Erickson	ndn->dn_nlevels = odn->dn_nlevels;
776744947dTom Erickson	ndn->dn_indblkshift = odn->dn_indblkshift;
777744947dTom Erickson	ndn->dn_datablkshift = odn->dn_datablkshift;
778744947dTom Erickson	ndn->dn_datablkszsec = odn->dn_datablkszsec;
779744947dTom Erickson	ndn->dn_datablksz = odn->dn_datablksz;
780744947dTom Erickson	ndn->dn_maxblkid = odn->dn_maxblkid;
78154811daToomas Soome	ndn->dn_num_slots = odn->dn_num_slots;
782c7fbe46Matthew Ahrens	bcopy(&odn->dn_next_type[0], &ndn->dn_next_type[0],
783c7fbe46Matthew Ahrens	    sizeof (odn->dn_next_type));
784744947dTom Erickson	bcopy(&odn->dn_next_nblkptr[0], &ndn->dn_next_nblkptr[0],
785744947dTom Erickson	    sizeof (odn->dn_next_nblkptr));
786744947dTom Erickson	bcopy(&odn->dn_next_nlevels[0], &ndn->dn_next_nlevels[0],
787744947dTom Erickson	    sizeof (odn->dn_next_nlevels));
788744947dTom Erickson	bcopy(&odn->dn_next_indblkshift[0], &ndn->dn_next_indblkshift[0],
789744947dTom Erickson	    sizeof (odn->dn_next_indblkshift));
790744947dTom Erickson	bcopy(&odn->dn_next_bonustype[0], &ndn->dn_next_bonustype[0],
791744947dTom Erickson	    sizeof (odn->dn_next_bonustype));
792744947dTom Erickson	bcopy(&odn->dn_rm_spillblk[0], &ndn->dn_rm_spillblk[0],
793744947dTom Erickson	    sizeof (odn->dn_rm_spillblk));
794744947dTom Erickson	bcopy(&odn->dn_next_bonuslen[0], &ndn->dn_next_bonuslen[0],
795744947dTom Erickson	    sizeof (odn->dn_next_bonuslen));
796744947dTom Erickson	bcopy(&odn->dn_next_blksz[0], &ndn->dn_next_blksz[0],
797744947dTom Erickson	    sizeof (odn->dn_next_blksz));
798eb63303Tom Caputi	bcopy(&odn->dn_next_maxblkid[0], &ndn->dn_next_maxblkid[0],
799eb63303Tom Caputi	    sizeof (odn->dn_next_maxblkid));
800744947dTom Erickson	for (i = 0; i < TXG_SIZE; i++) {
801744947dTom Erickson		list_move_tail(&ndn->dn_dirty_records[i],
802744947dTom Erickson		    &odn->dn_dirty_records[i]);
803744947dTom Erickson	}
804bf16b11Matthew Ahrens	bcopy(&odn->dn_free_ranges[0], &ndn->dn_free_ranges[0],
805bf16b11Matthew Ahrens	    sizeof (odn->dn_free_ranges));
806744947dTom Erickson	ndn->dn_allocated_txg = odn->dn_allocated_txg;
807744947dTom Erickson	ndn->dn_free_txg = odn->dn_free_txg;
808744947dTom Erickson	ndn->dn_assigned_txg = odn->dn_assigned_txg;
809aa02ea0Tom Caputi	ndn->dn_dirty_txg = odn->dn_dirty_txg;
810744947dTom Erickson	ndn->dn_dirtyctx = odn->dn_dirtyctx;
811744947dTom Erickson	ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset;
812e914aceTim Schumacher	ASSERT(zfs_refcount_count(&odn->dn_tx_holds) == 0);
813e914aceTim Schumacher	zfs_refcount_transfer(&ndn->dn_holds, &odn->dn_holds);
8140f6d88aAlex Reece	ASSERT(avl_is_empty(&ndn->dn_dbufs));
8150f6d88aAlex Reece	avl_swap(&ndn->dn_dbufs, &odn->dn_dbufs);
816744947dTom Erickson	ndn->dn_dbufs_count = odn->dn_dbufs_count;
817744947dTom Erickson	ndn->dn_bonus = odn->dn_bonus;
818744947dTom Erickson	ndn->dn_have_spill = odn->dn_have_spill;
819744947dTom Erickson	ndn->dn_zio = odn->dn_zio;
820744947dTom Erickson	ndn->dn_oldused = odn->dn_oldused;
821744947dTom Erickson	ndn->dn_oldflags = odn->dn_oldflags;
822744947dTom Erickson	ndn->dn_olduid = odn->dn_olduid;
823744947dTom Erickson	ndn->dn_oldgid = odn->dn_oldgid;
824f67950bNasf-Fan	ndn->dn_oldprojid = odn->dn_oldprojid;
825744947dTom Erickson	ndn->dn_newuid = odn->dn_newuid;
826744947dTom Erickson	ndn->dn_newgid = odn->dn_newgid;
827f67950bNasf-Fan	ndn->dn_newprojid = odn->dn_newprojid;
828744947dTom Erickson	ndn->dn_id_flags = odn->dn_id_flags;
829744947dTom Erickson	dmu_zfetch_init(&ndn->dn_zfetch, NULL);
830744947dTom Erickson	list_move_tail(&ndn->dn_zfetch.zf_stream, &odn->dn_zfetch.zf_stream);
831744947dTom Erickson	ndn->dn_zfetch.zf_dnode = odn->dn_zfetch.zf_dnode;
832744947dTom Erickson
833744947dTom Erickson	/*
834744947dTom Erickson	 * Update back pointers. Updating the handle fixes the back pointer of
835744947dTom Erickson	 * every descendant dbuf as well as the bonus dbuf.
836744947dTom Erickson	 */
837744947dTom Erickson	ASSERT(ndn->dn_handle->dnh_dnode == odn);
838744947dTom Erickson	ndn->dn_handle->dnh_dnode = ndn;
839744947dTom Erickson	if (ndn->dn_zfetch.zf_dnode == odn) {
840744947dTom Erickson		ndn->dn_zfetch.zf_dnode = ndn;
841744947dTom Erickson	}
842744947dTom Erickson
843744947dTom Erickson	/*
844744947dTom Erickson	 * Invalidate the original dnode by clearing all of its back pointers.
845744947dTom Erickson	 */
846744947dTom Erickson	odn->dn_dbuf = NULL;
847744947dTom Erickson	odn->dn_handle = NULL;
8480f6d88aAlex Reece	avl_create(&odn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t),
849744947dTom Erickson	    offsetof(dmu_buf_impl_t, db_link));
850744947dTom Erickson	odn->dn_dbufs_count = 0;
851744947dTom Erickson	odn->dn_bonus = NULL;
852744947dTom Erickson	odn->dn_zfetch.zf_dnode = NULL;
853744947dTom Erickson
854744947dTom Erickson	/*
855744947dTom Erickson	 * Set the low bit of the objset pointer to ensure that dnode_move()
856744947dTom Erickson	 * recognizes the dnode as invalid in any subsequent callback.
857744947dTom Erickson	 */
858744947dTom Erickson	POINTER_INVALIDATE(&odn->dn_objset);
859744947dTom Erickson
860744947dTom Erickson	/*
861744947dTom Erickson	 * Satisfy the destructor.
862744947dTom Erickson	 */
863744947dTom Erickson	for (i = 0; i < TXG_SIZE; i++) {
864744947dTom Erickson		list_create(&odn->dn_dirty_records[i],
865744947dTom Erickson		    sizeof (dbuf_dirty_record_t),
866744947dTom Erickson		    offsetof(dbuf_dirty_record_t, dr_dirty_node));
867bf16b11Matthew Ahrens		odn->dn_free_ranges[i] = NULL;
868744947dTom Erickson		odn->dn_next_nlevels[i] = 0;
869744947dTom Erickson		odn->dn_next_indblkshift[i] = 0;
870744947dTom Erickson		odn->dn_next_bonustype[i] = 0;
871744947dTom Erickson		odn->dn_rm_spillblk[i] = 0;
872744947dTom Erickson		odn->dn_next_bonuslen[i] = 0;
873744947dTom Erickson		odn->dn_next_blksz[i] = 0;
874744947dTom Erickson	}
875744947dTom Erickson	odn->dn_allocated_txg = 0;
876744947dTom Erickson	odn->dn_free_txg = 0;
877744947dTom Erickson	odn->dn_assigned_txg = 0;
878aa02ea0Tom Caputi	odn->dn_dirty_txg = 0;
879744947dTom Erickson	odn->dn_dirtyctx = 0;
880744947dTom Erickson	odn->dn_dirtyctx_firstset = NULL;
881744947dTom Erickson	odn->dn_have_spill = B_FALSE;
882744947dTom Erickson	odn->dn_zio = NULL;
883744947dTom Erickson	odn->dn_oldused = 0;
884744947dTom Erickson	odn->dn_oldflags = 0;
885744947dTom Erickson	odn->dn_olduid = 0;
886744947dTom Erickson	odn->dn_oldgid = 0;
887f67950bNasf-Fan	odn->dn_oldprojid = ZFS_DEFAULT_PROJID;
888744947dTom Erickson	odn->dn_newuid = 0;
889744947dTom Erickson	odn->dn_newgid = 0;
890f67950bNasf-Fan	odn->dn_newprojid = ZFS_DEFAULT_PROJID;
891744947dTom Erickson	odn->dn_id_flags = 0;
892744947dTom Erickson
893744947dTom Erickson	/*
894744947dTom Erickson	 * Mark the dnode.
895744947dTom Erickson	 */
896744947dTom Erickson	ndn->dn_moved = 1;
897744947dTom Erickson	odn->dn_moved = (uint8_t)-1;
898744947dTom Erickson}
899744947dTom Erickson
900