xref: /illumos-gate/usr/src/uts/common/fs/zfs/dmu_object.c (revision eb633035)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
2206e0070dSMark Shellenbaum  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23bf26014cSMatthew Ahrens  * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
24e77d42eaSMatthew Ahrens  * Copyright 2014 HybridCluster. All rights reserved.
25fa9e4066Sahrens  */
26fa9e4066Sahrens 
27*eb633035STom Caputi #include <sys/dbuf.h>
28fa9e4066Sahrens #include <sys/dmu.h>
29fa9e4066Sahrens #include <sys/dmu_objset.h>
30fa9e4066Sahrens #include <sys/dmu_tx.h>
31fa9e4066Sahrens #include <sys/dnode.h>
322acef22dSMatthew Ahrens #include <sys/zap.h>
332acef22dSMatthew Ahrens #include <sys/zfeature.h>
3454811da5SToomas Soome #include <sys/dsl_dataset.h>
35fa9e4066Sahrens 
3654811da5SToomas Soome /*
3754811da5SToomas Soome  * Each of the concurrent object allocators will grab
3854811da5SToomas Soome  * 2^dmu_object_alloc_chunk_shift dnode slots at a time.  The default is to
3954811da5SToomas Soome  * grab 128 slots, which is 4 blocks worth.  This was experimentally
4054811da5SToomas Soome  * determined to be the lowest value that eliminates the measurable effect
4154811da5SToomas Soome  * of lock contention from this code path.
4254811da5SToomas Soome  */
4354811da5SToomas Soome int dmu_object_alloc_chunk_shift = 7;
4454811da5SToomas Soome 
4554811da5SToomas Soome static uint64_t
dmu_object_alloc_impl(objset_t * os,dmu_object_type_t ot,int blocksize,int indirect_blockshift,dmu_object_type_t bonustype,int bonuslen,int dnodesize,dmu_tx_t * tx)4654811da5SToomas Soome dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize,
4754811da5SToomas Soome     int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
4854811da5SToomas Soome     int dnodesize, dmu_tx_t *tx)
49fa9e4066Sahrens {
50fa9e4066Sahrens 	uint64_t object;
51af346df5SNed Bass 	uint64_t L1_dnode_count = DNODES_PER_BLOCK <<
52744947dcSTom Erickson 	    (DMU_META_DNODE(os)->dn_indblkshift - SPA_BLKPTRSHIFT);
53ea8dc4b6Seschrock 	dnode_t *dn = NULL;
5454811da5SToomas Soome 	int dn_slots = dnodesize >> DNODE_SHIFT;
5554811da5SToomas Soome 	boolean_t restarted = B_FALSE;
5654811da5SToomas Soome 	uint64_t *cpuobj = &os->os_obj_next_percpu[CPU_SEQID %
5754811da5SToomas Soome 	    os->os_obj_next_percpu_len];
5854811da5SToomas Soome 	int dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift;
5954811da5SToomas Soome 	int error;
6054811da5SToomas Soome 
6154811da5SToomas Soome 	if (dn_slots == 0) {
6254811da5SToomas Soome 		dn_slots = DNODE_MIN_SLOTS;
6354811da5SToomas Soome 	} else {
6454811da5SToomas Soome 		ASSERT3S(dn_slots, >=, DNODE_MIN_SLOTS);
6554811da5SToomas Soome 		ASSERT3S(dn_slots, <=, DNODE_MAX_SLOTS);
6654811da5SToomas Soome 	}
6754811da5SToomas Soome 
6854811da5SToomas Soome 	/*
6954811da5SToomas Soome 	 * The "chunk" of dnodes that is assigned to a CPU-specific
7054811da5SToomas Soome 	 * allocator needs to be at least one block's worth, to avoid
7154811da5SToomas Soome 	 * lock contention on the dbuf.  It can be at most one L1 block's
7254811da5SToomas Soome 	 * worth, so that the "rescan after polishing off a L1's worth"
7354811da5SToomas Soome 	 * logic below will be sure to kick in.
7454811da5SToomas Soome 	 */
7554811da5SToomas Soome 	if (dnodes_per_chunk < DNODES_PER_BLOCK)
7654811da5SToomas Soome 		dnodes_per_chunk = DNODES_PER_BLOCK;
7754811da5SToomas Soome 	if (dnodes_per_chunk > L1_dnode_count)
7854811da5SToomas Soome 		dnodes_per_chunk = L1_dnode_count;
7954811da5SToomas Soome 
8054811da5SToomas Soome 	object = *cpuobj;
81fa9e4066Sahrens 
82fa9e4066Sahrens 	for (;;) {
83fa9e4066Sahrens 		/*
8454811da5SToomas Soome 		 * If we finished a chunk of dnodes, get a new one from
8554811da5SToomas Soome 		 * the global allocator.
86fa9e4066Sahrens 		 */
8754811da5SToomas Soome 		if ((P2PHASE(object, dnodes_per_chunk) == 0) ||
8854811da5SToomas Soome 		    (P2PHASE(object + dn_slots - 1, dnodes_per_chunk) <
8954811da5SToomas Soome 		    dn_slots)) {
9054811da5SToomas Soome 			DNODE_STAT_BUMP(dnode_alloc_next_chunk);
9154811da5SToomas Soome 			mutex_enter(&os->os_obj_lock);
9254811da5SToomas Soome 			ASSERT0(P2PHASE(os->os_obj_next_chunk,
9354811da5SToomas Soome 			    dnodes_per_chunk));
9454811da5SToomas Soome 			object = os->os_obj_next_chunk;
9554811da5SToomas Soome 
9654811da5SToomas Soome 			/*
9754811da5SToomas Soome 			 * Each time we polish off a L1 bp worth of dnodes
9854811da5SToomas Soome 			 * (2^12 objects), move to another L1 bp that's
9954811da5SToomas Soome 			 * still reasonably sparse (at most 1/4 full). Look
10054811da5SToomas Soome 			 * from the beginning at most once per txg. If we
10154811da5SToomas Soome 			 * still can't allocate from that L1 block, search
10254811da5SToomas Soome 			 * for an empty L0 block, which will quickly skip
10354811da5SToomas Soome 			 * to the end of the metadnode if the no nearby L0
10454811da5SToomas Soome 			 * blocks are empty. This fallback avoids a
10554811da5SToomas Soome 			 * pathology where full dnode blocks containing
10654811da5SToomas Soome 			 * large dnodes appear sparse because they have a
10754811da5SToomas Soome 			 * low blk_fill, leading to many failed allocation
10854811da5SToomas Soome 			 * attempts. In the long term a better mechanism to
10954811da5SToomas Soome 			 * search for sparse metadnode regions, such as
11054811da5SToomas Soome 			 * spacemaps, could be implemented.
11154811da5SToomas Soome 			 *
11254811da5SToomas Soome 			 * os_scan_dnodes is set during txg sync if enough
11354811da5SToomas Soome 			 * objects have been freed since the previous
11454811da5SToomas Soome 			 * rescan to justify backfilling again.
11554811da5SToomas Soome 			 *
11654811da5SToomas Soome 			 * Note that dmu_traverse depends on the behavior
11754811da5SToomas Soome 			 * that we use multiple blocks of the dnode object
11854811da5SToomas Soome 			 * before going back to reuse objects. Any change
11954811da5SToomas Soome 			 * to this algorithm should preserve that property
12054811da5SToomas Soome 			 * or find another solution to the issues described
12154811da5SToomas Soome 			 * in traverse_visitbp.
12254811da5SToomas Soome 			 */
12354811da5SToomas Soome 			if (P2PHASE(object, L1_dnode_count) == 0) {
12454811da5SToomas Soome 				uint64_t offset;
12554811da5SToomas Soome 				uint64_t blkfill;
12654811da5SToomas Soome 				int minlvl;
12754811da5SToomas Soome 				if (os->os_rescan_dnodes) {
12854811da5SToomas Soome 					offset = 0;
12954811da5SToomas Soome 					os->os_rescan_dnodes = B_FALSE;
13054811da5SToomas Soome 				} else {
13154811da5SToomas Soome 					offset = object << DNODE_SHIFT;
13254811da5SToomas Soome 				}
13354811da5SToomas Soome 				blkfill = restarted ? 1 : DNODES_PER_BLOCK >> 2;
13454811da5SToomas Soome 				minlvl = restarted ? 1 : 2;
13554811da5SToomas Soome 				restarted = B_TRUE;
13654811da5SToomas Soome 				error = dnode_next_offset(DMU_META_DNODE(os),
13754811da5SToomas Soome 				    DNODE_FIND_HOLE, &offset, minlvl,
13854811da5SToomas Soome 				    blkfill, 0);
13954811da5SToomas Soome 				if (error == 0) {
14054811da5SToomas Soome 					object = offset >> DNODE_SHIFT;
14154811da5SToomas Soome 				}
142af346df5SNed Bass 			}
14354811da5SToomas Soome 			/*
14454811da5SToomas Soome 			 * Note: if "restarted", we may find a L0 that
14554811da5SToomas Soome 			 * is not suitably aligned.
14654811da5SToomas Soome 			 */
14754811da5SToomas Soome 			os->os_obj_next_chunk =
14854811da5SToomas Soome 			    P2ALIGN(object, dnodes_per_chunk) +
14954811da5SToomas Soome 			    dnodes_per_chunk;
15054811da5SToomas Soome 			(void) atomic_swap_64(cpuobj, object);
15154811da5SToomas Soome 			mutex_exit(&os->os_obj_lock);
152fa9e4066Sahrens 		}
15354811da5SToomas Soome 
15454811da5SToomas Soome 		/*
15554811da5SToomas Soome 		 * The value of (*cpuobj) before adding dn_slots is the object
15654811da5SToomas Soome 		 * ID assigned to us.  The value afterwards is the object ID
15754811da5SToomas Soome 		 * assigned to whoever wants to do an allocation next.
15854811da5SToomas Soome 		 */
15954811da5SToomas Soome 		object = atomic_add_64_nv(cpuobj, dn_slots) - dn_slots;
160fa9e4066Sahrens 
161ea8dc4b6Seschrock 		/*
162ea8dc4b6Seschrock 		 * XXX We should check for an i/o error here and return
163ea8dc4b6Seschrock 		 * up to our caller.  Actually we should pre-read it in
164ea8dc4b6Seschrock 		 * dmu_tx_assign(), but there is currently no mechanism
165ea8dc4b6Seschrock 		 * to do so.
166ea8dc4b6Seschrock 		 */
16754811da5SToomas Soome 		error = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE,
16854811da5SToomas Soome 		    dn_slots, FTAG, &dn);
16954811da5SToomas Soome 		if (error == 0) {
17054811da5SToomas Soome 			rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
17154811da5SToomas Soome 			/*
17254811da5SToomas Soome 			 * Another thread could have allocated it; check
17354811da5SToomas Soome 			 * again now that we have the struct lock.
17454811da5SToomas Soome 			 */
17554811da5SToomas Soome 			if (dn->dn_type == DMU_OT_NONE) {
17654811da5SToomas Soome 				dnode_allocate(dn, ot, blocksize, 0,
17754811da5SToomas Soome 				    bonustype, bonuslen, dn_slots, tx);
17854811da5SToomas Soome 				rw_exit(&dn->dn_struct_rwlock);
17954811da5SToomas Soome 				dmu_tx_add_new_object(tx, dn);
18054811da5SToomas Soome 				dnode_rele(dn, FTAG);
18154811da5SToomas Soome 				return (object);
18254811da5SToomas Soome 			}
18354811da5SToomas Soome 			rw_exit(&dn->dn_struct_rwlock);
18454811da5SToomas Soome 			dnode_rele(dn, FTAG);
18554811da5SToomas Soome 			DNODE_STAT_BUMP(dnode_alloc_race);
18654811da5SToomas Soome 		}
187fa9e4066Sahrens 
18854811da5SToomas Soome 		/*
18954811da5SToomas Soome 		 * Skip to next known valid starting point on error. This
19054811da5SToomas Soome 		 * is the start of the next block of dnodes.
19154811da5SToomas Soome 		 */
19254811da5SToomas Soome 		if (dmu_object_next(os, &object, B_TRUE, 0) != 0) {
19354811da5SToomas Soome 			object = P2ROUNDUP(object + 1, DNODES_PER_BLOCK);
19454811da5SToomas Soome 			DNODE_STAT_BUMP(dnode_alloc_next_block);
19554811da5SToomas Soome 		}
19654811da5SToomas Soome 		(void) atomic_swap_64(cpuobj, object);
197fa9e4066Sahrens 	}
198fa9e4066Sahrens }
199fa9e4066Sahrens 
200221813c1SMatthew Ahrens uint64_t
dmu_object_alloc(objset_t * os,dmu_object_type_t ot,int blocksize,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)201221813c1SMatthew Ahrens dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
202221813c1SMatthew Ahrens     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
203221813c1SMatthew Ahrens {
20454811da5SToomas Soome 	return (dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype,
20554811da5SToomas Soome 	    bonuslen, 0, tx));
20654811da5SToomas Soome }
20754811da5SToomas Soome 
20854811da5SToomas Soome uint64_t
dmu_object_alloc_ibs(objset_t * os,dmu_object_type_t ot,int blocksize,int indirect_blockshift,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)20954811da5SToomas Soome dmu_object_alloc_ibs(objset_t *os, dmu_object_type_t ot, int blocksize,
21054811da5SToomas Soome     int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
21154811da5SToomas Soome     dmu_tx_t *tx)
21254811da5SToomas Soome {
21354811da5SToomas Soome 	return (dmu_object_alloc_impl(os, ot, blocksize, indirect_blockshift,
21454811da5SToomas Soome 	    bonustype, bonuslen, 0, tx));
21554811da5SToomas Soome }
21654811da5SToomas Soome 
21754811da5SToomas Soome uint64_t
dmu_object_alloc_dnsize(objset_t * os,dmu_object_type_t ot,int blocksize,dmu_object_type_t bonustype,int bonuslen,int dnodesize,dmu_tx_t * tx)21854811da5SToomas Soome dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize,
21954811da5SToomas Soome     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
22054811da5SToomas Soome {
22154811da5SToomas Soome 	return (dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype,
22254811da5SToomas Soome 	    bonuslen, dnodesize, tx));
223221813c1SMatthew Ahrens }
224221813c1SMatthew Ahrens 
225fa9e4066Sahrens int
dmu_object_claim(objset_t * os,uint64_t object,dmu_object_type_t ot,int blocksize,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)226fa9e4066Sahrens dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
227fa9e4066Sahrens     int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
22854811da5SToomas Soome {
22954811da5SToomas Soome 	return (dmu_object_claim_dnsize(os, object, ot, blocksize, bonustype,
23054811da5SToomas Soome 	    bonuslen, 0, tx));
23154811da5SToomas Soome }
23254811da5SToomas Soome 
23354811da5SToomas Soome int
dmu_object_claim_dnsize(objset_t * os,uint64_t object,dmu_object_type_t ot,int blocksize,dmu_object_type_t bonustype,int bonuslen,int dnodesize,dmu_tx_t * tx)23454811da5SToomas Soome dmu_object_claim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
23554811da5SToomas Soome     int blocksize, dmu_object_type_t bonustype, int bonuslen,
23654811da5SToomas Soome     int dnodesize, dmu_tx_t *tx)
237fa9e4066Sahrens {
238fa9e4066Sahrens 	dnode_t *dn;
23954811da5SToomas Soome 	int dn_slots = dnodesize >> DNODE_SHIFT;
240ea8dc4b6Seschrock 	int err;
241fa9e4066Sahrens 
24254811da5SToomas Soome 	if (dn_slots == 0)
24354811da5SToomas Soome 		dn_slots = DNODE_MIN_SLOTS;
24454811da5SToomas Soome 	ASSERT3S(dn_slots, >=, DNODE_MIN_SLOTS);
24554811da5SToomas Soome 	ASSERT3S(dn_slots, <=, DNODE_MAX_SLOTS);
24654811da5SToomas Soome 
247ea8dc4b6Seschrock 	if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
248be6fd75aSMatthew Ahrens 		return (SET_ERROR(EBADF));
249fa9e4066Sahrens 
25054811da5SToomas Soome 	err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, dn_slots,
25154811da5SToomas Soome 	    FTAG, &dn);
252ea8dc4b6Seschrock 	if (err)
253ea8dc4b6Seschrock 		return (err);
25454811da5SToomas Soome 	dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, dn_slots, tx);
255b0c42cd4Sbzzz 	dmu_tx_add_new_object(tx, dn);
256b0c42cd4Sbzzz 
257fa9e4066Sahrens 	dnode_rele(dn, FTAG);
258fa9e4066Sahrens 
259fa9e4066Sahrens 	return (0);
260fa9e4066Sahrens }
261fa9e4066Sahrens 
262fa9e4066Sahrens int
dmu_object_reclaim(objset_t * os,uint64_t object,dmu_object_type_t ot,int blocksize,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)263fa9e4066Sahrens dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
264e77d42eaSMatthew Ahrens     int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
26554811da5SToomas Soome {
26654811da5SToomas Soome 	return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype,
267*eb633035STom Caputi 	    bonuslen, DNODE_MIN_SIZE, B_FALSE, tx));
26854811da5SToomas Soome }
26954811da5SToomas Soome 
27054811da5SToomas Soome int
dmu_object_reclaim_dnsize(objset_t * os,uint64_t object,dmu_object_type_t ot,int blocksize,dmu_object_type_t bonustype,int bonuslen,int dnodesize,boolean_t keep_spill,dmu_tx_t * tx)27154811da5SToomas Soome dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
27254811da5SToomas Soome     int blocksize, dmu_object_type_t bonustype, int bonuslen, int dnodesize,
273*eb633035STom Caputi     boolean_t keep_spill, dmu_tx_t *tx)
274fa9e4066Sahrens {
275fa9e4066Sahrens 	dnode_t *dn;
27654811da5SToomas Soome 	int dn_slots = dnodesize >> DNODE_SHIFT;
277ea8dc4b6Seschrock 	int err;
278fa9e4066Sahrens 
279811964cdSTom Caputi 	if (dn_slots == 0)
280811964cdSTom Caputi 		dn_slots = DNODE_MIN_SLOTS;
281811964cdSTom Caputi 
2822bf405a2SMark Maybee 	if (object == DMU_META_DNODE_OBJECT)
283be6fd75aSMatthew Ahrens 		return (SET_ERROR(EBADF));
284fa9e4066Sahrens 
28554811da5SToomas Soome 	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
286ea8dc4b6Seschrock 	    FTAG, &dn);
287ea8dc4b6Seschrock 	if (err)
288ea8dc4b6Seschrock 		return (err);
2892bf405a2SMark Maybee 
290*eb633035STom Caputi 	dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots,
291*eb633035STom Caputi 	    keep_spill, tx);
292*eb633035STom Caputi 
293*eb633035STom Caputi 	dnode_rele(dn, FTAG);
294*eb633035STom Caputi 	return (err);
295*eb633035STom Caputi }
296*eb633035STom Caputi 
297*eb633035STom Caputi int
dmu_object_rm_spill(objset_t * os,uint64_t object,dmu_tx_t * tx)298*eb633035STom Caputi dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
299*eb633035STom Caputi {
300*eb633035STom Caputi 	dnode_t *dn;
301*eb633035STom Caputi 	int err;
302*eb633035STom Caputi 
303*eb633035STom Caputi 	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
304*eb633035STom Caputi 	    FTAG, &dn);
305*eb633035STom Caputi 	if (err)
306*eb633035STom Caputi 		return (err);
307*eb633035STom Caputi 
308*eb633035STom Caputi 	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
309*eb633035STom Caputi 	if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
310*eb633035STom Caputi 		dbuf_rm_spill(dn, tx);
311*eb633035STom Caputi 		dnode_rm_spill(dn, tx);
312*eb633035STom Caputi 	}
313*eb633035STom Caputi 	rw_exit(&dn->dn_struct_rwlock);
3142bf405a2SMark Maybee 
315fa9e4066Sahrens 	dnode_rele(dn, FTAG);
316cf04dda1SMark Maybee 	return (err);
317fa9e4066Sahrens }
318fa9e4066Sahrens 
319fa9e4066Sahrens int
dmu_object_free(objset_t * os,uint64_t object,dmu_tx_t * tx)320fa9e4066Sahrens dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
321fa9e4066Sahrens {
322fa9e4066Sahrens 	dnode_t *dn;
323ea8dc4b6Seschrock 	int err;
324fa9e4066Sahrens 
325ea8dc4b6Seschrock 	ASSERT(object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
326fa9e4066Sahrens 
32754811da5SToomas Soome 	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
328ea8dc4b6Seschrock 	    FTAG, &dn);
329ea8dc4b6Seschrock 	if (err)
330ea8dc4b6Seschrock 		return (err);
331fa9e4066Sahrens 
332fa9e4066Sahrens 	ASSERT(dn->dn_type != DMU_OT_NONE);
333738e2a3cSPaul Dagnelie 	/*
334738e2a3cSPaul Dagnelie 	 * If we don't create this free range, we'll leak indirect blocks when
335738e2a3cSPaul Dagnelie 	 * we get to freeing the dnode in syncing context.
336738e2a3cSPaul Dagnelie 	 */
337cdb0ab79Smaybee 	dnode_free_range(dn, 0, DMU_OBJECT_END, tx);
338fa9e4066Sahrens 	dnode_free(dn, tx);
339fa9e4066Sahrens 	dnode_rele(dn, FTAG);
340fa9e4066Sahrens 
341fa9e4066Sahrens 	return (0);
342fa9e4066Sahrens }
343fa9e4066Sahrens 
344a2cdcdd2SPaul Dagnelie /*
345a2cdcdd2SPaul Dagnelie  * Return (in *objectp) the next object which is allocated (or a hole)
346a2cdcdd2SPaul Dagnelie  * after *object, taking into account only objects that may have been modified
347a2cdcdd2SPaul Dagnelie  * after the specified txg.
348a2cdcdd2SPaul Dagnelie  */
349fa9e4066Sahrens int
dmu_object_next(objset_t * os,uint64_t * objectp,boolean_t hole,uint64_t txg)3506754306eSahrens dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg)
351fa9e4066Sahrens {
35254811da5SToomas Soome 	uint64_t offset;
35354811da5SToomas Soome 	uint64_t start_obj;
35454811da5SToomas Soome 	struct dsl_dataset *ds = os->os_dsl_dataset;
355fa9e4066Sahrens 	int error;
356fa9e4066Sahrens 
35754811da5SToomas Soome 	if (*objectp == 0) {
35854811da5SToomas Soome 		start_obj = 1;
35954811da5SToomas Soome 	} else if (ds && ds->ds_feature_inuse[SPA_FEATURE_LARGE_DNODE]) {
36054811da5SToomas Soome 		uint64_t i = *objectp + 1;
36154811da5SToomas Soome 		uint64_t last_obj = *objectp | (DNODES_PER_BLOCK - 1);
36254811da5SToomas Soome 		dmu_object_info_t doi;
36354811da5SToomas Soome 
36454811da5SToomas Soome 		/*
36554811da5SToomas Soome 		 * Scan through the remaining meta dnode block. The contents
36654811da5SToomas Soome 		 * of each slot in the block are known so it can be quickly
36754811da5SToomas Soome 		 * checked. If the block is exhausted without a match then
36854811da5SToomas Soome 		 * hand off to dnode_next_offset() for further scanning.
36954811da5SToomas Soome 		 */
37054811da5SToomas Soome 		while (i <= last_obj) {
37154811da5SToomas Soome 			error = dmu_object_info(os, i, &doi);
37254811da5SToomas Soome 			if (error == ENOENT) {
37354811da5SToomas Soome 				if (hole) {
37454811da5SToomas Soome 					*objectp = i;
37554811da5SToomas Soome 					return (0);
37654811da5SToomas Soome 				} else {
37754811da5SToomas Soome 					i++;
37854811da5SToomas Soome 				}
37954811da5SToomas Soome 			} else if (error == EEXIST) {
38054811da5SToomas Soome 				i++;
38154811da5SToomas Soome 			} else if (error == 0) {
38254811da5SToomas Soome 				if (hole) {
38354811da5SToomas Soome 					i += doi.doi_dnodesize >> DNODE_SHIFT;
38454811da5SToomas Soome 				} else {
38554811da5SToomas Soome 					*objectp = i;
38654811da5SToomas Soome 					return (0);
38754811da5SToomas Soome 				}
38854811da5SToomas Soome 			} else {
38954811da5SToomas Soome 				return (error);
39054811da5SToomas Soome 			}
39154811da5SToomas Soome 		}
39254811da5SToomas Soome 
39354811da5SToomas Soome 		start_obj = i;
39454811da5SToomas Soome 	} else {
39554811da5SToomas Soome 		start_obj = *objectp + 1;
39654811da5SToomas Soome 	}
39754811da5SToomas Soome 
39854811da5SToomas Soome 	offset = start_obj << DNODE_SHIFT;
39954811da5SToomas Soome 
400744947dcSTom Erickson 	error = dnode_next_offset(DMU_META_DNODE(os),
401cdb0ab79Smaybee 	    (hole ? DNODE_FIND_HOLE : 0), &offset, 0, DNODES_PER_BLOCK, txg);
402fa9e4066Sahrens 
403fa9e4066Sahrens 	*objectp = offset >> DNODE_SHIFT;
404fa9e4066Sahrens 
405fa9e4066Sahrens 	return (error);
406fa9e4066Sahrens }
4072acef22dSMatthew Ahrens 
4082acef22dSMatthew Ahrens /*
4092acef22dSMatthew Ahrens  * Turn this object from old_type into DMU_OTN_ZAP_METADATA, and bump the
4102acef22dSMatthew Ahrens  * refcount on SPA_FEATURE_EXTENSIBLE_DATASET.
4112acef22dSMatthew Ahrens  *
4122acef22dSMatthew Ahrens  * Only for use from syncing context, on MOS objects.
4132acef22dSMatthew Ahrens  */
4142acef22dSMatthew Ahrens void
dmu_object_zapify(objset_t * mos,uint64_t object,dmu_object_type_t old_type,dmu_tx_t * tx)4152acef22dSMatthew Ahrens dmu_object_zapify(objset_t *mos, uint64_t object, dmu_object_type_t old_type,
4162acef22dSMatthew Ahrens     dmu_tx_t *tx)
4172acef22dSMatthew Ahrens {
4182acef22dSMatthew Ahrens 	dnode_t *dn;
4192acef22dSMatthew Ahrens 
4202acef22dSMatthew Ahrens 	ASSERT(dmu_tx_is_syncing(tx));
4212acef22dSMatthew Ahrens 
4222acef22dSMatthew Ahrens 	VERIFY0(dnode_hold(mos, object, FTAG, &dn));
4232acef22dSMatthew Ahrens 	if (dn->dn_type == DMU_OTN_ZAP_METADATA) {
4242acef22dSMatthew Ahrens 		dnode_rele(dn, FTAG);
4252acef22dSMatthew Ahrens 		return;
4262acef22dSMatthew Ahrens 	}
4272acef22dSMatthew Ahrens 	ASSERT3U(dn->dn_type, ==, old_type);
4282acef22dSMatthew Ahrens 	ASSERT0(dn->dn_maxblkid);
429bf26014cSMatthew Ahrens 
430bf26014cSMatthew Ahrens 	/*
431bf26014cSMatthew Ahrens 	 * We must initialize the ZAP data before changing the type,
432bf26014cSMatthew Ahrens 	 * so that concurrent calls to *_is_zapified() can determine if
433bf26014cSMatthew Ahrens 	 * the object has been completely zapified by checking the type.
434bf26014cSMatthew Ahrens 	 */
435bf26014cSMatthew Ahrens 	mzap_create_impl(mos, object, 0, 0, tx);
436bf26014cSMatthew Ahrens 
4372acef22dSMatthew Ahrens 	dn->dn_next_type[tx->tx_txg & TXG_MASK] = dn->dn_type =
4382acef22dSMatthew Ahrens 	    DMU_OTN_ZAP_METADATA;
4392acef22dSMatthew Ahrens 	dnode_setdirty(dn, tx);
4402acef22dSMatthew Ahrens 	dnode_rele(dn, FTAG);
4412acef22dSMatthew Ahrens 
4422acef22dSMatthew Ahrens 	spa_feature_incr(dmu_objset_spa(mos),
4432acef22dSMatthew Ahrens 	    SPA_FEATURE_EXTENSIBLE_DATASET, tx);
4442acef22dSMatthew Ahrens }
4452acef22dSMatthew Ahrens 
4462acef22dSMatthew Ahrens void
dmu_object_free_zapified(objset_t * mos,uint64_t object,dmu_tx_t * tx)4472acef22dSMatthew Ahrens dmu_object_free_zapified(objset_t *mos, uint64_t object, dmu_tx_t *tx)
4482acef22dSMatthew Ahrens {
4492acef22dSMatthew Ahrens 	dnode_t *dn;
4502acef22dSMatthew Ahrens 	dmu_object_type_t t;
4512acef22dSMatthew Ahrens 
4522acef22dSMatthew Ahrens 	ASSERT(dmu_tx_is_syncing(tx));
4532acef22dSMatthew Ahrens 
4542acef22dSMatthew Ahrens 	VERIFY0(dnode_hold(mos, object, FTAG, &dn));
4552acef22dSMatthew Ahrens 	t = dn->dn_type;
4562acef22dSMatthew Ahrens 	dnode_rele(dn, FTAG);
4572acef22dSMatthew Ahrens 
4582acef22dSMatthew Ahrens 	if (t == DMU_OTN_ZAP_METADATA) {
4592acef22dSMatthew Ahrens 		spa_feature_decr(dmu_objset_spa(mos),
4602acef22dSMatthew Ahrens 		    SPA_FEATURE_EXTENSIBLE_DATASET, tx);
4612acef22dSMatthew Ahrens 	}
4622acef22dSMatthew Ahrens 	VERIFY0(dmu_object_free(mos, object, tx));
4632acef22dSMatthew Ahrens }
464