1fa9e4066Sahrens /*
2fa9e4066Sahrens * CDDL HEADER START
3fa9e4066Sahrens *
4fa9e4066Sahrens * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock * You may not use this file except in compliance with the License.
7fa9e4066Sahrens *
8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens * See the License for the specific language governing permissions
11fa9e4066Sahrens * and limitations under the License.
12fa9e4066Sahrens *
13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens *
19fa9e4066Sahrens * CDDL HEADER END
20fa9e4066Sahrens */
21fa9e4066Sahrens /*
2206e0070dSMark Shellenbaum * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23bf26014cSMatthew Ahrens * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
24e77d42eaSMatthew Ahrens * Copyright 2014 HybridCluster. All rights reserved.
25fa9e4066Sahrens */
26fa9e4066Sahrens
27*eb633035STom Caputi #include <sys/dbuf.h>
28fa9e4066Sahrens #include <sys/dmu.h>
29fa9e4066Sahrens #include <sys/dmu_objset.h>
30fa9e4066Sahrens #include <sys/dmu_tx.h>
31fa9e4066Sahrens #include <sys/dnode.h>
322acef22dSMatthew Ahrens #include <sys/zap.h>
332acef22dSMatthew Ahrens #include <sys/zfeature.h>
3454811da5SToomas Soome #include <sys/dsl_dataset.h>
35fa9e4066Sahrens
3654811da5SToomas Soome /*
3754811da5SToomas Soome * Each of the concurrent object allocators will grab
3854811da5SToomas Soome * 2^dmu_object_alloc_chunk_shift dnode slots at a time. The default is to
3954811da5SToomas Soome * grab 128 slots, which is 4 blocks worth. This was experimentally
4054811da5SToomas Soome * determined to be the lowest value that eliminates the measurable effect
4154811da5SToomas Soome * of lock contention from this code path.
4254811da5SToomas Soome */
4354811da5SToomas Soome int dmu_object_alloc_chunk_shift = 7;
4454811da5SToomas Soome
4554811da5SToomas Soome static uint64_t
dmu_object_alloc_impl(objset_t * os,dmu_object_type_t ot,int blocksize,int indirect_blockshift,dmu_object_type_t bonustype,int bonuslen,int dnodesize,dmu_tx_t * tx)4654811da5SToomas Soome dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize,
4754811da5SToomas Soome int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
4854811da5SToomas Soome int dnodesize, dmu_tx_t *tx)
49fa9e4066Sahrens {
50fa9e4066Sahrens uint64_t object;
51af346df5SNed Bass uint64_t L1_dnode_count = DNODES_PER_BLOCK <<
52744947dcSTom Erickson (DMU_META_DNODE(os)->dn_indblkshift - SPA_BLKPTRSHIFT);
53ea8dc4b6Seschrock dnode_t *dn = NULL;
5454811da5SToomas Soome int dn_slots = dnodesize >> DNODE_SHIFT;
5554811da5SToomas Soome boolean_t restarted = B_FALSE;
5654811da5SToomas Soome uint64_t *cpuobj = &os->os_obj_next_percpu[CPU_SEQID %
5754811da5SToomas Soome os->os_obj_next_percpu_len];
5854811da5SToomas Soome int dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift;
5954811da5SToomas Soome int error;
6054811da5SToomas Soome
6154811da5SToomas Soome if (dn_slots == 0) {
6254811da5SToomas Soome dn_slots = DNODE_MIN_SLOTS;
6354811da5SToomas Soome } else {
6454811da5SToomas Soome ASSERT3S(dn_slots, >=, DNODE_MIN_SLOTS);
6554811da5SToomas Soome ASSERT3S(dn_slots, <=, DNODE_MAX_SLOTS);
6654811da5SToomas Soome }
6754811da5SToomas Soome
6854811da5SToomas Soome /*
6954811da5SToomas Soome * The "chunk" of dnodes that is assigned to a CPU-specific
7054811da5SToomas Soome * allocator needs to be at least one block's worth, to avoid
7154811da5SToomas Soome * lock contention on the dbuf. It can be at most one L1 block's
7254811da5SToomas Soome * worth, so that the "rescan after polishing off a L1's worth"
7354811da5SToomas Soome * logic below will be sure to kick in.
7454811da5SToomas Soome */
7554811da5SToomas Soome if (dnodes_per_chunk < DNODES_PER_BLOCK)
7654811da5SToomas Soome dnodes_per_chunk = DNODES_PER_BLOCK;
7754811da5SToomas Soome if (dnodes_per_chunk > L1_dnode_count)
7854811da5SToomas Soome dnodes_per_chunk = L1_dnode_count;
7954811da5SToomas Soome
8054811da5SToomas Soome object = *cpuobj;
81fa9e4066Sahrens
82fa9e4066Sahrens for (;;) {
83fa9e4066Sahrens /*
8454811da5SToomas Soome * If we finished a chunk of dnodes, get a new one from
8554811da5SToomas Soome * the global allocator.
86fa9e4066Sahrens */
8754811da5SToomas Soome if ((P2PHASE(object, dnodes_per_chunk) == 0) ||
8854811da5SToomas Soome (P2PHASE(object + dn_slots - 1, dnodes_per_chunk) <
8954811da5SToomas Soome dn_slots)) {
9054811da5SToomas Soome DNODE_STAT_BUMP(dnode_alloc_next_chunk);
9154811da5SToomas Soome mutex_enter(&os->os_obj_lock);
9254811da5SToomas Soome ASSERT0(P2PHASE(os->os_obj_next_chunk,
9354811da5SToomas Soome dnodes_per_chunk));
9454811da5SToomas Soome object = os->os_obj_next_chunk;
9554811da5SToomas Soome
9654811da5SToomas Soome /*
9754811da5SToomas Soome * Each time we polish off a L1 bp worth of dnodes
9854811da5SToomas Soome * (2^12 objects), move to another L1 bp that's
9954811da5SToomas Soome * still reasonably sparse (at most 1/4 full). Look
10054811da5SToomas Soome * from the beginning at most once per txg. If we
10154811da5SToomas Soome * still can't allocate from that L1 block, search
10254811da5SToomas Soome * for an empty L0 block, which will quickly skip
10354811da5SToomas Soome * to the end of the metadnode if the no nearby L0
10454811da5SToomas Soome * blocks are empty. This fallback avoids a
10554811da5SToomas Soome * pathology where full dnode blocks containing
10654811da5SToomas Soome * large dnodes appear sparse because they have a
10754811da5SToomas Soome * low blk_fill, leading to many failed allocation
10854811da5SToomas Soome * attempts. In the long term a better mechanism to
10954811da5SToomas Soome * search for sparse metadnode regions, such as
11054811da5SToomas Soome * spacemaps, could be implemented.
11154811da5SToomas Soome *
11254811da5SToomas Soome * os_scan_dnodes is set during txg sync if enough
11354811da5SToomas Soome * objects have been freed since the previous
11454811da5SToomas Soome * rescan to justify backfilling again.
11554811da5SToomas Soome *
11654811da5SToomas Soome * Note that dmu_traverse depends on the behavior
11754811da5SToomas Soome * that we use multiple blocks of the dnode object
11854811da5SToomas Soome * before going back to reuse objects. Any change
11954811da5SToomas Soome * to this algorithm should preserve that property
12054811da5SToomas Soome * or find another solution to the issues described
12154811da5SToomas Soome * in traverse_visitbp.
12254811da5SToomas Soome */
12354811da5SToomas Soome if (P2PHASE(object, L1_dnode_count) == 0) {
12454811da5SToomas Soome uint64_t offset;
12554811da5SToomas Soome uint64_t blkfill;
12654811da5SToomas Soome int minlvl;
12754811da5SToomas Soome if (os->os_rescan_dnodes) {
12854811da5SToomas Soome offset = 0;
12954811da5SToomas Soome os->os_rescan_dnodes = B_FALSE;
13054811da5SToomas Soome } else {
13154811da5SToomas Soome offset = object << DNODE_SHIFT;
13254811da5SToomas Soome }
13354811da5SToomas Soome blkfill = restarted ? 1 : DNODES_PER_BLOCK >> 2;
13454811da5SToomas Soome minlvl = restarted ? 1 : 2;
13554811da5SToomas Soome restarted = B_TRUE;
13654811da5SToomas Soome error = dnode_next_offset(DMU_META_DNODE(os),
13754811da5SToomas Soome DNODE_FIND_HOLE, &offset, minlvl,
13854811da5SToomas Soome blkfill, 0);
13954811da5SToomas Soome if (error == 0) {
14054811da5SToomas Soome object = offset >> DNODE_SHIFT;
14154811da5SToomas Soome }
142af346df5SNed Bass }
14354811da5SToomas Soome /*
14454811da5SToomas Soome * Note: if "restarted", we may find a L0 that
14554811da5SToomas Soome * is not suitably aligned.
14654811da5SToomas Soome */
14754811da5SToomas Soome os->os_obj_next_chunk =
14854811da5SToomas Soome P2ALIGN(object, dnodes_per_chunk) +
14954811da5SToomas Soome dnodes_per_chunk;
15054811da5SToomas Soome (void) atomic_swap_64(cpuobj, object);
15154811da5SToomas Soome mutex_exit(&os->os_obj_lock);
152fa9e4066Sahrens }
15354811da5SToomas Soome
15454811da5SToomas Soome /*
15554811da5SToomas Soome * The value of (*cpuobj) before adding dn_slots is the object
15654811da5SToomas Soome * ID assigned to us. The value afterwards is the object ID
15754811da5SToomas Soome * assigned to whoever wants to do an allocation next.
15854811da5SToomas Soome */
15954811da5SToomas Soome object = atomic_add_64_nv(cpuobj, dn_slots) - dn_slots;
160fa9e4066Sahrens
161ea8dc4b6Seschrock /*
162ea8dc4b6Seschrock * XXX We should check for an i/o error here and return
163ea8dc4b6Seschrock * up to our caller. Actually we should pre-read it in
164ea8dc4b6Seschrock * dmu_tx_assign(), but there is currently no mechanism
165ea8dc4b6Seschrock * to do so.
166ea8dc4b6Seschrock */
16754811da5SToomas Soome error = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE,
16854811da5SToomas Soome dn_slots, FTAG, &dn);
16954811da5SToomas Soome if (error == 0) {
17054811da5SToomas Soome rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
17154811da5SToomas Soome /*
17254811da5SToomas Soome * Another thread could have allocated it; check
17354811da5SToomas Soome * again now that we have the struct lock.
17454811da5SToomas Soome */
17554811da5SToomas Soome if (dn->dn_type == DMU_OT_NONE) {
17654811da5SToomas Soome dnode_allocate(dn, ot, blocksize, 0,
17754811da5SToomas Soome bonustype, bonuslen, dn_slots, tx);
17854811da5SToomas Soome rw_exit(&dn->dn_struct_rwlock);
17954811da5SToomas Soome dmu_tx_add_new_object(tx, dn);
18054811da5SToomas Soome dnode_rele(dn, FTAG);
18154811da5SToomas Soome return (object);
18254811da5SToomas Soome }
18354811da5SToomas Soome rw_exit(&dn->dn_struct_rwlock);
18454811da5SToomas Soome dnode_rele(dn, FTAG);
18554811da5SToomas Soome DNODE_STAT_BUMP(dnode_alloc_race);
18654811da5SToomas Soome }
187fa9e4066Sahrens
18854811da5SToomas Soome /*
18954811da5SToomas Soome * Skip to next known valid starting point on error. This
19054811da5SToomas Soome * is the start of the next block of dnodes.
19154811da5SToomas Soome */
19254811da5SToomas Soome if (dmu_object_next(os, &object, B_TRUE, 0) != 0) {
19354811da5SToomas Soome object = P2ROUNDUP(object + 1, DNODES_PER_BLOCK);
19454811da5SToomas Soome DNODE_STAT_BUMP(dnode_alloc_next_block);
19554811da5SToomas Soome }
19654811da5SToomas Soome (void) atomic_swap_64(cpuobj, object);
197fa9e4066Sahrens }
198fa9e4066Sahrens }
199fa9e4066Sahrens
200221813c1SMatthew Ahrens uint64_t
dmu_object_alloc(objset_t * os,dmu_object_type_t ot,int blocksize,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)201221813c1SMatthew Ahrens dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
202221813c1SMatthew Ahrens dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
203221813c1SMatthew Ahrens {
20454811da5SToomas Soome return (dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype,
20554811da5SToomas Soome bonuslen, 0, tx));
20654811da5SToomas Soome }
20754811da5SToomas Soome
20854811da5SToomas Soome uint64_t
dmu_object_alloc_ibs(objset_t * os,dmu_object_type_t ot,int blocksize,int indirect_blockshift,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)20954811da5SToomas Soome dmu_object_alloc_ibs(objset_t *os, dmu_object_type_t ot, int blocksize,
21054811da5SToomas Soome int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
21154811da5SToomas Soome dmu_tx_t *tx)
21254811da5SToomas Soome {
21354811da5SToomas Soome return (dmu_object_alloc_impl(os, ot, blocksize, indirect_blockshift,
21454811da5SToomas Soome bonustype, bonuslen, 0, tx));
21554811da5SToomas Soome }
21654811da5SToomas Soome
21754811da5SToomas Soome uint64_t
dmu_object_alloc_dnsize(objset_t * os,dmu_object_type_t ot,int blocksize,dmu_object_type_t bonustype,int bonuslen,int dnodesize,dmu_tx_t * tx)21854811da5SToomas Soome dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize,
21954811da5SToomas Soome dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
22054811da5SToomas Soome {
22154811da5SToomas Soome return (dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype,
22254811da5SToomas Soome bonuslen, dnodesize, tx));
223221813c1SMatthew Ahrens }
224221813c1SMatthew Ahrens
225fa9e4066Sahrens int
dmu_object_claim(objset_t * os,uint64_t object,dmu_object_type_t ot,int blocksize,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)226fa9e4066Sahrens dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
227fa9e4066Sahrens int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
22854811da5SToomas Soome {
22954811da5SToomas Soome return (dmu_object_claim_dnsize(os, object, ot, blocksize, bonustype,
23054811da5SToomas Soome bonuslen, 0, tx));
23154811da5SToomas Soome }
23254811da5SToomas Soome
23354811da5SToomas Soome int
dmu_object_claim_dnsize(objset_t * os,uint64_t object,dmu_object_type_t ot,int blocksize,dmu_object_type_t bonustype,int bonuslen,int dnodesize,dmu_tx_t * tx)23454811da5SToomas Soome dmu_object_claim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
23554811da5SToomas Soome int blocksize, dmu_object_type_t bonustype, int bonuslen,
23654811da5SToomas Soome int dnodesize, dmu_tx_t *tx)
237fa9e4066Sahrens {
238fa9e4066Sahrens dnode_t *dn;
23954811da5SToomas Soome int dn_slots = dnodesize >> DNODE_SHIFT;
240ea8dc4b6Seschrock int err;
241fa9e4066Sahrens
24254811da5SToomas Soome if (dn_slots == 0)
24354811da5SToomas Soome dn_slots = DNODE_MIN_SLOTS;
24454811da5SToomas Soome ASSERT3S(dn_slots, >=, DNODE_MIN_SLOTS);
24554811da5SToomas Soome ASSERT3S(dn_slots, <=, DNODE_MAX_SLOTS);
24654811da5SToomas Soome
247ea8dc4b6Seschrock if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
248be6fd75aSMatthew Ahrens return (SET_ERROR(EBADF));
249fa9e4066Sahrens
25054811da5SToomas Soome err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, dn_slots,
25154811da5SToomas Soome FTAG, &dn);
252ea8dc4b6Seschrock if (err)
253ea8dc4b6Seschrock return (err);
25454811da5SToomas Soome dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, dn_slots, tx);
255b0c42cd4Sbzzz dmu_tx_add_new_object(tx, dn);
256b0c42cd4Sbzzz
257fa9e4066Sahrens dnode_rele(dn, FTAG);
258fa9e4066Sahrens
259fa9e4066Sahrens return (0);
260fa9e4066Sahrens }
261fa9e4066Sahrens
262fa9e4066Sahrens int
dmu_object_reclaim(objset_t * os,uint64_t object,dmu_object_type_t ot,int blocksize,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)263fa9e4066Sahrens dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
264e77d42eaSMatthew Ahrens int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
26554811da5SToomas Soome {
26654811da5SToomas Soome return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype,
267*eb633035STom Caputi bonuslen, DNODE_MIN_SIZE, B_FALSE, tx));
26854811da5SToomas Soome }
26954811da5SToomas Soome
27054811da5SToomas Soome int
dmu_object_reclaim_dnsize(objset_t * os,uint64_t object,dmu_object_type_t ot,int blocksize,dmu_object_type_t bonustype,int bonuslen,int dnodesize,boolean_t keep_spill,dmu_tx_t * tx)27154811da5SToomas Soome dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
27254811da5SToomas Soome int blocksize, dmu_object_type_t bonustype, int bonuslen, int dnodesize,
273*eb633035STom Caputi boolean_t keep_spill, dmu_tx_t *tx)
274fa9e4066Sahrens {
275fa9e4066Sahrens dnode_t *dn;
27654811da5SToomas Soome int dn_slots = dnodesize >> DNODE_SHIFT;
277ea8dc4b6Seschrock int err;
278fa9e4066Sahrens
279811964cdSTom Caputi if (dn_slots == 0)
280811964cdSTom Caputi dn_slots = DNODE_MIN_SLOTS;
281811964cdSTom Caputi
2822bf405a2SMark Maybee if (object == DMU_META_DNODE_OBJECT)
283be6fd75aSMatthew Ahrens return (SET_ERROR(EBADF));
284fa9e4066Sahrens
28554811da5SToomas Soome err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
286ea8dc4b6Seschrock FTAG, &dn);
287ea8dc4b6Seschrock if (err)
288ea8dc4b6Seschrock return (err);
2892bf405a2SMark Maybee
290*eb633035STom Caputi dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots,
291*eb633035STom Caputi keep_spill, tx);
292*eb633035STom Caputi
293*eb633035STom Caputi dnode_rele(dn, FTAG);
294*eb633035STom Caputi return (err);
295*eb633035STom Caputi }
296*eb633035STom Caputi
297*eb633035STom Caputi int
dmu_object_rm_spill(objset_t * os,uint64_t object,dmu_tx_t * tx)298*eb633035STom Caputi dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
299*eb633035STom Caputi {
300*eb633035STom Caputi dnode_t *dn;
301*eb633035STom Caputi int err;
302*eb633035STom Caputi
303*eb633035STom Caputi err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
304*eb633035STom Caputi FTAG, &dn);
305*eb633035STom Caputi if (err)
306*eb633035STom Caputi return (err);
307*eb633035STom Caputi
308*eb633035STom Caputi rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
309*eb633035STom Caputi if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
310*eb633035STom Caputi dbuf_rm_spill(dn, tx);
311*eb633035STom Caputi dnode_rm_spill(dn, tx);
312*eb633035STom Caputi }
313*eb633035STom Caputi rw_exit(&dn->dn_struct_rwlock);
3142bf405a2SMark Maybee
315fa9e4066Sahrens dnode_rele(dn, FTAG);
316cf04dda1SMark Maybee return (err);
317fa9e4066Sahrens }
318fa9e4066Sahrens
319fa9e4066Sahrens int
dmu_object_free(objset_t * os,uint64_t object,dmu_tx_t * tx)320fa9e4066Sahrens dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
321fa9e4066Sahrens {
322fa9e4066Sahrens dnode_t *dn;
323ea8dc4b6Seschrock int err;
324fa9e4066Sahrens
325ea8dc4b6Seschrock ASSERT(object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
326fa9e4066Sahrens
32754811da5SToomas Soome err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
328ea8dc4b6Seschrock FTAG, &dn);
329ea8dc4b6Seschrock if (err)
330ea8dc4b6Seschrock return (err);
331fa9e4066Sahrens
332fa9e4066Sahrens ASSERT(dn->dn_type != DMU_OT_NONE);
333738e2a3cSPaul Dagnelie /*
334738e2a3cSPaul Dagnelie * If we don't create this free range, we'll leak indirect blocks when
335738e2a3cSPaul Dagnelie * we get to freeing the dnode in syncing context.
336738e2a3cSPaul Dagnelie */
337cdb0ab79Smaybee dnode_free_range(dn, 0, DMU_OBJECT_END, tx);
338fa9e4066Sahrens dnode_free(dn, tx);
339fa9e4066Sahrens dnode_rele(dn, FTAG);
340fa9e4066Sahrens
341fa9e4066Sahrens return (0);
342fa9e4066Sahrens }
343fa9e4066Sahrens
344a2cdcdd2SPaul Dagnelie /*
345a2cdcdd2SPaul Dagnelie * Return (in *objectp) the next object which is allocated (or a hole)
346a2cdcdd2SPaul Dagnelie * after *object, taking into account only objects that may have been modified
347a2cdcdd2SPaul Dagnelie * after the specified txg.
348a2cdcdd2SPaul Dagnelie */
349fa9e4066Sahrens int
dmu_object_next(objset_t * os,uint64_t * objectp,boolean_t hole,uint64_t txg)3506754306eSahrens dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg)
351fa9e4066Sahrens {
35254811da5SToomas Soome uint64_t offset;
35354811da5SToomas Soome uint64_t start_obj;
35454811da5SToomas Soome struct dsl_dataset *ds = os->os_dsl_dataset;
355fa9e4066Sahrens int error;
356fa9e4066Sahrens
35754811da5SToomas Soome if (*objectp == 0) {
35854811da5SToomas Soome start_obj = 1;
35954811da5SToomas Soome } else if (ds && ds->ds_feature_inuse[SPA_FEATURE_LARGE_DNODE]) {
36054811da5SToomas Soome uint64_t i = *objectp + 1;
36154811da5SToomas Soome uint64_t last_obj = *objectp | (DNODES_PER_BLOCK - 1);
36254811da5SToomas Soome dmu_object_info_t doi;
36354811da5SToomas Soome
36454811da5SToomas Soome /*
36554811da5SToomas Soome * Scan through the remaining meta dnode block. The contents
36654811da5SToomas Soome * of each slot in the block are known so it can be quickly
36754811da5SToomas Soome * checked. If the block is exhausted without a match then
36854811da5SToomas Soome * hand off to dnode_next_offset() for further scanning.
36954811da5SToomas Soome */
37054811da5SToomas Soome while (i <= last_obj) {
37154811da5SToomas Soome error = dmu_object_info(os, i, &doi);
37254811da5SToomas Soome if (error == ENOENT) {
37354811da5SToomas Soome if (hole) {
37454811da5SToomas Soome *objectp = i;
37554811da5SToomas Soome return (0);
37654811da5SToomas Soome } else {
37754811da5SToomas Soome i++;
37854811da5SToomas Soome }
37954811da5SToomas Soome } else if (error == EEXIST) {
38054811da5SToomas Soome i++;
38154811da5SToomas Soome } else if (error == 0) {
38254811da5SToomas Soome if (hole) {
38354811da5SToomas Soome i += doi.doi_dnodesize >> DNODE_SHIFT;
38454811da5SToomas Soome } else {
38554811da5SToomas Soome *objectp = i;
38654811da5SToomas Soome return (0);
38754811da5SToomas Soome }
38854811da5SToomas Soome } else {
38954811da5SToomas Soome return (error);
39054811da5SToomas Soome }
39154811da5SToomas Soome }
39254811da5SToomas Soome
39354811da5SToomas Soome start_obj = i;
39454811da5SToomas Soome } else {
39554811da5SToomas Soome start_obj = *objectp + 1;
39654811da5SToomas Soome }
39754811da5SToomas Soome
39854811da5SToomas Soome offset = start_obj << DNODE_SHIFT;
39954811da5SToomas Soome
400744947dcSTom Erickson error = dnode_next_offset(DMU_META_DNODE(os),
401cdb0ab79Smaybee (hole ? DNODE_FIND_HOLE : 0), &offset, 0, DNODES_PER_BLOCK, txg);
402fa9e4066Sahrens
403fa9e4066Sahrens *objectp = offset >> DNODE_SHIFT;
404fa9e4066Sahrens
405fa9e4066Sahrens return (error);
406fa9e4066Sahrens }
4072acef22dSMatthew Ahrens
4082acef22dSMatthew Ahrens /*
4092acef22dSMatthew Ahrens * Turn this object from old_type into DMU_OTN_ZAP_METADATA, and bump the
4102acef22dSMatthew Ahrens * refcount on SPA_FEATURE_EXTENSIBLE_DATASET.
4112acef22dSMatthew Ahrens *
4122acef22dSMatthew Ahrens * Only for use from syncing context, on MOS objects.
4132acef22dSMatthew Ahrens */
4142acef22dSMatthew Ahrens void
dmu_object_zapify(objset_t * mos,uint64_t object,dmu_object_type_t old_type,dmu_tx_t * tx)4152acef22dSMatthew Ahrens dmu_object_zapify(objset_t *mos, uint64_t object, dmu_object_type_t old_type,
4162acef22dSMatthew Ahrens dmu_tx_t *tx)
4172acef22dSMatthew Ahrens {
4182acef22dSMatthew Ahrens dnode_t *dn;
4192acef22dSMatthew Ahrens
4202acef22dSMatthew Ahrens ASSERT(dmu_tx_is_syncing(tx));
4212acef22dSMatthew Ahrens
4222acef22dSMatthew Ahrens VERIFY0(dnode_hold(mos, object, FTAG, &dn));
4232acef22dSMatthew Ahrens if (dn->dn_type == DMU_OTN_ZAP_METADATA) {
4242acef22dSMatthew Ahrens dnode_rele(dn, FTAG);
4252acef22dSMatthew Ahrens return;
4262acef22dSMatthew Ahrens }
4272acef22dSMatthew Ahrens ASSERT3U(dn->dn_type, ==, old_type);
4282acef22dSMatthew Ahrens ASSERT0(dn->dn_maxblkid);
429bf26014cSMatthew Ahrens
430bf26014cSMatthew Ahrens /*
431bf26014cSMatthew Ahrens * We must initialize the ZAP data before changing the type,
432bf26014cSMatthew Ahrens * so that concurrent calls to *_is_zapified() can determine if
433bf26014cSMatthew Ahrens * the object has been completely zapified by checking the type.
434bf26014cSMatthew Ahrens */
435bf26014cSMatthew Ahrens mzap_create_impl(mos, object, 0, 0, tx);
436bf26014cSMatthew Ahrens
4372acef22dSMatthew Ahrens dn->dn_next_type[tx->tx_txg & TXG_MASK] = dn->dn_type =
4382acef22dSMatthew Ahrens DMU_OTN_ZAP_METADATA;
4392acef22dSMatthew Ahrens dnode_setdirty(dn, tx);
4402acef22dSMatthew Ahrens dnode_rele(dn, FTAG);
4412acef22dSMatthew Ahrens
4422acef22dSMatthew Ahrens spa_feature_incr(dmu_objset_spa(mos),
4432acef22dSMatthew Ahrens SPA_FEATURE_EXTENSIBLE_DATASET, tx);
4442acef22dSMatthew Ahrens }
4452acef22dSMatthew Ahrens
4462acef22dSMatthew Ahrens void
dmu_object_free_zapified(objset_t * mos,uint64_t object,dmu_tx_t * tx)4472acef22dSMatthew Ahrens dmu_object_free_zapified(objset_t *mos, uint64_t object, dmu_tx_t *tx)
4482acef22dSMatthew Ahrens {
4492acef22dSMatthew Ahrens dnode_t *dn;
4502acef22dSMatthew Ahrens dmu_object_type_t t;
4512acef22dSMatthew Ahrens
4522acef22dSMatthew Ahrens ASSERT(dmu_tx_is_syncing(tx));
4532acef22dSMatthew Ahrens
4542acef22dSMatthew Ahrens VERIFY0(dnode_hold(mos, object, FTAG, &dn));
4552acef22dSMatthew Ahrens t = dn->dn_type;
4562acef22dSMatthew Ahrens dnode_rele(dn, FTAG);
4572acef22dSMatthew Ahrens
4582acef22dSMatthew Ahrens if (t == DMU_OTN_ZAP_METADATA) {
4592acef22dSMatthew Ahrens spa_feature_decr(dmu_objset_spa(mos),
4602acef22dSMatthew Ahrens SPA_FEATURE_EXTENSIBLE_DATASET, tx);
4612acef22dSMatthew Ahrens }
4622acef22dSMatthew Ahrens VERIFY0(dmu_object_free(mos, object, tx));
4632acef22dSMatthew Ahrens }
464