xref: /illumos-gate/usr/src/uts/common/fs/zfs/dmu_traverse.c (revision ad135b5d644628e791c3188a6ecbd9c257961ef8)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
223f9d6ad7SLin Ling  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23*ad135b5dSChristopher Siden  * Copyright (c) 2012 by Delphix. All rights reserved.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
26fa9e4066Sahrens #include <sys/zfs_context.h>
27fa9e4066Sahrens #include <sys/dmu_objset.h>
28fa9e4066Sahrens #include <sys/dmu_traverse.h>
29fa9e4066Sahrens #include <sys/dsl_dataset.h>
30fa9e4066Sahrens #include <sys/dsl_dir.h>
31fa9e4066Sahrens #include <sys/dsl_pool.h>
32fa9e4066Sahrens #include <sys/dnode.h>
33fa9e4066Sahrens #include <sys/spa.h>
34fa9e4066Sahrens #include <sys/zio.h>
35fa9e4066Sahrens #include <sys/dmu_impl.h>
360a586ceaSMark Shellenbaum #include <sys/sa.h>
370a586ceaSMark Shellenbaum #include <sys/sa_impl.h>
3888b7b0f2SMatthew Ahrens #include <sys/callb.h>
3988b7b0f2SMatthew Ahrens 
4044f92b7dSChris Kirby int zfs_pd_blks_max = 100;
4144f92b7dSChris Kirby 
426e0cbcaaSMatthew Ahrens typedef struct prefetch_data {
4388b7b0f2SMatthew Ahrens 	kmutex_t pd_mtx;
4488b7b0f2SMatthew Ahrens 	kcondvar_t pd_cv;
4588b7b0f2SMatthew Ahrens 	int pd_blks_max;
4688b7b0f2SMatthew Ahrens 	int pd_blks_fetched;
4788b7b0f2SMatthew Ahrens 	int pd_flags;
4888b7b0f2SMatthew Ahrens 	boolean_t pd_cancel;
4988b7b0f2SMatthew Ahrens 	boolean_t pd_exited;
506e0cbcaaSMatthew Ahrens } prefetch_data_t;
5188b7b0f2SMatthew Ahrens 
526e0cbcaaSMatthew Ahrens typedef struct traverse_data {
5388b7b0f2SMatthew Ahrens 	spa_t *td_spa;
5488b7b0f2SMatthew Ahrens 	uint64_t td_objset;
5588b7b0f2SMatthew Ahrens 	blkptr_t *td_rootbp;
5688b7b0f2SMatthew Ahrens 	uint64_t td_min_txg;
57*ad135b5dSChristopher Siden 	zbookmark_t *td_resume;
5888b7b0f2SMatthew Ahrens 	int td_flags;
596e0cbcaaSMatthew Ahrens 	prefetch_data_t *td_pfd;
6088b7b0f2SMatthew Ahrens 	blkptr_cb_t *td_func;
6188b7b0f2SMatthew Ahrens 	void *td_arg;
626e0cbcaaSMatthew Ahrens } traverse_data_t;
63fa9e4066Sahrens 
646e0cbcaaSMatthew Ahrens static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
6514843421SMatthew Ahrens     arc_buf_t *buf, uint64_t objset, uint64_t object);
6614843421SMatthew Ahrens 
67b24ab676SJeff Bonwick static int
685dabedeeSbonwick traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
69ea8dc4b6Seschrock {
706e0cbcaaSMatthew Ahrens 	traverse_data_t *td = arg;
7188b7b0f2SMatthew Ahrens 	zbookmark_t zb;
72ea8dc4b6Seschrock 
7388b7b0f2SMatthew Ahrens 	if (bp->blk_birth == 0)
74b24ab676SJeff Bonwick 		return (0);
755dabedeeSbonwick 
7688b7b0f2SMatthew Ahrens 	if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(td->td_spa))
77b24ab676SJeff Bonwick 		return (0);
7888b7b0f2SMatthew Ahrens 
79b24ab676SJeff Bonwick 	SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
80b24ab676SJeff Bonwick 	    bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
81b24ab676SJeff Bonwick 
823f9d6ad7SLin Ling 	(void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL, td->td_arg);
83b24ab676SJeff Bonwick 
84b24ab676SJeff Bonwick 	return (0);
85ea8dc4b6Seschrock }
86ea8dc4b6Seschrock 
87b24ab676SJeff Bonwick static int
885dabedeeSbonwick traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
89ea8dc4b6Seschrock {
906e0cbcaaSMatthew Ahrens 	traverse_data_t *td = arg;
91ea8dc4b6Seschrock 
92ea8dc4b6Seschrock 	if (lrc->lrc_txtype == TX_WRITE) {
93ea8dc4b6Seschrock 		lr_write_t *lr = (lr_write_t *)lrc;
94ea8dc4b6Seschrock 		blkptr_t *bp = &lr->lr_blkptr;
9588b7b0f2SMatthew Ahrens 		zbookmark_t zb;
96ea8dc4b6Seschrock 
9788b7b0f2SMatthew Ahrens 		if (bp->blk_birth == 0)
98b24ab676SJeff Bonwick 			return (0);
995dabedeeSbonwick 
10088b7b0f2SMatthew Ahrens 		if (claim_txg == 0 || bp->blk_birth < claim_txg)
101b24ab676SJeff Bonwick 			return (0);
102b24ab676SJeff Bonwick 
1036e0cbcaaSMatthew Ahrens 		SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid,
1046e0cbcaaSMatthew Ahrens 		    ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
10588b7b0f2SMatthew Ahrens 
1063f9d6ad7SLin Ling 		(void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL,
107b24ab676SJeff Bonwick 		    td->td_arg);
108ea8dc4b6Seschrock 	}
109b24ab676SJeff Bonwick 	return (0);
110ea8dc4b6Seschrock }
111ea8dc4b6Seschrock 
112ea8dc4b6Seschrock static void
1136e0cbcaaSMatthew Ahrens traverse_zil(traverse_data_t *td, zil_header_t *zh)
114ea8dc4b6Seschrock {
1155dabedeeSbonwick 	uint64_t claim_txg = zh->zh_claim_txg;
116ea8dc4b6Seschrock 	zilog_t *zilog;
117ea8dc4b6Seschrock 
1185dabedeeSbonwick 	/*
1195dabedeeSbonwick 	 * We only want to visit blocks that have been claimed but not yet
120b24ab676SJeff Bonwick 	 * replayed; plus, in read-only mode, blocks that are already stable.
1215dabedeeSbonwick 	 */
1228ad4d6ddSJeff Bonwick 	if (claim_txg == 0 && spa_writeable(td->td_spa))
1235dabedeeSbonwick 		return;
1245dabedeeSbonwick 
12588b7b0f2SMatthew Ahrens 	zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh);
126ea8dc4b6Seschrock 
12788b7b0f2SMatthew Ahrens 	(void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td,
1285dabedeeSbonwick 	    claim_txg);
129ea8dc4b6Seschrock 
130ea8dc4b6Seschrock 	zil_free(zilog);
131ea8dc4b6Seschrock }
132ea8dc4b6Seschrock 
133*ad135b5dSChristopher Siden typedef enum resume_skip {
134*ad135b5dSChristopher Siden 	RESUME_SKIP_ALL,
135*ad135b5dSChristopher Siden 	RESUME_SKIP_NONE,
136*ad135b5dSChristopher Siden 	RESUME_SKIP_CHILDREN
137*ad135b5dSChristopher Siden } resume_skip_t;
138*ad135b5dSChristopher Siden 
139*ad135b5dSChristopher Siden /*
140*ad135b5dSChristopher Siden  * Returns RESUME_SKIP_ALL if td indicates that we are resuming a traversal and
141*ad135b5dSChristopher Siden  * the block indicated by zb does not need to be visited at all. Returns
142*ad135b5dSChristopher Siden  * RESUME_SKIP_CHILDREN if we are resuming a post traversal and we reach the
143*ad135b5dSChristopher Siden  * resume point. This indicates that this block should be visited but not its
144*ad135b5dSChristopher Siden  * children (since they must have been visited in a previous traversal).
145*ad135b5dSChristopher Siden  * Otherwise returns RESUME_SKIP_NONE.
146*ad135b5dSChristopher Siden  */
147*ad135b5dSChristopher Siden static resume_skip_t
148*ad135b5dSChristopher Siden resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp,
149*ad135b5dSChristopher Siden     const zbookmark_t *zb)
150*ad135b5dSChristopher Siden {
151*ad135b5dSChristopher Siden 	if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume)) {
152*ad135b5dSChristopher Siden 		/*
153*ad135b5dSChristopher Siden 		 * If we already visited this bp & everything below,
154*ad135b5dSChristopher Siden 		 * don't bother doing it again.
155*ad135b5dSChristopher Siden 		 */
156*ad135b5dSChristopher Siden 		if (zbookmark_is_before(dnp, zb, td->td_resume))
157*ad135b5dSChristopher Siden 			return (RESUME_SKIP_ALL);
158*ad135b5dSChristopher Siden 
159*ad135b5dSChristopher Siden 		/*
160*ad135b5dSChristopher Siden 		 * If we found the block we're trying to resume from, zero
161*ad135b5dSChristopher Siden 		 * the bookmark out to indicate that we have resumed.
162*ad135b5dSChristopher Siden 		 */
163*ad135b5dSChristopher Siden 		ASSERT3U(zb->zb_object, <=, td->td_resume->zb_object);
164*ad135b5dSChristopher Siden 		if (bcmp(zb, td->td_resume, sizeof (*zb)) == 0) {
165*ad135b5dSChristopher Siden 			bzero(td->td_resume, sizeof (*zb));
166*ad135b5dSChristopher Siden 			if (td->td_flags & TRAVERSE_POST)
167*ad135b5dSChristopher Siden 				return (RESUME_SKIP_CHILDREN);
168*ad135b5dSChristopher Siden 		}
169*ad135b5dSChristopher Siden 	}
170*ad135b5dSChristopher Siden 	return (RESUME_SKIP_NONE);
171*ad135b5dSChristopher Siden }
172*ad135b5dSChristopher Siden 
173*ad135b5dSChristopher Siden static void
174*ad135b5dSChristopher Siden traverse_pause(traverse_data_t *td, const zbookmark_t *zb)
175*ad135b5dSChristopher Siden {
176*ad135b5dSChristopher Siden 	ASSERT(td->td_resume != NULL);
177*ad135b5dSChristopher Siden 	ASSERT3U(zb->zb_level, ==, 0);
178*ad135b5dSChristopher Siden 	bcopy(zb, td->td_resume, sizeof (*td->td_resume));
179*ad135b5dSChristopher Siden }
180*ad135b5dSChristopher Siden 
181fa9e4066Sahrens static int
1826e0cbcaaSMatthew Ahrens traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
18388b7b0f2SMatthew Ahrens     arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
184fa9e4066Sahrens {
1856a0f0066SEric Taylor 	zbookmark_t czb;
186cd088ea4SVictor Latushkin 	int err = 0, lasterr = 0;
18788b7b0f2SMatthew Ahrens 	arc_buf_t *buf = NULL;
1886e0cbcaaSMatthew Ahrens 	prefetch_data_t *pd = td->td_pfd;
189cd088ea4SVictor Latushkin 	boolean_t hard = td->td_flags & TRAVERSE_HARD;
190*ad135b5dSChristopher Siden 	boolean_t pause = B_FALSE;
191*ad135b5dSChristopher Siden 
192*ad135b5dSChristopher Siden 	switch (resume_skip_check(td, dnp, zb)) {
193*ad135b5dSChristopher Siden 	case RESUME_SKIP_ALL:
194*ad135b5dSChristopher Siden 		return (0);
195*ad135b5dSChristopher Siden 	case RESUME_SKIP_CHILDREN:
196*ad135b5dSChristopher Siden 		goto post;
197*ad135b5dSChristopher Siden 	case RESUME_SKIP_NONE:
198*ad135b5dSChristopher Siden 		break;
199*ad135b5dSChristopher Siden 	default:
200*ad135b5dSChristopher Siden 		ASSERT(0);
201*ad135b5dSChristopher Siden 	}
202fa9e4066Sahrens 
203*ad135b5dSChristopher Siden 	if (BP_IS_HOLE(bp)) {
2043f9d6ad7SLin Ling 		err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp,
2053f9d6ad7SLin Ling 		    td->td_arg);
20688b7b0f2SMatthew Ahrens 		return (err);
207fa9e4066Sahrens 	}
208fa9e4066Sahrens 
20988b7b0f2SMatthew Ahrens 	if (bp->blk_birth <= td->td_min_txg)
21088b7b0f2SMatthew Ahrens 		return (0);
211fa9e4066Sahrens 
21288b7b0f2SMatthew Ahrens 	if (pd && !pd->pd_exited &&
21388b7b0f2SMatthew Ahrens 	    ((pd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
21488b7b0f2SMatthew Ahrens 	    BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0)) {
21588b7b0f2SMatthew Ahrens 		mutex_enter(&pd->pd_mtx);
21688b7b0f2SMatthew Ahrens 		ASSERT(pd->pd_blks_fetched >= 0);
21788b7b0f2SMatthew Ahrens 		while (pd->pd_blks_fetched == 0 && !pd->pd_exited)
21888b7b0f2SMatthew Ahrens 			cv_wait(&pd->pd_cv, &pd->pd_mtx);
21988b7b0f2SMatthew Ahrens 		pd->pd_blks_fetched--;
22088b7b0f2SMatthew Ahrens 		cv_broadcast(&pd->pd_cv);
22188b7b0f2SMatthew Ahrens 		mutex_exit(&pd->pd_mtx);
222fa9e4066Sahrens 	}
223fa9e4066Sahrens 
22488b7b0f2SMatthew Ahrens 	if (td->td_flags & TRAVERSE_PRE) {
2253f9d6ad7SLin Ling 		err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
2263f9d6ad7SLin Ling 		    td->td_arg);
22799d5e173STim Haley 		if (err == TRAVERSE_VISIT_NO_CHILDREN)
22899d5e173STim Haley 			return (0);
229*ad135b5dSChristopher Siden 		if (err == ERESTART)
230*ad135b5dSChristopher Siden 			pause = B_TRUE; /* handle pausing at a common point */
231*ad135b5dSChristopher Siden 		if (err != 0)
232*ad135b5dSChristopher Siden 			goto post;
233fa9e4066Sahrens 	}
234fa9e4066Sahrens 
23588b7b0f2SMatthew Ahrens 	if (BP_GET_LEVEL(bp) > 0) {
23688b7b0f2SMatthew Ahrens 		uint32_t flags = ARC_WAIT;
23788b7b0f2SMatthew Ahrens 		int i;
23888b7b0f2SMatthew Ahrens 		blkptr_t *cbp;
23988b7b0f2SMatthew Ahrens 		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
24088b7b0f2SMatthew Ahrens 
2413f9d6ad7SLin Ling 		err = dsl_read(NULL, td->td_spa, bp, pbuf,
24288b7b0f2SMatthew Ahrens 		    arc_getbuf_func, &buf,
24388b7b0f2SMatthew Ahrens 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
24488b7b0f2SMatthew Ahrens 		if (err)
24588b7b0f2SMatthew Ahrens 			return (err);
24688b7b0f2SMatthew Ahrens 
24788b7b0f2SMatthew Ahrens 		/* recursively visitbp() blocks below this */
24888b7b0f2SMatthew Ahrens 		cbp = buf->b_data;
24988b7b0f2SMatthew Ahrens 		for (i = 0; i < epb; i++, cbp++) {
25088b7b0f2SMatthew Ahrens 			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
25188b7b0f2SMatthew Ahrens 			    zb->zb_level - 1,
25288b7b0f2SMatthew Ahrens 			    zb->zb_blkid * epb + i);
25388b7b0f2SMatthew Ahrens 			err = traverse_visitbp(td, dnp, buf, cbp, &czb);
254cd088ea4SVictor Latushkin 			if (err) {
255cd088ea4SVictor Latushkin 				if (!hard)
256cd088ea4SVictor Latushkin 					break;
257cd088ea4SVictor Latushkin 				lasterr = err;
258cd088ea4SVictor Latushkin 			}
25988b7b0f2SMatthew Ahrens 		}
26088b7b0f2SMatthew Ahrens 	} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
26188b7b0f2SMatthew Ahrens 		uint32_t flags = ARC_WAIT;
26214843421SMatthew Ahrens 		int i;
26388b7b0f2SMatthew Ahrens 		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
26488b7b0f2SMatthew Ahrens 
2653f9d6ad7SLin Ling 		err = dsl_read(NULL, td->td_spa, bp, pbuf,
26688b7b0f2SMatthew Ahrens 		    arc_getbuf_func, &buf,
26788b7b0f2SMatthew Ahrens 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
26888b7b0f2SMatthew Ahrens 		if (err)
26988b7b0f2SMatthew Ahrens 			return (err);
27088b7b0f2SMatthew Ahrens 
27188b7b0f2SMatthew Ahrens 		/* recursively visitbp() blocks below this */
27288b7b0f2SMatthew Ahrens 		dnp = buf->b_data;
273cd088ea4SVictor Latushkin 		for (i = 0; i < epb; i++, dnp++) {
27414843421SMatthew Ahrens 			err = traverse_dnode(td, dnp, buf, zb->zb_objset,
27514843421SMatthew Ahrens 			    zb->zb_blkid * epb + i);
276cd088ea4SVictor Latushkin 			if (err) {
277cd088ea4SVictor Latushkin 				if (!hard)
278cd088ea4SVictor Latushkin 					break;
279cd088ea4SVictor Latushkin 				lasterr = err;
280cd088ea4SVictor Latushkin 			}
281fa9e4066Sahrens 		}
28288b7b0f2SMatthew Ahrens 	} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
28388b7b0f2SMatthew Ahrens 		uint32_t flags = ARC_WAIT;
28488b7b0f2SMatthew Ahrens 		objset_phys_t *osp;
28514843421SMatthew Ahrens 		dnode_phys_t *dnp;
28688b7b0f2SMatthew Ahrens 
2873f9d6ad7SLin Ling 		err = dsl_read_nolock(NULL, td->td_spa, bp,
28888b7b0f2SMatthew Ahrens 		    arc_getbuf_func, &buf,
28988b7b0f2SMatthew Ahrens 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
29088b7b0f2SMatthew Ahrens 		if (err)
29188b7b0f2SMatthew Ahrens 			return (err);
29288b7b0f2SMatthew Ahrens 
29388b7b0f2SMatthew Ahrens 		osp = buf->b_data;
29414843421SMatthew Ahrens 		dnp = &osp->os_meta_dnode;
295b24ab676SJeff Bonwick 		err = traverse_dnode(td, dnp, buf, zb->zb_objset,
296b24ab676SJeff Bonwick 		    DMU_META_DNODE_OBJECT);
297cd088ea4SVictor Latushkin 		if (err && hard) {
298cd088ea4SVictor Latushkin 			lasterr = err;
299cd088ea4SVictor Latushkin 			err = 0;
300cd088ea4SVictor Latushkin 		}
30114843421SMatthew Ahrens 		if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
30214843421SMatthew Ahrens 			dnp = &osp->os_userused_dnode;
30314843421SMatthew Ahrens 			err = traverse_dnode(td, dnp, buf, zb->zb_objset,
30414843421SMatthew Ahrens 			    DMU_USERUSED_OBJECT);
30514843421SMatthew Ahrens 		}
306cd088ea4SVictor Latushkin 		if (err && hard) {
307cd088ea4SVictor Latushkin 			lasterr = err;
308cd088ea4SVictor Latushkin 			err = 0;
309cd088ea4SVictor Latushkin 		}
31014843421SMatthew Ahrens 		if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
31114843421SMatthew Ahrens 			dnp = &osp->os_groupused_dnode;
31214843421SMatthew Ahrens 			err = traverse_dnode(td, dnp, buf, zb->zb_objset,
31314843421SMatthew Ahrens 			    DMU_GROUPUSED_OBJECT);
31488b7b0f2SMatthew Ahrens 		}
31588b7b0f2SMatthew Ahrens 	}
316fa9e4066Sahrens 
31788b7b0f2SMatthew Ahrens 	if (buf)
31888b7b0f2SMatthew Ahrens 		(void) arc_buf_remove_ref(buf, &buf);
319fa9e4066Sahrens 
320*ad135b5dSChristopher Siden post:
3213f9d6ad7SLin Ling 	if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
3223f9d6ad7SLin Ling 		err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
3233f9d6ad7SLin Ling 		    td->td_arg);
324*ad135b5dSChristopher Siden 		if (err == ERESTART)
325*ad135b5dSChristopher Siden 			pause = B_TRUE;
326*ad135b5dSChristopher Siden 	}
327*ad135b5dSChristopher Siden 
328*ad135b5dSChristopher Siden 	if (pause && td->td_resume != NULL) {
329*ad135b5dSChristopher Siden 		ASSERT3U(err, ==, ERESTART);
330*ad135b5dSChristopher Siden 		ASSERT(!hard);
331*ad135b5dSChristopher Siden 		traverse_pause(td, zb);
3323f9d6ad7SLin Ling 	}
333fa9e4066Sahrens 
334cd088ea4SVictor Latushkin 	return (err != 0 ? err : lasterr);
335fa9e4066Sahrens }
336fa9e4066Sahrens 
33714843421SMatthew Ahrens static int
3386e0cbcaaSMatthew Ahrens traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
33914843421SMatthew Ahrens     arc_buf_t *buf, uint64_t objset, uint64_t object)
34014843421SMatthew Ahrens {
341cd088ea4SVictor Latushkin 	int j, err = 0, lasterr = 0;
34214843421SMatthew Ahrens 	zbookmark_t czb;
343cd088ea4SVictor Latushkin 	boolean_t hard = (td->td_flags & TRAVERSE_HARD);
34414843421SMatthew Ahrens 
34514843421SMatthew Ahrens 	for (j = 0; j < dnp->dn_nblkptr; j++) {
34614843421SMatthew Ahrens 		SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
34714843421SMatthew Ahrens 		err = traverse_visitbp(td, dnp, buf,
34814843421SMatthew Ahrens 		    (blkptr_t *)&dnp->dn_blkptr[j], &czb);
349cd088ea4SVictor Latushkin 		if (err) {
350cd088ea4SVictor Latushkin 			if (!hard)
351cd088ea4SVictor Latushkin 				break;
352cd088ea4SVictor Latushkin 			lasterr = err;
353cd088ea4SVictor Latushkin 		}
3543f9d6ad7SLin Ling 	}
3553f9d6ad7SLin Ling 
3563f9d6ad7SLin Ling 	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
3573f9d6ad7SLin Ling 		SET_BOOKMARK(&czb, objset,
3583f9d6ad7SLin Ling 		    object, 0, DMU_SPILL_BLKID);
3593f9d6ad7SLin Ling 		err = traverse_visitbp(td, dnp, buf,
3603f9d6ad7SLin Ling 		    (blkptr_t *)&dnp->dn_spill, &czb);
3613f9d6ad7SLin Ling 		if (err) {
3623f9d6ad7SLin Ling 			if (!hard)
3633f9d6ad7SLin Ling 				return (err);
3643f9d6ad7SLin Ling 			lasterr = err;
3650a586ceaSMark Shellenbaum 		}
36614843421SMatthew Ahrens 	}
367cd088ea4SVictor Latushkin 	return (err != 0 ? err : lasterr);
36814843421SMatthew Ahrens }
36914843421SMatthew Ahrens 
37088b7b0f2SMatthew Ahrens /* ARGSUSED */
37188b7b0f2SMatthew Ahrens static int
372b24ab676SJeff Bonwick traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
3733f9d6ad7SLin Ling     arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp,
3743f9d6ad7SLin Ling     void *arg)
375e7cbe64fSgw {
3766e0cbcaaSMatthew Ahrens 	prefetch_data_t *pfd = arg;
37788b7b0f2SMatthew Ahrens 	uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
378e7cbe64fSgw 
37988b7b0f2SMatthew Ahrens 	ASSERT(pfd->pd_blks_fetched >= 0);
38088b7b0f2SMatthew Ahrens 	if (pfd->pd_cancel)
38188b7b0f2SMatthew Ahrens 		return (EINTR);
382e7cbe64fSgw 
38388b7b0f2SMatthew Ahrens 	if (bp == NULL || !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
3846e1f5caaSNeil Perrin 	    BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0) ||
3856e1f5caaSNeil Perrin 	    BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG)
386fa9e4066Sahrens 		return (0);
387fa9e4066Sahrens 
38888b7b0f2SMatthew Ahrens 	mutex_enter(&pfd->pd_mtx);
38988b7b0f2SMatthew Ahrens 	while (!pfd->pd_cancel && pfd->pd_blks_fetched >= pfd->pd_blks_max)
39088b7b0f2SMatthew Ahrens 		cv_wait(&pfd->pd_cv, &pfd->pd_mtx);
39188b7b0f2SMatthew Ahrens 	pfd->pd_blks_fetched++;
39288b7b0f2SMatthew Ahrens 	cv_broadcast(&pfd->pd_cv);
39388b7b0f2SMatthew Ahrens 	mutex_exit(&pfd->pd_mtx);
394fa9e4066Sahrens 
3953f9d6ad7SLin Ling 	(void) dsl_read(NULL, spa, bp, pbuf, NULL, NULL,
39688b7b0f2SMatthew Ahrens 	    ZIO_PRIORITY_ASYNC_READ,
39788b7b0f2SMatthew Ahrens 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
39888b7b0f2SMatthew Ahrens 	    &aflags, zb);
399fa9e4066Sahrens 
40088b7b0f2SMatthew Ahrens 	return (0);
401fa9e4066Sahrens }
402fa9e4066Sahrens 
403fa9e4066Sahrens static void
40488b7b0f2SMatthew Ahrens traverse_prefetch_thread(void *arg)
405fa9e4066Sahrens {
4066e0cbcaaSMatthew Ahrens 	traverse_data_t *td_main = arg;
4076e0cbcaaSMatthew Ahrens 	traverse_data_t td = *td_main;
40888b7b0f2SMatthew Ahrens 	zbookmark_t czb;
409fa9e4066Sahrens 
41088b7b0f2SMatthew Ahrens 	td.td_func = traverse_prefetcher;
41188b7b0f2SMatthew Ahrens 	td.td_arg = td_main->td_pfd;
41288b7b0f2SMatthew Ahrens 	td.td_pfd = NULL;
413fa9e4066Sahrens 
414b24ab676SJeff Bonwick 	SET_BOOKMARK(&czb, td.td_objset,
415b24ab676SJeff Bonwick 	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
41688b7b0f2SMatthew Ahrens 	(void) traverse_visitbp(&td, NULL, NULL, td.td_rootbp, &czb);
417fa9e4066Sahrens 
41888b7b0f2SMatthew Ahrens 	mutex_enter(&td_main->td_pfd->pd_mtx);
41988b7b0f2SMatthew Ahrens 	td_main->td_pfd->pd_exited = B_TRUE;
42088b7b0f2SMatthew Ahrens 	cv_broadcast(&td_main->td_pfd->pd_cv);
42188b7b0f2SMatthew Ahrens 	mutex_exit(&td_main->td_pfd->pd_mtx);
422fa9e4066Sahrens }
423fa9e4066Sahrens 
42488b7b0f2SMatthew Ahrens /*
42588b7b0f2SMatthew Ahrens  * NB: dataset must not be changing on-disk (eg, is a snapshot or we are
42688b7b0f2SMatthew Ahrens  * in syncing context).
42788b7b0f2SMatthew Ahrens  */
42888b7b0f2SMatthew Ahrens static int
429*ad135b5dSChristopher Siden traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
430*ad135b5dSChristopher Siden     uint64_t txg_start, zbookmark_t *resume, int flags,
431*ad135b5dSChristopher Siden     blkptr_cb_t func, void *arg)
432fa9e4066Sahrens {
4336e0cbcaaSMatthew Ahrens 	traverse_data_t td;
4346e0cbcaaSMatthew Ahrens 	prefetch_data_t pd = { 0 };
43588b7b0f2SMatthew Ahrens 	zbookmark_t czb;
43688b7b0f2SMatthew Ahrens 	int err;
437fa9e4066Sahrens 
438*ad135b5dSChristopher Siden 	ASSERT(ds == NULL || objset == ds->ds_object);
439*ad135b5dSChristopher Siden 	ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST));
440*ad135b5dSChristopher Siden 
44188b7b0f2SMatthew Ahrens 	td.td_spa = spa;
442*ad135b5dSChristopher Siden 	td.td_objset = objset;
44388b7b0f2SMatthew Ahrens 	td.td_rootbp = rootbp;
44488b7b0f2SMatthew Ahrens 	td.td_min_txg = txg_start;
445*ad135b5dSChristopher Siden 	td.td_resume = resume;
44688b7b0f2SMatthew Ahrens 	td.td_func = func;
44788b7b0f2SMatthew Ahrens 	td.td_arg = arg;
44888b7b0f2SMatthew Ahrens 	td.td_pfd = &pd;
44988b7b0f2SMatthew Ahrens 	td.td_flags = flags;
45088b7b0f2SMatthew Ahrens 
45144f92b7dSChris Kirby 	pd.pd_blks_max = zfs_pd_blks_max;
45288b7b0f2SMatthew Ahrens 	pd.pd_flags = flags;
45388b7b0f2SMatthew Ahrens 	mutex_init(&pd.pd_mtx, NULL, MUTEX_DEFAULT, NULL);
45488b7b0f2SMatthew Ahrens 	cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL);
45588b7b0f2SMatthew Ahrens 
4566e0cbcaaSMatthew Ahrens 	/* See comment on ZIL traversal in dsl_scan_visitds. */
4576e0cbcaaSMatthew Ahrens 	if (ds != NULL && !dsl_dataset_is_snapshot(ds)) {
4586e0cbcaaSMatthew Ahrens 		objset_t *os;
4596e0cbcaaSMatthew Ahrens 
4606e0cbcaaSMatthew Ahrens 		err = dmu_objset_from_ds(ds, &os);
4616e0cbcaaSMatthew Ahrens 		if (err)
4626e0cbcaaSMatthew Ahrens 			return (err);
4636e0cbcaaSMatthew Ahrens 
4646e0cbcaaSMatthew Ahrens 		traverse_zil(&td, &os->os_zil_header);
4656e0cbcaaSMatthew Ahrens 	}
4666e0cbcaaSMatthew Ahrens 
46788b7b0f2SMatthew Ahrens 	if (!(flags & TRAVERSE_PREFETCH) ||
46888b7b0f2SMatthew Ahrens 	    0 == taskq_dispatch(system_taskq, traverse_prefetch_thread,
46988b7b0f2SMatthew Ahrens 	    &td, TQ_NOQUEUE))
47088b7b0f2SMatthew Ahrens 		pd.pd_exited = B_TRUE;
47188b7b0f2SMatthew Ahrens 
4726e0cbcaaSMatthew Ahrens 	SET_BOOKMARK(&czb, td.td_objset,
473b24ab676SJeff Bonwick 	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
47488b7b0f2SMatthew Ahrens 	err = traverse_visitbp(&td, NULL, NULL, rootbp, &czb);
47588b7b0f2SMatthew Ahrens 
47688b7b0f2SMatthew Ahrens 	mutex_enter(&pd.pd_mtx);
47788b7b0f2SMatthew Ahrens 	pd.pd_cancel = B_TRUE;
47888b7b0f2SMatthew Ahrens 	cv_broadcast(&pd.pd_cv);
47988b7b0f2SMatthew Ahrens 	while (!pd.pd_exited)
48088b7b0f2SMatthew Ahrens 		cv_wait(&pd.pd_cv, &pd.pd_mtx);
48188b7b0f2SMatthew Ahrens 	mutex_exit(&pd.pd_mtx);
48288b7b0f2SMatthew Ahrens 
48388b7b0f2SMatthew Ahrens 	mutex_destroy(&pd.pd_mtx);
48488b7b0f2SMatthew Ahrens 	cv_destroy(&pd.pd_cv);
485fa9e4066Sahrens 
48688b7b0f2SMatthew Ahrens 	return (err);
487fa9e4066Sahrens }
488fa9e4066Sahrens 
48988b7b0f2SMatthew Ahrens /*
49088b7b0f2SMatthew Ahrens  * NB: dataset must not be changing on-disk (eg, is a snapshot or we are
49188b7b0f2SMatthew Ahrens  * in syncing context).
49288b7b0f2SMatthew Ahrens  */
49388b7b0f2SMatthew Ahrens int
49488b7b0f2SMatthew Ahrens traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
49588b7b0f2SMatthew Ahrens     blkptr_cb_t func, void *arg)
496fa9e4066Sahrens {
497*ad135b5dSChristopher Siden 	return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object,
498*ad135b5dSChristopher Siden 	    &ds->ds_phys->ds_bp, txg_start, NULL, flags, func, arg));
499*ad135b5dSChristopher Siden }
500*ad135b5dSChristopher Siden 
501*ad135b5dSChristopher Siden int
502*ad135b5dSChristopher Siden traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
503*ad135b5dSChristopher Siden     uint64_t txg_start, zbookmark_t *resume, int flags,
504*ad135b5dSChristopher Siden     blkptr_cb_t func, void *arg)
505*ad135b5dSChristopher Siden {
506*ad135b5dSChristopher Siden 	return (traverse_impl(spa, NULL, ZB_DESTROYED_OBJSET,
507*ad135b5dSChristopher Siden 	    blkptr, txg_start, resume, flags, func, arg));
508fa9e4066Sahrens }
509fa9e4066Sahrens 
51088b7b0f2SMatthew Ahrens /*
51188b7b0f2SMatthew Ahrens  * NB: pool must not be changing on-disk (eg, from zdb or sync context).
51288b7b0f2SMatthew Ahrens  */
51388b7b0f2SMatthew Ahrens int
514bbfd46c4SJeff Bonwick traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
515bbfd46c4SJeff Bonwick     blkptr_cb_t func, void *arg)
516fa9e4066Sahrens {
517cd088ea4SVictor Latushkin 	int err, lasterr = 0;
51888b7b0f2SMatthew Ahrens 	uint64_t obj;
51988b7b0f2SMatthew Ahrens 	dsl_pool_t *dp = spa_get_dsl(spa);
52088b7b0f2SMatthew Ahrens 	objset_t *mos = dp->dp_meta_objset;
521cd088ea4SVictor Latushkin 	boolean_t hard = (flags & TRAVERSE_HARD);
52288b7b0f2SMatthew Ahrens 
52388b7b0f2SMatthew Ahrens 	/* visit the MOS */
524*ad135b5dSChristopher Siden 	err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa),
525*ad135b5dSChristopher Siden 	    txg_start, NULL, flags, func, arg);
52688b7b0f2SMatthew Ahrens 	if (err)
52788b7b0f2SMatthew Ahrens 		return (err);
52888b7b0f2SMatthew Ahrens 
52988b7b0f2SMatthew Ahrens 	/* visit each dataset */
530cd088ea4SVictor Latushkin 	for (obj = 1; err == 0 || (err != ESRCH && hard);
531cd088ea4SVictor Latushkin 	    err = dmu_object_next(mos, &obj, FALSE, txg_start)) {
53288b7b0f2SMatthew Ahrens 		dmu_object_info_t doi;
53388b7b0f2SMatthew Ahrens 
53488b7b0f2SMatthew Ahrens 		err = dmu_object_info(mos, obj, &doi);
535cd088ea4SVictor Latushkin 		if (err) {
536cd088ea4SVictor Latushkin 			if (!hard)
537cd088ea4SVictor Latushkin 				return (err);
538cd088ea4SVictor Latushkin 			lasterr = err;
539cd088ea4SVictor Latushkin 			continue;
540cd088ea4SVictor Latushkin 		}
54188b7b0f2SMatthew Ahrens 
54288b7b0f2SMatthew Ahrens 		if (doi.doi_type == DMU_OT_DSL_DATASET) {
54388b7b0f2SMatthew Ahrens 			dsl_dataset_t *ds;
544468c413aSTim Haley 			uint64_t txg = txg_start;
545468c413aSTim Haley 
54688b7b0f2SMatthew Ahrens 			rw_enter(&dp->dp_config_rwlock, RW_READER);
54788b7b0f2SMatthew Ahrens 			err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
54888b7b0f2SMatthew Ahrens 			rw_exit(&dp->dp_config_rwlock);
549cd088ea4SVictor Latushkin 			if (err) {
550cd088ea4SVictor Latushkin 				if (!hard)
551cd088ea4SVictor Latushkin 					return (err);
552cd088ea4SVictor Latushkin 				lasterr = err;
553cd088ea4SVictor Latushkin 				continue;
554cd088ea4SVictor Latushkin 			}
555468c413aSTim Haley 			if (ds->ds_phys->ds_prev_snap_txg > txg)
556468c413aSTim Haley 				txg = ds->ds_phys->ds_prev_snap_txg;
557bbfd46c4SJeff Bonwick 			err = traverse_dataset(ds, txg, flags, func, arg);
55888b7b0f2SMatthew Ahrens 			dsl_dataset_rele(ds, FTAG);
559cd088ea4SVictor Latushkin 			if (err) {
560cd088ea4SVictor Latushkin 				if (!hard)
561cd088ea4SVictor Latushkin 					return (err);
562cd088ea4SVictor Latushkin 				lasterr = err;
563cd088ea4SVictor Latushkin 			}
56488b7b0f2SMatthew Ahrens 		}
565fa9e4066Sahrens 	}
56688b7b0f2SMatthew Ahrens 	if (err == ESRCH)
56788b7b0f2SMatthew Ahrens 		err = 0;
568cd088ea4SVictor Latushkin 	return (err != 0 ? err : lasterr);
569fa9e4066Sahrens }
570