1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 223f9d6ad7SLin Ling * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23*be6fd75aSMatthew Ahrens * Copyright (c) 2013 by Delphix. All rights reserved. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #include <sys/zfs_context.h> 27fa9e4066Sahrens #include <sys/dmu_objset.h> 28fa9e4066Sahrens #include <sys/dmu_traverse.h> 29fa9e4066Sahrens #include <sys/dsl_dataset.h> 30fa9e4066Sahrens #include <sys/dsl_dir.h> 31fa9e4066Sahrens #include <sys/dsl_pool.h> 32fa9e4066Sahrens #include <sys/dnode.h> 33fa9e4066Sahrens #include <sys/spa.h> 34fa9e4066Sahrens #include <sys/zio.h> 35fa9e4066Sahrens #include <sys/dmu_impl.h> 360a586ceaSMark Shellenbaum #include <sys/sa.h> 370a586ceaSMark Shellenbaum #include <sys/sa_impl.h> 3888b7b0f2SMatthew Ahrens #include <sys/callb.h> 3988b7b0f2SMatthew Ahrens 4044f92b7dSChris Kirby int zfs_pd_blks_max = 100; 4144f92b7dSChris Kirby 426e0cbcaaSMatthew Ahrens typedef struct prefetch_data { 4388b7b0f2SMatthew Ahrens kmutex_t pd_mtx; 4488b7b0f2SMatthew Ahrens kcondvar_t pd_cv; 4588b7b0f2SMatthew Ahrens int pd_blks_max; 4688b7b0f2SMatthew Ahrens int pd_blks_fetched; 4788b7b0f2SMatthew Ahrens int pd_flags; 4888b7b0f2SMatthew Ahrens boolean_t pd_cancel; 4988b7b0f2SMatthew Ahrens boolean_t pd_exited; 506e0cbcaaSMatthew Ahrens } prefetch_data_t; 5188b7b0f2SMatthew Ahrens 526e0cbcaaSMatthew Ahrens typedef struct traverse_data { 5388b7b0f2SMatthew Ahrens spa_t *td_spa; 5488b7b0f2SMatthew Ahrens uint64_t td_objset; 5588b7b0f2SMatthew Ahrens blkptr_t *td_rootbp; 5688b7b0f2SMatthew Ahrens uint64_t td_min_txg; 57ad135b5dSChristopher Siden zbookmark_t *td_resume; 5888b7b0f2SMatthew Ahrens int td_flags; 596e0cbcaaSMatthew Ahrens prefetch_data_t *td_pfd; 6088b7b0f2SMatthew Ahrens blkptr_cb_t *td_func; 6188b7b0f2SMatthew Ahrens void *td_arg; 626e0cbcaaSMatthew Ahrens } traverse_data_t; 63fa9e4066Sahrens 646e0cbcaaSMatthew Ahrens static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp, 651b912ec7SGeorge Wilson uint64_t objset, uint64_t object); 66b4709335SMatthew Ahrens static void prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *, 671b912ec7SGeorge Wilson uint64_t objset, uint64_t object); 6814843421SMatthew Ahrens 69b24ab676SJeff Bonwick static int 705dabedeeSbonwick traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg) 71ea8dc4b6Seschrock { 726e0cbcaaSMatthew Ahrens traverse_data_t *td = arg; 7388b7b0f2SMatthew Ahrens zbookmark_t zb; 74ea8dc4b6Seschrock 7588b7b0f2SMatthew Ahrens if (bp->blk_birth == 0) 76b24ab676SJeff Bonwick return (0); 775dabedeeSbonwick 7888b7b0f2SMatthew Ahrens if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(td->td_spa)) 79b24ab676SJeff Bonwick return (0); 8088b7b0f2SMatthew Ahrens 81b24ab676SJeff Bonwick SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, 82b24ab676SJeff Bonwick bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); 83b24ab676SJeff Bonwick 841b912ec7SGeorge Wilson (void) td->td_func(td->td_spa, zilog, bp, &zb, NULL, td->td_arg); 85b24ab676SJeff Bonwick 86b24ab676SJeff Bonwick return (0); 87ea8dc4b6Seschrock } 88ea8dc4b6Seschrock 89b24ab676SJeff Bonwick static int 905dabedeeSbonwick traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg) 91ea8dc4b6Seschrock { 926e0cbcaaSMatthew Ahrens traverse_data_t *td = arg; 93ea8dc4b6Seschrock 94ea8dc4b6Seschrock if (lrc->lrc_txtype == TX_WRITE) { 95ea8dc4b6Seschrock lr_write_t *lr = (lr_write_t *)lrc; 96ea8dc4b6Seschrock blkptr_t *bp = &lr->lr_blkptr; 9788b7b0f2SMatthew Ahrens zbookmark_t zb; 98ea8dc4b6Seschrock 9988b7b0f2SMatthew Ahrens if (bp->blk_birth == 0) 100b24ab676SJeff Bonwick return (0); 1015dabedeeSbonwick 10288b7b0f2SMatthew Ahrens if (claim_txg == 0 || bp->blk_birth < claim_txg) 103b24ab676SJeff Bonwick return (0); 104b24ab676SJeff Bonwick 1056e0cbcaaSMatthew Ahrens SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid, 1066e0cbcaaSMatthew Ahrens ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); 10788b7b0f2SMatthew Ahrens 1081b912ec7SGeorge Wilson (void) td->td_func(td->td_spa, zilog, bp, &zb, NULL, 109b24ab676SJeff Bonwick td->td_arg); 110ea8dc4b6Seschrock } 111b24ab676SJeff Bonwick return (0); 112ea8dc4b6Seschrock } 113ea8dc4b6Seschrock 114ea8dc4b6Seschrock static void 1156e0cbcaaSMatthew Ahrens traverse_zil(traverse_data_t *td, zil_header_t *zh) 116ea8dc4b6Seschrock { 1175dabedeeSbonwick uint64_t claim_txg = zh->zh_claim_txg; 118ea8dc4b6Seschrock zilog_t *zilog; 119ea8dc4b6Seschrock 1205dabedeeSbonwick /* 1215dabedeeSbonwick * We only want to visit blocks that have been claimed but not yet 122b24ab676SJeff Bonwick * replayed; plus, in read-only mode, blocks that are already stable. 1235dabedeeSbonwick */ 1248ad4d6ddSJeff Bonwick if (claim_txg == 0 && spa_writeable(td->td_spa)) 1255dabedeeSbonwick return; 1265dabedeeSbonwick 12788b7b0f2SMatthew Ahrens zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh); 128ea8dc4b6Seschrock 12988b7b0f2SMatthew Ahrens (void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td, 1305dabedeeSbonwick claim_txg); 131ea8dc4b6Seschrock 132ea8dc4b6Seschrock zil_free(zilog); 133ea8dc4b6Seschrock } 134ea8dc4b6Seschrock 135ad135b5dSChristopher Siden typedef enum resume_skip { 136ad135b5dSChristopher Siden RESUME_SKIP_ALL, 137ad135b5dSChristopher Siden RESUME_SKIP_NONE, 138ad135b5dSChristopher Siden RESUME_SKIP_CHILDREN 139ad135b5dSChristopher Siden } resume_skip_t; 140ad135b5dSChristopher Siden 141ad135b5dSChristopher Siden /* 142ad135b5dSChristopher Siden * Returns RESUME_SKIP_ALL if td indicates that we are resuming a traversal and 143ad135b5dSChristopher Siden * the block indicated by zb does not need to be visited at all. Returns 144ad135b5dSChristopher Siden * RESUME_SKIP_CHILDREN if we are resuming a post traversal and we reach the 145ad135b5dSChristopher Siden * resume point. This indicates that this block should be visited but not its 146ad135b5dSChristopher Siden * children (since they must have been visited in a previous traversal). 147ad135b5dSChristopher Siden * Otherwise returns RESUME_SKIP_NONE. 148ad135b5dSChristopher Siden */ 149ad135b5dSChristopher Siden static resume_skip_t 150ad135b5dSChristopher Siden resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp, 151ad135b5dSChristopher Siden const zbookmark_t *zb) 152ad135b5dSChristopher Siden { 153ad135b5dSChristopher Siden if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume)) { 154ad135b5dSChristopher Siden /* 155ad135b5dSChristopher Siden * If we already visited this bp & everything below, 156ad135b5dSChristopher Siden * don't bother doing it again. 157ad135b5dSChristopher Siden */ 158ad135b5dSChristopher Siden if (zbookmark_is_before(dnp, zb, td->td_resume)) 159ad135b5dSChristopher Siden return (RESUME_SKIP_ALL); 160ad135b5dSChristopher Siden 161ad135b5dSChristopher Siden /* 162ad135b5dSChristopher Siden * If we found the block we're trying to resume from, zero 163ad135b5dSChristopher Siden * the bookmark out to indicate that we have resumed. 164ad135b5dSChristopher Siden */ 165ad135b5dSChristopher Siden ASSERT3U(zb->zb_object, <=, td->td_resume->zb_object); 166ad135b5dSChristopher Siden if (bcmp(zb, td->td_resume, sizeof (*zb)) == 0) { 167ad135b5dSChristopher Siden bzero(td->td_resume, sizeof (*zb)); 168ad135b5dSChristopher Siden if (td->td_flags & TRAVERSE_POST) 169ad135b5dSChristopher Siden return (RESUME_SKIP_CHILDREN); 170ad135b5dSChristopher Siden } 171ad135b5dSChristopher Siden } 172ad135b5dSChristopher Siden return (RESUME_SKIP_NONE); 173ad135b5dSChristopher Siden } 174ad135b5dSChristopher Siden 175ad135b5dSChristopher Siden static void 176ad135b5dSChristopher Siden traverse_pause(traverse_data_t *td, const zbookmark_t *zb) 177ad135b5dSChristopher Siden { 178ad135b5dSChristopher Siden ASSERT(td->td_resume != NULL); 179fb09f5aaSMadhav Suresh ASSERT0(zb->zb_level); 180ad135b5dSChristopher Siden bcopy(zb, td->td_resume, sizeof (*td->td_resume)); 181ad135b5dSChristopher Siden } 182ad135b5dSChristopher Siden 183b4709335SMatthew Ahrens static void 184b4709335SMatthew Ahrens traverse_prefetch_metadata(traverse_data_t *td, 1851b912ec7SGeorge Wilson const blkptr_t *bp, const zbookmark_t *zb) 186b4709335SMatthew Ahrens { 187b4709335SMatthew Ahrens uint32_t flags = ARC_NOWAIT | ARC_PREFETCH; 188b4709335SMatthew Ahrens 189b4709335SMatthew Ahrens if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA)) 190b4709335SMatthew Ahrens return; 191b4709335SMatthew Ahrens /* 192b4709335SMatthew Ahrens * If we are in the process of resuming, don't prefetch, because 193b4709335SMatthew Ahrens * some children will not be needed (and in fact may have already 194b4709335SMatthew Ahrens * been freed). 195b4709335SMatthew Ahrens */ 196b4709335SMatthew Ahrens if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume)) 197b4709335SMatthew Ahrens return; 198b4709335SMatthew Ahrens if (BP_IS_HOLE(bp) || bp->blk_birth <= td->td_min_txg) 199b4709335SMatthew Ahrens return; 200b4709335SMatthew Ahrens if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE) 201b4709335SMatthew Ahrens return; 202b4709335SMatthew Ahrens 2031b912ec7SGeorge Wilson (void) arc_read(NULL, td->td_spa, bp, NULL, NULL, 2041b912ec7SGeorge Wilson ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); 205b4709335SMatthew Ahrens } 206b4709335SMatthew Ahrens 207fa9e4066Sahrens static int 2086e0cbcaaSMatthew Ahrens traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, 2091b912ec7SGeorge Wilson const blkptr_t *bp, const zbookmark_t *zb) 210fa9e4066Sahrens { 2116a0f0066SEric Taylor zbookmark_t czb; 212cd088ea4SVictor Latushkin int err = 0, lasterr = 0; 21388b7b0f2SMatthew Ahrens arc_buf_t *buf = NULL; 2146e0cbcaaSMatthew Ahrens prefetch_data_t *pd = td->td_pfd; 215cd088ea4SVictor Latushkin boolean_t hard = td->td_flags & TRAVERSE_HARD; 216ad135b5dSChristopher Siden boolean_t pause = B_FALSE; 217ad135b5dSChristopher Siden 218ad135b5dSChristopher Siden switch (resume_skip_check(td, dnp, zb)) { 219ad135b5dSChristopher Siden case RESUME_SKIP_ALL: 220ad135b5dSChristopher Siden return (0); 221ad135b5dSChristopher Siden case RESUME_SKIP_CHILDREN: 222ad135b5dSChristopher Siden goto post; 223ad135b5dSChristopher Siden case RESUME_SKIP_NONE: 224ad135b5dSChristopher Siden break; 225ad135b5dSChristopher Siden default: 226ad135b5dSChristopher Siden ASSERT(0); 227ad135b5dSChristopher Siden } 228fa9e4066Sahrens 229ad135b5dSChristopher Siden if (BP_IS_HOLE(bp)) { 2301b912ec7SGeorge Wilson err = td->td_func(td->td_spa, NULL, NULL, zb, dnp, td->td_arg); 23188b7b0f2SMatthew Ahrens return (err); 232fa9e4066Sahrens } 233fa9e4066Sahrens 23488b7b0f2SMatthew Ahrens if (bp->blk_birth <= td->td_min_txg) 23588b7b0f2SMatthew Ahrens return (0); 236fa9e4066Sahrens 23788b7b0f2SMatthew Ahrens if (pd && !pd->pd_exited && 23888b7b0f2SMatthew Ahrens ((pd->pd_flags & TRAVERSE_PREFETCH_DATA) || 23988b7b0f2SMatthew Ahrens BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0)) { 24088b7b0f2SMatthew Ahrens mutex_enter(&pd->pd_mtx); 24188b7b0f2SMatthew Ahrens ASSERT(pd->pd_blks_fetched >= 0); 24288b7b0f2SMatthew Ahrens while (pd->pd_blks_fetched == 0 && !pd->pd_exited) 24388b7b0f2SMatthew Ahrens cv_wait(&pd->pd_cv, &pd->pd_mtx); 24488b7b0f2SMatthew Ahrens pd->pd_blks_fetched--; 24588b7b0f2SMatthew Ahrens cv_broadcast(&pd->pd_cv); 24688b7b0f2SMatthew Ahrens mutex_exit(&pd->pd_mtx); 247fa9e4066Sahrens } 248fa9e4066Sahrens 24988b7b0f2SMatthew Ahrens if (td->td_flags & TRAVERSE_PRE) { 2501b912ec7SGeorge Wilson err = td->td_func(td->td_spa, NULL, bp, zb, dnp, 2513f9d6ad7SLin Ling td->td_arg); 25299d5e173STim Haley if (err == TRAVERSE_VISIT_NO_CHILDREN) 25399d5e173STim Haley return (0); 254ad135b5dSChristopher Siden if (err == ERESTART) 255ad135b5dSChristopher Siden pause = B_TRUE; /* handle pausing at a common point */ 256ad135b5dSChristopher Siden if (err != 0) 257ad135b5dSChristopher Siden goto post; 258fa9e4066Sahrens } 259fa9e4066Sahrens 26088b7b0f2SMatthew Ahrens if (BP_GET_LEVEL(bp) > 0) { 26188b7b0f2SMatthew Ahrens uint32_t flags = ARC_WAIT; 26288b7b0f2SMatthew Ahrens int i; 26388b7b0f2SMatthew Ahrens blkptr_t *cbp; 26488b7b0f2SMatthew Ahrens int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; 26588b7b0f2SMatthew Ahrens 2661b912ec7SGeorge Wilson err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, 26788b7b0f2SMatthew Ahrens ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); 2683b2aab18SMatthew Ahrens if (err != 0) 26988b7b0f2SMatthew Ahrens return (err); 270b4709335SMatthew Ahrens cbp = buf->b_data; 271b4709335SMatthew Ahrens 272b4709335SMatthew Ahrens for (i = 0; i < epb; i++) { 273b4709335SMatthew Ahrens SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, 274b4709335SMatthew Ahrens zb->zb_level - 1, 275b4709335SMatthew Ahrens zb->zb_blkid * epb + i); 2761b912ec7SGeorge Wilson traverse_prefetch_metadata(td, &cbp[i], &czb); 277b4709335SMatthew Ahrens } 27888b7b0f2SMatthew Ahrens 27988b7b0f2SMatthew Ahrens /* recursively visitbp() blocks below this */ 280b4709335SMatthew Ahrens for (i = 0; i < epb; i++) { 28188b7b0f2SMatthew Ahrens SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, 28288b7b0f2SMatthew Ahrens zb->zb_level - 1, 28388b7b0f2SMatthew Ahrens zb->zb_blkid * epb + i); 2841b912ec7SGeorge Wilson err = traverse_visitbp(td, dnp, &cbp[i], &czb); 2853b2aab18SMatthew Ahrens if (err != 0) { 286cd088ea4SVictor Latushkin if (!hard) 287cd088ea4SVictor Latushkin break; 288cd088ea4SVictor Latushkin lasterr = err; 289cd088ea4SVictor Latushkin } 29088b7b0f2SMatthew Ahrens } 29188b7b0f2SMatthew Ahrens } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { 29288b7b0f2SMatthew Ahrens uint32_t flags = ARC_WAIT; 29314843421SMatthew Ahrens int i; 29488b7b0f2SMatthew Ahrens int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; 29588b7b0f2SMatthew Ahrens 2961b912ec7SGeorge Wilson err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, 29788b7b0f2SMatthew Ahrens ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); 2983b2aab18SMatthew Ahrens if (err != 0) 29988b7b0f2SMatthew Ahrens return (err); 300b4709335SMatthew Ahrens dnp = buf->b_data; 301b4709335SMatthew Ahrens 302b4709335SMatthew Ahrens for (i = 0; i < epb; i++) { 3031b912ec7SGeorge Wilson prefetch_dnode_metadata(td, &dnp[i], zb->zb_objset, 304b4709335SMatthew Ahrens zb->zb_blkid * epb + i); 305b4709335SMatthew Ahrens } 30688b7b0f2SMatthew Ahrens 30788b7b0f2SMatthew Ahrens /* recursively visitbp() blocks below this */ 308b4709335SMatthew Ahrens for (i = 0; i < epb; i++) { 3091b912ec7SGeorge Wilson err = traverse_dnode(td, &dnp[i], zb->zb_objset, 31014843421SMatthew Ahrens zb->zb_blkid * epb + i); 3113b2aab18SMatthew Ahrens if (err != 0) { 312cd088ea4SVictor Latushkin if (!hard) 313cd088ea4SVictor Latushkin break; 314cd088ea4SVictor Latushkin lasterr = err; 315cd088ea4SVictor Latushkin } 316fa9e4066Sahrens } 31788b7b0f2SMatthew Ahrens } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { 31888b7b0f2SMatthew Ahrens uint32_t flags = ARC_WAIT; 31988b7b0f2SMatthew Ahrens objset_phys_t *osp; 32014843421SMatthew Ahrens dnode_phys_t *dnp; 32188b7b0f2SMatthew Ahrens 3221b912ec7SGeorge Wilson err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, 32388b7b0f2SMatthew Ahrens ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); 3243b2aab18SMatthew Ahrens if (err != 0) 32588b7b0f2SMatthew Ahrens return (err); 32688b7b0f2SMatthew Ahrens 32788b7b0f2SMatthew Ahrens osp = buf->b_data; 32814843421SMatthew Ahrens dnp = &osp->os_meta_dnode; 3291b912ec7SGeorge Wilson prefetch_dnode_metadata(td, dnp, zb->zb_objset, 330b4709335SMatthew Ahrens DMU_META_DNODE_OBJECT); 331b4709335SMatthew Ahrens if (arc_buf_size(buf) >= sizeof (objset_phys_t)) { 332b4709335SMatthew Ahrens prefetch_dnode_metadata(td, &osp->os_userused_dnode, 3331b912ec7SGeorge Wilson zb->zb_objset, DMU_USERUSED_OBJECT); 334b4709335SMatthew Ahrens prefetch_dnode_metadata(td, &osp->os_groupused_dnode, 3351b912ec7SGeorge Wilson zb->zb_objset, DMU_USERUSED_OBJECT); 336b4709335SMatthew Ahrens } 337b4709335SMatthew Ahrens 3381b912ec7SGeorge Wilson err = traverse_dnode(td, dnp, zb->zb_objset, 339b24ab676SJeff Bonwick DMU_META_DNODE_OBJECT); 340cd088ea4SVictor Latushkin if (err && hard) { 341cd088ea4SVictor Latushkin lasterr = err; 342cd088ea4SVictor Latushkin err = 0; 343cd088ea4SVictor Latushkin } 34414843421SMatthew Ahrens if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) { 34514843421SMatthew Ahrens dnp = &osp->os_userused_dnode; 3461b912ec7SGeorge Wilson err = traverse_dnode(td, dnp, zb->zb_objset, 34714843421SMatthew Ahrens DMU_USERUSED_OBJECT); 34814843421SMatthew Ahrens } 349cd088ea4SVictor Latushkin if (err && hard) { 350cd088ea4SVictor Latushkin lasterr = err; 351cd088ea4SVictor Latushkin err = 0; 352cd088ea4SVictor Latushkin } 35314843421SMatthew Ahrens if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) { 35414843421SMatthew Ahrens dnp = &osp->os_groupused_dnode; 3551b912ec7SGeorge Wilson err = traverse_dnode(td, dnp, zb->zb_objset, 35614843421SMatthew Ahrens DMU_GROUPUSED_OBJECT); 35788b7b0f2SMatthew Ahrens } 35888b7b0f2SMatthew Ahrens } 359fa9e4066Sahrens 36088b7b0f2SMatthew Ahrens if (buf) 36188b7b0f2SMatthew Ahrens (void) arc_buf_remove_ref(buf, &buf); 362fa9e4066Sahrens 363ad135b5dSChristopher Siden post: 3643f9d6ad7SLin Ling if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) { 3651b912ec7SGeorge Wilson err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg); 366ad135b5dSChristopher Siden if (err == ERESTART) 367ad135b5dSChristopher Siden pause = B_TRUE; 368ad135b5dSChristopher Siden } 369ad135b5dSChristopher Siden 370ad135b5dSChristopher Siden if (pause && td->td_resume != NULL) { 371ad135b5dSChristopher Siden ASSERT3U(err, ==, ERESTART); 372ad135b5dSChristopher Siden ASSERT(!hard); 373ad135b5dSChristopher Siden traverse_pause(td, zb); 3743f9d6ad7SLin Ling } 375fa9e4066Sahrens 376cd088ea4SVictor Latushkin return (err != 0 ? err : lasterr); 377fa9e4066Sahrens } 378fa9e4066Sahrens 379b4709335SMatthew Ahrens static void 380b4709335SMatthew Ahrens prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *dnp, 3811b912ec7SGeorge Wilson uint64_t objset, uint64_t object) 382b4709335SMatthew Ahrens { 383b4709335SMatthew Ahrens int j; 384b4709335SMatthew Ahrens zbookmark_t czb; 385b4709335SMatthew Ahrens 386b4709335SMatthew Ahrens for (j = 0; j < dnp->dn_nblkptr; j++) { 387b4709335SMatthew Ahrens SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j); 3881b912ec7SGeorge Wilson traverse_prefetch_metadata(td, &dnp->dn_blkptr[j], &czb); 389b4709335SMatthew Ahrens } 390b4709335SMatthew Ahrens 391b4709335SMatthew Ahrens if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { 392b4709335SMatthew Ahrens SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID); 3931b912ec7SGeorge Wilson traverse_prefetch_metadata(td, &dnp->dn_spill, &czb); 394b4709335SMatthew Ahrens } 395b4709335SMatthew Ahrens } 396b4709335SMatthew Ahrens 39714843421SMatthew Ahrens static int 3986e0cbcaaSMatthew Ahrens traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp, 3991b912ec7SGeorge Wilson uint64_t objset, uint64_t object) 40014843421SMatthew Ahrens { 401cd088ea4SVictor Latushkin int j, err = 0, lasterr = 0; 40214843421SMatthew Ahrens zbookmark_t czb; 403cd088ea4SVictor Latushkin boolean_t hard = (td->td_flags & TRAVERSE_HARD); 40414843421SMatthew Ahrens 40514843421SMatthew Ahrens for (j = 0; j < dnp->dn_nblkptr; j++) { 40614843421SMatthew Ahrens SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j); 4071b912ec7SGeorge Wilson err = traverse_visitbp(td, dnp, &dnp->dn_blkptr[j], &czb); 4083b2aab18SMatthew Ahrens if (err != 0) { 409cd088ea4SVictor Latushkin if (!hard) 410cd088ea4SVictor Latushkin break; 411cd088ea4SVictor Latushkin lasterr = err; 412cd088ea4SVictor Latushkin } 4133f9d6ad7SLin Ling } 4143f9d6ad7SLin Ling 4153f9d6ad7SLin Ling if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { 416b4709335SMatthew Ahrens SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID); 4171b912ec7SGeorge Wilson err = traverse_visitbp(td, dnp, &dnp->dn_spill, &czb); 4183b2aab18SMatthew Ahrens if (err != 0) { 4193f9d6ad7SLin Ling if (!hard) 4203f9d6ad7SLin Ling return (err); 4213f9d6ad7SLin Ling lasterr = err; 4220a586ceaSMark Shellenbaum } 42314843421SMatthew Ahrens } 424cd088ea4SVictor Latushkin return (err != 0 ? err : lasterr); 42514843421SMatthew Ahrens } 42614843421SMatthew Ahrens 42788b7b0f2SMatthew Ahrens /* ARGSUSED */ 42888b7b0f2SMatthew Ahrens static int 429b24ab676SJeff Bonwick traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 4301b912ec7SGeorge Wilson const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 431e7cbe64fSgw { 4326e0cbcaaSMatthew Ahrens prefetch_data_t *pfd = arg; 43388b7b0f2SMatthew Ahrens uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; 434e7cbe64fSgw 43588b7b0f2SMatthew Ahrens ASSERT(pfd->pd_blks_fetched >= 0); 43688b7b0f2SMatthew Ahrens if (pfd->pd_cancel) 437*be6fd75aSMatthew Ahrens return (SET_ERROR(EINTR)); 438e7cbe64fSgw 43988b7b0f2SMatthew Ahrens if (bp == NULL || !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) || 4406e1f5caaSNeil Perrin BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0) || 4416e1f5caaSNeil Perrin BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) 442fa9e4066Sahrens return (0); 443fa9e4066Sahrens 44488b7b0f2SMatthew Ahrens mutex_enter(&pfd->pd_mtx); 44588b7b0f2SMatthew Ahrens while (!pfd->pd_cancel && pfd->pd_blks_fetched >= pfd->pd_blks_max) 44688b7b0f2SMatthew Ahrens cv_wait(&pfd->pd_cv, &pfd->pd_mtx); 44788b7b0f2SMatthew Ahrens pfd->pd_blks_fetched++; 44888b7b0f2SMatthew Ahrens cv_broadcast(&pfd->pd_cv); 44988b7b0f2SMatthew Ahrens mutex_exit(&pfd->pd_mtx); 450fa9e4066Sahrens 4511b912ec7SGeorge Wilson (void) arc_read(NULL, spa, bp, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, 4521b912ec7SGeorge Wilson ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &aflags, zb); 453fa9e4066Sahrens 45488b7b0f2SMatthew Ahrens return (0); 455fa9e4066Sahrens } 456fa9e4066Sahrens 457fa9e4066Sahrens static void 45888b7b0f2SMatthew Ahrens traverse_prefetch_thread(void *arg) 459fa9e4066Sahrens { 4606e0cbcaaSMatthew Ahrens traverse_data_t *td_main = arg; 4616e0cbcaaSMatthew Ahrens traverse_data_t td = *td_main; 46288b7b0f2SMatthew Ahrens zbookmark_t czb; 463fa9e4066Sahrens 46488b7b0f2SMatthew Ahrens td.td_func = traverse_prefetcher; 46588b7b0f2SMatthew Ahrens td.td_arg = td_main->td_pfd; 46688b7b0f2SMatthew Ahrens td.td_pfd = NULL; 467fa9e4066Sahrens 468b24ab676SJeff Bonwick SET_BOOKMARK(&czb, td.td_objset, 469b24ab676SJeff Bonwick ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 4701b912ec7SGeorge Wilson (void) traverse_visitbp(&td, NULL, td.td_rootbp, &czb); 471fa9e4066Sahrens 47288b7b0f2SMatthew Ahrens mutex_enter(&td_main->td_pfd->pd_mtx); 47388b7b0f2SMatthew Ahrens td_main->td_pfd->pd_exited = B_TRUE; 47488b7b0f2SMatthew Ahrens cv_broadcast(&td_main->td_pfd->pd_cv); 47588b7b0f2SMatthew Ahrens mutex_exit(&td_main->td_pfd->pd_mtx); 476fa9e4066Sahrens } 477fa9e4066Sahrens 47888b7b0f2SMatthew Ahrens /* 47988b7b0f2SMatthew Ahrens * NB: dataset must not be changing on-disk (eg, is a snapshot or we are 48088b7b0f2SMatthew Ahrens * in syncing context). 48188b7b0f2SMatthew Ahrens */ 48288b7b0f2SMatthew Ahrens static int 483ad135b5dSChristopher Siden traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, 484ad135b5dSChristopher Siden uint64_t txg_start, zbookmark_t *resume, int flags, 485ad135b5dSChristopher Siden blkptr_cb_t func, void *arg) 486fa9e4066Sahrens { 4876e0cbcaaSMatthew Ahrens traverse_data_t td; 4886e0cbcaaSMatthew Ahrens prefetch_data_t pd = { 0 }; 48988b7b0f2SMatthew Ahrens zbookmark_t czb; 49088b7b0f2SMatthew Ahrens int err; 491fa9e4066Sahrens 492ad135b5dSChristopher Siden ASSERT(ds == NULL || objset == ds->ds_object); 493ad135b5dSChristopher Siden ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST)); 494ad135b5dSChristopher Siden 495b4709335SMatthew Ahrens /* 496b4709335SMatthew Ahrens * The data prefetching mechanism (the prefetch thread) is incompatible 497b4709335SMatthew Ahrens * with resuming from a bookmark. 498b4709335SMatthew Ahrens */ 499b4709335SMatthew Ahrens ASSERT(resume == NULL || !(flags & TRAVERSE_PREFETCH_DATA)); 500b4709335SMatthew Ahrens 50188b7b0f2SMatthew Ahrens td.td_spa = spa; 502ad135b5dSChristopher Siden td.td_objset = objset; 50388b7b0f2SMatthew Ahrens td.td_rootbp = rootbp; 50488b7b0f2SMatthew Ahrens td.td_min_txg = txg_start; 505ad135b5dSChristopher Siden td.td_resume = resume; 50688b7b0f2SMatthew Ahrens td.td_func = func; 50788b7b0f2SMatthew Ahrens td.td_arg = arg; 50888b7b0f2SMatthew Ahrens td.td_pfd = &pd; 50988b7b0f2SMatthew Ahrens td.td_flags = flags; 51088b7b0f2SMatthew Ahrens 51144f92b7dSChris Kirby pd.pd_blks_max = zfs_pd_blks_max; 51288b7b0f2SMatthew Ahrens pd.pd_flags = flags; 51388b7b0f2SMatthew Ahrens mutex_init(&pd.pd_mtx, NULL, MUTEX_DEFAULT, NULL); 51488b7b0f2SMatthew Ahrens cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL); 51588b7b0f2SMatthew Ahrens 5166e0cbcaaSMatthew Ahrens /* See comment on ZIL traversal in dsl_scan_visitds. */ 5173b2aab18SMatthew Ahrens if (ds != NULL && !dsl_dataset_is_snapshot(ds) && !BP_IS_HOLE(rootbp)) { 5183b2aab18SMatthew Ahrens uint32_t flags = ARC_WAIT; 5193b2aab18SMatthew Ahrens objset_phys_t *osp; 5203b2aab18SMatthew Ahrens arc_buf_t *buf; 5216e0cbcaaSMatthew Ahrens 5223b2aab18SMatthew Ahrens err = arc_read(NULL, td.td_spa, rootbp, 5233b2aab18SMatthew Ahrens arc_getbuf_func, &buf, 5243b2aab18SMatthew Ahrens ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, NULL); 5253b2aab18SMatthew Ahrens if (err != 0) 5266e0cbcaaSMatthew Ahrens return (err); 5276e0cbcaaSMatthew Ahrens 5283b2aab18SMatthew Ahrens osp = buf->b_data; 5293b2aab18SMatthew Ahrens traverse_zil(&td, &osp->os_zil_header); 5303b2aab18SMatthew Ahrens (void) arc_buf_remove_ref(buf, &buf); 5316e0cbcaaSMatthew Ahrens } 5326e0cbcaaSMatthew Ahrens 533b4709335SMatthew Ahrens if (!(flags & TRAVERSE_PREFETCH_DATA) || 53488b7b0f2SMatthew Ahrens 0 == taskq_dispatch(system_taskq, traverse_prefetch_thread, 53588b7b0f2SMatthew Ahrens &td, TQ_NOQUEUE)) 53688b7b0f2SMatthew Ahrens pd.pd_exited = B_TRUE; 53788b7b0f2SMatthew Ahrens 5386e0cbcaaSMatthew Ahrens SET_BOOKMARK(&czb, td.td_objset, 539b24ab676SJeff Bonwick ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 5401b912ec7SGeorge Wilson err = traverse_visitbp(&td, NULL, rootbp, &czb); 54188b7b0f2SMatthew Ahrens 54288b7b0f2SMatthew Ahrens mutex_enter(&pd.pd_mtx); 54388b7b0f2SMatthew Ahrens pd.pd_cancel = B_TRUE; 54488b7b0f2SMatthew Ahrens cv_broadcast(&pd.pd_cv); 54588b7b0f2SMatthew Ahrens while (!pd.pd_exited) 54688b7b0f2SMatthew Ahrens cv_wait(&pd.pd_cv, &pd.pd_mtx); 54788b7b0f2SMatthew Ahrens mutex_exit(&pd.pd_mtx); 54888b7b0f2SMatthew Ahrens 54988b7b0f2SMatthew Ahrens mutex_destroy(&pd.pd_mtx); 55088b7b0f2SMatthew Ahrens cv_destroy(&pd.pd_cv); 551fa9e4066Sahrens 55288b7b0f2SMatthew Ahrens return (err); 553fa9e4066Sahrens } 554fa9e4066Sahrens 55588b7b0f2SMatthew Ahrens /* 55688b7b0f2SMatthew Ahrens * NB: dataset must not be changing on-disk (eg, is a snapshot or we are 55788b7b0f2SMatthew Ahrens * in syncing context). 55888b7b0f2SMatthew Ahrens */ 55988b7b0f2SMatthew Ahrens int 56088b7b0f2SMatthew Ahrens traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags, 56188b7b0f2SMatthew Ahrens blkptr_cb_t func, void *arg) 562fa9e4066Sahrens { 563ad135b5dSChristopher Siden return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object, 564ad135b5dSChristopher Siden &ds->ds_phys->ds_bp, txg_start, NULL, flags, func, arg)); 565ad135b5dSChristopher Siden } 566ad135b5dSChristopher Siden 567ad135b5dSChristopher Siden int 568ad135b5dSChristopher Siden traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr, 569ad135b5dSChristopher Siden uint64_t txg_start, zbookmark_t *resume, int flags, 570ad135b5dSChristopher Siden blkptr_cb_t func, void *arg) 571ad135b5dSChristopher Siden { 572ad135b5dSChristopher Siden return (traverse_impl(spa, NULL, ZB_DESTROYED_OBJSET, 573ad135b5dSChristopher Siden blkptr, txg_start, resume, flags, func, arg)); 574fa9e4066Sahrens } 575fa9e4066Sahrens 57688b7b0f2SMatthew Ahrens /* 57788b7b0f2SMatthew Ahrens * NB: pool must not be changing on-disk (eg, from zdb or sync context). 57888b7b0f2SMatthew Ahrens */ 57988b7b0f2SMatthew Ahrens int 580bbfd46c4SJeff Bonwick traverse_pool(spa_t *spa, uint64_t txg_start, int flags, 581bbfd46c4SJeff Bonwick blkptr_cb_t func, void *arg) 582fa9e4066Sahrens { 583cd088ea4SVictor Latushkin int err, lasterr = 0; 58488b7b0f2SMatthew Ahrens uint64_t obj; 58588b7b0f2SMatthew Ahrens dsl_pool_t *dp = spa_get_dsl(spa); 58688b7b0f2SMatthew Ahrens objset_t *mos = dp->dp_meta_objset; 587cd088ea4SVictor Latushkin boolean_t hard = (flags & TRAVERSE_HARD); 58888b7b0f2SMatthew Ahrens 58988b7b0f2SMatthew Ahrens /* visit the MOS */ 590ad135b5dSChristopher Siden err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa), 591ad135b5dSChristopher Siden txg_start, NULL, flags, func, arg); 5923b2aab18SMatthew Ahrens if (err != 0) 59388b7b0f2SMatthew Ahrens return (err); 59488b7b0f2SMatthew Ahrens 59588b7b0f2SMatthew Ahrens /* visit each dataset */ 596cd088ea4SVictor Latushkin for (obj = 1; err == 0 || (err != ESRCH && hard); 597cd088ea4SVictor Latushkin err = dmu_object_next(mos, &obj, FALSE, txg_start)) { 59888b7b0f2SMatthew Ahrens dmu_object_info_t doi; 59988b7b0f2SMatthew Ahrens 60088b7b0f2SMatthew Ahrens err = dmu_object_info(mos, obj, &doi); 6013b2aab18SMatthew Ahrens if (err != 0) { 602cd088ea4SVictor Latushkin if (!hard) 603cd088ea4SVictor Latushkin return (err); 604cd088ea4SVictor Latushkin lasterr = err; 605cd088ea4SVictor Latushkin continue; 606cd088ea4SVictor Latushkin } 60788b7b0f2SMatthew Ahrens 60888b7b0f2SMatthew Ahrens if (doi.doi_type == DMU_OT_DSL_DATASET) { 60988b7b0f2SMatthew Ahrens dsl_dataset_t *ds; 610468c413aSTim Haley uint64_t txg = txg_start; 611468c413aSTim Haley 6123b2aab18SMatthew Ahrens dsl_pool_config_enter(dp, FTAG); 61388b7b0f2SMatthew Ahrens err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); 6143b2aab18SMatthew Ahrens dsl_pool_config_exit(dp, FTAG); 6153b2aab18SMatthew Ahrens if (err != 0) { 616cd088ea4SVictor Latushkin if (!hard) 617cd088ea4SVictor Latushkin return (err); 618cd088ea4SVictor Latushkin lasterr = err; 619cd088ea4SVictor Latushkin continue; 620cd088ea4SVictor Latushkin } 621468c413aSTim Haley if (ds->ds_phys->ds_prev_snap_txg > txg) 622468c413aSTim Haley txg = ds->ds_phys->ds_prev_snap_txg; 623bbfd46c4SJeff Bonwick err = traverse_dataset(ds, txg, flags, func, arg); 62488b7b0f2SMatthew Ahrens dsl_dataset_rele(ds, FTAG); 6253b2aab18SMatthew Ahrens if (err != 0) { 626cd088ea4SVictor Latushkin if (!hard) 627cd088ea4SVictor Latushkin return (err); 628cd088ea4SVictor Latushkin lasterr = err; 629cd088ea4SVictor Latushkin } 63088b7b0f2SMatthew Ahrens } 631fa9e4066Sahrens } 63288b7b0f2SMatthew Ahrens if (err == ESRCH) 63388b7b0f2SMatthew Ahrens err = 0; 634cd088ea4SVictor Latushkin return (err != 0 ? err : lasterr); 635fa9e4066Sahrens } 636