1*fa9e4066Sahrens /* 2*fa9e4066Sahrens * CDDL HEADER START 3*fa9e4066Sahrens * 4*fa9e4066Sahrens * The contents of this file are subject to the terms of the 5*fa9e4066Sahrens * Common Development and Distribution License, Version 1.0 only 6*fa9e4066Sahrens * (the "License"). You may not use this file except in compliance 7*fa9e4066Sahrens * with the License. 8*fa9e4066Sahrens * 9*fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 11*fa9e4066Sahrens * See the License for the specific language governing permissions 12*fa9e4066Sahrens * and limitations under the License. 13*fa9e4066Sahrens * 14*fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 15*fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 17*fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 18*fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 19*fa9e4066Sahrens * 20*fa9e4066Sahrens * CDDL HEADER END 21*fa9e4066Sahrens */ 22*fa9e4066Sahrens /* 23*fa9e4066Sahrens * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*fa9e4066Sahrens * Use is subject to license terms. 25*fa9e4066Sahrens */ 26*fa9e4066Sahrens 27*fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28*fa9e4066Sahrens 29*fa9e4066Sahrens #include <sys/dmu.h> 30*fa9e4066Sahrens #include <sys/dmu_impl.h> 31*fa9e4066Sahrens #include <sys/dmu_tx.h> 32*fa9e4066Sahrens #include <sys/dbuf.h> 33*fa9e4066Sahrens #include <sys/dnode.h> 34*fa9e4066Sahrens #include <sys/zfs_context.h> 35*fa9e4066Sahrens #include <sys/dmu_objset.h> 36*fa9e4066Sahrens #include <sys/dmu_traverse.h> 37*fa9e4066Sahrens #include <sys/dsl_dataset.h> 38*fa9e4066Sahrens #include <sys/dsl_dir.h> 39*fa9e4066Sahrens #include <sys/dsl_pool.h> 40*fa9e4066Sahrens #include <sys/dmu_zfetch.h> 41*fa9e4066Sahrens #include <sys/zfs_ioctl.h> 42*fa9e4066Sahrens #include <sys/zap.h> 43*fa9e4066Sahrens 44*fa9e4066Sahrens const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { 45*fa9e4066Sahrens { byteswap_uint8_array, TRUE, "unallocated" }, 46*fa9e4066Sahrens { zap_byteswap, TRUE, "object directory" }, 47*fa9e4066Sahrens { byteswap_uint64_array, TRUE, "object array" }, 48*fa9e4066Sahrens { byteswap_uint8_array, TRUE, "packed nvlist" }, 49*fa9e4066Sahrens { byteswap_uint64_array, TRUE, "packed nvlist size" }, 50*fa9e4066Sahrens { byteswap_uint64_array, TRUE, "bplist" }, 51*fa9e4066Sahrens { byteswap_uint64_array, TRUE, "bplist header" }, 52*fa9e4066Sahrens { byteswap_uint64_array, TRUE, "SPA space map header" }, 53*fa9e4066Sahrens { byteswap_uint64_array, TRUE, "SPA space map" }, 54*fa9e4066Sahrens { byteswap_uint64_array, TRUE, "ZIL intent log" }, 55*fa9e4066Sahrens { dnode_buf_byteswap, TRUE, "DMU dnode" }, 56*fa9e4066Sahrens { dmu_objset_byteswap, TRUE, "DMU objset" }, 57*fa9e4066Sahrens { byteswap_uint64_array, TRUE, "DSL directory" }, 58*fa9e4066Sahrens { zap_byteswap, TRUE, "DSL directory child map"}, 59*fa9e4066Sahrens { zap_byteswap, TRUE, "DSL dataset snap map" }, 60*fa9e4066Sahrens { zap_byteswap, TRUE, "DSL props" }, 61*fa9e4066Sahrens { byteswap_uint64_array, TRUE, "DSL dataset" }, 62*fa9e4066Sahrens { zfs_znode_byteswap, TRUE, "ZFS znode" }, 63*fa9e4066Sahrens { zfs_acl_byteswap, TRUE, "ZFS ACL" }, 64*fa9e4066Sahrens { byteswap_uint8_array, FALSE, "ZFS plain file" }, 65*fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS directory" }, 66*fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS master node" }, 67*fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS delete queue" }, 68*fa9e4066Sahrens { byteswap_uint8_array, FALSE, "zvol object" }, 69*fa9e4066Sahrens { zap_byteswap, TRUE, "zvol prop" }, 70*fa9e4066Sahrens { byteswap_uint8_array, FALSE, "other uint8[]" }, 71*fa9e4066Sahrens { byteswap_uint64_array, FALSE, "other uint64[]" }, 72*fa9e4066Sahrens { zap_byteswap, TRUE, "other ZAP" }, 73*fa9e4066Sahrens }; 74*fa9e4066Sahrens 75*fa9e4066Sahrens static int 76*fa9e4066Sahrens dmu_buf_read_array_impl(dmu_buf_impl_t **dbp, int numbufs, uint32_t flags) 77*fa9e4066Sahrens { 78*fa9e4066Sahrens int i, err = 0; 79*fa9e4066Sahrens dnode_t *dn; 80*fa9e4066Sahrens zio_t *zio; 81*fa9e4066Sahrens int canfail; 82*fa9e4066Sahrens uint64_t rd_sz; 83*fa9e4066Sahrens 84*fa9e4066Sahrens if (numbufs == 0) 85*fa9e4066Sahrens return (0); 86*fa9e4066Sahrens 87*fa9e4066Sahrens rd_sz = numbufs * dbp[0]->db.db_size; 88*fa9e4066Sahrens ASSERT(rd_sz <= DMU_MAX_ACCESS); 89*fa9e4066Sahrens 90*fa9e4066Sahrens dn = dbp[0]->db_dnode; 91*fa9e4066Sahrens if (flags & DB_RF_CANFAIL) { 92*fa9e4066Sahrens canfail = 1; 93*fa9e4066Sahrens } else { 94*fa9e4066Sahrens canfail = 0; 95*fa9e4066Sahrens } 96*fa9e4066Sahrens zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, canfail); 97*fa9e4066Sahrens 98*fa9e4066Sahrens /* don't prefetch if read the read is large */ 99*fa9e4066Sahrens if (rd_sz >= zfetch_array_rd_sz) { 100*fa9e4066Sahrens flags |= DB_RF_NOPREFETCH; 101*fa9e4066Sahrens } 102*fa9e4066Sahrens 103*fa9e4066Sahrens /* initiate async reads */ 104*fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 105*fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 106*fa9e4066Sahrens if (dbp[i]->db_state == DB_UNCACHED) 107*fa9e4066Sahrens dbuf_read_impl(dbp[i], zio, flags); 108*fa9e4066Sahrens } 109*fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 110*fa9e4066Sahrens err = zio_wait(zio); 111*fa9e4066Sahrens 112*fa9e4066Sahrens if (err) 113*fa9e4066Sahrens return (err); 114*fa9e4066Sahrens 115*fa9e4066Sahrens /* wait for other io to complete */ 116*fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 117*fa9e4066Sahrens mutex_enter(&dbp[i]->db_mtx); 118*fa9e4066Sahrens while (dbp[i]->db_state == DB_READ || 119*fa9e4066Sahrens dbp[i]->db_state == DB_FILL) 120*fa9e4066Sahrens cv_wait(&dbp[i]->db_changed, &dbp[i]->db_mtx); 121*fa9e4066Sahrens ASSERT(dbp[i]->db_state == DB_CACHED); 122*fa9e4066Sahrens mutex_exit(&dbp[i]->db_mtx); 123*fa9e4066Sahrens } 124*fa9e4066Sahrens 125*fa9e4066Sahrens return (0); 126*fa9e4066Sahrens } 127*fa9e4066Sahrens 128*fa9e4066Sahrens void 129*fa9e4066Sahrens dmu_buf_read_array(dmu_buf_t **dbp_fake, int numbufs) 130*fa9e4066Sahrens { 131*fa9e4066Sahrens dmu_buf_impl_t **dbp = (dmu_buf_impl_t **)dbp_fake; 132*fa9e4066Sahrens int err; 133*fa9e4066Sahrens 134*fa9e4066Sahrens err = dmu_buf_read_array_impl(dbp, numbufs, DB_RF_MUST_SUCCEED); 135*fa9e4066Sahrens ASSERT(err == 0); 136*fa9e4066Sahrens } 137*fa9e4066Sahrens 138*fa9e4066Sahrens int 139*fa9e4066Sahrens dmu_buf_read_array_canfail(dmu_buf_t **dbp_fake, int numbufs) 140*fa9e4066Sahrens { 141*fa9e4066Sahrens dmu_buf_impl_t **dbp = (dmu_buf_impl_t **)dbp_fake; 142*fa9e4066Sahrens 143*fa9e4066Sahrens return (dmu_buf_read_array_impl(dbp, numbufs, DB_RF_CANFAIL)); 144*fa9e4066Sahrens } 145*fa9e4066Sahrens 146*fa9e4066Sahrens dmu_buf_t * 147*fa9e4066Sahrens dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset) 148*fa9e4066Sahrens { 149*fa9e4066Sahrens dnode_t *dn; 150*fa9e4066Sahrens uint64_t blkid; 151*fa9e4066Sahrens dmu_buf_impl_t *db; 152*fa9e4066Sahrens 153*fa9e4066Sahrens /* dataset_verify(dd); */ 154*fa9e4066Sahrens 155*fa9e4066Sahrens dn = dnode_hold(os->os, object, FTAG); 156*fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 157*fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 158*fa9e4066Sahrens db = dbuf_hold(dn, blkid); 159*fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 160*fa9e4066Sahrens dnode_rele(dn, FTAG); 161*fa9e4066Sahrens return (&db->db); 162*fa9e4066Sahrens } 163*fa9e4066Sahrens 164*fa9e4066Sahrens dmu_buf_t * 165*fa9e4066Sahrens dmu_bonus_hold(objset_t *os, uint64_t object) 166*fa9e4066Sahrens { 167*fa9e4066Sahrens return (dmu_bonus_hold_tag(os, object, NULL)); 168*fa9e4066Sahrens } 169*fa9e4066Sahrens 170*fa9e4066Sahrens int 171*fa9e4066Sahrens dmu_bonus_max(void) 172*fa9e4066Sahrens { 173*fa9e4066Sahrens return (DN_MAX_BONUSLEN); 174*fa9e4066Sahrens } 175*fa9e4066Sahrens 176*fa9e4066Sahrens /* 177*fa9e4066Sahrens * Returns held bonus buffer if the object exists, NULL if it doesn't. 178*fa9e4066Sahrens */ 179*fa9e4066Sahrens dmu_buf_t * 180*fa9e4066Sahrens dmu_bonus_hold_tag(objset_t *os, uint64_t object, void *tag) 181*fa9e4066Sahrens { 182*fa9e4066Sahrens dnode_t *dn = dnode_hold(os->os, object, FTAG); 183*fa9e4066Sahrens dmu_buf_impl_t *db; 184*fa9e4066Sahrens 185*fa9e4066Sahrens if (dn == NULL) 186*fa9e4066Sahrens return (NULL); 187*fa9e4066Sahrens 188*fa9e4066Sahrens db = dbuf_hold_bonus(dn, tag); 189*fa9e4066Sahrens /* XXX - hack: hold the first block if this is a ZAP object */ 190*fa9e4066Sahrens if (dmu_ot[dn->dn_type].ot_byteswap == zap_byteswap) { 191*fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 192*fa9e4066Sahrens dn->dn_db0 = dbuf_hold(dn, 0); 193*fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 194*fa9e4066Sahrens } 195*fa9e4066Sahrens dnode_rele(dn, FTAG); 196*fa9e4066Sahrens return (&db->db); 197*fa9e4066Sahrens } 198*fa9e4066Sahrens 199*fa9e4066Sahrens static dmu_buf_t ** 200*fa9e4066Sahrens dbuf_hold_array(dnode_t *dn, 201*fa9e4066Sahrens uint64_t offset, uint64_t length, int *numbufsp) 202*fa9e4066Sahrens { 203*fa9e4066Sahrens dmu_buf_t **dbp; 204*fa9e4066Sahrens uint64_t blkid, nblks, i; 205*fa9e4066Sahrens 206*fa9e4066Sahrens if (length == 0) { 207*fa9e4066Sahrens if (numbufsp) 208*fa9e4066Sahrens *numbufsp = 0; 209*fa9e4066Sahrens return (NULL); 210*fa9e4066Sahrens } 211*fa9e4066Sahrens 212*fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 213*fa9e4066Sahrens if (dn->dn_datablkshift) { 214*fa9e4066Sahrens int blkshift = dn->dn_datablkshift; 215*fa9e4066Sahrens nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) - 216*fa9e4066Sahrens P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift; 217*fa9e4066Sahrens } else { 218*fa9e4066Sahrens ASSERT3U(offset + length, <=, dn->dn_datablksz); 219*fa9e4066Sahrens nblks = 1; 220*fa9e4066Sahrens } 221*fa9e4066Sahrens dbp = kmem_alloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); 222*fa9e4066Sahrens 223*fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 224*fa9e4066Sahrens for (i = 0; i < nblks; i++) { 225*fa9e4066Sahrens dmu_buf_impl_t *dbuf; 226*fa9e4066Sahrens dbuf = dbuf_hold(dn, blkid+i); 227*fa9e4066Sahrens dbp[i] = &dbuf->db; 228*fa9e4066Sahrens } 229*fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 230*fa9e4066Sahrens 231*fa9e4066Sahrens if (numbufsp) 232*fa9e4066Sahrens *numbufsp = nblks; 233*fa9e4066Sahrens return (dbp); 234*fa9e4066Sahrens } 235*fa9e4066Sahrens 236*fa9e4066Sahrens dmu_buf_t ** 237*fa9e4066Sahrens dmu_buf_hold_array(objset_t *os, uint64_t object, 238*fa9e4066Sahrens uint64_t offset, uint64_t length, int *numbufsp) 239*fa9e4066Sahrens { 240*fa9e4066Sahrens dnode_t *dn; 241*fa9e4066Sahrens dmu_buf_t **dbp; 242*fa9e4066Sahrens 243*fa9e4066Sahrens ASSERT(length <= DMU_MAX_ACCESS); 244*fa9e4066Sahrens 245*fa9e4066Sahrens if (length == 0) { 246*fa9e4066Sahrens if (numbufsp) 247*fa9e4066Sahrens *numbufsp = 0; 248*fa9e4066Sahrens return (NULL); 249*fa9e4066Sahrens } 250*fa9e4066Sahrens 251*fa9e4066Sahrens dn = dnode_hold(os->os, object, FTAG); 252*fa9e4066Sahrens dbp = dbuf_hold_array(dn, offset, length, numbufsp); 253*fa9e4066Sahrens dnode_rele(dn, FTAG); 254*fa9e4066Sahrens 255*fa9e4066Sahrens return (dbp); 256*fa9e4066Sahrens } 257*fa9e4066Sahrens 258*fa9e4066Sahrens void 259*fa9e4066Sahrens dmu_buf_add_ref(dmu_buf_t *dbuf, void *tag) 260*fa9e4066Sahrens { 261*fa9e4066Sahrens dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf; 262*fa9e4066Sahrens dbuf_add_ref(db, tag); 263*fa9e4066Sahrens } 264*fa9e4066Sahrens 265*fa9e4066Sahrens void 266*fa9e4066Sahrens dmu_buf_remove_ref(dmu_buf_t *dbuf, void *tag) 267*fa9e4066Sahrens { 268*fa9e4066Sahrens dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf; 269*fa9e4066Sahrens dbuf_remove_ref(db, tag); 270*fa9e4066Sahrens } 271*fa9e4066Sahrens 272*fa9e4066Sahrens void 273*fa9e4066Sahrens dmu_buf_rele(dmu_buf_t *dbuf_fake) 274*fa9e4066Sahrens { 275*fa9e4066Sahrens dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf_fake; 276*fa9e4066Sahrens 277*fa9e4066Sahrens /* XXX - hack: hold the first block if this is a ZAP object */ 278*fa9e4066Sahrens if (db->db_blkid == DB_BONUS_BLKID && 279*fa9e4066Sahrens dmu_ot[db->db_dnode->dn_type].ot_byteswap == zap_byteswap) 280*fa9e4066Sahrens dbuf_rele(db->db_dnode->dn_db0); 281*fa9e4066Sahrens dbuf_rele(db); 282*fa9e4066Sahrens } 283*fa9e4066Sahrens 284*fa9e4066Sahrens void 285*fa9e4066Sahrens dmu_buf_rele_tag(dmu_buf_t *dbuf_fake, void *tag) 286*fa9e4066Sahrens { 287*fa9e4066Sahrens dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf_fake; 288*fa9e4066Sahrens 289*fa9e4066Sahrens /* XXX - hack: hold the first block if this is a ZAP object */ 290*fa9e4066Sahrens if (db->db_blkid == DB_BONUS_BLKID && 291*fa9e4066Sahrens dmu_ot[db->db_dnode->dn_type].ot_byteswap == zap_byteswap) 292*fa9e4066Sahrens dbuf_rele(db->db_dnode->dn_db0); 293*fa9e4066Sahrens dbuf_remove_ref(db, tag); 294*fa9e4066Sahrens } 295*fa9e4066Sahrens 296*fa9e4066Sahrens void 297*fa9e4066Sahrens dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs) 298*fa9e4066Sahrens { 299*fa9e4066Sahrens int i; 300*fa9e4066Sahrens dmu_buf_impl_t **dbp = (dmu_buf_impl_t **)dbp_fake; 301*fa9e4066Sahrens 302*fa9e4066Sahrens if (numbufs == 0) 303*fa9e4066Sahrens return; 304*fa9e4066Sahrens 305*fa9e4066Sahrens ASSERT((numbufs * dbp[0]->db.db_size) <= DMU_MAX_ACCESS); 306*fa9e4066Sahrens 307*fa9e4066Sahrens for (i = 0; i < numbufs; i++) 308*fa9e4066Sahrens dbuf_rele(dbp[i]); 309*fa9e4066Sahrens 310*fa9e4066Sahrens kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs); 311*fa9e4066Sahrens } 312*fa9e4066Sahrens 313*fa9e4066Sahrens void 314*fa9e4066Sahrens dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) 315*fa9e4066Sahrens { 316*fa9e4066Sahrens dnode_t *dn; 317*fa9e4066Sahrens uint64_t blkid; 318*fa9e4066Sahrens int nblks, i; 319*fa9e4066Sahrens 320*fa9e4066Sahrens if (len == 0) { /* they're interested in the bonus buffer */ 321*fa9e4066Sahrens dn = os->os->os_meta_dnode; 322*fa9e4066Sahrens 323*fa9e4066Sahrens if (object == 0 || object >= DN_MAX_OBJECT) 324*fa9e4066Sahrens return; 325*fa9e4066Sahrens 326*fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 327*fa9e4066Sahrens blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t)); 328*fa9e4066Sahrens dbuf_prefetch(dn, blkid); 329*fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 330*fa9e4066Sahrens return; 331*fa9e4066Sahrens } 332*fa9e4066Sahrens 333*fa9e4066Sahrens /* 334*fa9e4066Sahrens * XXX - Note, if the dnode for the requested object is not 335*fa9e4066Sahrens * already cached, we will do a *synchronous* read in the 336*fa9e4066Sahrens * dnode_hold() call. The same is true for any indirects. 337*fa9e4066Sahrens */ 338*fa9e4066Sahrens dn = dnode_hold(os->os, object, FTAG); 339*fa9e4066Sahrens if (dn == NULL) 340*fa9e4066Sahrens return; 341*fa9e4066Sahrens 342*fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 343*fa9e4066Sahrens if (dn->dn_datablkshift) { 344*fa9e4066Sahrens int blkshift = dn->dn_datablkshift; 345*fa9e4066Sahrens nblks = (P2ROUNDUP(offset+len, 1<<blkshift) - 346*fa9e4066Sahrens P2ALIGN(offset, 1<<blkshift)) >> blkshift; 347*fa9e4066Sahrens } else { 348*fa9e4066Sahrens nblks = (offset < dn->dn_datablksz); 349*fa9e4066Sahrens } 350*fa9e4066Sahrens 351*fa9e4066Sahrens if (nblks != 0) { 352*fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 353*fa9e4066Sahrens for (i = 0; i < nblks; i++) 354*fa9e4066Sahrens dbuf_prefetch(dn, blkid+i); 355*fa9e4066Sahrens } 356*fa9e4066Sahrens 357*fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 358*fa9e4066Sahrens 359*fa9e4066Sahrens dnode_rele(dn, FTAG); 360*fa9e4066Sahrens } 361*fa9e4066Sahrens 362*fa9e4066Sahrens void 363*fa9e4066Sahrens dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, 364*fa9e4066Sahrens uint64_t size, dmu_tx_t *tx) 365*fa9e4066Sahrens { 366*fa9e4066Sahrens dnode_t *dn = dnode_hold(os->os, object, FTAG); 367*fa9e4066Sahrens ASSERT(offset < UINT64_MAX); 368*fa9e4066Sahrens ASSERT(size == -1ULL || size <= UINT64_MAX - offset); 369*fa9e4066Sahrens dnode_free_range(dn, offset, size, tx); 370*fa9e4066Sahrens dnode_rele(dn, FTAG); 371*fa9e4066Sahrens } 372*fa9e4066Sahrens 373*fa9e4066Sahrens static int 374*fa9e4066Sahrens dmu_read_impl(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 375*fa9e4066Sahrens void *buf, uint32_t flags) 376*fa9e4066Sahrens { 377*fa9e4066Sahrens dnode_t *dn; 378*fa9e4066Sahrens dmu_buf_t **dbp; 379*fa9e4066Sahrens int numbufs, i; 380*fa9e4066Sahrens 381*fa9e4066Sahrens dn = dnode_hold(os->os, object, FTAG); 382*fa9e4066Sahrens 383*fa9e4066Sahrens if (dn->dn_datablkshift == 0) { 384*fa9e4066Sahrens int newsz = offset > dn->dn_datablksz ? 0 : 385*fa9e4066Sahrens MIN(size, dn->dn_datablksz - offset); 386*fa9e4066Sahrens bzero((char *)buf + newsz, size - newsz); 387*fa9e4066Sahrens size = newsz; 388*fa9e4066Sahrens } 389*fa9e4066Sahrens 390*fa9e4066Sahrens dnode_rele(dn, FTAG); 391*fa9e4066Sahrens 392*fa9e4066Sahrens if (size == 0) 393*fa9e4066Sahrens return (0); 394*fa9e4066Sahrens 395*fa9e4066Sahrens while (size > 0) { 396*fa9e4066Sahrens uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2); 397*fa9e4066Sahrens int err; 398*fa9e4066Sahrens 399*fa9e4066Sahrens /* 400*fa9e4066Sahrens * NB: we could do this block-at-a-time, but it's nice 401*fa9e4066Sahrens * to be reading in parallel. 402*fa9e4066Sahrens */ 403*fa9e4066Sahrens dbp = dmu_buf_hold_array(os, object, offset, mylen, &numbufs); 404*fa9e4066Sahrens err = dmu_buf_read_array_impl((dmu_buf_impl_t **)dbp, numbufs, 405*fa9e4066Sahrens flags); 406*fa9e4066Sahrens if (err) { 407*fa9e4066Sahrens dmu_buf_rele_array(dbp, numbufs); 408*fa9e4066Sahrens return (err); 409*fa9e4066Sahrens } 410*fa9e4066Sahrens 411*fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 412*fa9e4066Sahrens int tocpy; 413*fa9e4066Sahrens int bufoff; 414*fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 415*fa9e4066Sahrens 416*fa9e4066Sahrens ASSERT(size > 0); 417*fa9e4066Sahrens 418*fa9e4066Sahrens bufoff = offset - db->db_offset; 419*fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 420*fa9e4066Sahrens 421*fa9e4066Sahrens bcopy((char *)db->db_data + bufoff, buf, tocpy); 422*fa9e4066Sahrens 423*fa9e4066Sahrens offset += tocpy; 424*fa9e4066Sahrens size -= tocpy; 425*fa9e4066Sahrens buf = (char *)buf + tocpy; 426*fa9e4066Sahrens } 427*fa9e4066Sahrens dmu_buf_rele_array(dbp, numbufs); 428*fa9e4066Sahrens } 429*fa9e4066Sahrens return (0); 430*fa9e4066Sahrens } 431*fa9e4066Sahrens 432*fa9e4066Sahrens void 433*fa9e4066Sahrens dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 434*fa9e4066Sahrens void *buf) 435*fa9e4066Sahrens { 436*fa9e4066Sahrens int err; 437*fa9e4066Sahrens 438*fa9e4066Sahrens err = dmu_read_impl(os, object, offset, size, buf, DB_RF_MUST_SUCCEED); 439*fa9e4066Sahrens ASSERT3U(err, ==, 0); 440*fa9e4066Sahrens } 441*fa9e4066Sahrens 442*fa9e4066Sahrens int 443*fa9e4066Sahrens dmu_read_canfail(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 444*fa9e4066Sahrens void *buf) 445*fa9e4066Sahrens { 446*fa9e4066Sahrens return (dmu_read_impl(os, object, offset, size, buf, DB_RF_CANFAIL)); 447*fa9e4066Sahrens } 448*fa9e4066Sahrens 449*fa9e4066Sahrens void 450*fa9e4066Sahrens dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 451*fa9e4066Sahrens const void *buf, dmu_tx_t *tx) 452*fa9e4066Sahrens { 453*fa9e4066Sahrens dmu_buf_t **dbp; 454*fa9e4066Sahrens int numbufs, i; 455*fa9e4066Sahrens 456*fa9e4066Sahrens dbp = dmu_buf_hold_array(os, object, offset, size, &numbufs); 457*fa9e4066Sahrens 458*fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 459*fa9e4066Sahrens int tocpy; 460*fa9e4066Sahrens int bufoff; 461*fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 462*fa9e4066Sahrens 463*fa9e4066Sahrens ASSERT(size > 0); 464*fa9e4066Sahrens 465*fa9e4066Sahrens bufoff = offset - db->db_offset; 466*fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 467*fa9e4066Sahrens 468*fa9e4066Sahrens ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 469*fa9e4066Sahrens 470*fa9e4066Sahrens if (tocpy == db->db_size) 471*fa9e4066Sahrens dmu_buf_will_fill(db, tx); 472*fa9e4066Sahrens else 473*fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 474*fa9e4066Sahrens 475*fa9e4066Sahrens bcopy(buf, (char *)db->db_data + bufoff, tocpy); 476*fa9e4066Sahrens 477*fa9e4066Sahrens if (tocpy == db->db_size) 478*fa9e4066Sahrens dmu_buf_fill_done(db, tx); 479*fa9e4066Sahrens 480*fa9e4066Sahrens offset += tocpy; 481*fa9e4066Sahrens size -= tocpy; 482*fa9e4066Sahrens buf = (char *)buf + tocpy; 483*fa9e4066Sahrens } 484*fa9e4066Sahrens dmu_buf_rele_array(dbp, numbufs); 485*fa9e4066Sahrens } 486*fa9e4066Sahrens 487*fa9e4066Sahrens #ifdef _KERNEL 488*fa9e4066Sahrens int 489*fa9e4066Sahrens dmu_write_uio(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 490*fa9e4066Sahrens uio_t *uio, dmu_tx_t *tx) 491*fa9e4066Sahrens { 492*fa9e4066Sahrens dmu_buf_t **dbp; 493*fa9e4066Sahrens int numbufs, i; 494*fa9e4066Sahrens int err = 0; 495*fa9e4066Sahrens 496*fa9e4066Sahrens dbp = dmu_buf_hold_array(os, object, offset, size, &numbufs); 497*fa9e4066Sahrens 498*fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 499*fa9e4066Sahrens int tocpy; 500*fa9e4066Sahrens int bufoff; 501*fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 502*fa9e4066Sahrens 503*fa9e4066Sahrens ASSERT(size > 0); 504*fa9e4066Sahrens 505*fa9e4066Sahrens bufoff = offset - db->db_offset; 506*fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 507*fa9e4066Sahrens 508*fa9e4066Sahrens ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 509*fa9e4066Sahrens 510*fa9e4066Sahrens if (tocpy == db->db_size) 511*fa9e4066Sahrens dmu_buf_will_fill(db, tx); 512*fa9e4066Sahrens else 513*fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 514*fa9e4066Sahrens 515*fa9e4066Sahrens /* 516*fa9e4066Sahrens * XXX uiomove could block forever (eg. nfs-backed 517*fa9e4066Sahrens * pages). There needs to be a uiolockdown() function 518*fa9e4066Sahrens * to lock the pages in memory, so that uiomove won't 519*fa9e4066Sahrens * block. 520*fa9e4066Sahrens */ 521*fa9e4066Sahrens err = uiomove((char *)db->db_data + bufoff, tocpy, 522*fa9e4066Sahrens UIO_WRITE, uio); 523*fa9e4066Sahrens 524*fa9e4066Sahrens if (tocpy == db->db_size) 525*fa9e4066Sahrens dmu_buf_fill_done(db, tx); 526*fa9e4066Sahrens 527*fa9e4066Sahrens if (err) 528*fa9e4066Sahrens break; 529*fa9e4066Sahrens 530*fa9e4066Sahrens offset += tocpy; 531*fa9e4066Sahrens size -= tocpy; 532*fa9e4066Sahrens } 533*fa9e4066Sahrens dmu_buf_rele_array(dbp, numbufs); 534*fa9e4066Sahrens return (err); 535*fa9e4066Sahrens } 536*fa9e4066Sahrens #endif 537*fa9e4066Sahrens 538*fa9e4066Sahrens struct backuparg { 539*fa9e4066Sahrens dmu_replay_record_t *drr; 540*fa9e4066Sahrens vnode_t *vp; 541*fa9e4066Sahrens objset_t *os; 542*fa9e4066Sahrens int err; 543*fa9e4066Sahrens }; 544*fa9e4066Sahrens 545*fa9e4066Sahrens static int 546*fa9e4066Sahrens dump_bytes(struct backuparg *ba, void *buf, int len) 547*fa9e4066Sahrens { 548*fa9e4066Sahrens ssize_t resid; /* have to get resid to get detailed errno */ 549*fa9e4066Sahrens /* Need to compute checksum here */ 550*fa9e4066Sahrens ASSERT3U(len % 8, ==, 0); 551*fa9e4066Sahrens ba->err = vn_rdwr(UIO_WRITE, ba->vp, 552*fa9e4066Sahrens (caddr_t)buf, len, 553*fa9e4066Sahrens 0, UIO_SYSSPACE, FAPPEND, RLIM_INFINITY, CRED(), &resid); 554*fa9e4066Sahrens return (ba->err); 555*fa9e4066Sahrens } 556*fa9e4066Sahrens 557*fa9e4066Sahrens static int 558*fa9e4066Sahrens dump_free(struct backuparg *ba, uint64_t object, uint64_t offset, 559*fa9e4066Sahrens uint64_t length) 560*fa9e4066Sahrens { 561*fa9e4066Sahrens /* write a FREE record */ 562*fa9e4066Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 563*fa9e4066Sahrens ba->drr->drr_type = DRR_FREE; 564*fa9e4066Sahrens ba->drr->drr_u.drr_free.drr_object = object; 565*fa9e4066Sahrens ba->drr->drr_u.drr_free.drr_offset = offset; 566*fa9e4066Sahrens ba->drr->drr_u.drr_free.drr_length = length; 567*fa9e4066Sahrens 568*fa9e4066Sahrens if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 569*fa9e4066Sahrens return (EINTR); 570*fa9e4066Sahrens return (0); 571*fa9e4066Sahrens } 572*fa9e4066Sahrens 573*fa9e4066Sahrens static int 574*fa9e4066Sahrens dump_data(struct backuparg *ba, dmu_object_type_t type, 575*fa9e4066Sahrens uint64_t object, uint64_t offset, int blksz, void *data) 576*fa9e4066Sahrens { 577*fa9e4066Sahrens /* write a DATA record */ 578*fa9e4066Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 579*fa9e4066Sahrens ba->drr->drr_type = DRR_WRITE; 580*fa9e4066Sahrens ba->drr->drr_u.drr_write.drr_object = object; 581*fa9e4066Sahrens ba->drr->drr_u.drr_write.drr_type = type; 582*fa9e4066Sahrens ba->drr->drr_u.drr_write.drr_offset = offset; 583*fa9e4066Sahrens ba->drr->drr_u.drr_write.drr_length = blksz; 584*fa9e4066Sahrens 585*fa9e4066Sahrens if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 586*fa9e4066Sahrens return (EINTR); 587*fa9e4066Sahrens if (dump_bytes(ba, data, blksz)) 588*fa9e4066Sahrens return (EINTR); 589*fa9e4066Sahrens return (0); 590*fa9e4066Sahrens } 591*fa9e4066Sahrens 592*fa9e4066Sahrens static int 593*fa9e4066Sahrens dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) 594*fa9e4066Sahrens { 595*fa9e4066Sahrens /* write a FREEOBJECTS record */ 596*fa9e4066Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 597*fa9e4066Sahrens ba->drr->drr_type = DRR_FREEOBJECTS; 598*fa9e4066Sahrens ba->drr->drr_u.drr_freeobjects.drr_firstobj = firstobj; 599*fa9e4066Sahrens ba->drr->drr_u.drr_freeobjects.drr_numobjs = numobjs; 600*fa9e4066Sahrens 601*fa9e4066Sahrens if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 602*fa9e4066Sahrens return (EINTR); 603*fa9e4066Sahrens return (0); 604*fa9e4066Sahrens } 605*fa9e4066Sahrens 606*fa9e4066Sahrens static int 607*fa9e4066Sahrens dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp) 608*fa9e4066Sahrens { 609*fa9e4066Sahrens if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) 610*fa9e4066Sahrens return (dump_freeobjects(ba, object, 1)); 611*fa9e4066Sahrens 612*fa9e4066Sahrens /* write an OBJECT record */ 613*fa9e4066Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 614*fa9e4066Sahrens ba->drr->drr_type = DRR_OBJECT; 615*fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_object = object; 616*fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_type = dnp->dn_type; 617*fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_bonustype = dnp->dn_bonustype; 618*fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_blksz = 619*fa9e4066Sahrens dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 620*fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_bonuslen = dnp->dn_bonuslen; 621*fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_checksum = dnp->dn_checksum; 622*fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_compress = dnp->dn_compress; 623*fa9e4066Sahrens 624*fa9e4066Sahrens if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 625*fa9e4066Sahrens return (EINTR); 626*fa9e4066Sahrens 627*fa9e4066Sahrens if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8))) 628*fa9e4066Sahrens return (EINTR); 629*fa9e4066Sahrens 630*fa9e4066Sahrens /* free anything past the end of the file */ 631*fa9e4066Sahrens if (dump_free(ba, object, (dnp->dn_maxblkid + 1) * 632*fa9e4066Sahrens (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) 633*fa9e4066Sahrens return (EINTR); 634*fa9e4066Sahrens if (ba->err) 635*fa9e4066Sahrens return (EINTR); 636*fa9e4066Sahrens return (0); 637*fa9e4066Sahrens } 638*fa9e4066Sahrens 639*fa9e4066Sahrens #define BP_SPAN(dnp, level) \ 640*fa9e4066Sahrens (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ 641*fa9e4066Sahrens (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) 642*fa9e4066Sahrens 643*fa9e4066Sahrens static int 644*fa9e4066Sahrens backup_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 645*fa9e4066Sahrens { 646*fa9e4066Sahrens struct backuparg *ba = arg; 647*fa9e4066Sahrens uint64_t object = bc->bc_bookmark.zb_object; 648*fa9e4066Sahrens int level = bc->bc_bookmark.zb_level; 649*fa9e4066Sahrens uint64_t blkid = bc->bc_bookmark.zb_blkid; 650*fa9e4066Sahrens blkptr_t *bp = bc->bc_blkptr.blk_birth ? &bc->bc_blkptr : NULL; 651*fa9e4066Sahrens dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; 652*fa9e4066Sahrens void *data = bc->bc_data; 653*fa9e4066Sahrens int err = 0; 654*fa9e4066Sahrens 655*fa9e4066Sahrens if (issig(JUSTLOOKING)) 656*fa9e4066Sahrens return (EINTR); 657*fa9e4066Sahrens 658*fa9e4066Sahrens ASSERT(data || bp == NULL); 659*fa9e4066Sahrens 660*fa9e4066Sahrens if (bp == NULL && object == 0) { 661*fa9e4066Sahrens uint64_t span = BP_SPAN(bc->bc_dnode, level); 662*fa9e4066Sahrens uint64_t dnobj = (blkid * span) >> DNODE_SHIFT; 663*fa9e4066Sahrens err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT); 664*fa9e4066Sahrens } else if (bp == NULL) { 665*fa9e4066Sahrens uint64_t span = BP_SPAN(bc->bc_dnode, level); 666*fa9e4066Sahrens err = dump_free(ba, object, blkid * span, span); 667*fa9e4066Sahrens } else if (data && level == 0 && type == DMU_OT_DNODE) { 668*fa9e4066Sahrens dnode_phys_t *blk = data; 669*fa9e4066Sahrens int i; 670*fa9e4066Sahrens int blksz = BP_GET_LSIZE(bp); 671*fa9e4066Sahrens 672*fa9e4066Sahrens for (i = 0; i < blksz >> DNODE_SHIFT; i++) { 673*fa9e4066Sahrens uint64_t dnobj = 674*fa9e4066Sahrens (blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 675*fa9e4066Sahrens err = dump_dnode(ba, dnobj, blk+i); 676*fa9e4066Sahrens if (err) 677*fa9e4066Sahrens break; 678*fa9e4066Sahrens } 679*fa9e4066Sahrens } else if (level == 0 && 680*fa9e4066Sahrens type != DMU_OT_DNODE && type != DMU_OT_OBJSET) { 681*fa9e4066Sahrens int blksz = BP_GET_LSIZE(bp); 682*fa9e4066Sahrens if (data == NULL) { 683*fa9e4066Sahrens arc_buf_t *abuf; 684*fa9e4066Sahrens 685*fa9e4066Sahrens (void) arc_read(NULL, spa, bp, 686*fa9e4066Sahrens dmu_ot[type].ot_byteswap, arc_getbuf_func, &abuf, 687*fa9e4066Sahrens ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_MUSTSUCCEED, 688*fa9e4066Sahrens ARC_WAIT); 689*fa9e4066Sahrens 690*fa9e4066Sahrens if (abuf) { 691*fa9e4066Sahrens err = dump_data(ba, type, object, blkid * blksz, 692*fa9e4066Sahrens blksz, abuf->b_data); 693*fa9e4066Sahrens arc_buf_free(abuf, &abuf); 694*fa9e4066Sahrens } 695*fa9e4066Sahrens } else { 696*fa9e4066Sahrens err = dump_data(ba, type, object, blkid * blksz, 697*fa9e4066Sahrens blksz, data); 698*fa9e4066Sahrens } 699*fa9e4066Sahrens } 700*fa9e4066Sahrens 701*fa9e4066Sahrens ASSERT(err == 0 || err == EINTR); 702*fa9e4066Sahrens return (err); 703*fa9e4066Sahrens } 704*fa9e4066Sahrens 705*fa9e4066Sahrens int 706*fa9e4066Sahrens dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, vnode_t *vp) 707*fa9e4066Sahrens { 708*fa9e4066Sahrens dsl_dataset_t *ds = tosnap->os->os_dsl_dataset; 709*fa9e4066Sahrens dsl_dataset_t *fromds = fromsnap ? fromsnap->os->os_dsl_dataset : NULL; 710*fa9e4066Sahrens dmu_replay_record_t *drr; 711*fa9e4066Sahrens struct backuparg ba; 712*fa9e4066Sahrens int err; 713*fa9e4066Sahrens 714*fa9e4066Sahrens /* tosnap must be a snapshot */ 715*fa9e4066Sahrens if (ds->ds_phys->ds_next_snap_obj == 0) 716*fa9e4066Sahrens return (EINVAL); 717*fa9e4066Sahrens 718*fa9e4066Sahrens /* fromsnap must be an earlier snapshot from the same fs as tosnap */ 719*fa9e4066Sahrens if (fromds && (ds->ds_dir != fromds->ds_dir || 720*fa9e4066Sahrens fromds->ds_phys->ds_creation_txg >= 721*fa9e4066Sahrens ds->ds_phys->ds_creation_txg)) 722*fa9e4066Sahrens return (EXDEV); 723*fa9e4066Sahrens 724*fa9e4066Sahrens drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 725*fa9e4066Sahrens drr->drr_type = DRR_BEGIN; 726*fa9e4066Sahrens drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; 727*fa9e4066Sahrens drr->drr_u.drr_begin.drr_version = DMU_BACKUP_VERSION; 728*fa9e4066Sahrens drr->drr_u.drr_begin.drr_creation_time = 729*fa9e4066Sahrens ds->ds_phys->ds_creation_time; 730*fa9e4066Sahrens drr->drr_u.drr_begin.drr_type = tosnap->os->os_phys->os_type; 731*fa9e4066Sahrens drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; 732*fa9e4066Sahrens if (fromds) 733*fa9e4066Sahrens drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid; 734*fa9e4066Sahrens dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); 735*fa9e4066Sahrens 736*fa9e4066Sahrens ba.drr = drr; 737*fa9e4066Sahrens ba.vp = vp; 738*fa9e4066Sahrens ba.os = tosnap; 739*fa9e4066Sahrens 740*fa9e4066Sahrens if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t))) { 741*fa9e4066Sahrens kmem_free(drr, sizeof (dmu_replay_record_t)); 742*fa9e4066Sahrens return (ba.err); 743*fa9e4066Sahrens } 744*fa9e4066Sahrens 745*fa9e4066Sahrens err = traverse_dsl_dataset(ds, 746*fa9e4066Sahrens fromds ? fromds->ds_phys->ds_creation_txg : 0, 747*fa9e4066Sahrens ADVANCE_PRE | ADVANCE_HOLES | ADVANCE_DATA | ADVANCE_NOLOCK, 748*fa9e4066Sahrens backup_cb, &ba); 749*fa9e4066Sahrens 750*fa9e4066Sahrens if (err) { 751*fa9e4066Sahrens if (err == EINTR && ba.err) 752*fa9e4066Sahrens err = ba.err; 753*fa9e4066Sahrens return (err); 754*fa9e4066Sahrens } 755*fa9e4066Sahrens 756*fa9e4066Sahrens bzero(drr, sizeof (dmu_replay_record_t)); 757*fa9e4066Sahrens drr->drr_type = DRR_END; 758*fa9e4066Sahrens 759*fa9e4066Sahrens if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t))) 760*fa9e4066Sahrens return (ba.err); 761*fa9e4066Sahrens 762*fa9e4066Sahrens kmem_free(drr, sizeof (dmu_replay_record_t)); 763*fa9e4066Sahrens 764*fa9e4066Sahrens return (0); 765*fa9e4066Sahrens } 766*fa9e4066Sahrens 767*fa9e4066Sahrens struct restorearg { 768*fa9e4066Sahrens int err; 769*fa9e4066Sahrens int byteswap; 770*fa9e4066Sahrens vnode_t *vp; 771*fa9e4066Sahrens char *buf; 772*fa9e4066Sahrens uint64_t voff; 773*fa9e4066Sahrens int buflen; /* number of valid bytes in buf */ 774*fa9e4066Sahrens int bufoff; /* next offset to read */ 775*fa9e4066Sahrens int bufsize; /* amount of memory allocated for buf */ 776*fa9e4066Sahrens }; 777*fa9e4066Sahrens 778*fa9e4066Sahrens static int 779*fa9e4066Sahrens replay_incremental_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 780*fa9e4066Sahrens { 781*fa9e4066Sahrens struct drr_begin *drrb = arg; 782*fa9e4066Sahrens dsl_dataset_t *ds = NULL; 783*fa9e4066Sahrens dsl_dataset_t *ds_prev = NULL; 784*fa9e4066Sahrens const char *snapname; 785*fa9e4066Sahrens int err = EINVAL; 786*fa9e4066Sahrens uint64_t val; 787*fa9e4066Sahrens 788*fa9e4066Sahrens /* this must be a filesytem */ 789*fa9e4066Sahrens if (dd->dd_phys->dd_head_dataset_obj == 0) 790*fa9e4066Sahrens goto die; 791*fa9e4066Sahrens 792*fa9e4066Sahrens ds = dsl_dataset_open_obj(dd->dd_pool, dd->dd_phys->dd_head_dataset_obj, 793*fa9e4066Sahrens NULL, DS_MODE_EXCLUSIVE, FTAG); 794*fa9e4066Sahrens 795*fa9e4066Sahrens if (ds == NULL) { 796*fa9e4066Sahrens err = EBUSY; 797*fa9e4066Sahrens goto die; 798*fa9e4066Sahrens } 799*fa9e4066Sahrens 800*fa9e4066Sahrens /* must already be a snapshot of this fs */ 801*fa9e4066Sahrens if (ds->ds_phys->ds_prev_snap_obj == 0) { 802*fa9e4066Sahrens err = ENODEV; 803*fa9e4066Sahrens goto die; 804*fa9e4066Sahrens } 805*fa9e4066Sahrens 806*fa9e4066Sahrens /* most recent snapshot must match fromguid */ 807*fa9e4066Sahrens ds_prev = dsl_dataset_open_obj(dd->dd_pool, 808*fa9e4066Sahrens ds->ds_phys->ds_prev_snap_obj, NULL, 809*fa9e4066Sahrens DS_MODE_STANDARD | DS_MODE_READONLY, FTAG); 810*fa9e4066Sahrens if (ds_prev->ds_phys->ds_guid != drrb->drr_fromguid) { 811*fa9e4066Sahrens err = ENODEV; 812*fa9e4066Sahrens goto die; 813*fa9e4066Sahrens } 814*fa9e4066Sahrens 815*fa9e4066Sahrens /* must not have any changes since most recent snapshot */ 816*fa9e4066Sahrens if (ds->ds_phys->ds_bp.blk_birth > 817*fa9e4066Sahrens ds_prev->ds_phys->ds_creation_txg) { 818*fa9e4066Sahrens err = ETXTBSY; 819*fa9e4066Sahrens goto die; 820*fa9e4066Sahrens } 821*fa9e4066Sahrens 822*fa9e4066Sahrens /* new snapshot name must not exist */ 823*fa9e4066Sahrens snapname = strrchr(drrb->drr_toname, '@'); 824*fa9e4066Sahrens if (snapname == NULL) { 825*fa9e4066Sahrens err = EEXIST; 826*fa9e4066Sahrens goto die; 827*fa9e4066Sahrens } 828*fa9e4066Sahrens snapname++; 829*fa9e4066Sahrens err = zap_lookup(dd->dd_pool->dp_meta_objset, 830*fa9e4066Sahrens ds->ds_phys->ds_snapnames_zapobj, snapname, 8, 1, &val); 831*fa9e4066Sahrens if (err != ENOENT) { 832*fa9e4066Sahrens if (err == 0) 833*fa9e4066Sahrens err = EEXIST; 834*fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 835*fa9e4066Sahrens dsl_dataset_close(ds_prev, DS_MODE_STANDARD, FTAG); 836*fa9e4066Sahrens return (err); 837*fa9e4066Sahrens } 838*fa9e4066Sahrens 839*fa9e4066Sahrens dsl_dataset_close(ds_prev, DS_MODE_STANDARD, FTAG); 840*fa9e4066Sahrens 841*fa9e4066Sahrens /* The point of no (unsuccessful) return. */ 842*fa9e4066Sahrens 843*fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 844*fa9e4066Sahrens ds->ds_phys->ds_restoring = TRUE; 845*fa9e4066Sahrens 846*fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 847*fa9e4066Sahrens return (0); 848*fa9e4066Sahrens 849*fa9e4066Sahrens die: 850*fa9e4066Sahrens if (ds_prev) 851*fa9e4066Sahrens dsl_dataset_close(ds_prev, DS_MODE_STANDARD, FTAG); 852*fa9e4066Sahrens if (ds) 853*fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 854*fa9e4066Sahrens return (err); 855*fa9e4066Sahrens } 856*fa9e4066Sahrens 857*fa9e4066Sahrens static int 858*fa9e4066Sahrens replay_full_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 859*fa9e4066Sahrens { 860*fa9e4066Sahrens struct drr_begin *drrb = arg; 861*fa9e4066Sahrens int err; 862*fa9e4066Sahrens char *fsfullname, *fslastname, *cp; 863*fa9e4066Sahrens dsl_dataset_t *ds; 864*fa9e4066Sahrens 865*fa9e4066Sahrens fsfullname = kmem_alloc(MAXNAMELEN, KM_SLEEP); 866*fa9e4066Sahrens (void) strncpy(fsfullname, drrb->drr_toname, MAXNAMELEN); 867*fa9e4066Sahrens cp = strchr(fsfullname, '@'); 868*fa9e4066Sahrens if (cp == NULL) { 869*fa9e4066Sahrens kmem_free(fsfullname, MAXNAMELEN); 870*fa9e4066Sahrens return (EINVAL); 871*fa9e4066Sahrens } 872*fa9e4066Sahrens *cp = '\0'; 873*fa9e4066Sahrens fslastname = strrchr(fsfullname, '/'); 874*fa9e4066Sahrens if (fslastname == NULL) { 875*fa9e4066Sahrens kmem_free(fsfullname, MAXNAMELEN); 876*fa9e4066Sahrens return (EINVAL); 877*fa9e4066Sahrens } 878*fa9e4066Sahrens fslastname++; 879*fa9e4066Sahrens 880*fa9e4066Sahrens err = dsl_dataset_create_sync(dd, fsfullname, fslastname, NULL, tx); 881*fa9e4066Sahrens if (err) { 882*fa9e4066Sahrens kmem_free(fsfullname, MAXNAMELEN); 883*fa9e4066Sahrens return (err); 884*fa9e4066Sahrens } 885*fa9e4066Sahrens 886*fa9e4066Sahrens /* the point of no (unsuccessful) return */ 887*fa9e4066Sahrens 888*fa9e4066Sahrens err = dsl_dataset_open_spa(dd->dd_pool->dp_spa, fsfullname, 889*fa9e4066Sahrens DS_MODE_EXCLUSIVE, FTAG, &ds); 890*fa9e4066Sahrens ASSERT3U(err, ==, 0); 891*fa9e4066Sahrens kmem_free(fsfullname, MAXNAMELEN); 892*fa9e4066Sahrens 893*fa9e4066Sahrens (void) dmu_objset_create_impl(dsl_dataset_get_spa(ds), 894*fa9e4066Sahrens ds, drrb->drr_type, tx); 895*fa9e4066Sahrens 896*fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 897*fa9e4066Sahrens ds->ds_phys->ds_restoring = TRUE; 898*fa9e4066Sahrens 899*fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 900*fa9e4066Sahrens return (0); 901*fa9e4066Sahrens } 902*fa9e4066Sahrens 903*fa9e4066Sahrens static int 904*fa9e4066Sahrens replay_end_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 905*fa9e4066Sahrens { 906*fa9e4066Sahrens struct drr_begin *drrb = arg; 907*fa9e4066Sahrens int err; 908*fa9e4066Sahrens char *snapname; 909*fa9e4066Sahrens dsl_dataset_t *ds; 910*fa9e4066Sahrens 911*fa9e4066Sahrens /* XXX verify that drr_toname is in dd */ 912*fa9e4066Sahrens 913*fa9e4066Sahrens snapname = strchr(drrb->drr_toname, '@'); 914*fa9e4066Sahrens if (snapname == NULL) 915*fa9e4066Sahrens return (EINVAL); 916*fa9e4066Sahrens snapname++; 917*fa9e4066Sahrens 918*fa9e4066Sahrens /* create snapshot */ 919*fa9e4066Sahrens err = dsl_dataset_snapshot_sync(dd, snapname, tx); 920*fa9e4066Sahrens if (err) 921*fa9e4066Sahrens return (err); 922*fa9e4066Sahrens 923*fa9e4066Sahrens /* set snapshot's creation time and guid */ 924*fa9e4066Sahrens err = dsl_dataset_open_spa(dd->dd_pool->dp_spa, drrb->drr_toname, 925*fa9e4066Sahrens DS_MODE_PRIMARY | DS_MODE_READONLY | DS_MODE_RESTORE, FTAG, &ds); 926*fa9e4066Sahrens ASSERT3U(err, ==, 0); 927*fa9e4066Sahrens 928*fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 929*fa9e4066Sahrens ds->ds_phys->ds_creation_time = drrb->drr_creation_time; 930*fa9e4066Sahrens ds->ds_phys->ds_guid = drrb->drr_toguid; 931*fa9e4066Sahrens ds->ds_phys->ds_restoring = FALSE; 932*fa9e4066Sahrens 933*fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_PRIMARY, FTAG); 934*fa9e4066Sahrens 935*fa9e4066Sahrens ds = dsl_dataset_open_obj(dd->dd_pool, dd->dd_phys->dd_head_dataset_obj, 936*fa9e4066Sahrens NULL, DS_MODE_STANDARD | DS_MODE_RESTORE, FTAG); 937*fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 938*fa9e4066Sahrens ds->ds_phys->ds_restoring = FALSE; 939*fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 940*fa9e4066Sahrens 941*fa9e4066Sahrens return (0); 942*fa9e4066Sahrens } 943*fa9e4066Sahrens 944*fa9e4066Sahrens void * 945*fa9e4066Sahrens restore_read(struct restorearg *ra, int len) 946*fa9e4066Sahrens { 947*fa9e4066Sahrens void *rv; 948*fa9e4066Sahrens 949*fa9e4066Sahrens /* some things will require 8-byte alignment, so everything must */ 950*fa9e4066Sahrens ASSERT3U(len % 8, ==, 0); 951*fa9e4066Sahrens 952*fa9e4066Sahrens while (ra->buflen - ra->bufoff < len) { 953*fa9e4066Sahrens ssize_t resid; 954*fa9e4066Sahrens int leftover = ra->buflen - ra->bufoff; 955*fa9e4066Sahrens 956*fa9e4066Sahrens (void) memmove(ra->buf, ra->buf + ra->bufoff, leftover); 957*fa9e4066Sahrens ra->err = vn_rdwr(UIO_READ, ra->vp, 958*fa9e4066Sahrens (caddr_t)ra->buf + leftover, ra->bufsize - leftover, 959*fa9e4066Sahrens ra->voff, UIO_SYSSPACE, FAPPEND, 960*fa9e4066Sahrens RLIM_INFINITY, CRED(), &resid); 961*fa9e4066Sahrens 962*fa9e4066Sahrens /* Need to compute checksum */ 963*fa9e4066Sahrens 964*fa9e4066Sahrens ra->voff += ra->bufsize - leftover - resid; 965*fa9e4066Sahrens ra->buflen = ra->bufsize - resid; 966*fa9e4066Sahrens ra->bufoff = 0; 967*fa9e4066Sahrens if (resid == ra->bufsize - leftover) 968*fa9e4066Sahrens ra->err = EINVAL; 969*fa9e4066Sahrens if (ra->err) 970*fa9e4066Sahrens return (NULL); 971*fa9e4066Sahrens } 972*fa9e4066Sahrens 973*fa9e4066Sahrens ASSERT3U(ra->bufoff % 8, ==, 0); 974*fa9e4066Sahrens ASSERT3U(ra->buflen - ra->bufoff, >=, len); 975*fa9e4066Sahrens rv = ra->buf + ra->bufoff; 976*fa9e4066Sahrens ra->bufoff += len; 977*fa9e4066Sahrens return (rv); 978*fa9e4066Sahrens } 979*fa9e4066Sahrens 980*fa9e4066Sahrens static void 981*fa9e4066Sahrens backup_byteswap(dmu_replay_record_t *drr) 982*fa9e4066Sahrens { 983*fa9e4066Sahrens #define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) 984*fa9e4066Sahrens #define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) 985*fa9e4066Sahrens drr->drr_type = BSWAP_32(drr->drr_type); 986*fa9e4066Sahrens switch (drr->drr_type) { 987*fa9e4066Sahrens case DRR_BEGIN: 988*fa9e4066Sahrens DO64(drr_begin.drr_magic); 989*fa9e4066Sahrens DO64(drr_begin.drr_version); 990*fa9e4066Sahrens DO64(drr_begin.drr_creation_time); 991*fa9e4066Sahrens DO32(drr_begin.drr_type); 992*fa9e4066Sahrens DO64(drr_begin.drr_toguid); 993*fa9e4066Sahrens DO64(drr_begin.drr_fromguid); 994*fa9e4066Sahrens break; 995*fa9e4066Sahrens case DRR_OBJECT: 996*fa9e4066Sahrens DO64(drr_object.drr_object); 997*fa9e4066Sahrens /* DO64(drr_object.drr_allocation_txg); */ 998*fa9e4066Sahrens DO32(drr_object.drr_type); 999*fa9e4066Sahrens DO32(drr_object.drr_bonustype); 1000*fa9e4066Sahrens DO32(drr_object.drr_blksz); 1001*fa9e4066Sahrens DO32(drr_object.drr_bonuslen); 1002*fa9e4066Sahrens break; 1003*fa9e4066Sahrens case DRR_FREEOBJECTS: 1004*fa9e4066Sahrens DO64(drr_freeobjects.drr_firstobj); 1005*fa9e4066Sahrens DO64(drr_freeobjects.drr_numobjs); 1006*fa9e4066Sahrens break; 1007*fa9e4066Sahrens case DRR_WRITE: 1008*fa9e4066Sahrens DO64(drr_write.drr_object); 1009*fa9e4066Sahrens DO32(drr_write.drr_type); 1010*fa9e4066Sahrens DO64(drr_write.drr_offset); 1011*fa9e4066Sahrens DO64(drr_write.drr_length); 1012*fa9e4066Sahrens break; 1013*fa9e4066Sahrens case DRR_FREE: 1014*fa9e4066Sahrens DO64(drr_free.drr_object); 1015*fa9e4066Sahrens DO64(drr_free.drr_offset); 1016*fa9e4066Sahrens DO64(drr_free.drr_length); 1017*fa9e4066Sahrens break; 1018*fa9e4066Sahrens case DRR_END: 1019*fa9e4066Sahrens DO64(drr_end.drr_checksum); 1020*fa9e4066Sahrens break; 1021*fa9e4066Sahrens } 1022*fa9e4066Sahrens #undef DO64 1023*fa9e4066Sahrens #undef DO32 1024*fa9e4066Sahrens } 1025*fa9e4066Sahrens 1026*fa9e4066Sahrens static int 1027*fa9e4066Sahrens restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) 1028*fa9e4066Sahrens { 1029*fa9e4066Sahrens int err; 1030*fa9e4066Sahrens dmu_tx_t *tx; 1031*fa9e4066Sahrens 1032*fa9e4066Sahrens err = dmu_object_info(os, drro->drr_object, NULL); 1033*fa9e4066Sahrens 1034*fa9e4066Sahrens if (err != 0 && err != ENOENT) 1035*fa9e4066Sahrens return (EINVAL); 1036*fa9e4066Sahrens 1037*fa9e4066Sahrens if (drro->drr_type == DMU_OT_NONE || 1038*fa9e4066Sahrens drro->drr_type >= DMU_OT_NUMTYPES || 1039*fa9e4066Sahrens drro->drr_bonustype >= DMU_OT_NUMTYPES || 1040*fa9e4066Sahrens drro->drr_checksum >= ZIO_CHECKSUM_FUNCTIONS || 1041*fa9e4066Sahrens drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || 1042*fa9e4066Sahrens P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || 1043*fa9e4066Sahrens drro->drr_blksz < SPA_MINBLOCKSIZE || 1044*fa9e4066Sahrens drro->drr_blksz > SPA_MAXBLOCKSIZE || 1045*fa9e4066Sahrens drro->drr_bonuslen > DN_MAX_BONUSLEN) { 1046*fa9e4066Sahrens return (EINVAL); 1047*fa9e4066Sahrens } 1048*fa9e4066Sahrens 1049*fa9e4066Sahrens tx = dmu_tx_create(os); 1050*fa9e4066Sahrens 1051*fa9e4066Sahrens if (err == ENOENT) { 1052*fa9e4066Sahrens /* currently free, want to be allocated */ 1053*fa9e4066Sahrens dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1054*fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1); 1055*fa9e4066Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 1056*fa9e4066Sahrens if (err) { 1057*fa9e4066Sahrens dmu_tx_abort(tx); 1058*fa9e4066Sahrens return (err); 1059*fa9e4066Sahrens } 1060*fa9e4066Sahrens err = dmu_object_claim(os, drro->drr_object, 1061*fa9e4066Sahrens drro->drr_type, drro->drr_blksz, 1062*fa9e4066Sahrens drro->drr_bonustype, drro->drr_bonuslen, tx); 1063*fa9e4066Sahrens } else { 1064*fa9e4066Sahrens /* currently allocated, want to be allocated */ 1065*fa9e4066Sahrens dmu_tx_hold_bonus(tx, drro->drr_object); 1066*fa9e4066Sahrens /* 1067*fa9e4066Sahrens * We may change blocksize, so need to 1068*fa9e4066Sahrens * hold_write 1069*fa9e4066Sahrens */ 1070*fa9e4066Sahrens dmu_tx_hold_write(tx, drro->drr_object, 0, 1); 1071*fa9e4066Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 1072*fa9e4066Sahrens if (err) { 1073*fa9e4066Sahrens dmu_tx_abort(tx); 1074*fa9e4066Sahrens return (err); 1075*fa9e4066Sahrens } 1076*fa9e4066Sahrens 1077*fa9e4066Sahrens err = dmu_object_reclaim(os, drro->drr_object, 1078*fa9e4066Sahrens drro->drr_type, drro->drr_blksz, 1079*fa9e4066Sahrens drro->drr_bonustype, drro->drr_bonuslen, tx); 1080*fa9e4066Sahrens } 1081*fa9e4066Sahrens if (err) { 1082*fa9e4066Sahrens dmu_tx_commit(tx); 1083*fa9e4066Sahrens return (EINVAL); 1084*fa9e4066Sahrens } 1085*fa9e4066Sahrens 1086*fa9e4066Sahrens dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksum, tx); 1087*fa9e4066Sahrens dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); 1088*fa9e4066Sahrens 1089*fa9e4066Sahrens if (drro->drr_bonuslen) { 1090*fa9e4066Sahrens dmu_buf_t *db; 1091*fa9e4066Sahrens void *data; 1092*fa9e4066Sahrens db = dmu_bonus_hold(os, drro->drr_object); 1093*fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 1094*fa9e4066Sahrens 1095*fa9e4066Sahrens ASSERT3U(db->db_size, ==, drro->drr_bonuslen); 1096*fa9e4066Sahrens data = restore_read(ra, P2ROUNDUP(db->db_size, 8)); 1097*fa9e4066Sahrens if (data == NULL) { 1098*fa9e4066Sahrens dmu_tx_commit(tx); 1099*fa9e4066Sahrens return (ra->err); 1100*fa9e4066Sahrens } 1101*fa9e4066Sahrens bcopy(data, db->db_data, db->db_size); 1102*fa9e4066Sahrens if (ra->byteswap) { 1103*fa9e4066Sahrens dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data, 1104*fa9e4066Sahrens drro->drr_bonuslen); 1105*fa9e4066Sahrens } 1106*fa9e4066Sahrens dmu_buf_rele(db); 1107*fa9e4066Sahrens } 1108*fa9e4066Sahrens dmu_tx_commit(tx); 1109*fa9e4066Sahrens return (0); 1110*fa9e4066Sahrens } 1111*fa9e4066Sahrens 1112*fa9e4066Sahrens /* ARGSUSED */ 1113*fa9e4066Sahrens static int 1114*fa9e4066Sahrens restore_freeobjects(struct restorearg *ra, objset_t *os, 1115*fa9e4066Sahrens struct drr_freeobjects *drrfo) 1116*fa9e4066Sahrens { 1117*fa9e4066Sahrens uint64_t obj; 1118*fa9e4066Sahrens 1119*fa9e4066Sahrens if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) 1120*fa9e4066Sahrens return (EINVAL); 1121*fa9e4066Sahrens 1122*fa9e4066Sahrens for (obj = drrfo->drr_firstobj; 1123*fa9e4066Sahrens obj < drrfo->drr_firstobj + drrfo->drr_numobjs; obj++) { 1124*fa9e4066Sahrens dmu_tx_t *tx; 1125*fa9e4066Sahrens int err; 1126*fa9e4066Sahrens 1127*fa9e4066Sahrens if (dmu_object_info(os, obj, NULL) != 0) 1128*fa9e4066Sahrens continue; 1129*fa9e4066Sahrens 1130*fa9e4066Sahrens tx = dmu_tx_create(os); 1131*fa9e4066Sahrens dmu_tx_hold_bonus(tx, obj); 1132*fa9e4066Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 1133*fa9e4066Sahrens if (err) { 1134*fa9e4066Sahrens dmu_tx_abort(tx); 1135*fa9e4066Sahrens return (err); 1136*fa9e4066Sahrens } 1137*fa9e4066Sahrens err = dmu_object_free(os, obj, tx); 1138*fa9e4066Sahrens dmu_tx_commit(tx); 1139*fa9e4066Sahrens if (err && err != ENOENT) 1140*fa9e4066Sahrens return (EINVAL); 1141*fa9e4066Sahrens } 1142*fa9e4066Sahrens return (0); 1143*fa9e4066Sahrens } 1144*fa9e4066Sahrens 1145*fa9e4066Sahrens static int 1146*fa9e4066Sahrens restore_write(struct restorearg *ra, objset_t *os, 1147*fa9e4066Sahrens struct drr_write *drrw) 1148*fa9e4066Sahrens { 1149*fa9e4066Sahrens dmu_tx_t *tx; 1150*fa9e4066Sahrens void *data; 1151*fa9e4066Sahrens int err; 1152*fa9e4066Sahrens 1153*fa9e4066Sahrens if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || 1154*fa9e4066Sahrens drrw->drr_type >= DMU_OT_NUMTYPES) 1155*fa9e4066Sahrens return (EINVAL); 1156*fa9e4066Sahrens 1157*fa9e4066Sahrens data = restore_read(ra, drrw->drr_length); 1158*fa9e4066Sahrens if (data == NULL) 1159*fa9e4066Sahrens return (ra->err); 1160*fa9e4066Sahrens 1161*fa9e4066Sahrens if (dmu_object_info(os, drrw->drr_object, NULL) != 0) 1162*fa9e4066Sahrens return (EINVAL); 1163*fa9e4066Sahrens 1164*fa9e4066Sahrens tx = dmu_tx_create(os); 1165*fa9e4066Sahrens 1166*fa9e4066Sahrens dmu_tx_hold_write(tx, drrw->drr_object, 1167*fa9e4066Sahrens drrw->drr_offset, drrw->drr_length); 1168*fa9e4066Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 1169*fa9e4066Sahrens if (err) { 1170*fa9e4066Sahrens dmu_tx_abort(tx); 1171*fa9e4066Sahrens return (err); 1172*fa9e4066Sahrens } 1173*fa9e4066Sahrens if (ra->byteswap) 1174*fa9e4066Sahrens dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length); 1175*fa9e4066Sahrens dmu_write(os, drrw->drr_object, 1176*fa9e4066Sahrens drrw->drr_offset, drrw->drr_length, data, tx); 1177*fa9e4066Sahrens dmu_tx_commit(tx); 1178*fa9e4066Sahrens return (0); 1179*fa9e4066Sahrens } 1180*fa9e4066Sahrens 1181*fa9e4066Sahrens /* ARGSUSED */ 1182*fa9e4066Sahrens static int 1183*fa9e4066Sahrens restore_free(struct restorearg *ra, objset_t *os, 1184*fa9e4066Sahrens struct drr_free *drrf) 1185*fa9e4066Sahrens { 1186*fa9e4066Sahrens dmu_tx_t *tx; 1187*fa9e4066Sahrens int err; 1188*fa9e4066Sahrens 1189*fa9e4066Sahrens if (drrf->drr_length != -1ULL && 1190*fa9e4066Sahrens drrf->drr_offset + drrf->drr_length < drrf->drr_offset) 1191*fa9e4066Sahrens return (EINVAL); 1192*fa9e4066Sahrens 1193*fa9e4066Sahrens if (dmu_object_info(os, drrf->drr_object, NULL) != 0) 1194*fa9e4066Sahrens return (EINVAL); 1195*fa9e4066Sahrens 1196*fa9e4066Sahrens tx = dmu_tx_create(os); 1197*fa9e4066Sahrens 1198*fa9e4066Sahrens dmu_tx_hold_free(tx, drrf->drr_object, 1199*fa9e4066Sahrens drrf->drr_offset, drrf->drr_length); 1200*fa9e4066Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 1201*fa9e4066Sahrens if (err) { 1202*fa9e4066Sahrens dmu_tx_abort(tx); 1203*fa9e4066Sahrens return (err); 1204*fa9e4066Sahrens } 1205*fa9e4066Sahrens dmu_free_range(os, drrf->drr_object, 1206*fa9e4066Sahrens drrf->drr_offset, drrf->drr_length, tx); 1207*fa9e4066Sahrens dmu_tx_commit(tx); 1208*fa9e4066Sahrens return (0); 1209*fa9e4066Sahrens } 1210*fa9e4066Sahrens 1211*fa9e4066Sahrens int 1212*fa9e4066Sahrens dmu_recvbackup(struct drr_begin *drrb, uint64_t *sizep, 1213*fa9e4066Sahrens vnode_t *vp, uint64_t voffset) 1214*fa9e4066Sahrens { 1215*fa9e4066Sahrens struct restorearg ra; 1216*fa9e4066Sahrens dmu_replay_record_t *drr; 1217*fa9e4066Sahrens char *cp, *tosnap; 1218*fa9e4066Sahrens dsl_dir_t *dd = NULL; 1219*fa9e4066Sahrens objset_t *os = NULL; 1220*fa9e4066Sahrens 1221*fa9e4066Sahrens bzero(&ra, sizeof (ra)); 1222*fa9e4066Sahrens ra.vp = vp; 1223*fa9e4066Sahrens ra.voff = voffset; 1224*fa9e4066Sahrens ra.bufsize = 1<<20; 1225*fa9e4066Sahrens ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); 1226*fa9e4066Sahrens 1227*fa9e4066Sahrens if (drrb->drr_magic == DMU_BACKUP_MAGIC) { 1228*fa9e4066Sahrens ra.byteswap = FALSE; 1229*fa9e4066Sahrens } else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { 1230*fa9e4066Sahrens ra.byteswap = TRUE; 1231*fa9e4066Sahrens } else { 1232*fa9e4066Sahrens ra.err = EINVAL; 1233*fa9e4066Sahrens goto out; 1234*fa9e4066Sahrens } 1235*fa9e4066Sahrens 1236*fa9e4066Sahrens if (ra.byteswap) { 1237*fa9e4066Sahrens drrb->drr_magic = BSWAP_64(drrb->drr_magic); 1238*fa9e4066Sahrens drrb->drr_version = BSWAP_64(drrb->drr_version); 1239*fa9e4066Sahrens drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); 1240*fa9e4066Sahrens drrb->drr_type = BSWAP_32(drrb->drr_type); 1241*fa9e4066Sahrens drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); 1242*fa9e4066Sahrens drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); 1243*fa9e4066Sahrens } 1244*fa9e4066Sahrens 1245*fa9e4066Sahrens ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); 1246*fa9e4066Sahrens 1247*fa9e4066Sahrens tosnap = drrb->drr_toname; 1248*fa9e4066Sahrens if (drrb->drr_version != DMU_BACKUP_VERSION || 1249*fa9e4066Sahrens drrb->drr_type >= DMU_OST_NUMTYPES || 1250*fa9e4066Sahrens strchr(drrb->drr_toname, '@') == NULL) { 1251*fa9e4066Sahrens ra.err = EINVAL; 1252*fa9e4066Sahrens goto out; 1253*fa9e4066Sahrens } 1254*fa9e4066Sahrens 1255*fa9e4066Sahrens /* 1256*fa9e4066Sahrens * Process the begin in syncing context. 1257*fa9e4066Sahrens */ 1258*fa9e4066Sahrens if (drrb->drr_fromguid) { 1259*fa9e4066Sahrens /* incremental backup */ 1260*fa9e4066Sahrens 1261*fa9e4066Sahrens cp = strchr(tosnap, '@'); 1262*fa9e4066Sahrens *cp = '\0'; 1263*fa9e4066Sahrens dd = dsl_dir_open(tosnap, FTAG, NULL); 1264*fa9e4066Sahrens *cp = '@'; 1265*fa9e4066Sahrens if (dd == NULL) { 1266*fa9e4066Sahrens ra.err = ENOENT; 1267*fa9e4066Sahrens goto out; 1268*fa9e4066Sahrens } 1269*fa9e4066Sahrens 1270*fa9e4066Sahrens ra.err = dsl_dir_sync_task(dd, replay_incremental_sync, 1271*fa9e4066Sahrens drrb, 1<<20); 1272*fa9e4066Sahrens } else { 1273*fa9e4066Sahrens /* full backup */ 1274*fa9e4066Sahrens const char *tail; 1275*fa9e4066Sahrens 1276*fa9e4066Sahrens cp = strchr(tosnap, '@'); 1277*fa9e4066Sahrens *cp = '\0'; 1278*fa9e4066Sahrens dd = dsl_dir_open(tosnap, FTAG, &tail); 1279*fa9e4066Sahrens *cp = '@'; 1280*fa9e4066Sahrens if (dd == NULL) { 1281*fa9e4066Sahrens ra.err = ENOENT; 1282*fa9e4066Sahrens goto out; 1283*fa9e4066Sahrens } 1284*fa9e4066Sahrens if (tail == NULL) { 1285*fa9e4066Sahrens ra.err = EEXIST; 1286*fa9e4066Sahrens goto out; 1287*fa9e4066Sahrens } 1288*fa9e4066Sahrens 1289*fa9e4066Sahrens ra.err = dsl_dir_sync_task(dd, replay_full_sync, 1290*fa9e4066Sahrens drrb, 1<<20); 1291*fa9e4066Sahrens } 1292*fa9e4066Sahrens if (ra.err) 1293*fa9e4066Sahrens goto out; 1294*fa9e4066Sahrens 1295*fa9e4066Sahrens /* 1296*fa9e4066Sahrens * Open the objset we are modifying. 1297*fa9e4066Sahrens */ 1298*fa9e4066Sahrens 1299*fa9e4066Sahrens cp = strchr(tosnap, '@'); 1300*fa9e4066Sahrens *cp = '\0'; 1301*fa9e4066Sahrens ra.err = dmu_objset_open(tosnap, DMU_OST_ANY, 1302*fa9e4066Sahrens DS_MODE_PRIMARY | DS_MODE_RESTORE, &os); 1303*fa9e4066Sahrens *cp = '@'; 1304*fa9e4066Sahrens ASSERT3U(ra.err, ==, 0); 1305*fa9e4066Sahrens 1306*fa9e4066Sahrens /* 1307*fa9e4066Sahrens * Read records and process them. 1308*fa9e4066Sahrens */ 1309*fa9e4066Sahrens while (ra.err == 0 && 1310*fa9e4066Sahrens NULL != (drr = restore_read(&ra, sizeof (*drr)))) { 1311*fa9e4066Sahrens if (issig(JUSTLOOKING)) { 1312*fa9e4066Sahrens ra.err = EINTR; 1313*fa9e4066Sahrens goto out; 1314*fa9e4066Sahrens } 1315*fa9e4066Sahrens 1316*fa9e4066Sahrens if (ra.byteswap) 1317*fa9e4066Sahrens backup_byteswap(drr); 1318*fa9e4066Sahrens 1319*fa9e4066Sahrens switch (drr->drr_type) { 1320*fa9e4066Sahrens case DRR_OBJECT: 1321*fa9e4066Sahrens { 1322*fa9e4066Sahrens /* 1323*fa9e4066Sahrens * We need to make a copy of the record header, 1324*fa9e4066Sahrens * because restore_{object,write} may need to 1325*fa9e4066Sahrens * restore_read(), which will invalidate drr. 1326*fa9e4066Sahrens */ 1327*fa9e4066Sahrens struct drr_object drro = drr->drr_u.drr_object; 1328*fa9e4066Sahrens ra.err = restore_object(&ra, os, &drro); 1329*fa9e4066Sahrens break; 1330*fa9e4066Sahrens } 1331*fa9e4066Sahrens case DRR_FREEOBJECTS: 1332*fa9e4066Sahrens { 1333*fa9e4066Sahrens struct drr_freeobjects drrfo = 1334*fa9e4066Sahrens drr->drr_u.drr_freeobjects; 1335*fa9e4066Sahrens ra.err = restore_freeobjects(&ra, os, &drrfo); 1336*fa9e4066Sahrens break; 1337*fa9e4066Sahrens } 1338*fa9e4066Sahrens case DRR_WRITE: 1339*fa9e4066Sahrens { 1340*fa9e4066Sahrens struct drr_write drrw = drr->drr_u.drr_write; 1341*fa9e4066Sahrens ra.err = restore_write(&ra, os, &drrw); 1342*fa9e4066Sahrens break; 1343*fa9e4066Sahrens } 1344*fa9e4066Sahrens case DRR_FREE: 1345*fa9e4066Sahrens { 1346*fa9e4066Sahrens struct drr_free drrf = drr->drr_u.drr_free; 1347*fa9e4066Sahrens ra.err = restore_free(&ra, os, &drrf); 1348*fa9e4066Sahrens break; 1349*fa9e4066Sahrens } 1350*fa9e4066Sahrens case DRR_END: 1351*fa9e4066Sahrens /* Need to verify checksum. */ 1352*fa9e4066Sahrens /* 1353*fa9e4066Sahrens * dd may be the parent of the dd we are 1354*fa9e4066Sahrens * restoring into (eg. if it's a full backup). 1355*fa9e4066Sahrens */ 1356*fa9e4066Sahrens ra.err = dsl_dir_sync_task(dmu_objset_ds(os)-> 1357*fa9e4066Sahrens ds_dir, replay_end_sync, drrb, 1<<20); 1358*fa9e4066Sahrens goto out; 1359*fa9e4066Sahrens default: 1360*fa9e4066Sahrens ra.err = EINVAL; 1361*fa9e4066Sahrens goto out; 1362*fa9e4066Sahrens } 1363*fa9e4066Sahrens } 1364*fa9e4066Sahrens 1365*fa9e4066Sahrens out: 1366*fa9e4066Sahrens if (os) 1367*fa9e4066Sahrens dmu_objset_close(os); 1368*fa9e4066Sahrens 1369*fa9e4066Sahrens /* 1370*fa9e4066Sahrens * Make sure we don't rollback/destroy unless we actually 1371*fa9e4066Sahrens * processed the begin properly. 'os' will only be set if this 1372*fa9e4066Sahrens * is the case. 1373*fa9e4066Sahrens */ 1374*fa9e4066Sahrens if (ra.err && os && dd && tosnap && strchr(tosnap, '@')) { 1375*fa9e4066Sahrens /* 1376*fa9e4066Sahrens * rollback or destroy what we created, so we don't 1377*fa9e4066Sahrens * leave it in the restoring state. 1378*fa9e4066Sahrens */ 1379*fa9e4066Sahrens txg_wait_synced(dd->dd_pool, 0); 1380*fa9e4066Sahrens if (drrb->drr_fromguid) { 1381*fa9e4066Sahrens /* incremental: rollback to most recent snapshot */ 1382*fa9e4066Sahrens (void) dsl_dir_sync_task(dd, 1383*fa9e4066Sahrens dsl_dataset_rollback_sync, NULL, 0); 1384*fa9e4066Sahrens } else { 1385*fa9e4066Sahrens /* full: destroy whole fs */ 1386*fa9e4066Sahrens cp = strchr(tosnap, '@'); 1387*fa9e4066Sahrens *cp = '\0'; 1388*fa9e4066Sahrens cp = strchr(tosnap, '/'); 1389*fa9e4066Sahrens if (cp) { 1390*fa9e4066Sahrens (void) dsl_dir_sync_task(dd, 1391*fa9e4066Sahrens dsl_dir_destroy_sync, cp+1, 0); 1392*fa9e4066Sahrens } 1393*fa9e4066Sahrens cp = strchr(tosnap, '\0'); 1394*fa9e4066Sahrens *cp = '@'; 1395*fa9e4066Sahrens } 1396*fa9e4066Sahrens 1397*fa9e4066Sahrens } 1398*fa9e4066Sahrens 1399*fa9e4066Sahrens if (dd) 1400*fa9e4066Sahrens dsl_dir_close(dd, FTAG); 1401*fa9e4066Sahrens kmem_free(ra.buf, ra.bufsize); 1402*fa9e4066Sahrens if (sizep) 1403*fa9e4066Sahrens *sizep = ra.voff; 1404*fa9e4066Sahrens return (ra.err); 1405*fa9e4066Sahrens } 1406*fa9e4066Sahrens 1407*fa9e4066Sahrens /* 1408*fa9e4066Sahrens * Intent log support: sync the block at <os, object, offset> to disk. 1409*fa9e4066Sahrens * N.B. and XXX: the caller is responsible for serializing dmu_sync()s 1410*fa9e4066Sahrens * of the same block, and for making sure that the data isn't changing 1411*fa9e4066Sahrens * while dmu_sync() is writing it. 1412*fa9e4066Sahrens * 1413*fa9e4066Sahrens * Return values: 1414*fa9e4066Sahrens * 1415*fa9e4066Sahrens * EALREADY: this txg has already been synced, so there's nothing to to. 1416*fa9e4066Sahrens * The caller should not log the write. 1417*fa9e4066Sahrens * 1418*fa9e4066Sahrens * ENOENT: the block was dbuf_free_range()'d, so there's nothing to do. 1419*fa9e4066Sahrens * The caller should not log the write. 1420*fa9e4066Sahrens * 1421*fa9e4066Sahrens * EINPROGRESS: the block is in the process of being synced by the 1422*fa9e4066Sahrens * usual mechanism (spa_sync()), so we can't sync it here. 1423*fa9e4066Sahrens * The caller should txg_wait_synced() and not log the write. 1424*fa9e4066Sahrens * 1425*fa9e4066Sahrens * EBUSY: another thread is trying to dmu_sync() the same dbuf. 1426*fa9e4066Sahrens * (This case cannot arise under the current locking rules.) 1427*fa9e4066Sahrens * The caller should txg_wait_synced() and not log the write. 1428*fa9e4066Sahrens * 1429*fa9e4066Sahrens * ESTALE: the block was dirtied or freed while we were writing it, 1430*fa9e4066Sahrens * so the data is no longer valid. 1431*fa9e4066Sahrens * The caller should txg_wait_synced() and not log the write. 1432*fa9e4066Sahrens * 1433*fa9e4066Sahrens * 0: success. Sets *bp to the blkptr just written, and sets 1434*fa9e4066Sahrens * *blkoff to the data's offset within that block. 1435*fa9e4066Sahrens * The caller should log this blkptr/blkoff in its lr_write_t. 1436*fa9e4066Sahrens */ 1437*fa9e4066Sahrens int 1438*fa9e4066Sahrens dmu_sync(objset_t *os, uint64_t object, uint64_t offset, uint64_t *blkoff, 1439*fa9e4066Sahrens blkptr_t *bp, uint64_t txg) 1440*fa9e4066Sahrens { 1441*fa9e4066Sahrens dsl_pool_t *dp = os->os->os_dsl_dataset->ds_dir->dd_pool; 1442*fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 1443*fa9e4066Sahrens dmu_buf_impl_t *db; 1444*fa9e4066Sahrens blkptr_t *blk; 1445*fa9e4066Sahrens int err; 1446*fa9e4066Sahrens 1447*fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&tx->tx_suspend)); 1448*fa9e4066Sahrens ASSERT(BP_IS_HOLE(bp)); 1449*fa9e4066Sahrens ASSERT(txg != 0); 1450*fa9e4066Sahrens 1451*fa9e4066Sahrens dprintf("dmu_sync txg=%llu, s,o,q %llu %llu %llu\n", 1452*fa9e4066Sahrens txg, tx->tx_synced_txg, tx->tx_open_txg, tx->tx_quiesced_txg); 1453*fa9e4066Sahrens 1454*fa9e4066Sahrens /* 1455*fa9e4066Sahrens * If this txg already synced, there's nothing to do. 1456*fa9e4066Sahrens */ 1457*fa9e4066Sahrens if (txg <= tx->tx_synced_txg) { 1458*fa9e4066Sahrens /* 1459*fa9e4066Sahrens * If we're running ziltest, we need the blkptr regardless. 1460*fa9e4066Sahrens */ 1461*fa9e4066Sahrens if (txg > spa_freeze_txg(dp->dp_spa)) { 1462*fa9e4066Sahrens db = (dmu_buf_impl_t *)dmu_buf_hold(os, object, offset); 1463*fa9e4066Sahrens /* if db_blkptr == NULL, this was an empty write */ 1464*fa9e4066Sahrens if (db->db_blkptr) 1465*fa9e4066Sahrens *bp = *db->db_blkptr; /* structure assignment */ 1466*fa9e4066Sahrens else 1467*fa9e4066Sahrens bzero(bp, sizeof (blkptr_t)); 1468*fa9e4066Sahrens *blkoff = offset - db->db.db_offset; 1469*fa9e4066Sahrens ASSERT3U(*blkoff, <, db->db.db_size); 1470*fa9e4066Sahrens dmu_buf_rele((dmu_buf_t *)db); 1471*fa9e4066Sahrens return (0); 1472*fa9e4066Sahrens } 1473*fa9e4066Sahrens return (EALREADY); 1474*fa9e4066Sahrens } 1475*fa9e4066Sahrens 1476*fa9e4066Sahrens /* 1477*fa9e4066Sahrens * If this txg is in the middle of syncing, just wait for it. 1478*fa9e4066Sahrens */ 1479*fa9e4066Sahrens if (txg == tx->tx_syncing_txg) { 1480*fa9e4066Sahrens ASSERT(txg != tx->tx_open_txg); 1481*fa9e4066Sahrens return (EINPROGRESS); 1482*fa9e4066Sahrens } 1483*fa9e4066Sahrens 1484*fa9e4066Sahrens db = (dmu_buf_impl_t *)dmu_buf_hold(os, object, offset); 1485*fa9e4066Sahrens 1486*fa9e4066Sahrens mutex_enter(&db->db_mtx); 1487*fa9e4066Sahrens 1488*fa9e4066Sahrens /* 1489*fa9e4066Sahrens * If this dbuf isn't dirty, must have been free_range'd. 1490*fa9e4066Sahrens * There's no need to log writes to freed blocks, so we're done. 1491*fa9e4066Sahrens */ 1492*fa9e4066Sahrens if (!list_link_active(&db->db_dirty_node[txg&TXG_MASK])) { 1493*fa9e4066Sahrens mutex_exit(&db->db_mtx); 1494*fa9e4066Sahrens dmu_buf_rele((dmu_buf_t *)db); 1495*fa9e4066Sahrens return (ENOENT); 1496*fa9e4066Sahrens } 1497*fa9e4066Sahrens 1498*fa9e4066Sahrens blk = db->db_d.db_overridden_by[txg&TXG_MASK]; 1499*fa9e4066Sahrens 1500*fa9e4066Sahrens /* 1501*fa9e4066Sahrens * If we already did a dmu_sync() of this dbuf in this txg, 1502*fa9e4066Sahrens * free the old block before writing the new one. 1503*fa9e4066Sahrens */ 1504*fa9e4066Sahrens if (blk != NULL) { 1505*fa9e4066Sahrens ASSERT(blk != IN_DMU_SYNC); 1506*fa9e4066Sahrens if (blk == IN_DMU_SYNC) { 1507*fa9e4066Sahrens mutex_exit(&db->db_mtx); 1508*fa9e4066Sahrens dmu_buf_rele((dmu_buf_t *)db); 1509*fa9e4066Sahrens return (EBUSY); 1510*fa9e4066Sahrens } 1511*fa9e4066Sahrens arc_release(db->db_d.db_data_old[txg&TXG_MASK], db); 1512*fa9e4066Sahrens if (!BP_IS_HOLE(blk)) { 1513*fa9e4066Sahrens (void) arc_free(NULL, os->os->os_spa, txg, blk, 1514*fa9e4066Sahrens NULL, NULL, ARC_WAIT); 1515*fa9e4066Sahrens } 1516*fa9e4066Sahrens kmem_free(blk, sizeof (blkptr_t)); 1517*fa9e4066Sahrens } 1518*fa9e4066Sahrens 1519*fa9e4066Sahrens db->db_d.db_overridden_by[txg&TXG_MASK] = IN_DMU_SYNC; 1520*fa9e4066Sahrens mutex_exit(&db->db_mtx); 1521*fa9e4066Sahrens 1522*fa9e4066Sahrens blk = kmem_alloc(sizeof (blkptr_t), KM_SLEEP); 1523*fa9e4066Sahrens blk->blk_birth = 0; /* mark as invalid */ 1524*fa9e4066Sahrens 1525*fa9e4066Sahrens err = arc_write(NULL, os->os->os_spa, 1526*fa9e4066Sahrens zio_checksum_select(db->db_dnode->dn_checksum, os->os->os_checksum), 1527*fa9e4066Sahrens zio_compress_select(db->db_dnode->dn_compress, os->os->os_compress), 1528*fa9e4066Sahrens txg, blk, db->db_d.db_data_old[txg&TXG_MASK], NULL, NULL, 1529*fa9e4066Sahrens ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, ARC_WAIT); 1530*fa9e4066Sahrens ASSERT(err == 0); 1531*fa9e4066Sahrens 1532*fa9e4066Sahrens if (!BP_IS_HOLE(blk)) { 1533*fa9e4066Sahrens blk->blk_fill = 1; 1534*fa9e4066Sahrens BP_SET_TYPE(blk, db->db_dnode->dn_type); 1535*fa9e4066Sahrens BP_SET_LEVEL(blk, 0); 1536*fa9e4066Sahrens } 1537*fa9e4066Sahrens 1538*fa9e4066Sahrens /* copy the block pointer back to caller */ 1539*fa9e4066Sahrens *bp = *blk; /* structure assignment */ 1540*fa9e4066Sahrens *blkoff = offset - db->db.db_offset; 1541*fa9e4066Sahrens ASSERT3U(*blkoff, <, db->db.db_size); 1542*fa9e4066Sahrens 1543*fa9e4066Sahrens mutex_enter(&db->db_mtx); 1544*fa9e4066Sahrens if (db->db_d.db_overridden_by[txg&TXG_MASK] != IN_DMU_SYNC) { 1545*fa9e4066Sahrens /* we were dirtied/freed during the sync */ 1546*fa9e4066Sahrens ASSERT3P(db->db_d.db_overridden_by[txg&TXG_MASK], ==, NULL); 1547*fa9e4066Sahrens arc_release(db->db_d.db_data_old[txg&TXG_MASK], db); 1548*fa9e4066Sahrens mutex_exit(&db->db_mtx); 1549*fa9e4066Sahrens dmu_buf_rele((dmu_buf_t *)db); 1550*fa9e4066Sahrens /* Note that this block does not free on disk until txg syncs */ 1551*fa9e4066Sahrens 1552*fa9e4066Sahrens /* 1553*fa9e4066Sahrens * XXX can we use ARC_NOWAIT here? 1554*fa9e4066Sahrens * XXX should we be ignoring the return code? 1555*fa9e4066Sahrens */ 1556*fa9e4066Sahrens if (!BP_IS_HOLE(blk)) { 1557*fa9e4066Sahrens (void) arc_free(NULL, os->os->os_spa, txg, blk, 1558*fa9e4066Sahrens NULL, NULL, ARC_WAIT); 1559*fa9e4066Sahrens } 1560*fa9e4066Sahrens kmem_free(blk, sizeof (blkptr_t)); 1561*fa9e4066Sahrens return (ESTALE); 1562*fa9e4066Sahrens } 1563*fa9e4066Sahrens 1564*fa9e4066Sahrens db->db_d.db_overridden_by[txg&TXG_MASK] = blk; 1565*fa9e4066Sahrens mutex_exit(&db->db_mtx); 1566*fa9e4066Sahrens dmu_buf_rele((dmu_buf_t *)db); 1567*fa9e4066Sahrens ASSERT3U(txg, >, tx->tx_syncing_txg); 1568*fa9e4066Sahrens return (0); 1569*fa9e4066Sahrens } 1570*fa9e4066Sahrens 1571*fa9e4066Sahrens uint64_t 1572*fa9e4066Sahrens dmu_object_max_nonzero_offset(objset_t *os, uint64_t object) 1573*fa9e4066Sahrens { 1574*fa9e4066Sahrens dnode_t *dn = dnode_hold(os->os, object, FTAG); 1575*fa9e4066Sahrens uint64_t rv = dnode_max_nonzero_offset(dn); 1576*fa9e4066Sahrens dnode_rele(dn, FTAG); 1577*fa9e4066Sahrens return (rv); 1578*fa9e4066Sahrens } 1579*fa9e4066Sahrens 1580*fa9e4066Sahrens int 1581*fa9e4066Sahrens dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, 1582*fa9e4066Sahrens dmu_tx_t *tx) 1583*fa9e4066Sahrens { 1584*fa9e4066Sahrens dnode_t *dn = dnode_hold(os->os, object, FTAG); 1585*fa9e4066Sahrens int err = dnode_set_blksz(dn, size, ibs, tx); 1586*fa9e4066Sahrens dnode_rele(dn, FTAG); 1587*fa9e4066Sahrens return (err); 1588*fa9e4066Sahrens } 1589*fa9e4066Sahrens 1590*fa9e4066Sahrens void 1591*fa9e4066Sahrens dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, 1592*fa9e4066Sahrens dmu_tx_t *tx) 1593*fa9e4066Sahrens { 1594*fa9e4066Sahrens dnode_t *dn = dnode_hold(os->os, object, FTAG); 1595*fa9e4066Sahrens ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS); 1596*fa9e4066Sahrens dn->dn_checksum = checksum; 1597*fa9e4066Sahrens dnode_setdirty(dn, tx); 1598*fa9e4066Sahrens dnode_rele(dn, FTAG); 1599*fa9e4066Sahrens } 1600*fa9e4066Sahrens 1601*fa9e4066Sahrens void 1602*fa9e4066Sahrens dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, 1603*fa9e4066Sahrens dmu_tx_t *tx) 1604*fa9e4066Sahrens { 1605*fa9e4066Sahrens dnode_t *dn = dnode_hold(os->os, object, FTAG); 1606*fa9e4066Sahrens ASSERT(compress < ZIO_COMPRESS_FUNCTIONS); 1607*fa9e4066Sahrens dn->dn_compress = compress; 1608*fa9e4066Sahrens dnode_setdirty(dn, tx); 1609*fa9e4066Sahrens dnode_rele(dn, FTAG); 1610*fa9e4066Sahrens } 1611*fa9e4066Sahrens 1612*fa9e4066Sahrens int 1613*fa9e4066Sahrens dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) 1614*fa9e4066Sahrens { 1615*fa9e4066Sahrens dnode_t *dn; 1616*fa9e4066Sahrens int i, err; 1617*fa9e4066Sahrens 1618*fa9e4066Sahrens dn = dnode_hold(os->os, object, FTAG); 1619*fa9e4066Sahrens /* 1620*fa9e4066Sahrens * Sync any current changes before 1621*fa9e4066Sahrens * we go trundling through the block pointers. 1622*fa9e4066Sahrens */ 1623*fa9e4066Sahrens for (i = 0; i < TXG_SIZE; i++) { 1624*fa9e4066Sahrens if (dn->dn_dirtyblksz[i]) 1625*fa9e4066Sahrens break; 1626*fa9e4066Sahrens } 1627*fa9e4066Sahrens if (i != TXG_SIZE) { 1628*fa9e4066Sahrens dnode_rele(dn, FTAG); 1629*fa9e4066Sahrens txg_wait_synced(dmu_objset_pool(os), 0); 1630*fa9e4066Sahrens dn = dnode_hold(os->os, object, FTAG); 1631*fa9e4066Sahrens } 1632*fa9e4066Sahrens 1633*fa9e4066Sahrens err = dnode_next_offset(dn, hole, off, 1, 1); 1634*fa9e4066Sahrens dnode_rele(dn, FTAG); 1635*fa9e4066Sahrens 1636*fa9e4066Sahrens return (err); 1637*fa9e4066Sahrens } 1638*fa9e4066Sahrens 1639*fa9e4066Sahrens void 1640*fa9e4066Sahrens dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) 1641*fa9e4066Sahrens { 1642*fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 1643*fa9e4066Sahrens mutex_enter(&dn->dn_mtx); 1644*fa9e4066Sahrens 1645*fa9e4066Sahrens doi->doi_data_block_size = dn->dn_datablksz; 1646*fa9e4066Sahrens doi->doi_metadata_block_size = dn->dn_indblkshift ? 1647*fa9e4066Sahrens 1ULL << dn->dn_indblkshift : 0; 1648*fa9e4066Sahrens doi->doi_indirection = dn->dn_nlevels; 1649*fa9e4066Sahrens doi->doi_checksum = dn->dn_checksum; 1650*fa9e4066Sahrens doi->doi_compress = dn->dn_compress; 1651*fa9e4066Sahrens doi->doi_physical_blks = dn->dn_phys->dn_secphys; 1652*fa9e4066Sahrens doi->doi_max_block_offset = dn->dn_phys->dn_maxblkid; 1653*fa9e4066Sahrens doi->doi_type = dn->dn_type; 1654*fa9e4066Sahrens doi->doi_bonus_size = dn->dn_bonuslen; 1655*fa9e4066Sahrens doi->doi_bonus_type = dn->dn_bonustype; 1656*fa9e4066Sahrens 1657*fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 1658*fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 1659*fa9e4066Sahrens } 1660*fa9e4066Sahrens 1661*fa9e4066Sahrens /* 1662*fa9e4066Sahrens * Get information on a DMU object. 1663*fa9e4066Sahrens * If doi is NULL, just indicates whether the object exists. 1664*fa9e4066Sahrens */ 1665*fa9e4066Sahrens int 1666*fa9e4066Sahrens dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi) 1667*fa9e4066Sahrens { 1668*fa9e4066Sahrens dnode_t *dn = dnode_hold(os->os, object, FTAG); 1669*fa9e4066Sahrens 1670*fa9e4066Sahrens if (dn == NULL) 1671*fa9e4066Sahrens return (ENOENT); 1672*fa9e4066Sahrens 1673*fa9e4066Sahrens if (doi != NULL) 1674*fa9e4066Sahrens dmu_object_info_from_dnode(dn, doi); 1675*fa9e4066Sahrens 1676*fa9e4066Sahrens dnode_rele(dn, FTAG); 1677*fa9e4066Sahrens return (0); 1678*fa9e4066Sahrens } 1679*fa9e4066Sahrens 1680*fa9e4066Sahrens /* 1681*fa9e4066Sahrens * As above, but faster; can be used when you have a held dbuf in hand. 1682*fa9e4066Sahrens */ 1683*fa9e4066Sahrens void 1684*fa9e4066Sahrens dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi) 1685*fa9e4066Sahrens { 1686*fa9e4066Sahrens dmu_object_info_from_dnode(((dmu_buf_impl_t *)db)->db_dnode, doi); 1687*fa9e4066Sahrens } 1688*fa9e4066Sahrens 1689*fa9e4066Sahrens /* 1690*fa9e4066Sahrens * Faster still when you only care about the size. 1691*fa9e4066Sahrens * This is specifically optimized for zfs_getattr(). 1692*fa9e4066Sahrens */ 1693*fa9e4066Sahrens void 1694*fa9e4066Sahrens dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize, u_longlong_t *nblk512) 1695*fa9e4066Sahrens { 1696*fa9e4066Sahrens dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; 1697*fa9e4066Sahrens 1698*fa9e4066Sahrens *blksize = dn->dn_datablksz; 1699*fa9e4066Sahrens *nblk512 = dn->dn_phys->dn_secphys + 1; /* add 1 for dnode space */ 1700*fa9e4066Sahrens } 1701*fa9e4066Sahrens 1702*fa9e4066Sahrens void 1703*fa9e4066Sahrens byteswap_uint64_array(void *vbuf, size_t size) 1704*fa9e4066Sahrens { 1705*fa9e4066Sahrens uint64_t *buf = vbuf; 1706*fa9e4066Sahrens size_t count = size >> 3; 1707*fa9e4066Sahrens int i; 1708*fa9e4066Sahrens 1709*fa9e4066Sahrens ASSERT((size & 7) == 0); 1710*fa9e4066Sahrens 1711*fa9e4066Sahrens for (i = 0; i < count; i++) 1712*fa9e4066Sahrens buf[i] = BSWAP_64(buf[i]); 1713*fa9e4066Sahrens } 1714*fa9e4066Sahrens 1715*fa9e4066Sahrens void 1716*fa9e4066Sahrens byteswap_uint32_array(void *vbuf, size_t size) 1717*fa9e4066Sahrens { 1718*fa9e4066Sahrens uint32_t *buf = vbuf; 1719*fa9e4066Sahrens size_t count = size >> 2; 1720*fa9e4066Sahrens int i; 1721*fa9e4066Sahrens 1722*fa9e4066Sahrens ASSERT((size & 3) == 0); 1723*fa9e4066Sahrens 1724*fa9e4066Sahrens for (i = 0; i < count; i++) 1725*fa9e4066Sahrens buf[i] = BSWAP_32(buf[i]); 1726*fa9e4066Sahrens } 1727*fa9e4066Sahrens 1728*fa9e4066Sahrens void 1729*fa9e4066Sahrens byteswap_uint16_array(void *vbuf, size_t size) 1730*fa9e4066Sahrens { 1731*fa9e4066Sahrens uint16_t *buf = vbuf; 1732*fa9e4066Sahrens size_t count = size >> 1; 1733*fa9e4066Sahrens int i; 1734*fa9e4066Sahrens 1735*fa9e4066Sahrens ASSERT((size & 1) == 0); 1736*fa9e4066Sahrens 1737*fa9e4066Sahrens for (i = 0; i < count; i++) 1738*fa9e4066Sahrens buf[i] = BSWAP_16(buf[i]); 1739*fa9e4066Sahrens } 1740*fa9e4066Sahrens 1741*fa9e4066Sahrens /* ARGSUSED */ 1742*fa9e4066Sahrens void 1743*fa9e4066Sahrens byteswap_uint8_array(void *vbuf, size_t size) 1744*fa9e4066Sahrens { 1745*fa9e4066Sahrens } 1746*fa9e4066Sahrens 1747*fa9e4066Sahrens void 1748*fa9e4066Sahrens dmu_init(void) 1749*fa9e4066Sahrens { 1750*fa9e4066Sahrens dbuf_init(); 1751*fa9e4066Sahrens dnode_init(); 1752*fa9e4066Sahrens arc_init(); 1753*fa9e4066Sahrens } 1754*fa9e4066Sahrens 1755*fa9e4066Sahrens void 1756*fa9e4066Sahrens dmu_fini(void) 1757*fa9e4066Sahrens { 1758*fa9e4066Sahrens arc_fini(); 1759*fa9e4066Sahrens dnode_fini(); 1760*fa9e4066Sahrens dbuf_fini(); 1761*fa9e4066Sahrens } 1762