1*fa9e4066Sahrens /* 2*fa9e4066Sahrens * CDDL HEADER START 3*fa9e4066Sahrens * 4*fa9e4066Sahrens * The contents of this file are subject to the terms of the 5*fa9e4066Sahrens * Common Development and Distribution License, Version 1.0 only 6*fa9e4066Sahrens * (the "License"). You may not use this file except in compliance 7*fa9e4066Sahrens * with the License. 8*fa9e4066Sahrens * 9*fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 11*fa9e4066Sahrens * See the License for the specific language governing permissions 12*fa9e4066Sahrens * and limitations under the License. 13*fa9e4066Sahrens * 14*fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 15*fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 17*fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 18*fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 19*fa9e4066Sahrens * 20*fa9e4066Sahrens * CDDL HEADER END 21*fa9e4066Sahrens */ 22*fa9e4066Sahrens /* 23*fa9e4066Sahrens * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*fa9e4066Sahrens * Use is subject to license terms. 25*fa9e4066Sahrens */ 26*fa9e4066Sahrens 27*fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28*fa9e4066Sahrens 29*fa9e4066Sahrens #include <sys/zfs_context.h> 30*fa9e4066Sahrens #include <sys/dmu_objset.h> 31*fa9e4066Sahrens #include <sys/dsl_dir.h> 32*fa9e4066Sahrens #include <sys/dsl_dataset.h> 33*fa9e4066Sahrens #include <sys/dsl_prop.h> 34*fa9e4066Sahrens #include <sys/dsl_pool.h> 35*fa9e4066Sahrens #include <sys/dnode.h> 36*fa9e4066Sahrens #include <sys/dbuf.h> 37*fa9e4066Sahrens #include <sys/dmu_tx.h> 38*fa9e4066Sahrens #include <sys/zio_checksum.h> 39*fa9e4066Sahrens #include <sys/zap.h> 40*fa9e4066Sahrens #include <sys/zil.h> 41*fa9e4066Sahrens #include <sys/dmu_impl.h> 42*fa9e4066Sahrens 43*fa9e4066Sahrens 44*fa9e4066Sahrens spa_t * 45*fa9e4066Sahrens dmu_objset_spa(objset_t *os) 46*fa9e4066Sahrens { 47*fa9e4066Sahrens return (os->os->os_spa); 48*fa9e4066Sahrens } 49*fa9e4066Sahrens 50*fa9e4066Sahrens zilog_t * 51*fa9e4066Sahrens dmu_objset_zil(objset_t *os) 52*fa9e4066Sahrens { 53*fa9e4066Sahrens return (os->os->os_zil); 54*fa9e4066Sahrens } 55*fa9e4066Sahrens 56*fa9e4066Sahrens dsl_pool_t * 57*fa9e4066Sahrens dmu_objset_pool(objset_t *os) 58*fa9e4066Sahrens { 59*fa9e4066Sahrens dsl_dataset_t *ds; 60*fa9e4066Sahrens 61*fa9e4066Sahrens if ((ds = os->os->os_dsl_dataset) != NULL && ds->ds_dir) 62*fa9e4066Sahrens return (ds->ds_dir->dd_pool); 63*fa9e4066Sahrens else 64*fa9e4066Sahrens return (spa_get_dsl(os->os->os_spa)); 65*fa9e4066Sahrens } 66*fa9e4066Sahrens 67*fa9e4066Sahrens dsl_dataset_t * 68*fa9e4066Sahrens dmu_objset_ds(objset_t *os) 69*fa9e4066Sahrens { 70*fa9e4066Sahrens return (os->os->os_dsl_dataset); 71*fa9e4066Sahrens } 72*fa9e4066Sahrens 73*fa9e4066Sahrens dmu_objset_type_t 74*fa9e4066Sahrens dmu_objset_type(objset_t *os) 75*fa9e4066Sahrens { 76*fa9e4066Sahrens return (os->os->os_phys->os_type); 77*fa9e4066Sahrens } 78*fa9e4066Sahrens 79*fa9e4066Sahrens void 80*fa9e4066Sahrens dmu_objset_name(objset_t *os, char *buf) 81*fa9e4066Sahrens { 82*fa9e4066Sahrens dsl_dataset_name(os->os->os_dsl_dataset, buf); 83*fa9e4066Sahrens } 84*fa9e4066Sahrens 85*fa9e4066Sahrens uint64_t 86*fa9e4066Sahrens dmu_objset_id(objset_t *os) 87*fa9e4066Sahrens { 88*fa9e4066Sahrens dsl_dataset_t *ds = os->os->os_dsl_dataset; 89*fa9e4066Sahrens 90*fa9e4066Sahrens return (ds ? ds->ds_object : 0); 91*fa9e4066Sahrens } 92*fa9e4066Sahrens 93*fa9e4066Sahrens static void 94*fa9e4066Sahrens checksum_changed_cb(void *arg, uint64_t newval) 95*fa9e4066Sahrens { 96*fa9e4066Sahrens objset_impl_t *osi = arg; 97*fa9e4066Sahrens 98*fa9e4066Sahrens /* 99*fa9e4066Sahrens * Inheritance should have been done by now. 100*fa9e4066Sahrens */ 101*fa9e4066Sahrens ASSERT(newval != ZIO_CHECKSUM_INHERIT); 102*fa9e4066Sahrens 103*fa9e4066Sahrens osi->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); 104*fa9e4066Sahrens } 105*fa9e4066Sahrens 106*fa9e4066Sahrens static void 107*fa9e4066Sahrens compression_changed_cb(void *arg, uint64_t newval) 108*fa9e4066Sahrens { 109*fa9e4066Sahrens objset_impl_t *osi = arg; 110*fa9e4066Sahrens 111*fa9e4066Sahrens /* 112*fa9e4066Sahrens * Inheritance and range checking should have been done by now. 113*fa9e4066Sahrens */ 114*fa9e4066Sahrens ASSERT(newval != ZIO_COMPRESS_INHERIT); 115*fa9e4066Sahrens 116*fa9e4066Sahrens osi->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); 117*fa9e4066Sahrens } 118*fa9e4066Sahrens 119*fa9e4066Sahrens void 120*fa9e4066Sahrens dmu_objset_byteswap(void *buf, size_t size) 121*fa9e4066Sahrens { 122*fa9e4066Sahrens objset_phys_t *osp = buf; 123*fa9e4066Sahrens 124*fa9e4066Sahrens ASSERT(size == sizeof (objset_phys_t)); 125*fa9e4066Sahrens dnode_byteswap(&osp->os_meta_dnode); 126*fa9e4066Sahrens byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); 127*fa9e4066Sahrens osp->os_type = BSWAP_64(osp->os_type); 128*fa9e4066Sahrens } 129*fa9e4066Sahrens 130*fa9e4066Sahrens objset_impl_t * 131*fa9e4066Sahrens dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp) 132*fa9e4066Sahrens { 133*fa9e4066Sahrens objset_impl_t *winner, *osi; 134*fa9e4066Sahrens int i, err, checksum; 135*fa9e4066Sahrens 136*fa9e4066Sahrens osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); 137*fa9e4066Sahrens osi->os.os = osi; 138*fa9e4066Sahrens osi->os_dsl_dataset = ds; 139*fa9e4066Sahrens osi->os_spa = spa; 140*fa9e4066Sahrens if (bp) 141*fa9e4066Sahrens osi->os_rootbp = *bp; 142*fa9e4066Sahrens osi->os_phys = zio_buf_alloc(sizeof (objset_phys_t)); 143*fa9e4066Sahrens if (!BP_IS_HOLE(&osi->os_rootbp)) { 144*fa9e4066Sahrens dprintf_bp(&osi->os_rootbp, "reading %s", ""); 145*fa9e4066Sahrens (void) arc_read(NULL, spa, &osi->os_rootbp, 146*fa9e4066Sahrens dmu_ot[DMU_OT_OBJSET].ot_byteswap, 147*fa9e4066Sahrens arc_bcopy_func, osi->os_phys, 148*fa9e4066Sahrens ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_MUSTSUCCEED, ARC_WAIT); 149*fa9e4066Sahrens } else { 150*fa9e4066Sahrens bzero(osi->os_phys, sizeof (objset_phys_t)); 151*fa9e4066Sahrens } 152*fa9e4066Sahrens osi->os_zil = zil_alloc(&osi->os, &osi->os_phys->os_zil_header); 153*fa9e4066Sahrens 154*fa9e4066Sahrens /* 155*fa9e4066Sahrens * Note: the changed_cb will be called once before the register 156*fa9e4066Sahrens * func returns, thus changing the checksum/compression from the 157*fa9e4066Sahrens * default (fletcher2/off). 158*fa9e4066Sahrens */ 159*fa9e4066Sahrens if (ds) { 160*fa9e4066Sahrens err = dsl_prop_register(ds, "checksum", 161*fa9e4066Sahrens checksum_changed_cb, osi); 162*fa9e4066Sahrens ASSERT(err == 0); 163*fa9e4066Sahrens 164*fa9e4066Sahrens err = dsl_prop_register(ds, "compression", 165*fa9e4066Sahrens compression_changed_cb, osi); 166*fa9e4066Sahrens ASSERT(err == 0); 167*fa9e4066Sahrens } else { 168*fa9e4066Sahrens /* It's the meta-objset. */ 169*fa9e4066Sahrens osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; 170*fa9e4066Sahrens osi->os_compress = ZIO_COMPRESS_LZJB; 171*fa9e4066Sahrens } 172*fa9e4066Sahrens 173*fa9e4066Sahrens /* 174*fa9e4066Sahrens * Metadata always gets compressed and checksummed. 175*fa9e4066Sahrens * If the data checksum is multi-bit correctable, and it's not 176*fa9e4066Sahrens * a ZBT-style checksum, then it's suitable for metadata as well. 177*fa9e4066Sahrens * Otherwise, the metadata checksum defaults to fletcher4. 178*fa9e4066Sahrens */ 179*fa9e4066Sahrens checksum = osi->os_checksum; 180*fa9e4066Sahrens 181*fa9e4066Sahrens if (zio_checksum_table[checksum].ci_correctable && 182*fa9e4066Sahrens !zio_checksum_table[checksum].ci_zbt) 183*fa9e4066Sahrens osi->os_md_checksum = checksum; 184*fa9e4066Sahrens else 185*fa9e4066Sahrens osi->os_md_checksum = ZIO_CHECKSUM_FLETCHER_4; 186*fa9e4066Sahrens 187*fa9e4066Sahrens osi->os_md_compress = ZIO_COMPRESS_LZJB; 188*fa9e4066Sahrens 189*fa9e4066Sahrens for (i = 0; i < TXG_SIZE; i++) { 190*fa9e4066Sahrens list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), 191*fa9e4066Sahrens offsetof(dnode_t, dn_dirty_link[i])); 192*fa9e4066Sahrens list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), 193*fa9e4066Sahrens offsetof(dnode_t, dn_dirty_link[i])); 194*fa9e4066Sahrens } 195*fa9e4066Sahrens list_create(&osi->os_dnodes, sizeof (dnode_t), 196*fa9e4066Sahrens offsetof(dnode_t, dn_link)); 197*fa9e4066Sahrens list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), 198*fa9e4066Sahrens offsetof(dmu_buf_impl_t, db_link)); 199*fa9e4066Sahrens 200*fa9e4066Sahrens osi->os_meta_dnode = dnode_special_open(osi, 201*fa9e4066Sahrens &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); 202*fa9e4066Sahrens 203*fa9e4066Sahrens if (ds != NULL) { 204*fa9e4066Sahrens winner = dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict); 205*fa9e4066Sahrens if (winner) { 206*fa9e4066Sahrens dmu_objset_evict(ds, osi); 207*fa9e4066Sahrens osi = winner; 208*fa9e4066Sahrens } 209*fa9e4066Sahrens } 210*fa9e4066Sahrens 211*fa9e4066Sahrens return (osi); 212*fa9e4066Sahrens } 213*fa9e4066Sahrens 214*fa9e4066Sahrens /* called from zpl */ 215*fa9e4066Sahrens int 216*fa9e4066Sahrens dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, 217*fa9e4066Sahrens objset_t **osp) 218*fa9e4066Sahrens { 219*fa9e4066Sahrens dsl_dataset_t *ds; 220*fa9e4066Sahrens int err; 221*fa9e4066Sahrens objset_t *os; 222*fa9e4066Sahrens objset_impl_t *osi; 223*fa9e4066Sahrens 224*fa9e4066Sahrens os = kmem_alloc(sizeof (objset_t), KM_SLEEP); 225*fa9e4066Sahrens err = dsl_dataset_open(name, mode, os, &ds); 226*fa9e4066Sahrens if (err) { 227*fa9e4066Sahrens kmem_free(os, sizeof (objset_t)); 228*fa9e4066Sahrens return (err); 229*fa9e4066Sahrens } 230*fa9e4066Sahrens 231*fa9e4066Sahrens osi = dsl_dataset_get_user_ptr(ds); 232*fa9e4066Sahrens if (osi == NULL) { 233*fa9e4066Sahrens blkptr_t bp; 234*fa9e4066Sahrens 235*fa9e4066Sahrens dsl_dataset_get_blkptr(ds, &bp); 236*fa9e4066Sahrens osi = dmu_objset_open_impl(dsl_dataset_get_spa(ds), ds, &bp); 237*fa9e4066Sahrens } 238*fa9e4066Sahrens 239*fa9e4066Sahrens os->os = osi; 240*fa9e4066Sahrens os->os_mode = mode; 241*fa9e4066Sahrens 242*fa9e4066Sahrens if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) { 243*fa9e4066Sahrens dmu_objset_close(os); 244*fa9e4066Sahrens return (EINVAL); 245*fa9e4066Sahrens } 246*fa9e4066Sahrens *osp = os; 247*fa9e4066Sahrens return (0); 248*fa9e4066Sahrens } 249*fa9e4066Sahrens 250*fa9e4066Sahrens void 251*fa9e4066Sahrens dmu_objset_close(objset_t *os) 252*fa9e4066Sahrens { 253*fa9e4066Sahrens dsl_dataset_close(os->os->os_dsl_dataset, os->os_mode, os); 254*fa9e4066Sahrens kmem_free(os, sizeof (objset_t)); 255*fa9e4066Sahrens } 256*fa9e4066Sahrens 257*fa9e4066Sahrens void 258*fa9e4066Sahrens dmu_objset_evict(dsl_dataset_t *ds, void *arg) 259*fa9e4066Sahrens { 260*fa9e4066Sahrens objset_impl_t *osi = arg; 261*fa9e4066Sahrens int err, i; 262*fa9e4066Sahrens 263*fa9e4066Sahrens for (i = 0; i < TXG_SIZE; i++) { 264*fa9e4066Sahrens ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL); 265*fa9e4066Sahrens ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL); 266*fa9e4066Sahrens } 267*fa9e4066Sahrens 268*fa9e4066Sahrens if (ds) { 269*fa9e4066Sahrens err = dsl_prop_unregister(ds, "checksum", 270*fa9e4066Sahrens checksum_changed_cb, osi); 271*fa9e4066Sahrens ASSERT(err == 0); 272*fa9e4066Sahrens 273*fa9e4066Sahrens err = dsl_prop_unregister(ds, "compression", 274*fa9e4066Sahrens compression_changed_cb, osi); 275*fa9e4066Sahrens ASSERT(err == 0); 276*fa9e4066Sahrens } 277*fa9e4066Sahrens 278*fa9e4066Sahrens ASSERT3P(list_head(&osi->os_dnodes), ==, osi->os_meta_dnode); 279*fa9e4066Sahrens ASSERT3P(list_tail(&osi->os_dnodes), ==, osi->os_meta_dnode); 280*fa9e4066Sahrens ASSERT3P(list_head(&osi->os_meta_dnode->dn_dbufs), ==, NULL); 281*fa9e4066Sahrens 282*fa9e4066Sahrens dnode_special_close(osi->os_meta_dnode); 283*fa9e4066Sahrens zil_free(osi->os_zil); 284*fa9e4066Sahrens 285*fa9e4066Sahrens zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); 286*fa9e4066Sahrens kmem_free(osi, sizeof (objset_impl_t)); 287*fa9e4066Sahrens } 288*fa9e4066Sahrens 289*fa9e4066Sahrens /* called from dsl for meta-objset */ 290*fa9e4066Sahrens objset_impl_t * 291*fa9e4066Sahrens dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, dmu_objset_type_t type, 292*fa9e4066Sahrens dmu_tx_t *tx) 293*fa9e4066Sahrens { 294*fa9e4066Sahrens objset_impl_t *osi; 295*fa9e4066Sahrens dnode_t *mdn; 296*fa9e4066Sahrens 297*fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 298*fa9e4066Sahrens osi = dmu_objset_open_impl(spa, ds, NULL); 299*fa9e4066Sahrens mdn = osi->os_meta_dnode; 300*fa9e4066Sahrens 301*fa9e4066Sahrens dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, 302*fa9e4066Sahrens DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); 303*fa9e4066Sahrens 304*fa9e4066Sahrens /* 305*fa9e4066Sahrens * We don't want to have to increase the meta-dnode's nlevels 306*fa9e4066Sahrens * later, because then we could do it in quescing context while 307*fa9e4066Sahrens * we are also accessing it in open context. 308*fa9e4066Sahrens * 309*fa9e4066Sahrens * This precaution is not necessary for the MOS (ds == NULL), 310*fa9e4066Sahrens * because the MOS is only updated in syncing context. 311*fa9e4066Sahrens * This is most fortunate: the MOS is the only objset that 312*fa9e4066Sahrens * needs to be synced multiple times as spa_sync() iterates 313*fa9e4066Sahrens * to convergence, so minimizing its dn_nlevels matters. 314*fa9e4066Sahrens */ 315*fa9e4066Sahrens if (ds != NULL) 316*fa9e4066Sahrens mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = 317*fa9e4066Sahrens mdn->dn_nlevels = DN_META_DNODE_LEVELS; 318*fa9e4066Sahrens 319*fa9e4066Sahrens ASSERT(type != DMU_OST_NONE); 320*fa9e4066Sahrens ASSERT(type != DMU_OST_ANY); 321*fa9e4066Sahrens ASSERT(type < DMU_OST_NUMTYPES); 322*fa9e4066Sahrens osi->os_phys->os_type = type; 323*fa9e4066Sahrens 324*fa9e4066Sahrens dsl_dataset_dirty(ds, tx); 325*fa9e4066Sahrens 326*fa9e4066Sahrens return (osi); 327*fa9e4066Sahrens } 328*fa9e4066Sahrens 329*fa9e4066Sahrens struct oscarg { 330*fa9e4066Sahrens void (*userfunc)(objset_t *os, void *arg, dmu_tx_t *tx); 331*fa9e4066Sahrens void *userarg; 332*fa9e4066Sahrens dsl_dataset_t *clone_parent; 333*fa9e4066Sahrens const char *fullname; 334*fa9e4066Sahrens const char *lastname; 335*fa9e4066Sahrens dmu_objset_type_t type; 336*fa9e4066Sahrens }; 337*fa9e4066Sahrens 338*fa9e4066Sahrens static int 339*fa9e4066Sahrens dmu_objset_create_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 340*fa9e4066Sahrens { 341*fa9e4066Sahrens struct oscarg *oa = arg; 342*fa9e4066Sahrens dsl_dataset_t *ds; 343*fa9e4066Sahrens int err; 344*fa9e4066Sahrens blkptr_t bp; 345*fa9e4066Sahrens 346*fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 347*fa9e4066Sahrens 348*fa9e4066Sahrens err = dsl_dataset_create_sync(dd, oa->fullname, oa->lastname, 349*fa9e4066Sahrens oa->clone_parent, tx); 350*fa9e4066Sahrens dprintf_dd(dd, "fn=%s ln=%s err=%d\n", 351*fa9e4066Sahrens oa->fullname, oa->lastname, err); 352*fa9e4066Sahrens if (err) 353*fa9e4066Sahrens return (err); 354*fa9e4066Sahrens 355*fa9e4066Sahrens err = dsl_dataset_open_spa(dd->dd_pool->dp_spa, oa->fullname, 356*fa9e4066Sahrens DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds); 357*fa9e4066Sahrens ASSERT3U(err, ==, 0); 358*fa9e4066Sahrens dsl_dataset_get_blkptr(ds, &bp); 359*fa9e4066Sahrens if (BP_IS_HOLE(&bp)) { 360*fa9e4066Sahrens objset_impl_t *osi; 361*fa9e4066Sahrens 362*fa9e4066Sahrens /* This is an empty dmu_objset; not a clone. */ 363*fa9e4066Sahrens osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds), 364*fa9e4066Sahrens ds, oa->type, tx); 365*fa9e4066Sahrens 366*fa9e4066Sahrens if (oa->userfunc) 367*fa9e4066Sahrens oa->userfunc(&osi->os, oa->userarg, tx); 368*fa9e4066Sahrens } 369*fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG); 370*fa9e4066Sahrens 371*fa9e4066Sahrens return (0); 372*fa9e4066Sahrens } 373*fa9e4066Sahrens 374*fa9e4066Sahrens int 375*fa9e4066Sahrens dmu_objset_create(const char *name, dmu_objset_type_t type, 376*fa9e4066Sahrens objset_t *clone_parent, 377*fa9e4066Sahrens void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg) 378*fa9e4066Sahrens { 379*fa9e4066Sahrens dsl_dir_t *pds; 380*fa9e4066Sahrens const char *tail; 381*fa9e4066Sahrens int err = 0; 382*fa9e4066Sahrens 383*fa9e4066Sahrens pds = dsl_dir_open(name, FTAG, &tail); 384*fa9e4066Sahrens if (pds == NULL) 385*fa9e4066Sahrens return (ENOENT); 386*fa9e4066Sahrens if (tail == NULL) { 387*fa9e4066Sahrens dsl_dir_close(pds, FTAG); 388*fa9e4066Sahrens return (EEXIST); 389*fa9e4066Sahrens } 390*fa9e4066Sahrens 391*fa9e4066Sahrens dprintf("name=%s\n", name); 392*fa9e4066Sahrens 393*fa9e4066Sahrens if (tail[0] == '@') { 394*fa9e4066Sahrens /* 395*fa9e4066Sahrens * If we're creating a snapshot, make sure everything 396*fa9e4066Sahrens * they might want is on disk. XXX Sketchy to know 397*fa9e4066Sahrens * about snapshots here, better to put in DSL. 398*fa9e4066Sahrens */ 399*fa9e4066Sahrens objset_t *os; 400*fa9e4066Sahrens size_t plen = strchr(name, '@') - name + 1; 401*fa9e4066Sahrens char *pbuf = kmem_alloc(plen, KM_SLEEP); 402*fa9e4066Sahrens bcopy(name, pbuf, plen - 1); 403*fa9e4066Sahrens pbuf[plen - 1] = '\0'; 404*fa9e4066Sahrens 405*fa9e4066Sahrens err = dmu_objset_open(pbuf, DMU_OST_ANY, DS_MODE_STANDARD, &os); 406*fa9e4066Sahrens if (err == 0) { 407*fa9e4066Sahrens err = zil_suspend(dmu_objset_zil(os)); 408*fa9e4066Sahrens if (err == 0) { 409*fa9e4066Sahrens err = dsl_dir_sync_task(pds, 410*fa9e4066Sahrens dsl_dataset_snapshot_sync, 411*fa9e4066Sahrens (void*)(tail+1), 16*1024); 412*fa9e4066Sahrens zil_resume(dmu_objset_zil(os)); 413*fa9e4066Sahrens } 414*fa9e4066Sahrens dmu_objset_close(os); 415*fa9e4066Sahrens } 416*fa9e4066Sahrens kmem_free(pbuf, plen); 417*fa9e4066Sahrens } else { 418*fa9e4066Sahrens struct oscarg oa = { 0 }; 419*fa9e4066Sahrens oa.userfunc = func; 420*fa9e4066Sahrens oa.userarg = arg; 421*fa9e4066Sahrens oa.fullname = name; 422*fa9e4066Sahrens oa.lastname = tail; 423*fa9e4066Sahrens oa.type = type; 424*fa9e4066Sahrens if (clone_parent != NULL) { 425*fa9e4066Sahrens /* 426*fa9e4066Sahrens * You can't clone to a different type. 427*fa9e4066Sahrens */ 428*fa9e4066Sahrens if (clone_parent->os->os_phys->os_type != type) { 429*fa9e4066Sahrens dsl_dir_close(pds, FTAG); 430*fa9e4066Sahrens return (EINVAL); 431*fa9e4066Sahrens } 432*fa9e4066Sahrens oa.clone_parent = clone_parent->os->os_dsl_dataset; 433*fa9e4066Sahrens } 434*fa9e4066Sahrens err = dsl_dir_sync_task(pds, dmu_objset_create_sync, &oa, 435*fa9e4066Sahrens 256*1024); 436*fa9e4066Sahrens } 437*fa9e4066Sahrens dsl_dir_close(pds, FTAG); 438*fa9e4066Sahrens return (err); 439*fa9e4066Sahrens } 440*fa9e4066Sahrens 441*fa9e4066Sahrens int 442*fa9e4066Sahrens dmu_objset_destroy(const char *name) 443*fa9e4066Sahrens { 444*fa9e4066Sahrens objset_t *os; 445*fa9e4066Sahrens int error; 446*fa9e4066Sahrens 447*fa9e4066Sahrens /* 448*fa9e4066Sahrens * If it looks like we'll be able to destroy it, and there's 449*fa9e4066Sahrens * an unplayed replay log sitting around, destroy the log. 450*fa9e4066Sahrens * It would be nicer to do this in dsl_dataset_destroy_sync(), 451*fa9e4066Sahrens * but the replay log objset is modified in open context. 452*fa9e4066Sahrens */ 453*fa9e4066Sahrens error = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE, &os); 454*fa9e4066Sahrens if (error == 0) { 455*fa9e4066Sahrens zil_destroy(dmu_objset_zil(os)); 456*fa9e4066Sahrens dmu_objset_close(os); 457*fa9e4066Sahrens } 458*fa9e4066Sahrens 459*fa9e4066Sahrens /* XXX uncache everything? */ 460*fa9e4066Sahrens return (dsl_dataset_destroy(name)); 461*fa9e4066Sahrens } 462*fa9e4066Sahrens 463*fa9e4066Sahrens int 464*fa9e4066Sahrens dmu_objset_rollback(const char *name) 465*fa9e4066Sahrens { 466*fa9e4066Sahrens int err; 467*fa9e4066Sahrens objset_t *os; 468*fa9e4066Sahrens 469*fa9e4066Sahrens err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE, &os); 470*fa9e4066Sahrens if (err == 0) { 471*fa9e4066Sahrens err = zil_suspend(dmu_objset_zil(os)); 472*fa9e4066Sahrens if (err == 0) 473*fa9e4066Sahrens zil_resume(dmu_objset_zil(os)); 474*fa9e4066Sahrens dmu_objset_close(os); 475*fa9e4066Sahrens if (err == 0) { 476*fa9e4066Sahrens /* XXX uncache everything? */ 477*fa9e4066Sahrens err = dsl_dataset_rollback(name); 478*fa9e4066Sahrens } 479*fa9e4066Sahrens } 480*fa9e4066Sahrens return (err); 481*fa9e4066Sahrens } 482*fa9e4066Sahrens 483*fa9e4066Sahrens static void 484*fa9e4066Sahrens dmu_objset_sync_dnodes(objset_impl_t *os, list_t *list, dmu_tx_t *tx) 485*fa9e4066Sahrens { 486*fa9e4066Sahrens dnode_t *dn = list_head(list); 487*fa9e4066Sahrens int level, err; 488*fa9e4066Sahrens 489*fa9e4066Sahrens for (level = 0; dn = list_head(list); level++) { 490*fa9e4066Sahrens zio_t *zio; 491*fa9e4066Sahrens zio = zio_root(os->os_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 492*fa9e4066Sahrens 493*fa9e4066Sahrens ASSERT3U(level, <=, DN_MAX_LEVELS); 494*fa9e4066Sahrens 495*fa9e4066Sahrens while (dn) { 496*fa9e4066Sahrens dnode_t *next = list_next(list, dn); 497*fa9e4066Sahrens 498*fa9e4066Sahrens list_remove(list, dn); 499*fa9e4066Sahrens if (dnode_sync(dn, level, zio, tx) == 0) { 500*fa9e4066Sahrens /* 501*fa9e4066Sahrens * This dnode requires syncing at higher 502*fa9e4066Sahrens * levels; put it back onto the list. 503*fa9e4066Sahrens */ 504*fa9e4066Sahrens if (next) 505*fa9e4066Sahrens list_insert_before(list, next, dn); 506*fa9e4066Sahrens else 507*fa9e4066Sahrens list_insert_tail(list, dn); 508*fa9e4066Sahrens } 509*fa9e4066Sahrens dn = next; 510*fa9e4066Sahrens } 511*fa9e4066Sahrens err = zio_wait(zio); 512*fa9e4066Sahrens ASSERT(err == 0); 513*fa9e4066Sahrens } 514*fa9e4066Sahrens } 515*fa9e4066Sahrens 516*fa9e4066Sahrens /* ARGSUSED */ 517*fa9e4066Sahrens static void 518*fa9e4066Sahrens killer(zio_t *zio, arc_buf_t *abuf, void *arg) 519*fa9e4066Sahrens { 520*fa9e4066Sahrens objset_impl_t *os = arg; 521*fa9e4066Sahrens objset_phys_t *osphys = zio->io_data; 522*fa9e4066Sahrens dnode_phys_t *dnp = &osphys->os_meta_dnode; 523*fa9e4066Sahrens int i; 524*fa9e4066Sahrens 525*fa9e4066Sahrens ASSERT3U(zio->io_error, ==, 0); 526*fa9e4066Sahrens 527*fa9e4066Sahrens /* 528*fa9e4066Sahrens * Update rootbp fill count. 529*fa9e4066Sahrens */ 530*fa9e4066Sahrens os->os_rootbp.blk_fill = 1; /* count the meta-dnode */ 531*fa9e4066Sahrens for (i = 0; i < dnp->dn_nblkptr; i++) 532*fa9e4066Sahrens os->os_rootbp.blk_fill += dnp->dn_blkptr[i].blk_fill; 533*fa9e4066Sahrens 534*fa9e4066Sahrens BP_SET_TYPE(zio->io_bp, DMU_OT_OBJSET); 535*fa9e4066Sahrens BP_SET_LEVEL(zio->io_bp, 0); 536*fa9e4066Sahrens 537*fa9e4066Sahrens if (!DVA_EQUAL(BP_IDENTITY(zio->io_bp), 538*fa9e4066Sahrens BP_IDENTITY(&zio->io_bp_orig))) { 539*fa9e4066Sahrens dsl_dataset_block_kill(os->os_dsl_dataset, &zio->io_bp_orig, 540*fa9e4066Sahrens os->os_synctx); 541*fa9e4066Sahrens dsl_dataset_block_born(os->os_dsl_dataset, zio->io_bp, 542*fa9e4066Sahrens os->os_synctx); 543*fa9e4066Sahrens } 544*fa9e4066Sahrens } 545*fa9e4066Sahrens 546*fa9e4066Sahrens 547*fa9e4066Sahrens /* called from dsl */ 548*fa9e4066Sahrens void 549*fa9e4066Sahrens dmu_objset_sync(objset_impl_t *os, dmu_tx_t *tx) 550*fa9e4066Sahrens { 551*fa9e4066Sahrens extern taskq_t *dbuf_tq; 552*fa9e4066Sahrens int txgoff; 553*fa9e4066Sahrens list_t *dirty_list; 554*fa9e4066Sahrens int err; 555*fa9e4066Sahrens arc_buf_t *abuf = 556*fa9e4066Sahrens arc_buf_alloc(os->os_spa, sizeof (objset_phys_t), FTAG); 557*fa9e4066Sahrens 558*fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 559*fa9e4066Sahrens ASSERT(os->os_synctx == NULL); 560*fa9e4066Sahrens /* XXX the write_done callback should really give us the tx... */ 561*fa9e4066Sahrens os->os_synctx = tx; 562*fa9e4066Sahrens 563*fa9e4066Sahrens dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); 564*fa9e4066Sahrens 565*fa9e4066Sahrens txgoff = tx->tx_txg & TXG_MASK; 566*fa9e4066Sahrens 567*fa9e4066Sahrens dmu_objset_sync_dnodes(os, &os->os_free_dnodes[txgoff], tx); 568*fa9e4066Sahrens dmu_objset_sync_dnodes(os, &os->os_dirty_dnodes[txgoff], tx); 569*fa9e4066Sahrens 570*fa9e4066Sahrens /* 571*fa9e4066Sahrens * Free intent log blocks up to this tx. 572*fa9e4066Sahrens */ 573*fa9e4066Sahrens zil_sync(os->os_zil, tx); 574*fa9e4066Sahrens 575*fa9e4066Sahrens /* 576*fa9e4066Sahrens * Sync meta-dnode 577*fa9e4066Sahrens */ 578*fa9e4066Sahrens dirty_list = &os->os_dirty_dnodes[txgoff]; 579*fa9e4066Sahrens ASSERT(list_head(dirty_list) == NULL); 580*fa9e4066Sahrens list_insert_tail(dirty_list, os->os_meta_dnode); 581*fa9e4066Sahrens dmu_objset_sync_dnodes(os, dirty_list, tx); 582*fa9e4066Sahrens 583*fa9e4066Sahrens /* 584*fa9e4066Sahrens * Sync the root block. 585*fa9e4066Sahrens */ 586*fa9e4066Sahrens bcopy(os->os_phys, abuf->b_data, sizeof (objset_phys_t)); 587*fa9e4066Sahrens err = arc_write(NULL, os->os_spa, os->os_md_checksum, 588*fa9e4066Sahrens os->os_md_compress, tx->tx_txg, &os->os_rootbp, abuf, killer, os, 589*fa9e4066Sahrens ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, ARC_WAIT); 590*fa9e4066Sahrens ASSERT(err == 0); 591*fa9e4066Sahrens arc_buf_free(abuf, FTAG); 592*fa9e4066Sahrens 593*fa9e4066Sahrens dsl_dataset_set_blkptr(os->os_dsl_dataset, &os->os_rootbp, tx); 594*fa9e4066Sahrens 595*fa9e4066Sahrens ASSERT3P(os->os_synctx, ==, tx); 596*fa9e4066Sahrens taskq_wait(dbuf_tq); 597*fa9e4066Sahrens os->os_synctx = NULL; 598*fa9e4066Sahrens } 599*fa9e4066Sahrens 600*fa9e4066Sahrens void 601*fa9e4066Sahrens dmu_objset_stats(objset_t *os, dmu_objset_stats_t *dds) 602*fa9e4066Sahrens { 603*fa9e4066Sahrens if (os->os->os_dsl_dataset != NULL) { 604*fa9e4066Sahrens dsl_dataset_stats(os->os->os_dsl_dataset, dds); 605*fa9e4066Sahrens } else { 606*fa9e4066Sahrens ASSERT(os->os->os_phys->os_type == DMU_OST_META); 607*fa9e4066Sahrens bzero(dds, sizeof (*dds)); 608*fa9e4066Sahrens } 609*fa9e4066Sahrens dds->dds_type = os->os->os_phys->os_type; 610*fa9e4066Sahrens } 611*fa9e4066Sahrens 612*fa9e4066Sahrens int 613*fa9e4066Sahrens dmu_objset_is_snapshot(objset_t *os) 614*fa9e4066Sahrens { 615*fa9e4066Sahrens if (os->os->os_dsl_dataset != NULL) 616*fa9e4066Sahrens return (dsl_dataset_is_snapshot(os->os->os_dsl_dataset)); 617*fa9e4066Sahrens else 618*fa9e4066Sahrens return (B_FALSE); 619*fa9e4066Sahrens } 620*fa9e4066Sahrens 621*fa9e4066Sahrens int 622*fa9e4066Sahrens dmu_snapshot_list_next(objset_t *os, int namelen, char *name, 623*fa9e4066Sahrens uint64_t *id, uint64_t *offp) 624*fa9e4066Sahrens { 625*fa9e4066Sahrens dsl_dataset_t *ds = os->os->os_dsl_dataset; 626*fa9e4066Sahrens zap_cursor_t cursor; 627*fa9e4066Sahrens zap_attribute_t attr; 628*fa9e4066Sahrens 629*fa9e4066Sahrens if (ds->ds_phys->ds_snapnames_zapobj == 0) 630*fa9e4066Sahrens return (ENOENT); 631*fa9e4066Sahrens 632*fa9e4066Sahrens zap_cursor_init_serialized(&cursor, 633*fa9e4066Sahrens ds->ds_dir->dd_pool->dp_meta_objset, 634*fa9e4066Sahrens ds->ds_phys->ds_snapnames_zapobj, *offp); 635*fa9e4066Sahrens 636*fa9e4066Sahrens if (zap_cursor_retrieve(&cursor, &attr) != 0) 637*fa9e4066Sahrens return (ENOENT); 638*fa9e4066Sahrens 639*fa9e4066Sahrens if (strlen(attr.za_name) + 1 > namelen) 640*fa9e4066Sahrens return (ENAMETOOLONG); 641*fa9e4066Sahrens 642*fa9e4066Sahrens (void) strcpy(name, attr.za_name); 643*fa9e4066Sahrens *id = attr.za_first_integer; 644*fa9e4066Sahrens zap_cursor_advance(&cursor); 645*fa9e4066Sahrens *offp = zap_cursor_serialize(&cursor); 646*fa9e4066Sahrens 647*fa9e4066Sahrens return (0); 648*fa9e4066Sahrens } 649*fa9e4066Sahrens 650*fa9e4066Sahrens /* 651*fa9e4066Sahrens * Find all objsets under name, and for each, call 'func(child_name, arg)'. 652*fa9e4066Sahrens */ 653*fa9e4066Sahrens void 654*fa9e4066Sahrens dmu_objset_find(char *name, void func(char *, void *), void *arg, int flags) 655*fa9e4066Sahrens { 656*fa9e4066Sahrens dsl_dir_t *dd; 657*fa9e4066Sahrens objset_t *os; 658*fa9e4066Sahrens uint64_t snapobj; 659*fa9e4066Sahrens zap_cursor_t zc; 660*fa9e4066Sahrens zap_attribute_t attr; 661*fa9e4066Sahrens char *child; 662*fa9e4066Sahrens int do_self; 663*fa9e4066Sahrens 664*fa9e4066Sahrens dd = dsl_dir_open(name, FTAG, NULL); 665*fa9e4066Sahrens if (dd == NULL) 666*fa9e4066Sahrens return; 667*fa9e4066Sahrens 668*fa9e4066Sahrens do_self = (dd->dd_phys->dd_head_dataset_obj != 0); 669*fa9e4066Sahrens 670*fa9e4066Sahrens /* 671*fa9e4066Sahrens * Iterate over all children. 672*fa9e4066Sahrens */ 673*fa9e4066Sahrens if (dd->dd_phys->dd_child_dir_zapobj != 0) { 674*fa9e4066Sahrens for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, 675*fa9e4066Sahrens dd->dd_phys->dd_child_dir_zapobj); 676*fa9e4066Sahrens zap_cursor_retrieve(&zc, &attr) == 0; 677*fa9e4066Sahrens (void) zap_cursor_advance(&zc)) { 678*fa9e4066Sahrens ASSERT(attr.za_integer_length == sizeof (uint64_t)); 679*fa9e4066Sahrens ASSERT(attr.za_num_integers == 1); 680*fa9e4066Sahrens 681*fa9e4066Sahrens /* 682*fa9e4066Sahrens * No separating '/' because parent's name ends in /. 683*fa9e4066Sahrens */ 684*fa9e4066Sahrens child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 685*fa9e4066Sahrens /* XXX could probably just use name here */ 686*fa9e4066Sahrens dsl_dir_name(dd, child); 687*fa9e4066Sahrens (void) strcat(child, "/"); 688*fa9e4066Sahrens (void) strcat(child, attr.za_name); 689*fa9e4066Sahrens dmu_objset_find(child, func, arg, flags); 690*fa9e4066Sahrens kmem_free(child, MAXPATHLEN); 691*fa9e4066Sahrens } 692*fa9e4066Sahrens } 693*fa9e4066Sahrens 694*fa9e4066Sahrens /* 695*fa9e4066Sahrens * Iterate over all snapshots. 696*fa9e4066Sahrens */ 697*fa9e4066Sahrens if ((flags & DS_FIND_SNAPSHOTS) && 698*fa9e4066Sahrens dmu_objset_open(name, DMU_OST_ANY, 699*fa9e4066Sahrens DS_MODE_STANDARD | DS_MODE_READONLY, &os) == 0) { 700*fa9e4066Sahrens 701*fa9e4066Sahrens snapobj = os->os->os_dsl_dataset->ds_phys->ds_snapnames_zapobj; 702*fa9e4066Sahrens dmu_objset_close(os); 703*fa9e4066Sahrens 704*fa9e4066Sahrens for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, snapobj); 705*fa9e4066Sahrens zap_cursor_retrieve(&zc, &attr) == 0; 706*fa9e4066Sahrens (void) zap_cursor_advance(&zc)) { 707*fa9e4066Sahrens ASSERT(attr.za_integer_length == sizeof (uint64_t)); 708*fa9e4066Sahrens ASSERT(attr.za_num_integers == 1); 709*fa9e4066Sahrens 710*fa9e4066Sahrens child = kmem_alloc(MAXPATHLEN, KM_SLEEP); 711*fa9e4066Sahrens /* XXX could probably just use name here */ 712*fa9e4066Sahrens dsl_dir_name(dd, child); 713*fa9e4066Sahrens (void) strcat(child, "@"); 714*fa9e4066Sahrens (void) strcat(child, attr.za_name); 715*fa9e4066Sahrens func(child, arg); 716*fa9e4066Sahrens kmem_free(child, MAXPATHLEN); 717*fa9e4066Sahrens } 718*fa9e4066Sahrens } 719*fa9e4066Sahrens 720*fa9e4066Sahrens dsl_dir_close(dd, FTAG); 721*fa9e4066Sahrens 722*fa9e4066Sahrens /* 723*fa9e4066Sahrens * Apply to self if appropriate. 724*fa9e4066Sahrens */ 725*fa9e4066Sahrens if (do_self) 726*fa9e4066Sahrens func(name, arg); 727*fa9e4066Sahrens } 728