1*fa9e4066Sahrens /* 2*fa9e4066Sahrens * CDDL HEADER START 3*fa9e4066Sahrens * 4*fa9e4066Sahrens * The contents of this file are subject to the terms of the 5*fa9e4066Sahrens * Common Development and Distribution License, Version 1.0 only 6*fa9e4066Sahrens * (the "License"). You may not use this file except in compliance 7*fa9e4066Sahrens * with the License. 8*fa9e4066Sahrens * 9*fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 11*fa9e4066Sahrens * See the License for the specific language governing permissions 12*fa9e4066Sahrens * and limitations under the License. 13*fa9e4066Sahrens * 14*fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 15*fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 17*fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 18*fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 19*fa9e4066Sahrens * 20*fa9e4066Sahrens * CDDL HEADER END 21*fa9e4066Sahrens */ 22*fa9e4066Sahrens /* 23*fa9e4066Sahrens * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*fa9e4066Sahrens * Use is subject to license terms. 25*fa9e4066Sahrens */ 26*fa9e4066Sahrens 27*fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28*fa9e4066Sahrens 29*fa9e4066Sahrens #include <sys/zfs_context.h> 30*fa9e4066Sahrens #include <sys/dbuf.h> 31*fa9e4066Sahrens #include <sys/dnode.h> 32*fa9e4066Sahrens #include <sys/dmu.h> 33*fa9e4066Sahrens #include <sys/dmu_tx.h> 34*fa9e4066Sahrens #include <sys/dmu_objset.h> 35*fa9e4066Sahrens #include <sys/dsl_dataset.h> 36*fa9e4066Sahrens #include <sys/spa.h> 37*fa9e4066Sahrens #include <sys/zio.h> 38*fa9e4066Sahrens 39*fa9e4066Sahrens 40*fa9e4066Sahrens static void 41*fa9e4066Sahrens dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx) 42*fa9e4066Sahrens { 43*fa9e4066Sahrens dmu_buf_impl_t *db; 44*fa9e4066Sahrens int i; 45*fa9e4066Sahrens uint64_t txg = tx->tx_txg; 46*fa9e4066Sahrens 47*fa9e4066Sahrens ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); 48*fa9e4066Sahrens ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); 49*fa9e4066Sahrens /* this dnode can't be paged out because it's dirty */ 50*fa9e4066Sahrens 51*fa9e4066Sahrens db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG); 52*fa9e4066Sahrens for (i = 0; i < dn->dn_phys->dn_nblkptr; i++) 53*fa9e4066Sahrens if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i])) 54*fa9e4066Sahrens break; 55*fa9e4066Sahrens if (i != dn->dn_phys->dn_nblkptr) { 56*fa9e4066Sahrens ASSERT(list_link_active(&db->db_dirty_node[txg&TXG_MASK])); 57*fa9e4066Sahrens 58*fa9e4066Sahrens dbuf_read_havestruct(db); 59*fa9e4066Sahrens arc_release(db->db_buf, db); 60*fa9e4066Sahrens /* copy dnode's block pointers to new indirect block */ 61*fa9e4066Sahrens ASSERT3U(sizeof (blkptr_t) * dn->dn_phys->dn_nblkptr, <=, 62*fa9e4066Sahrens db->db.db_size); 63*fa9e4066Sahrens bcopy(dn->dn_phys->dn_blkptr, db->db.db_data, 64*fa9e4066Sahrens sizeof (blkptr_t) * dn->dn_phys->dn_nblkptr); 65*fa9e4066Sahrens } 66*fa9e4066Sahrens 67*fa9e4066Sahrens dn->dn_phys->dn_nlevels += 1; 68*fa9e4066Sahrens dprintf("os=%p obj=%llu, increase to %d\n", 69*fa9e4066Sahrens dn->dn_objset, dn->dn_object, 70*fa9e4066Sahrens dn->dn_phys->dn_nlevels); 71*fa9e4066Sahrens 72*fa9e4066Sahrens /* set dbuf's parent pointers to new indirect buf */ 73*fa9e4066Sahrens for (i = 0; i < dn->dn_phys->dn_nblkptr; i++) { 74*fa9e4066Sahrens dmu_buf_impl_t *child = 75*fa9e4066Sahrens dbuf_find(dn, dn->dn_phys->dn_nlevels-2, i); 76*fa9e4066Sahrens if (child == NULL) 77*fa9e4066Sahrens continue; 78*fa9e4066Sahrens if (child->db_dnode == NULL) { 79*fa9e4066Sahrens mutex_exit(&child->db_mtx); 80*fa9e4066Sahrens continue; 81*fa9e4066Sahrens } 82*fa9e4066Sahrens 83*fa9e4066Sahrens if (child->db_parent == NULL || 84*fa9e4066Sahrens child->db_parent == dn->dn_dbuf) { 85*fa9e4066Sahrens dprintf_dbuf_bp(child, child->db_blkptr, 86*fa9e4066Sahrens "changing db_blkptr to new indirect %s", ""); 87*fa9e4066Sahrens child->db_parent = db; 88*fa9e4066Sahrens dbuf_add_ref(db, child); 89*fa9e4066Sahrens if (db->db.db_data) { 90*fa9e4066Sahrens child->db_blkptr = 91*fa9e4066Sahrens (blkptr_t *)db->db.db_data + i; 92*fa9e4066Sahrens } else { 93*fa9e4066Sahrens child->db_blkptr = NULL; 94*fa9e4066Sahrens } 95*fa9e4066Sahrens dprintf_dbuf_bp(child, child->db_blkptr, 96*fa9e4066Sahrens "changed db_blkptr to new indirect %s", ""); 97*fa9e4066Sahrens } 98*fa9e4066Sahrens ASSERT3P(child->db_parent, ==, db); 99*fa9e4066Sahrens 100*fa9e4066Sahrens mutex_exit(&child->db_mtx); 101*fa9e4066Sahrens } 102*fa9e4066Sahrens 103*fa9e4066Sahrens bzero(dn->dn_phys->dn_blkptr, 104*fa9e4066Sahrens sizeof (blkptr_t) * dn->dn_phys->dn_nblkptr); 105*fa9e4066Sahrens 106*fa9e4066Sahrens dbuf_remove_ref(db, FTAG); 107*fa9e4066Sahrens } 108*fa9e4066Sahrens 109*fa9e4066Sahrens static void 110*fa9e4066Sahrens free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx) 111*fa9e4066Sahrens { 112*fa9e4066Sahrens objset_impl_t *os = dn->dn_objset; 113*fa9e4066Sahrens uint64_t bytesfreed = 0; 114*fa9e4066Sahrens int i; 115*fa9e4066Sahrens 116*fa9e4066Sahrens dprintf("os=%p obj=%llx num=%d\n", os, dn->dn_object, num); 117*fa9e4066Sahrens 118*fa9e4066Sahrens for (i = 0; i < num; i++, bp++) { 119*fa9e4066Sahrens if (BP_IS_HOLE(bp)) 120*fa9e4066Sahrens continue; 121*fa9e4066Sahrens 122*fa9e4066Sahrens bytesfreed += BP_GET_ASIZE(bp); 123*fa9e4066Sahrens ASSERT3U(bytesfreed >> DEV_BSHIFT, <=, dn->dn_phys->dn_secphys); 124*fa9e4066Sahrens dsl_dataset_block_kill(os->os_dsl_dataset, bp, tx); 125*fa9e4066Sahrens } 126*fa9e4066Sahrens dnode_diduse_space(dn, -bytesfreed); 127*fa9e4066Sahrens } 128*fa9e4066Sahrens 129*fa9e4066Sahrens static void 130*fa9e4066Sahrens free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) 131*fa9e4066Sahrens { 132*fa9e4066Sahrens #ifdef ZFS_DEBUG 133*fa9e4066Sahrens int off, num; 134*fa9e4066Sahrens int i, err, epbs; 135*fa9e4066Sahrens uint64_t txg = tx->tx_txg; 136*fa9e4066Sahrens 137*fa9e4066Sahrens epbs = db->db_dnode->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 138*fa9e4066Sahrens off = start - (db->db_blkid * 1<<epbs); 139*fa9e4066Sahrens num = end - start + 1; 140*fa9e4066Sahrens 141*fa9e4066Sahrens ASSERT3U(off, >=, 0); 142*fa9e4066Sahrens ASSERT3U(num, >=, 0); 143*fa9e4066Sahrens ASSERT3U(db->db_level, >, 0); 144*fa9e4066Sahrens ASSERT3U(db->db.db_size, ==, 1<<db->db_dnode->dn_phys->dn_indblkshift); 145*fa9e4066Sahrens ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT); 146*fa9e4066Sahrens ASSERT(db->db_blkptr != NULL); 147*fa9e4066Sahrens 148*fa9e4066Sahrens for (i = off; i < off+num; i++) { 149*fa9e4066Sahrens uint64_t *buf; 150*fa9e4066Sahrens int j; 151*fa9e4066Sahrens dmu_buf_impl_t *child; 152*fa9e4066Sahrens 153*fa9e4066Sahrens ASSERT(db->db_level == 1); 154*fa9e4066Sahrens 155*fa9e4066Sahrens rw_enter(&db->db_dnode->dn_struct_rwlock, RW_READER); 156*fa9e4066Sahrens err = dbuf_hold_impl(db->db_dnode, db->db_level-1, 157*fa9e4066Sahrens (db->db_blkid << epbs) + i, TRUE, FTAG, &child); 158*fa9e4066Sahrens rw_exit(&db->db_dnode->dn_struct_rwlock); 159*fa9e4066Sahrens if (err == ENOENT) 160*fa9e4066Sahrens continue; 161*fa9e4066Sahrens ASSERT(err == 0); 162*fa9e4066Sahrens ASSERT(child->db_level == 0); 163*fa9e4066Sahrens ASSERT(!list_link_active(&child->db_dirty_node[txg&TXG_MASK])); 164*fa9e4066Sahrens 165*fa9e4066Sahrens /* db_data_old better be zeroed */ 166*fa9e4066Sahrens if (child->db_d.db_data_old[txg & TXG_MASK]) { 167*fa9e4066Sahrens buf = (child->db_d.db_data_old[txg & TXG_MASK])->b_data; 168*fa9e4066Sahrens for (j = 0; j < child->db.db_size >> 3; j++) { 169*fa9e4066Sahrens if (buf[j] != 0) { 170*fa9e4066Sahrens panic("freed data not zero: " 171*fa9e4066Sahrens "child=%p i=%d off=%d num=%d\n", 172*fa9e4066Sahrens child, i, off, num); 173*fa9e4066Sahrens } 174*fa9e4066Sahrens } 175*fa9e4066Sahrens } 176*fa9e4066Sahrens 177*fa9e4066Sahrens /* 178*fa9e4066Sahrens * db_data better be zeroed unless it's dirty in a 179*fa9e4066Sahrens * future txg. 180*fa9e4066Sahrens */ 181*fa9e4066Sahrens mutex_enter(&child->db_mtx); 182*fa9e4066Sahrens buf = child->db.db_data; 183*fa9e4066Sahrens if (buf != NULL && child->db_state != DB_FILL && 184*fa9e4066Sahrens !list_link_active(&child->db_dirty_node 185*fa9e4066Sahrens [(txg+1) & TXG_MASK]) && 186*fa9e4066Sahrens !list_link_active(&child->db_dirty_node 187*fa9e4066Sahrens [(txg+2) & TXG_MASK])) { 188*fa9e4066Sahrens for (j = 0; j < child->db.db_size >> 3; j++) { 189*fa9e4066Sahrens if (buf[j] != 0) { 190*fa9e4066Sahrens panic("freed data not zero: " 191*fa9e4066Sahrens "child=%p i=%d off=%d num=%d\n", 192*fa9e4066Sahrens child, i, off, num); 193*fa9e4066Sahrens } 194*fa9e4066Sahrens } 195*fa9e4066Sahrens } 196*fa9e4066Sahrens mutex_exit(&child->db_mtx); 197*fa9e4066Sahrens 198*fa9e4066Sahrens dbuf_remove_ref(child, FTAG); 199*fa9e4066Sahrens } 200*fa9e4066Sahrens #endif 201*fa9e4066Sahrens } 202*fa9e4066Sahrens 203*fa9e4066Sahrens static int 204*fa9e4066Sahrens free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc, 205*fa9e4066Sahrens dmu_tx_t *tx) 206*fa9e4066Sahrens { 207*fa9e4066Sahrens dnode_t *dn = db->db_dnode; 208*fa9e4066Sahrens blkptr_t *bp; 209*fa9e4066Sahrens dmu_buf_impl_t *subdb; 210*fa9e4066Sahrens uint64_t start, end, dbstart, dbend, i; 211*fa9e4066Sahrens int epbs, shift, err; 212*fa9e4066Sahrens int txg_index = tx->tx_txg&TXG_MASK; 213*fa9e4066Sahrens int all = TRUE; 214*fa9e4066Sahrens 215*fa9e4066Sahrens dbuf_read(db); 216*fa9e4066Sahrens arc_release(db->db_buf, db); 217*fa9e4066Sahrens bp = (blkptr_t *)db->db.db_data; 218*fa9e4066Sahrens 219*fa9e4066Sahrens epbs = db->db_dnode->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 220*fa9e4066Sahrens shift = (db->db_level - 1) * epbs; 221*fa9e4066Sahrens dbstart = db->db_blkid << epbs; 222*fa9e4066Sahrens start = blkid >> shift; 223*fa9e4066Sahrens if (dbstart < start) { 224*fa9e4066Sahrens bp += start - dbstart; 225*fa9e4066Sahrens all = FALSE; 226*fa9e4066Sahrens } else { 227*fa9e4066Sahrens start = dbstart; 228*fa9e4066Sahrens } 229*fa9e4066Sahrens dbend = ((db->db_blkid + 1) << epbs) - 1; 230*fa9e4066Sahrens end = (blkid + nblks - 1) >> shift; 231*fa9e4066Sahrens if (dbend <= end) 232*fa9e4066Sahrens end = dbend; 233*fa9e4066Sahrens else if (all) 234*fa9e4066Sahrens all = trunc; 235*fa9e4066Sahrens ASSERT3U(start, <=, end); 236*fa9e4066Sahrens 237*fa9e4066Sahrens if (db->db_level == 1) { 238*fa9e4066Sahrens free_verify(db, start, end, tx); 239*fa9e4066Sahrens free_blocks(dn, bp, end-start+1, tx); 240*fa9e4066Sahrens ASSERT(all || list_link_active(&db->db_dirty_node[txg_index])); 241*fa9e4066Sahrens return (all); 242*fa9e4066Sahrens } 243*fa9e4066Sahrens 244*fa9e4066Sahrens for (i = start; i <= end; i++, bp++) { 245*fa9e4066Sahrens if (BP_IS_HOLE(bp)) 246*fa9e4066Sahrens continue; 247*fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 248*fa9e4066Sahrens err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb); 249*fa9e4066Sahrens ASSERT3U(err, ==, 0); 250*fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 251*fa9e4066Sahrens 252*fa9e4066Sahrens if (free_children(subdb, blkid, nblks, trunc, tx)) { 253*fa9e4066Sahrens ASSERT3P(subdb->db_blkptr, ==, bp); 254*fa9e4066Sahrens free_blocks(dn, bp, 1, tx); 255*fa9e4066Sahrens } 256*fa9e4066Sahrens dbuf_remove_ref(subdb, FTAG); 257*fa9e4066Sahrens } 258*fa9e4066Sahrens #ifdef ZFS_DEBUG 259*fa9e4066Sahrens bp -= (end-start)+1; 260*fa9e4066Sahrens for (i = start; i <= end; i++, bp++) { 261*fa9e4066Sahrens if (i == start && blkid != 0) 262*fa9e4066Sahrens continue; 263*fa9e4066Sahrens else if (i == end && !trunc) 264*fa9e4066Sahrens continue; 265*fa9e4066Sahrens ASSERT3U(bp->blk_birth, ==, 0); 266*fa9e4066Sahrens } 267*fa9e4066Sahrens #endif 268*fa9e4066Sahrens ASSERT(all || list_link_active(&db->db_dirty_node[txg_index])); 269*fa9e4066Sahrens return (all); 270*fa9e4066Sahrens } 271*fa9e4066Sahrens 272*fa9e4066Sahrens /* 273*fa9e4066Sahrens * free_range: Traverse the indicated range of the provided file 274*fa9e4066Sahrens * and "free" all the blocks contained there. 275*fa9e4066Sahrens */ 276*fa9e4066Sahrens static void 277*fa9e4066Sahrens dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) 278*fa9e4066Sahrens { 279*fa9e4066Sahrens blkptr_t *bp = dn->dn_phys->dn_blkptr; 280*fa9e4066Sahrens dmu_buf_impl_t *db; 281*fa9e4066Sahrens int trunc, start, end, shift, i, err; 282*fa9e4066Sahrens int dnlevel = dn->dn_phys->dn_nlevels; 283*fa9e4066Sahrens 284*fa9e4066Sahrens if (blkid > dn->dn_phys->dn_maxblkid) 285*fa9e4066Sahrens return; 286*fa9e4066Sahrens 287*fa9e4066Sahrens ASSERT(dn->dn_phys->dn_maxblkid < UINT64_MAX); 288*fa9e4066Sahrens trunc = blkid + nblks > dn->dn_phys->dn_maxblkid; 289*fa9e4066Sahrens if (trunc) 290*fa9e4066Sahrens nblks = dn->dn_phys->dn_maxblkid - blkid + 1; 291*fa9e4066Sahrens 292*fa9e4066Sahrens /* There are no indirect blocks in the object */ 293*fa9e4066Sahrens if (dnlevel == 1) { 294*fa9e4066Sahrens if (blkid >= dn->dn_phys->dn_nblkptr) { 295*fa9e4066Sahrens /* this range was never made persistent */ 296*fa9e4066Sahrens return; 297*fa9e4066Sahrens } 298*fa9e4066Sahrens ASSERT3U(blkid + nblks, <=, dn->dn_phys->dn_nblkptr); 299*fa9e4066Sahrens free_blocks(dn, bp + blkid, nblks, tx); 300*fa9e4066Sahrens if (trunc) { 301*fa9e4066Sahrens uint64_t off = (dn->dn_phys->dn_maxblkid + 1) * 302*fa9e4066Sahrens (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT); 303*fa9e4066Sahrens dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0); 304*fa9e4066Sahrens ASSERT(off < dn->dn_phys->dn_maxblkid || 305*fa9e4066Sahrens dn->dn_phys->dn_maxblkid == 0 || 306*fa9e4066Sahrens dnode_next_offset(dn, FALSE, &off, 1, 1) == ESRCH); 307*fa9e4066Sahrens } 308*fa9e4066Sahrens return; 309*fa9e4066Sahrens } 310*fa9e4066Sahrens 311*fa9e4066Sahrens shift = (dnlevel - 1) * (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT); 312*fa9e4066Sahrens start = blkid >> shift; 313*fa9e4066Sahrens ASSERT(start < dn->dn_phys->dn_nblkptr); 314*fa9e4066Sahrens end = (blkid + nblks - 1) >> shift; 315*fa9e4066Sahrens bp += start; 316*fa9e4066Sahrens for (i = start; i <= end; i++, bp++) { 317*fa9e4066Sahrens if (BP_IS_HOLE(bp)) 318*fa9e4066Sahrens continue; 319*fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 320*fa9e4066Sahrens err = dbuf_hold_impl(dn, dnlevel-1, i, TRUE, FTAG, &db); 321*fa9e4066Sahrens ASSERT3U(err, ==, 0); 322*fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 323*fa9e4066Sahrens 324*fa9e4066Sahrens if (free_children(db, blkid, nblks, trunc, tx)) { 325*fa9e4066Sahrens ASSERT3P(db->db_blkptr, ==, bp); 326*fa9e4066Sahrens free_blocks(dn, bp, 1, tx); 327*fa9e4066Sahrens } 328*fa9e4066Sahrens dbuf_remove_ref(db, FTAG); 329*fa9e4066Sahrens } 330*fa9e4066Sahrens if (trunc) { 331*fa9e4066Sahrens uint64_t off = (dn->dn_phys->dn_maxblkid + 1) * 332*fa9e4066Sahrens (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT); 333*fa9e4066Sahrens dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0); 334*fa9e4066Sahrens ASSERT(off < dn->dn_phys->dn_maxblkid || 335*fa9e4066Sahrens dn->dn_phys->dn_maxblkid == 0 || 336*fa9e4066Sahrens dnode_next_offset(dn, FALSE, &off, 1, 1) == ESRCH); 337*fa9e4066Sahrens } 338*fa9e4066Sahrens } 339*fa9e4066Sahrens 340*fa9e4066Sahrens static int 341*fa9e4066Sahrens dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) 342*fa9e4066Sahrens { 343*fa9e4066Sahrens dmu_buf_impl_t *db; 344*fa9e4066Sahrens int txgoff = tx->tx_txg & TXG_MASK; 345*fa9e4066Sahrens 346*fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 347*fa9e4066Sahrens 348*fa9e4066Sahrens /* Undirty all buffers */ 349*fa9e4066Sahrens while (db = list_head(&dn->dn_dirty_dbufs[txgoff])) { 350*fa9e4066Sahrens mutex_enter(&db->db_mtx); 351*fa9e4066Sahrens /* XXX - use dbuf_undirty()? */ 352*fa9e4066Sahrens list_remove(&dn->dn_dirty_dbufs[txgoff], db); 353*fa9e4066Sahrens if (db->db_level == 0) { 354*fa9e4066Sahrens ASSERT3P(db->db_d.db_data_old[txgoff], ==, db->db_buf); 355*fa9e4066Sahrens if (db->db_d.db_overridden_by[txgoff]) 356*fa9e4066Sahrens dbuf_unoverride(db, tx->tx_txg); 357*fa9e4066Sahrens db->db_d.db_data_old[txgoff] = NULL; 358*fa9e4066Sahrens } 359*fa9e4066Sahrens db->db_dirtycnt -= 1; 360*fa9e4066Sahrens mutex_exit(&db->db_mtx); 361*fa9e4066Sahrens dbuf_remove_ref(db, (void *)(uintptr_t)tx->tx_txg); 362*fa9e4066Sahrens } 363*fa9e4066Sahrens 364*fa9e4066Sahrens ASSERT3U(refcount_count(&dn->dn_holds), ==, 1); 365*fa9e4066Sahrens 366*fa9e4066Sahrens /* Undirty next bits */ 367*fa9e4066Sahrens dn->dn_next_nlevels[txgoff] = 0; 368*fa9e4066Sahrens dn->dn_next_indblkshift[txgoff] = 0; 369*fa9e4066Sahrens 370*fa9e4066Sahrens /* free up all the blocks in the file. */ 371*fa9e4066Sahrens dbuf_free_range(dn, 0, -1, tx); 372*fa9e4066Sahrens dnode_sync_free_range(dn, 0, dn->dn_phys->dn_maxblkid+1, tx); 373*fa9e4066Sahrens ASSERT3U(dn->dn_phys->dn_secphys, ==, 0); 374*fa9e4066Sahrens 375*fa9e4066Sahrens /* 376*fa9e4066Sahrens * All dbufs should be gone, since all holds are gone... 377*fa9e4066Sahrens */ 378*fa9e4066Sahrens ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL); 379*fa9e4066Sahrens 380*fa9e4066Sahrens /* ASSERT(blkptrs are zero); */ 381*fa9e4066Sahrens ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); 382*fa9e4066Sahrens ASSERT(dn->dn_type != DMU_OT_NONE); 383*fa9e4066Sahrens 384*fa9e4066Sahrens ASSERT(dn->dn_free_txg > 0); 385*fa9e4066Sahrens if (dn->dn_allocated_txg != dn->dn_free_txg) 386*fa9e4066Sahrens dbuf_will_dirty(dn->dn_dbuf, tx); 387*fa9e4066Sahrens bzero(dn->dn_phys, sizeof (dnode_phys_t)); 388*fa9e4066Sahrens 389*fa9e4066Sahrens mutex_enter(&dn->dn_mtx); 390*fa9e4066Sahrens dn->dn_type = DMU_OT_NONE; 391*fa9e4066Sahrens dn->dn_dirtyblksz[txgoff] = 0; 392*fa9e4066Sahrens dn->dn_maxblkid = 0; 393*fa9e4066Sahrens dn->dn_allocated_txg = 0; 394*fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 395*fa9e4066Sahrens 396*fa9e4066Sahrens ASSERT(!IS_DNODE_DNODE(dn->dn_object)); 397*fa9e4066Sahrens 398*fa9e4066Sahrens dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg); 399*fa9e4066Sahrens /* 400*fa9e4066Sahrens * Now that we've released our hold, the dnode may 401*fa9e4066Sahrens * be evicted, so we musn't access it. 402*fa9e4066Sahrens */ 403*fa9e4066Sahrens return (1); 404*fa9e4066Sahrens } 405*fa9e4066Sahrens 406*fa9e4066Sahrens /* 407*fa9e4066Sahrens * Write out the dnode's dirty buffers at the specified level. 408*fa9e4066Sahrens * This may create more dirty buffers at the next level up. 409*fa9e4066Sahrens * 410*fa9e4066Sahrens * NOTE: The dnode is kept in memory by being dirty. Once the 411*fa9e4066Sahrens * dirty bit is cleared, it may be evicted. Beware of this! 412*fa9e4066Sahrens */ 413*fa9e4066Sahrens int 414*fa9e4066Sahrens dnode_sync(dnode_t *dn, int level, zio_t *zio, dmu_tx_t *tx) 415*fa9e4066Sahrens { 416*fa9e4066Sahrens free_range_t *rp; 417*fa9e4066Sahrens int txgoff = tx->tx_txg & TXG_MASK; 418*fa9e4066Sahrens dnode_phys_t *dnp = dn->dn_phys; 419*fa9e4066Sahrens 420*fa9e4066Sahrens /* ASSERT(dn->dn_objset->dd_snapshot == NULL); */ 421*fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 422*fa9e4066Sahrens ASSERT(IS_DNODE_DNODE(dn->dn_object) || 423*fa9e4066Sahrens dn->dn_dirtyblksz[txgoff] > 0); 424*fa9e4066Sahrens 425*fa9e4066Sahrens ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg); 426*fa9e4066Sahrens dnode_verify(dn); 427*fa9e4066Sahrens /* 428*fa9e4066Sahrens * Make sure the dbuf for the dn_phys is released before we modify it. 429*fa9e4066Sahrens */ 430*fa9e4066Sahrens if (dn->dn_dbuf) 431*fa9e4066Sahrens arc_release(dn->dn_dbuf->db_buf, dn->dn_dbuf); 432*fa9e4066Sahrens 433*fa9e4066Sahrens mutex_enter(&dn->dn_mtx); 434*fa9e4066Sahrens if (dn->dn_allocated_txg == tx->tx_txg) { 435*fa9e4066Sahrens /* The dnode is newly allocated or reallocated */ 436*fa9e4066Sahrens if (dnp->dn_type == DMU_OT_NONE) { 437*fa9e4066Sahrens /* this is a first alloc, not a realloc */ 438*fa9e4066Sahrens /* XXX shouldn't the phys already be zeroed? */ 439*fa9e4066Sahrens bzero(dnp, DNODE_CORE_SIZE); 440*fa9e4066Sahrens dnp->dn_datablkszsec = dn->dn_datablkszsec; 441*fa9e4066Sahrens dnp->dn_indblkshift = dn->dn_indblkshift; 442*fa9e4066Sahrens dnp->dn_nlevels = 1; 443*fa9e4066Sahrens } 444*fa9e4066Sahrens 445*fa9e4066Sahrens if (dn->dn_nblkptr > dnp->dn_nblkptr) { 446*fa9e4066Sahrens /* zero the new blkptrs we are gaining */ 447*fa9e4066Sahrens bzero(dnp->dn_blkptr + dnp->dn_nblkptr, 448*fa9e4066Sahrens sizeof (blkptr_t) * 449*fa9e4066Sahrens (dn->dn_nblkptr - dnp->dn_nblkptr)); 450*fa9e4066Sahrens } 451*fa9e4066Sahrens dnp->dn_type = dn->dn_type; 452*fa9e4066Sahrens dnp->dn_bonustype = dn->dn_bonustype; 453*fa9e4066Sahrens dnp->dn_bonuslen = dn->dn_bonuslen; 454*fa9e4066Sahrens dnp->dn_nblkptr = dn->dn_nblkptr; 455*fa9e4066Sahrens } 456*fa9e4066Sahrens 457*fa9e4066Sahrens if (dn->dn_dirtyblksz[txgoff]) { 458*fa9e4066Sahrens ASSERT(P2PHASE(dn->dn_dirtyblksz[txgoff], 459*fa9e4066Sahrens SPA_MINBLOCKSIZE) == 0); 460*fa9e4066Sahrens dnp->dn_datablkszsec = 461*fa9e4066Sahrens dn->dn_dirtyblksz[txgoff] >> SPA_MINBLOCKSHIFT; 462*fa9e4066Sahrens } 463*fa9e4066Sahrens 464*fa9e4066Sahrens if (dn->dn_next_indblkshift[txgoff]) { 465*fa9e4066Sahrens ASSERT(dnp->dn_nlevels == 1); 466*fa9e4066Sahrens dnp->dn_indblkshift = dn->dn_next_indblkshift[txgoff]; 467*fa9e4066Sahrens dn->dn_next_indblkshift[txgoff] = 0; 468*fa9e4066Sahrens } 469*fa9e4066Sahrens 470*fa9e4066Sahrens /* 471*fa9e4066Sahrens * Just take the live (open-context) values for checksum and compress. 472*fa9e4066Sahrens * Strictly speaking it's a future leak, but nothing bad happens if we 473*fa9e4066Sahrens * start using the new checksum or compress algorithm a little early. 474*fa9e4066Sahrens */ 475*fa9e4066Sahrens dnp->dn_checksum = dn->dn_checksum; 476*fa9e4066Sahrens dnp->dn_compress = dn->dn_compress; 477*fa9e4066Sahrens 478*fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 479*fa9e4066Sahrens 480*fa9e4066Sahrens /* process all the "freed" ranges in the file */ 481*fa9e4066Sahrens if (dn->dn_free_txg == 0 || dn->dn_free_txg > tx->tx_txg) { 482*fa9e4066Sahrens for (rp = avl_first(&dn->dn_ranges[txgoff]); rp != NULL; 483*fa9e4066Sahrens rp = AVL_NEXT(&dn->dn_ranges[txgoff], rp)) 484*fa9e4066Sahrens dnode_sync_free_range(dn, 485*fa9e4066Sahrens rp->fr_blkid, rp->fr_nblks, tx); 486*fa9e4066Sahrens } 487*fa9e4066Sahrens mutex_enter(&dn->dn_mtx); 488*fa9e4066Sahrens for (rp = avl_first(&dn->dn_ranges[txgoff]); rp; ) { 489*fa9e4066Sahrens free_range_t *last = rp; 490*fa9e4066Sahrens rp = AVL_NEXT(&dn->dn_ranges[txgoff], rp); 491*fa9e4066Sahrens avl_remove(&dn->dn_ranges[txgoff], last); 492*fa9e4066Sahrens kmem_free(last, sizeof (free_range_t)); 493*fa9e4066Sahrens } 494*fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 495*fa9e4066Sahrens 496*fa9e4066Sahrens if (dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg) { 497*fa9e4066Sahrens ASSERT3U(level, ==, 0); 498*fa9e4066Sahrens return (dnode_sync_free(dn, tx)); 499*fa9e4066Sahrens } 500*fa9e4066Sahrens 501*fa9e4066Sahrens if (dn->dn_next_nlevels[txgoff]) { 502*fa9e4066Sahrens int new_lvl = dn->dn_next_nlevels[txgoff]; 503*fa9e4066Sahrens 504*fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 505*fa9e4066Sahrens while (new_lvl > dnp->dn_nlevels) 506*fa9e4066Sahrens dnode_increase_indirection(dn, tx); 507*fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 508*fa9e4066Sahrens dn->dn_next_nlevels[txgoff] = 0; 509*fa9e4066Sahrens } 510*fa9e4066Sahrens 511*fa9e4066Sahrens if (level == dnp->dn_nlevels) { 512*fa9e4066Sahrens uint64_t off = (dn->dn_phys->dn_maxblkid + 1) * 513*fa9e4066Sahrens (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT); 514*fa9e4066Sahrens 515*fa9e4066Sahrens /* we've already synced out all data and indirect blocks */ 516*fa9e4066Sahrens /* there are no more dirty dbufs under this dnode */ 517*fa9e4066Sahrens ASSERT3P(list_head(&dn->dn_dirty_dbufs[txgoff]), ==, NULL); 518*fa9e4066Sahrens ASSERT(dn->dn_free_txg == 0 || dn->dn_free_txg >= tx->tx_txg); 519*fa9e4066Sahrens 520*fa9e4066Sahrens /* XXX this is expensive. remove once 6343073 is closed. */ 521*fa9e4066Sahrens /* NB: the "off < maxblkid" is to catch overflow */ 522*fa9e4066Sahrens /* 523*fa9e4066Sahrens * NB: if blocksize is changing, we could get confused, 524*fa9e4066Sahrens * so only bother if there are multiple blocks and thus 525*fa9e4066Sahrens * it can't be changing. 526*fa9e4066Sahrens */ 527*fa9e4066Sahrens ASSERT(off < dn->dn_phys->dn_maxblkid || 528*fa9e4066Sahrens dn->dn_phys->dn_maxblkid == 0 || 529*fa9e4066Sahrens dnode_next_offset(dn, FALSE, &off, 1, 1) == ESRCH); 530*fa9e4066Sahrens 531*fa9e4066Sahrens dn->dn_dirtyblksz[txgoff] = 0; 532*fa9e4066Sahrens 533*fa9e4066Sahrens 534*fa9e4066Sahrens if (!IS_DNODE_DNODE(dn->dn_object)) { 535*fa9e4066Sahrens dbuf_will_dirty(dn->dn_dbuf, tx); 536*fa9e4066Sahrens dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg); 537*fa9e4066Sahrens } 538*fa9e4066Sahrens 539*fa9e4066Sahrens /* 540*fa9e4066Sahrens * Now that we've dropped the reference, the dnode may 541*fa9e4066Sahrens * be evicted, so we musn't access it. 542*fa9e4066Sahrens */ 543*fa9e4066Sahrens return (1); 544*fa9e4066Sahrens } else { 545*fa9e4066Sahrens dmu_buf_impl_t *db, *db_next; 546*fa9e4066Sahrens list_t *list = &dn->dn_dirty_dbufs[txgoff]; 547*fa9e4066Sahrens /* 548*fa9e4066Sahrens * Iterate over the list, removing and sync'ing dbufs 549*fa9e4066Sahrens * which are on the level we want, and leaving others. 550*fa9e4066Sahrens */ 551*fa9e4066Sahrens for (db = list_head(list); db; db = db_next) { 552*fa9e4066Sahrens db_next = list_next(list, db); 553*fa9e4066Sahrens if (db->db_level == level) { 554*fa9e4066Sahrens list_remove(list, db); 555*fa9e4066Sahrens dbuf_sync(db, zio, tx); 556*fa9e4066Sahrens } 557*fa9e4066Sahrens } 558*fa9e4066Sahrens return (0); 559*fa9e4066Sahrens } 560*fa9e4066Sahrens } 561