1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5f65e61c0Sahrens * Common Development and Distribution License (the "License"). 6f65e61c0Sahrens * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 2206e0070dSMark Shellenbaum * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 233f2366c2SGordon Ross * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24bf16b11eSMatthew Ahrens * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 25aad02571SSaso Kiselkov * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. 26810e43b2SBill Pijewski * Copyright (c) 2013, Joyent, Inc. All rights reserved. 27*bc9014e6SJustin Gibbs * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. 28fa9e4066Sahrens */ 29fa9e4066Sahrens 30fa9e4066Sahrens #include <sys/zfs_context.h> 31fa9e4066Sahrens #include <sys/dmu.h> 322f3d8780SMatthew Ahrens #include <sys/dmu_send.h> 33fa9e4066Sahrens #include <sys/dmu_impl.h> 34fa9e4066Sahrens #include <sys/dbuf.h> 35fa9e4066Sahrens #include <sys/dmu_objset.h> 36fa9e4066Sahrens #include <sys/dsl_dataset.h> 37fa9e4066Sahrens #include <sys/dsl_dir.h> 38fa9e4066Sahrens #include <sys/dmu_tx.h> 39fa9e4066Sahrens #include <sys/spa.h> 40fa9e4066Sahrens #include <sys/zio.h> 41fa9e4066Sahrens #include <sys/dmu_zfetch.h> 420a586ceaSMark Shellenbaum #include <sys/sa.h> 430a586ceaSMark Shellenbaum #include <sys/sa_impl.h> 445d7b4d43SMatthew Ahrens #include <sys/zfeature.h> 455d7b4d43SMatthew Ahrens #include <sys/blkptr.h> 46bf16b11eSMatthew Ahrens #include <sys/range_tree.h> 47fa9e4066Sahrens 48713d6c20SMatthew Ahrens /* 49713d6c20SMatthew Ahrens * Number of times that zfs_free_range() took the slow path while doing 50713d6c20SMatthew Ahrens * a zfs receive. A nonzero value indicates a potential performance problem. 51713d6c20SMatthew Ahrens */ 52713d6c20SMatthew Ahrens uint64_t zfs_free_range_recv_miss; 53713d6c20SMatthew Ahrens 54fa9e4066Sahrens static void dbuf_destroy(dmu_buf_impl_t *db); 553b2aab18SMatthew Ahrens static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); 56088f3894Sahrens static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx); 57fa9e4066Sahrens 58*bc9014e6SJustin Gibbs #ifndef __lint 59*bc9014e6SJustin Gibbs extern inline void dmu_buf_init_user(dmu_buf_user_t *dbu, 60*bc9014e6SJustin Gibbs dmu_buf_evict_func_t *evict_func, dmu_buf_t **clear_on_evict_dbufp); 61*bc9014e6SJustin Gibbs #endif /* ! __lint */ 62*bc9014e6SJustin Gibbs 63fa9e4066Sahrens /* 64fa9e4066Sahrens * Global data structures and functions for the dbuf cache. 65fa9e4066Sahrens */ 66fa9e4066Sahrens static kmem_cache_t *dbuf_cache; 67*bc9014e6SJustin Gibbs static taskq_t *dbu_evict_taskq; 68fa9e4066Sahrens 69fa9e4066Sahrens /* ARGSUSED */ 70fa9e4066Sahrens static int 71fa9e4066Sahrens dbuf_cons(void *vdb, void *unused, int kmflag) 72fa9e4066Sahrens { 73fa9e4066Sahrens dmu_buf_impl_t *db = vdb; 74fa9e4066Sahrens bzero(db, sizeof (dmu_buf_impl_t)); 75fa9e4066Sahrens 76fa9e4066Sahrens mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL); 77fa9e4066Sahrens cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL); 78fa9e4066Sahrens refcount_create(&db->db_holds); 790f6d88adSAlex Reece 80fa9e4066Sahrens return (0); 81fa9e4066Sahrens } 82fa9e4066Sahrens 83fa9e4066Sahrens /* ARGSUSED */ 84fa9e4066Sahrens static void 85fa9e4066Sahrens dbuf_dest(void *vdb, void *unused) 86fa9e4066Sahrens { 87fa9e4066Sahrens dmu_buf_impl_t *db = vdb; 88fa9e4066Sahrens mutex_destroy(&db->db_mtx); 89fa9e4066Sahrens cv_destroy(&db->db_changed); 90fa9e4066Sahrens refcount_destroy(&db->db_holds); 91fa9e4066Sahrens } 92fa9e4066Sahrens 93fa9e4066Sahrens /* 94fa9e4066Sahrens * dbuf hash table routines 95fa9e4066Sahrens */ 96fa9e4066Sahrens static dbuf_hash_table_t dbuf_hash_table; 97fa9e4066Sahrens 98fa9e4066Sahrens static uint64_t dbuf_hash_count; 99fa9e4066Sahrens 100fa9e4066Sahrens static uint64_t 101fa9e4066Sahrens dbuf_hash(void *os, uint64_t obj, uint8_t lvl, uint64_t blkid) 102fa9e4066Sahrens { 103fa9e4066Sahrens uintptr_t osv = (uintptr_t)os; 104fa9e4066Sahrens uint64_t crc = -1ULL; 105fa9e4066Sahrens 106fa9e4066Sahrens ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); 107fa9e4066Sahrens crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (lvl)) & 0xFF]; 108fa9e4066Sahrens crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 6)) & 0xFF]; 109fa9e4066Sahrens crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF]; 110fa9e4066Sahrens crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF]; 111fa9e4066Sahrens crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 0)) & 0xFF]; 112fa9e4066Sahrens crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 8)) & 0xFF]; 113fa9e4066Sahrens 114fa9e4066Sahrens crc ^= (osv>>14) ^ (obj>>16) ^ (blkid>>16); 115fa9e4066Sahrens 116fa9e4066Sahrens return (crc); 117fa9e4066Sahrens } 118fa9e4066Sahrens 119fa9e4066Sahrens #define DBUF_HASH(os, obj, level, blkid) dbuf_hash(os, obj, level, blkid); 120fa9e4066Sahrens 121fa9e4066Sahrens #define DBUF_EQUAL(dbuf, os, obj, level, blkid) \ 122fa9e4066Sahrens ((dbuf)->db.db_object == (obj) && \ 123fa9e4066Sahrens (dbuf)->db_objset == (os) && \ 124fa9e4066Sahrens (dbuf)->db_level == (level) && \ 125fa9e4066Sahrens (dbuf)->db_blkid == (blkid)) 126fa9e4066Sahrens 127fa9e4066Sahrens dmu_buf_impl_t * 128fa9e4066Sahrens dbuf_find(dnode_t *dn, uint8_t level, uint64_t blkid) 129fa9e4066Sahrens { 130fa9e4066Sahrens dbuf_hash_table_t *h = &dbuf_hash_table; 131503ad85cSMatthew Ahrens objset_t *os = dn->dn_objset; 132fa9e4066Sahrens uint64_t obj = dn->dn_object; 133fa9e4066Sahrens uint64_t hv = DBUF_HASH(os, obj, level, blkid); 134fa9e4066Sahrens uint64_t idx = hv & h->hash_table_mask; 135fa9e4066Sahrens dmu_buf_impl_t *db; 136fa9e4066Sahrens 137fa9e4066Sahrens mutex_enter(DBUF_HASH_MUTEX(h, idx)); 138fa9e4066Sahrens for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) { 139fa9e4066Sahrens if (DBUF_EQUAL(db, os, obj, level, blkid)) { 140fa9e4066Sahrens mutex_enter(&db->db_mtx); 141ea8dc4b6Seschrock if (db->db_state != DB_EVICTING) { 142fa9e4066Sahrens mutex_exit(DBUF_HASH_MUTEX(h, idx)); 143fa9e4066Sahrens return (db); 144fa9e4066Sahrens } 145fa9e4066Sahrens mutex_exit(&db->db_mtx); 146fa9e4066Sahrens } 147fa9e4066Sahrens } 148fa9e4066Sahrens mutex_exit(DBUF_HASH_MUTEX(h, idx)); 149fa9e4066Sahrens return (NULL); 150fa9e4066Sahrens } 151fa9e4066Sahrens 152fa9e4066Sahrens /* 153fa9e4066Sahrens * Insert an entry into the hash table. If there is already an element 154fa9e4066Sahrens * equal to elem in the hash table, then the already existing element 155fa9e4066Sahrens * will be returned and the new element will not be inserted. 156fa9e4066Sahrens * Otherwise returns NULL. 157fa9e4066Sahrens */ 158fa9e4066Sahrens static dmu_buf_impl_t * 159fa9e4066Sahrens dbuf_hash_insert(dmu_buf_impl_t *db) 160fa9e4066Sahrens { 161fa9e4066Sahrens dbuf_hash_table_t *h = &dbuf_hash_table; 162503ad85cSMatthew Ahrens objset_t *os = db->db_objset; 163fa9e4066Sahrens uint64_t obj = db->db.db_object; 164fa9e4066Sahrens int level = db->db_level; 165fa9e4066Sahrens uint64_t blkid = db->db_blkid; 166fa9e4066Sahrens uint64_t hv = DBUF_HASH(os, obj, level, blkid); 167fa9e4066Sahrens uint64_t idx = hv & h->hash_table_mask; 168fa9e4066Sahrens dmu_buf_impl_t *dbf; 169fa9e4066Sahrens 170fa9e4066Sahrens mutex_enter(DBUF_HASH_MUTEX(h, idx)); 171fa9e4066Sahrens for (dbf = h->hash_table[idx]; dbf != NULL; dbf = dbf->db_hash_next) { 172fa9e4066Sahrens if (DBUF_EQUAL(dbf, os, obj, level, blkid)) { 173fa9e4066Sahrens mutex_enter(&dbf->db_mtx); 174ea8dc4b6Seschrock if (dbf->db_state != DB_EVICTING) { 175fa9e4066Sahrens mutex_exit(DBUF_HASH_MUTEX(h, idx)); 176fa9e4066Sahrens return (dbf); 177fa9e4066Sahrens } 178fa9e4066Sahrens mutex_exit(&dbf->db_mtx); 179fa9e4066Sahrens } 180fa9e4066Sahrens } 181fa9e4066Sahrens 182fa9e4066Sahrens mutex_enter(&db->db_mtx); 183fa9e4066Sahrens db->db_hash_next = h->hash_table[idx]; 184fa9e4066Sahrens h->hash_table[idx] = db; 185fa9e4066Sahrens mutex_exit(DBUF_HASH_MUTEX(h, idx)); 1861a5e258fSJosef 'Jeff' Sipek atomic_inc_64(&dbuf_hash_count); 187fa9e4066Sahrens 188fa9e4066Sahrens return (NULL); 189fa9e4066Sahrens } 190fa9e4066Sahrens 191fa9e4066Sahrens /* 192bbfa8ea8SMatthew Ahrens * Remove an entry from the hash table. It must be in the EVICTING state. 193fa9e4066Sahrens */ 194fa9e4066Sahrens static void 195fa9e4066Sahrens dbuf_hash_remove(dmu_buf_impl_t *db) 196fa9e4066Sahrens { 197fa9e4066Sahrens dbuf_hash_table_t *h = &dbuf_hash_table; 198fa9e4066Sahrens uint64_t hv = DBUF_HASH(db->db_objset, db->db.db_object, 199fa9e4066Sahrens db->db_level, db->db_blkid); 200fa9e4066Sahrens uint64_t idx = hv & h->hash_table_mask; 201fa9e4066Sahrens dmu_buf_impl_t *dbf, **dbp; 202fa9e4066Sahrens 203fa9e4066Sahrens /* 204bbfa8ea8SMatthew Ahrens * We musn't hold db_mtx to maintain lock ordering: 205fa9e4066Sahrens * DBUF_HASH_MUTEX > db_mtx. 206fa9e4066Sahrens */ 207fa9e4066Sahrens ASSERT(refcount_is_zero(&db->db_holds)); 208ea8dc4b6Seschrock ASSERT(db->db_state == DB_EVICTING); 209fa9e4066Sahrens ASSERT(!MUTEX_HELD(&db->db_mtx)); 210fa9e4066Sahrens 211fa9e4066Sahrens mutex_enter(DBUF_HASH_MUTEX(h, idx)); 212fa9e4066Sahrens dbp = &h->hash_table[idx]; 213fa9e4066Sahrens while ((dbf = *dbp) != db) { 214fa9e4066Sahrens dbp = &dbf->db_hash_next; 215fa9e4066Sahrens ASSERT(dbf != NULL); 216fa9e4066Sahrens } 217fa9e4066Sahrens *dbp = db->db_hash_next; 218fa9e4066Sahrens db->db_hash_next = NULL; 219fa9e4066Sahrens mutex_exit(DBUF_HASH_MUTEX(h, idx)); 2201a5e258fSJosef 'Jeff' Sipek atomic_dec_64(&dbuf_hash_count); 221fa9e4066Sahrens } 222fa9e4066Sahrens 223ea8dc4b6Seschrock static arc_evict_func_t dbuf_do_evict; 224fa9e4066Sahrens 225*bc9014e6SJustin Gibbs typedef enum { 226*bc9014e6SJustin Gibbs DBVU_EVICTING, 227*bc9014e6SJustin Gibbs DBVU_NOT_EVICTING 228*bc9014e6SJustin Gibbs } dbvu_verify_type_t; 229*bc9014e6SJustin Gibbs 230*bc9014e6SJustin Gibbs static void 231*bc9014e6SJustin Gibbs dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type) 232*bc9014e6SJustin Gibbs { 233*bc9014e6SJustin Gibbs #ifdef ZFS_DEBUG 234*bc9014e6SJustin Gibbs int64_t holds; 235*bc9014e6SJustin Gibbs 236*bc9014e6SJustin Gibbs if (db->db_user == NULL) 237*bc9014e6SJustin Gibbs return; 238*bc9014e6SJustin Gibbs 239*bc9014e6SJustin Gibbs /* Only data blocks support the attachment of user data. */ 240*bc9014e6SJustin Gibbs ASSERT(db->db_level == 0); 241*bc9014e6SJustin Gibbs 242*bc9014e6SJustin Gibbs /* Clients must resolve a dbuf before attaching user data. */ 243*bc9014e6SJustin Gibbs ASSERT(db->db.db_data != NULL); 244*bc9014e6SJustin Gibbs ASSERT3U(db->db_state, ==, DB_CACHED); 245*bc9014e6SJustin Gibbs 246*bc9014e6SJustin Gibbs holds = refcount_count(&db->db_holds); 247*bc9014e6SJustin Gibbs if (verify_type == DBVU_EVICTING) { 248*bc9014e6SJustin Gibbs /* 249*bc9014e6SJustin Gibbs * Immediate eviction occurs when holds == dirtycnt. 250*bc9014e6SJustin Gibbs * For normal eviction buffers, holds is zero on 251*bc9014e6SJustin Gibbs * eviction, except when dbuf_fix_old_data() calls 252*bc9014e6SJustin Gibbs * dbuf_clear_data(). However, the hold count can grow 253*bc9014e6SJustin Gibbs * during eviction even though db_mtx is held (see 254*bc9014e6SJustin Gibbs * dmu_bonus_hold() for an example), so we can only 255*bc9014e6SJustin Gibbs * test the generic invariant that holds >= dirtycnt. 256*bc9014e6SJustin Gibbs */ 257*bc9014e6SJustin Gibbs ASSERT3U(holds, >=, db->db_dirtycnt); 258*bc9014e6SJustin Gibbs } else { 259*bc9014e6SJustin Gibbs if (db->db_immediate_evict == TRUE) 260*bc9014e6SJustin Gibbs ASSERT3U(holds, >=, db->db_dirtycnt); 261*bc9014e6SJustin Gibbs else 262*bc9014e6SJustin Gibbs ASSERT3U(holds, >, 0); 263*bc9014e6SJustin Gibbs } 264*bc9014e6SJustin Gibbs #endif 265*bc9014e6SJustin Gibbs } 266*bc9014e6SJustin Gibbs 267fa9e4066Sahrens static void 268fa9e4066Sahrens dbuf_evict_user(dmu_buf_impl_t *db) 269fa9e4066Sahrens { 270*bc9014e6SJustin Gibbs dmu_buf_user_t *dbu = db->db_user; 271*bc9014e6SJustin Gibbs 272fa9e4066Sahrens ASSERT(MUTEX_HELD(&db->db_mtx)); 273fa9e4066Sahrens 274*bc9014e6SJustin Gibbs if (dbu == NULL) 275fa9e4066Sahrens return; 276fa9e4066Sahrens 277*bc9014e6SJustin Gibbs dbuf_verify_user(db, DBVU_EVICTING); 278*bc9014e6SJustin Gibbs db->db_user = NULL; 279*bc9014e6SJustin Gibbs 280*bc9014e6SJustin Gibbs #ifdef ZFS_DEBUG 281*bc9014e6SJustin Gibbs if (dbu->dbu_clear_on_evict_dbufp != NULL) 282*bc9014e6SJustin Gibbs *dbu->dbu_clear_on_evict_dbufp = NULL; 283*bc9014e6SJustin Gibbs #endif 284*bc9014e6SJustin Gibbs 285*bc9014e6SJustin Gibbs /* 286*bc9014e6SJustin Gibbs * Invoke the callback from a taskq to avoid lock order reversals 287*bc9014e6SJustin Gibbs * and limit stack depth. 288*bc9014e6SJustin Gibbs */ 289*bc9014e6SJustin Gibbs taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func, dbu, 0, 290*bc9014e6SJustin Gibbs &dbu->dbu_tqent); 291fa9e4066Sahrens } 292fa9e4066Sahrens 293744947dcSTom Erickson boolean_t 294744947dcSTom Erickson dbuf_is_metadata(dmu_buf_impl_t *db) 295744947dcSTom Erickson { 296744947dcSTom Erickson if (db->db_level > 0) { 297744947dcSTom Erickson return (B_TRUE); 298744947dcSTom Erickson } else { 299744947dcSTom Erickson boolean_t is_metadata; 300744947dcSTom Erickson 301744947dcSTom Erickson DB_DNODE_ENTER(db); 302ad135b5dSChristopher Siden is_metadata = DMU_OT_IS_METADATA(DB_DNODE(db)->dn_type); 303744947dcSTom Erickson DB_DNODE_EXIT(db); 304744947dcSTom Erickson 305744947dcSTom Erickson return (is_metadata); 306744947dcSTom Erickson } 307744947dcSTom Erickson } 308744947dcSTom Erickson 309ea8dc4b6Seschrock void 310ea8dc4b6Seschrock dbuf_evict(dmu_buf_impl_t *db) 311ea8dc4b6Seschrock { 312ea8dc4b6Seschrock ASSERT(MUTEX_HELD(&db->db_mtx)); 313ea8dc4b6Seschrock ASSERT(db->db_buf == NULL); 314c717a561Smaybee ASSERT(db->db_data_pending == NULL); 315ea8dc4b6Seschrock 316ea8dc4b6Seschrock dbuf_clear(db); 317ea8dc4b6Seschrock dbuf_destroy(db); 318ea8dc4b6Seschrock } 319ea8dc4b6Seschrock 320fa9e4066Sahrens void 321fa9e4066Sahrens dbuf_init(void) 322fa9e4066Sahrens { 323ea8dc4b6Seschrock uint64_t hsize = 1ULL << 16; 324fa9e4066Sahrens dbuf_hash_table_t *h = &dbuf_hash_table; 325fa9e4066Sahrens int i; 326fa9e4066Sahrens 327fa9e4066Sahrens /* 328fa9e4066Sahrens * The hash table is big enough to fill all of physical memory 329ea8dc4b6Seschrock * with an average 4K block size. The table will take up 330ea8dc4b6Seschrock * totalmem*sizeof(void*)/4K (i.e. 2MB/GB with 8-byte pointers). 331fa9e4066Sahrens */ 332ea8dc4b6Seschrock while (hsize * 4096 < physmem * PAGESIZE) 333fa9e4066Sahrens hsize <<= 1; 334fa9e4066Sahrens 335ea8dc4b6Seschrock retry: 336fa9e4066Sahrens h->hash_table_mask = hsize - 1; 337ea8dc4b6Seschrock h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP); 338ea8dc4b6Seschrock if (h->hash_table == NULL) { 339ea8dc4b6Seschrock /* XXX - we should really return an error instead of assert */ 340ea8dc4b6Seschrock ASSERT(hsize > (1ULL << 10)); 341ea8dc4b6Seschrock hsize >>= 1; 342ea8dc4b6Seschrock goto retry; 343ea8dc4b6Seschrock } 344fa9e4066Sahrens 345fa9e4066Sahrens dbuf_cache = kmem_cache_create("dmu_buf_impl_t", 346fa9e4066Sahrens sizeof (dmu_buf_impl_t), 347fa9e4066Sahrens 0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0); 348fa9e4066Sahrens 349fa9e4066Sahrens for (i = 0; i < DBUF_MUTEXES; i++) 350fa9e4066Sahrens mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL); 351*bc9014e6SJustin Gibbs 352*bc9014e6SJustin Gibbs /* 353*bc9014e6SJustin Gibbs * All entries are queued via taskq_dispatch_ent(), so min/maxalloc 354*bc9014e6SJustin Gibbs * configuration is not required. 355*bc9014e6SJustin Gibbs */ 356*bc9014e6SJustin Gibbs dbu_evict_taskq = taskq_create("dbu_evict", 1, minclsyspri, 0, 0, 0); 357fa9e4066Sahrens } 358fa9e4066Sahrens 359fa9e4066Sahrens void 360fa9e4066Sahrens dbuf_fini(void) 361fa9e4066Sahrens { 362fa9e4066Sahrens dbuf_hash_table_t *h = &dbuf_hash_table; 363fa9e4066Sahrens int i; 364fa9e4066Sahrens 365fa9e4066Sahrens for (i = 0; i < DBUF_MUTEXES; i++) 366fa9e4066Sahrens mutex_destroy(&h->hash_mutexes[i]); 367fa9e4066Sahrens kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *)); 368fa9e4066Sahrens kmem_cache_destroy(dbuf_cache); 369*bc9014e6SJustin Gibbs taskq_destroy(dbu_evict_taskq); 370fa9e4066Sahrens } 371fa9e4066Sahrens 372fa9e4066Sahrens /* 373fa9e4066Sahrens * Other stuff. 374fa9e4066Sahrens */ 375fa9e4066Sahrens 3769c9dc39aSek #ifdef ZFS_DEBUG 377fa9e4066Sahrens static void 378fa9e4066Sahrens dbuf_verify(dmu_buf_impl_t *db) 379fa9e4066Sahrens { 380744947dcSTom Erickson dnode_t *dn; 381b24ab676SJeff Bonwick dbuf_dirty_record_t *dr; 382fa9e4066Sahrens 383fa9e4066Sahrens ASSERT(MUTEX_HELD(&db->db_mtx)); 384fa9e4066Sahrens 385fa9e4066Sahrens if (!(zfs_flags & ZFS_DEBUG_DBUF_VERIFY)) 386fa9e4066Sahrens return; 387fa9e4066Sahrens 388fa9e4066Sahrens ASSERT(db->db_objset != NULL); 389744947dcSTom Erickson DB_DNODE_ENTER(db); 390744947dcSTom Erickson dn = DB_DNODE(db); 391fa9e4066Sahrens if (dn == NULL) { 392fa9e4066Sahrens ASSERT(db->db_parent == NULL); 393fa9e4066Sahrens ASSERT(db->db_blkptr == NULL); 394fa9e4066Sahrens } else { 395fa9e4066Sahrens ASSERT3U(db->db.db_object, ==, dn->dn_object); 396fa9e4066Sahrens ASSERT3P(db->db_objset, ==, dn->dn_objset); 397fa9e4066Sahrens ASSERT3U(db->db_level, <, dn->dn_nlevels); 398744947dcSTom Erickson ASSERT(db->db_blkid == DMU_BONUS_BLKID || 399744947dcSTom Erickson db->db_blkid == DMU_SPILL_BLKID || 4000f6d88adSAlex Reece !avl_is_empty(&dn->dn_dbufs)); 401fa9e4066Sahrens } 4020a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID) { 403fa9e4066Sahrens ASSERT(dn != NULL); 4041934e92fSmaybee ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen); 4050a586ceaSMark Shellenbaum ASSERT3U(db->db.db_offset, ==, DMU_BONUS_BLKID); 4060a586ceaSMark Shellenbaum } else if (db->db_blkid == DMU_SPILL_BLKID) { 4070a586ceaSMark Shellenbaum ASSERT(dn != NULL); 4080a586ceaSMark Shellenbaum ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen); 409fb09f5aaSMadhav Suresh ASSERT0(db->db.db_offset); 410fa9e4066Sahrens } else { 411fa9e4066Sahrens ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size); 412fa9e4066Sahrens } 413fa9e4066Sahrens 414b24ab676SJeff Bonwick for (dr = db->db_data_pending; dr != NULL; dr = dr->dr_next) 415b24ab676SJeff Bonwick ASSERT(dr->dr_dbuf == db); 416b24ab676SJeff Bonwick 417b24ab676SJeff Bonwick for (dr = db->db_last_dirty; dr != NULL; dr = dr->dr_next) 418b24ab676SJeff Bonwick ASSERT(dr->dr_dbuf == db); 419b24ab676SJeff Bonwick 42088b7b0f2SMatthew Ahrens /* 42188b7b0f2SMatthew Ahrens * We can't assert that db_size matches dn_datablksz because it 42288b7b0f2SMatthew Ahrens * can be momentarily different when another thread is doing 42388b7b0f2SMatthew Ahrens * dnode_set_blksz(). 42488b7b0f2SMatthew Ahrens */ 42588b7b0f2SMatthew Ahrens if (db->db_level == 0 && db->db.db_object == DMU_META_DNODE_OBJECT) { 426b24ab676SJeff Bonwick dr = db->db_data_pending; 42788b7b0f2SMatthew Ahrens /* 42888b7b0f2SMatthew Ahrens * It should only be modified in syncing context, so 42988b7b0f2SMatthew Ahrens * make sure we only have one copy of the data. 43088b7b0f2SMatthew Ahrens */ 43188b7b0f2SMatthew Ahrens ASSERT(dr == NULL || dr->dt.dl.dr_data == db->db_buf); 432fa9e4066Sahrens } 433fa9e4066Sahrens 434fa9e4066Sahrens /* verify db->db_blkptr */ 435fa9e4066Sahrens if (db->db_blkptr) { 436fa9e4066Sahrens if (db->db_parent == dn->dn_dbuf) { 437fa9e4066Sahrens /* db is pointed to by the dnode */ 438fa9e4066Sahrens /* ASSERT3U(db->db_blkid, <, dn->dn_nblkptr); */ 43914843421SMatthew Ahrens if (DMU_OBJECT_IS_SPECIAL(db->db.db_object)) 440fa9e4066Sahrens ASSERT(db->db_parent == NULL); 441fa9e4066Sahrens else 442fa9e4066Sahrens ASSERT(db->db_parent != NULL); 4430a586ceaSMark Shellenbaum if (db->db_blkid != DMU_SPILL_BLKID) 4440a586ceaSMark Shellenbaum ASSERT3P(db->db_blkptr, ==, 4450a586ceaSMark Shellenbaum &dn->dn_phys->dn_blkptr[db->db_blkid]); 446fa9e4066Sahrens } else { 447fa9e4066Sahrens /* db is pointed to by an indirect block */ 448fa9e4066Sahrens int epb = db->db_parent->db.db_size >> SPA_BLKPTRSHIFT; 449fa9e4066Sahrens ASSERT3U(db->db_parent->db_level, ==, db->db_level+1); 450fa9e4066Sahrens ASSERT3U(db->db_parent->db.db_object, ==, 451fa9e4066Sahrens db->db.db_object); 452fa9e4066Sahrens /* 453fa9e4066Sahrens * dnode_grow_indblksz() can make this fail if we don't 454fa9e4066Sahrens * have the struct_rwlock. XXX indblksz no longer 455fa9e4066Sahrens * grows. safe to do this now? 456fa9e4066Sahrens */ 457744947dcSTom Erickson if (RW_WRITE_HELD(&dn->dn_struct_rwlock)) { 458fa9e4066Sahrens ASSERT3P(db->db_blkptr, ==, 459fa9e4066Sahrens ((blkptr_t *)db->db_parent->db.db_data + 460fa9e4066Sahrens db->db_blkid % epb)); 461fa9e4066Sahrens } 462fa9e4066Sahrens } 463fa9e4066Sahrens } 464fa9e4066Sahrens if ((db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr)) && 4653f9d6ad7SLin Ling (db->db_buf == NULL || db->db_buf->b_data) && 4660a586ceaSMark Shellenbaum db->db.db_data && db->db_blkid != DMU_BONUS_BLKID && 467fa9e4066Sahrens db->db_state != DB_FILL && !dn->dn_free_txg) { 468fa9e4066Sahrens /* 469fa9e4066Sahrens * If the blkptr isn't set but they have nonzero data, 470fa9e4066Sahrens * it had better be dirty, otherwise we'll lose that 471fa9e4066Sahrens * data when we evict this buffer. 472fa9e4066Sahrens */ 473fa9e4066Sahrens if (db->db_dirtycnt == 0) { 474fa9e4066Sahrens uint64_t *buf = db->db.db_data; 475fa9e4066Sahrens int i; 476fa9e4066Sahrens 477fa9e4066Sahrens for (i = 0; i < db->db.db_size >> 3; i++) { 478fa9e4066Sahrens ASSERT(buf[i] == 0); 479fa9e4066Sahrens } 480fa9e4066Sahrens } 481fa9e4066Sahrens } 482744947dcSTom Erickson DB_DNODE_EXIT(db); 483fa9e4066Sahrens } 4849c9dc39aSek #endif 485fa9e4066Sahrens 486*bc9014e6SJustin Gibbs static void 487*bc9014e6SJustin Gibbs dbuf_clear_data(dmu_buf_impl_t *db) 488*bc9014e6SJustin Gibbs { 489*bc9014e6SJustin Gibbs ASSERT(MUTEX_HELD(&db->db_mtx)); 490*bc9014e6SJustin Gibbs dbuf_evict_user(db); 491*bc9014e6SJustin Gibbs db->db_buf = NULL; 492*bc9014e6SJustin Gibbs db->db.db_data = NULL; 493*bc9014e6SJustin Gibbs if (db->db_state != DB_NOFILL) 494*bc9014e6SJustin Gibbs db->db_state = DB_UNCACHED; 495*bc9014e6SJustin Gibbs } 496*bc9014e6SJustin Gibbs 497fa9e4066Sahrens static void 498fa9e4066Sahrens dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf) 499fa9e4066Sahrens { 500fa9e4066Sahrens ASSERT(MUTEX_HELD(&db->db_mtx)); 501*bc9014e6SJustin Gibbs ASSERT(buf != NULL); 502*bc9014e6SJustin Gibbs 503fa9e4066Sahrens db->db_buf = buf; 504*bc9014e6SJustin Gibbs ASSERT(buf->b_data != NULL); 505*bc9014e6SJustin Gibbs db->db.db_data = buf->b_data; 506*bc9014e6SJustin Gibbs if (!arc_released(buf)) 507*bc9014e6SJustin Gibbs arc_set_callback(buf, dbuf_do_evict, db); 508fa9e4066Sahrens } 509fa9e4066Sahrens 510c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 511c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Loan out an arc_buf for read. Return the loaned arc_buf. 512c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 513c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t * 514c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dbuf_loan_arcbuf(dmu_buf_impl_t *db) 515c242f9a0Schunli zhang - Sun Microsystems - Irvine United States { 516c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t *abuf; 517c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 518c242f9a0Schunli zhang - Sun Microsystems - Irvine United States mutex_enter(&db->db_mtx); 519c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (arc_released(db->db_buf) || refcount_count(&db->db_holds) > 1) { 520c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int blksz = db->db.db_size; 52143466aaeSMax Grossman spa_t *spa = db->db_objset->os_spa; 522744947dcSTom Erickson 523c242f9a0Schunli zhang - Sun Microsystems - Irvine United States mutex_exit(&db->db_mtx); 524744947dcSTom Erickson abuf = arc_loan_buf(spa, blksz); 525c242f9a0Schunli zhang - Sun Microsystems - Irvine United States bcopy(db->db.db_data, abuf->b_data, blksz); 526c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 527c242f9a0Schunli zhang - Sun Microsystems - Irvine United States abuf = db->db_buf; 528c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_loan_inuse_buf(abuf, db); 529*bc9014e6SJustin Gibbs dbuf_clear_data(db); 530c242f9a0Schunli zhang - Sun Microsystems - Irvine United States mutex_exit(&db->db_mtx); 531c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 532c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (abuf); 533c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 534c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 535fa9e4066Sahrens uint64_t 536fa9e4066Sahrens dbuf_whichblock(dnode_t *dn, uint64_t offset) 537fa9e4066Sahrens { 538fa9e4066Sahrens if (dn->dn_datablkshift) { 539fa9e4066Sahrens return (offset >> dn->dn_datablkshift); 540fa9e4066Sahrens } else { 541fa9e4066Sahrens ASSERT3U(offset, <, dn->dn_datablksz); 542fa9e4066Sahrens return (0); 543fa9e4066Sahrens } 544fa9e4066Sahrens } 545fa9e4066Sahrens 546fa9e4066Sahrens static void 547fa9e4066Sahrens dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb) 548fa9e4066Sahrens { 549fa9e4066Sahrens dmu_buf_impl_t *db = vdb; 550fa9e4066Sahrens 551fa9e4066Sahrens mutex_enter(&db->db_mtx); 552fa9e4066Sahrens ASSERT3U(db->db_state, ==, DB_READ); 553fa9e4066Sahrens /* 554fa9e4066Sahrens * All reads are synchronous, so we must have a hold on the dbuf 555fa9e4066Sahrens */ 556fa9e4066Sahrens ASSERT(refcount_count(&db->db_holds) > 0); 557ea8dc4b6Seschrock ASSERT(db->db_buf == NULL); 558fa9e4066Sahrens ASSERT(db->db.db_data == NULL); 559c717a561Smaybee if (db->db_level == 0 && db->db_freed_in_flight) { 560fa9e4066Sahrens /* we were freed in flight; disregard any error */ 561fa9e4066Sahrens arc_release(buf, db); 562fa9e4066Sahrens bzero(buf->b_data, db->db.db_size); 5636b4acc8bSahrens arc_buf_freeze(buf); 564c717a561Smaybee db->db_freed_in_flight = FALSE; 565fa9e4066Sahrens dbuf_set_data(db, buf); 566fa9e4066Sahrens db->db_state = DB_CACHED; 567fa9e4066Sahrens } else if (zio == NULL || zio->io_error == 0) { 568fa9e4066Sahrens dbuf_set_data(db, buf); 569fa9e4066Sahrens db->db_state = DB_CACHED; 570fa9e4066Sahrens } else { 5710a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 572fa9e4066Sahrens ASSERT3P(db->db_buf, ==, NULL); 5733b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(buf, db)); 574ea8dc4b6Seschrock db->db_state = DB_UNCACHED; 575fa9e4066Sahrens } 576fa9e4066Sahrens cv_broadcast(&db->db_changed); 5773f9d6ad7SLin Ling dbuf_rele_and_unlock(db, NULL); 578fa9e4066Sahrens } 579fa9e4066Sahrens 580ea8dc4b6Seschrock static void 58113506d1eSmaybee dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags) 582fa9e4066Sahrens { 583744947dcSTom Erickson dnode_t *dn; 5847802d7bfSMatthew Ahrens zbookmark_phys_t zb; 5857adb730bSGeorge Wilson arc_flags_t aflags = ARC_FLAG_NOWAIT; 586fa9e4066Sahrens 587744947dcSTom Erickson DB_DNODE_ENTER(db); 588744947dcSTom Erickson dn = DB_DNODE(db); 589fa9e4066Sahrens ASSERT(!refcount_is_zero(&db->db_holds)); 590fa9e4066Sahrens /* We need the struct_rwlock to prevent db_blkptr from changing. */ 591088f3894Sahrens ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); 592ea8dc4b6Seschrock ASSERT(MUTEX_HELD(&db->db_mtx)); 593ea8dc4b6Seschrock ASSERT(db->db_state == DB_UNCACHED); 594ea8dc4b6Seschrock ASSERT(db->db_buf == NULL); 595fa9e4066Sahrens 5960a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID) { 597cf04dda1SMark Maybee int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen); 5981934e92fSmaybee 5991934e92fSmaybee ASSERT3U(bonuslen, <=, db->db.db_size); 600ea8dc4b6Seschrock db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN); 6015a98e54bSBrendan Gregg - Sun Microsystems arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); 6021934e92fSmaybee if (bonuslen < DN_MAX_BONUSLEN) 603ea8dc4b6Seschrock bzero(db->db.db_data, DN_MAX_BONUSLEN); 604cf04dda1SMark Maybee if (bonuslen) 605cf04dda1SMark Maybee bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen); 606744947dcSTom Erickson DB_DNODE_EXIT(db); 607fa9e4066Sahrens db->db_state = DB_CACHED; 608fa9e4066Sahrens mutex_exit(&db->db_mtx); 609fa9e4066Sahrens return; 610fa9e4066Sahrens } 611fa9e4066Sahrens 6121c8564a7SMark Maybee /* 6131c8564a7SMark Maybee * Recheck BP_IS_HOLE() after dnode_block_freed() in case dnode_sync() 6141c8564a7SMark Maybee * processes the delete record and clears the bp while we are waiting 6151c8564a7SMark Maybee * for the dn_mtx (resulting in a "no" from block_freed). 6161c8564a7SMark Maybee */ 617088f3894Sahrens if (db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr) || 6181c8564a7SMark Maybee (db->db_level == 0 && (dnode_block_freed(dn, db->db_blkid) || 6191c8564a7SMark Maybee BP_IS_HOLE(db->db_blkptr)))) { 620ad23a2dbSjohansen arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 621ad23a2dbSjohansen 622744947dcSTom Erickson DB_DNODE_EXIT(db); 62343466aaeSMax Grossman dbuf_set_data(db, arc_buf_alloc(db->db_objset->os_spa, 62443466aaeSMax Grossman db->db.db_size, db, type)); 625fa9e4066Sahrens bzero(db->db.db_data, db->db.db_size); 626fa9e4066Sahrens db->db_state = DB_CACHED; 62713506d1eSmaybee *flags |= DB_RF_CACHED; 628fa9e4066Sahrens mutex_exit(&db->db_mtx); 629fa9e4066Sahrens return; 630fa9e4066Sahrens } 631fa9e4066Sahrens 632744947dcSTom Erickson DB_DNODE_EXIT(db); 633744947dcSTom Erickson 634fa9e4066Sahrens db->db_state = DB_READ; 635fa9e4066Sahrens mutex_exit(&db->db_mtx); 636fa9e4066Sahrens 6373baa08fcSek if (DBUF_IS_L2CACHEABLE(db)) 6387adb730bSGeorge Wilson aflags |= ARC_FLAG_L2CACHE; 639aad02571SSaso Kiselkov if (DBUF_IS_L2COMPRESSIBLE(db)) 6407adb730bSGeorge Wilson aflags |= ARC_FLAG_L2COMPRESS; 6413baa08fcSek 642b24ab676SJeff Bonwick SET_BOOKMARK(&zb, db->db_objset->os_dsl_dataset ? 643b24ab676SJeff Bonwick db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET, 644b24ab676SJeff Bonwick db->db.db_object, db->db_level, db->db_blkid); 645ea8dc4b6Seschrock 646ea8dc4b6Seschrock dbuf_add_ref(db, NULL); 647088f3894Sahrens 64843466aaeSMax Grossman (void) arc_read(zio, db->db_objset->os_spa, db->db_blkptr, 649fa9e4066Sahrens dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, 65013506d1eSmaybee (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED, 65113506d1eSmaybee &aflags, &zb); 6527adb730bSGeorge Wilson if (aflags & ARC_FLAG_CACHED) 65313506d1eSmaybee *flags |= DB_RF_CACHED; 654fa9e4066Sahrens } 655fa9e4066Sahrens 656ea8dc4b6Seschrock int 657ea8dc4b6Seschrock dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) 658fa9e4066Sahrens { 659ea8dc4b6Seschrock int err = 0; 66043466aaeSMax Grossman boolean_t havepzio = (zio != NULL); 66143466aaeSMax Grossman boolean_t prefetch; 662744947dcSTom Erickson dnode_t *dn; 663fa9e4066Sahrens 664fa9e4066Sahrens /* 665fa9e4066Sahrens * We don't have to hold the mutex to check db_state because it 666fa9e4066Sahrens * can't be freed while we have a hold on the buffer. 667fa9e4066Sahrens */ 668fa9e4066Sahrens ASSERT(!refcount_is_zero(&db->db_holds)); 669fa9e4066Sahrens 67082c9918fSTim Haley if (db->db_state == DB_NOFILL) 671be6fd75aSMatthew Ahrens return (SET_ERROR(EIO)); 67282c9918fSTim Haley 673744947dcSTom Erickson DB_DNODE_ENTER(db); 674744947dcSTom Erickson dn = DB_DNODE(db); 675ea8dc4b6Seschrock if ((flags & DB_RF_HAVESTRUCT) == 0) 676744947dcSTom Erickson rw_enter(&dn->dn_struct_rwlock, RW_READER); 677ea8dc4b6Seschrock 6780a586ceaSMark Shellenbaum prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID && 679744947dcSTom Erickson (flags & DB_RF_NOPREFETCH) == 0 && dn != NULL && 6803baa08fcSek DBUF_IS_CACHEABLE(db); 68113506d1eSmaybee 682ea8dc4b6Seschrock mutex_enter(&db->db_mtx); 683ea8dc4b6Seschrock if (db->db_state == DB_CACHED) { 684ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 68513506d1eSmaybee if (prefetch) 686744947dcSTom Erickson dmu_zfetch(&dn->dn_zfetch, db->db.db_offset, 68713506d1eSmaybee db->db.db_size, TRUE); 688fa9e4066Sahrens if ((flags & DB_RF_HAVESTRUCT) == 0) 689744947dcSTom Erickson rw_exit(&dn->dn_struct_rwlock); 690744947dcSTom Erickson DB_DNODE_EXIT(db); 691ea8dc4b6Seschrock } else if (db->db_state == DB_UNCACHED) { 692744947dcSTom Erickson spa_t *spa = dn->dn_objset->os_spa; 693744947dcSTom Erickson 694744947dcSTom Erickson if (zio == NULL) 695744947dcSTom Erickson zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 69613506d1eSmaybee dbuf_read_impl(db, zio, &flags); 69713506d1eSmaybee 698ea8dc4b6Seschrock /* dbuf_read_impl has dropped db_mtx for us */ 699ea8dc4b6Seschrock 70013506d1eSmaybee if (prefetch) 701744947dcSTom Erickson dmu_zfetch(&dn->dn_zfetch, db->db.db_offset, 70213506d1eSmaybee db->db.db_size, flags & DB_RF_CACHED); 703ea8dc4b6Seschrock 704fa9e4066Sahrens if ((flags & DB_RF_HAVESTRUCT) == 0) 705744947dcSTom Erickson rw_exit(&dn->dn_struct_rwlock); 706744947dcSTom Erickson DB_DNODE_EXIT(db); 707fa9e4066Sahrens 708ea8dc4b6Seschrock if (!havepzio) 709ea8dc4b6Seschrock err = zio_wait(zio); 710ea8dc4b6Seschrock } else { 7113e30c24aSWill Andrews /* 7123e30c24aSWill Andrews * Another reader came in while the dbuf was in flight 7133e30c24aSWill Andrews * between UNCACHED and CACHED. Either a writer will finish 7143e30c24aSWill Andrews * writing the buffer (sending the dbuf to CACHED) or the 7153e30c24aSWill Andrews * first reader's request will reach the read_done callback 7163e30c24aSWill Andrews * and send the dbuf to CACHED. Otherwise, a failure 7173e30c24aSWill Andrews * occurred and the dbuf went to UNCACHED. 7183e30c24aSWill Andrews */ 71913506d1eSmaybee mutex_exit(&db->db_mtx); 72013506d1eSmaybee if (prefetch) 721744947dcSTom Erickson dmu_zfetch(&dn->dn_zfetch, db->db.db_offset, 72213506d1eSmaybee db->db.db_size, TRUE); 723ea8dc4b6Seschrock if ((flags & DB_RF_HAVESTRUCT) == 0) 724744947dcSTom Erickson rw_exit(&dn->dn_struct_rwlock); 725744947dcSTom Erickson DB_DNODE_EXIT(db); 72613506d1eSmaybee 7273e30c24aSWill Andrews /* Skip the wait per the caller's request. */ 72813506d1eSmaybee mutex_enter(&db->db_mtx); 729ea8dc4b6Seschrock if ((flags & DB_RF_NEVERWAIT) == 0) { 730ea8dc4b6Seschrock while (db->db_state == DB_READ || 731ea8dc4b6Seschrock db->db_state == DB_FILL) { 732ea8dc4b6Seschrock ASSERT(db->db_state == DB_READ || 733ea8dc4b6Seschrock (flags & DB_RF_HAVESTRUCT) == 0); 734f6164ad6SAdam H. Leventhal DTRACE_PROBE2(blocked__read, dmu_buf_impl_t *, 735f6164ad6SAdam H. Leventhal db, zio_t *, zio); 736ea8dc4b6Seschrock cv_wait(&db->db_changed, &db->db_mtx); 737ea8dc4b6Seschrock } 738ea8dc4b6Seschrock if (db->db_state == DB_UNCACHED) 739be6fd75aSMatthew Ahrens err = SET_ERROR(EIO); 740ea8dc4b6Seschrock } 741ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 742fa9e4066Sahrens } 743fa9e4066Sahrens 744ea8dc4b6Seschrock ASSERT(err || havepzio || db->db_state == DB_CACHED); 745ea8dc4b6Seschrock return (err); 746fa9e4066Sahrens } 747fa9e4066Sahrens 748fa9e4066Sahrens static void 749fa9e4066Sahrens dbuf_noread(dmu_buf_impl_t *db) 750fa9e4066Sahrens { 751fa9e4066Sahrens ASSERT(!refcount_is_zero(&db->db_holds)); 7520a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 753fa9e4066Sahrens mutex_enter(&db->db_mtx); 754fa9e4066Sahrens while (db->db_state == DB_READ || db->db_state == DB_FILL) 755fa9e4066Sahrens cv_wait(&db->db_changed, &db->db_mtx); 756fa9e4066Sahrens if (db->db_state == DB_UNCACHED) { 757ad23a2dbSjohansen arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 75843466aaeSMax Grossman spa_t *spa = db->db_objset->os_spa; 759ad23a2dbSjohansen 760ea8dc4b6Seschrock ASSERT(db->db_buf == NULL); 761fa9e4066Sahrens ASSERT(db->db.db_data == NULL); 762744947dcSTom Erickson dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type)); 763fa9e4066Sahrens db->db_state = DB_FILL; 76482c9918fSTim Haley } else if (db->db_state == DB_NOFILL) { 765*bc9014e6SJustin Gibbs dbuf_clear_data(db); 766fa9e4066Sahrens } else { 767fa9e4066Sahrens ASSERT3U(db->db_state, ==, DB_CACHED); 768fa9e4066Sahrens } 769fa9e4066Sahrens mutex_exit(&db->db_mtx); 770fa9e4066Sahrens } 771fa9e4066Sahrens 772fa9e4066Sahrens /* 773fa9e4066Sahrens * This is our just-in-time copy function. It makes a copy of 774fa9e4066Sahrens * buffers, that have been modified in a previous transaction 775fa9e4066Sahrens * group, before we modify them in the current active group. 776fa9e4066Sahrens * 777fa9e4066Sahrens * This function is used in two places: when we are dirtying a 778fa9e4066Sahrens * buffer for the first time in a txg, and when we are freeing 779fa9e4066Sahrens * a range in a dnode that includes this buffer. 780fa9e4066Sahrens * 781fa9e4066Sahrens * Note that when we are called from dbuf_free_range() we do 782fa9e4066Sahrens * not put a hold on the buffer, we just traverse the active 783fa9e4066Sahrens * dbuf list for the dnode. 784fa9e4066Sahrens */ 785fa9e4066Sahrens static void 786fa9e4066Sahrens dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg) 787fa9e4066Sahrens { 788c717a561Smaybee dbuf_dirty_record_t *dr = db->db_last_dirty; 789fa9e4066Sahrens 790fa9e4066Sahrens ASSERT(MUTEX_HELD(&db->db_mtx)); 791fa9e4066Sahrens ASSERT(db->db.db_data != NULL); 792c717a561Smaybee ASSERT(db->db_level == 0); 793c717a561Smaybee ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT); 794fa9e4066Sahrens 7954d31c452Smaybee if (dr == NULL || 7964d31c452Smaybee (dr->dt.dl.dr_data != 7970a586ceaSMark Shellenbaum ((db->db_blkid == DMU_BONUS_BLKID) ? db->db.db_data : db->db_buf))) 798fa9e4066Sahrens return; 799fa9e4066Sahrens 800fa9e4066Sahrens /* 801c717a561Smaybee * If the last dirty record for this dbuf has not yet synced 802c717a561Smaybee * and its referencing the dbuf data, either: 803744947dcSTom Erickson * reset the reference to point to a new copy, 804c717a561Smaybee * or (if there a no active holders) 805c717a561Smaybee * just null out the current db_data pointer. 806fa9e4066Sahrens */ 807c717a561Smaybee ASSERT(dr->dr_txg >= txg - 2); 8080a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID) { 809c717a561Smaybee /* Note that the data bufs here are zio_bufs */ 810c717a561Smaybee dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN); 8115a98e54bSBrendan Gregg - Sun Microsystems arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); 812c717a561Smaybee bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN); 813c717a561Smaybee } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) { 814c717a561Smaybee int size = db->db.db_size; 815c717a561Smaybee arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 81643466aaeSMax Grossman spa_t *spa = db->db_objset->os_spa; 817744947dcSTom Erickson 818744947dcSTom Erickson dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type); 819c717a561Smaybee bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size); 820c717a561Smaybee } else { 821*bc9014e6SJustin Gibbs dbuf_clear_data(db); 822fa9e4066Sahrens } 823fa9e4066Sahrens } 824fa9e4066Sahrens 825c717a561Smaybee void 826c717a561Smaybee dbuf_unoverride(dbuf_dirty_record_t *dr) 827ea8dc4b6Seschrock { 828c717a561Smaybee dmu_buf_impl_t *db = dr->dr_dbuf; 829b24ab676SJeff Bonwick blkptr_t *bp = &dr->dt.dl.dr_overridden_by; 830c717a561Smaybee uint64_t txg = dr->dr_txg; 831ea8dc4b6Seschrock 832ea8dc4b6Seschrock ASSERT(MUTEX_HELD(&db->db_mtx)); 833c717a561Smaybee ASSERT(dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC); 834c717a561Smaybee ASSERT(db->db_level == 0); 835ea8dc4b6Seschrock 8360a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID || 837c717a561Smaybee dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN) 838c717a561Smaybee return; 839ea8dc4b6Seschrock 840b24ab676SJeff Bonwick ASSERT(db->db_data_pending != dr); 841b24ab676SJeff Bonwick 842c717a561Smaybee /* free this block */ 84343466aaeSMax Grossman if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite) 84443466aaeSMax Grossman zio_free(db->db_objset->os_spa, txg, bp); 845b24ab676SJeff Bonwick 846c717a561Smaybee dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; 84780901aeaSGeorge Wilson dr->dt.dl.dr_nopwrite = B_FALSE; 84880901aeaSGeorge Wilson 849c717a561Smaybee /* 850c717a561Smaybee * Release the already-written buffer, so we leave it in 851c717a561Smaybee * a consistent dirty state. Note that all callers are 852c717a561Smaybee * modifying the buffer, so they will immediately do 853c717a561Smaybee * another (redundant) arc_release(). Therefore, leave 854c717a561Smaybee * the buf thawed to save the effort of freezing & 855c717a561Smaybee * immediately re-thawing it. 856c717a561Smaybee */ 857c717a561Smaybee arc_release(dr->dt.dl.dr_data, db); 858fa9e4066Sahrens } 859fa9e4066Sahrens 860cdb0ab79Smaybee /* 861cdb0ab79Smaybee * Evict (if its unreferenced) or clear (if its referenced) any level-0 862cdb0ab79Smaybee * data blocks in the free range, so that any future readers will find 86343466aaeSMax Grossman * empty blocks. 8642f3d8780SMatthew Ahrens * 8652f3d8780SMatthew Ahrens * This is a no-op if the dataset is in the middle of an incremental 8662f3d8780SMatthew Ahrens * receive; see comment below for details. 867cdb0ab79Smaybee */ 868fa9e4066Sahrens void 8690f6d88adSAlex Reece dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, 8700f6d88adSAlex Reece dmu_tx_t *tx) 871fa9e4066Sahrens { 872*bc9014e6SJustin Gibbs dmu_buf_impl_t db_search; 873*bc9014e6SJustin Gibbs dmu_buf_impl_t *db, *db_next; 874fa9e4066Sahrens uint64_t txg = tx->tx_txg; 8750f6d88adSAlex Reece avl_index_t where; 876fa9e4066Sahrens 8770f6d88adSAlex Reece if (end_blkid > dn->dn_maxblkid && (end_blkid != DMU_SPILL_BLKID)) 8780f6d88adSAlex Reece end_blkid = dn->dn_maxblkid; 8790f6d88adSAlex Reece dprintf_dnode(dn, "start=%llu end=%llu\n", start_blkid, end_blkid); 8800f6d88adSAlex Reece 8810f6d88adSAlex Reece db_search.db_level = 0; 8820f6d88adSAlex Reece db_search.db_blkid = start_blkid; 88386bb58aeSAlex Reece db_search.db_state = DB_SEARCH; 8842f3d8780SMatthew Ahrens 885713d6c20SMatthew Ahrens mutex_enter(&dn->dn_dbufs_mtx); 8860f6d88adSAlex Reece if (start_blkid >= dn->dn_unlisted_l0_blkid) { 887713d6c20SMatthew Ahrens /* There can't be any dbufs in this range; no need to search. */ 8880f6d88adSAlex Reece #ifdef DEBUG 8890f6d88adSAlex Reece db = avl_find(&dn->dn_dbufs, &db_search, &where); 8900f6d88adSAlex Reece ASSERT3P(db, ==, NULL); 8910f6d88adSAlex Reece db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER); 8920f6d88adSAlex Reece ASSERT(db == NULL || db->db_level > 0); 8930f6d88adSAlex Reece #endif 894713d6c20SMatthew Ahrens mutex_exit(&dn->dn_dbufs_mtx); 895713d6c20SMatthew Ahrens return; 896713d6c20SMatthew Ahrens } else if (dmu_objset_is_receiving(dn->dn_objset)) { 8972f3d8780SMatthew Ahrens /* 898713d6c20SMatthew Ahrens * If we are receiving, we expect there to be no dbufs in 899713d6c20SMatthew Ahrens * the range to be freed, because receive modifies each 900713d6c20SMatthew Ahrens * block at most once, and in offset order. If this is 901713d6c20SMatthew Ahrens * not the case, it can lead to performance problems, 902713d6c20SMatthew Ahrens * so note that we unexpectedly took the slow path. 9032f3d8780SMatthew Ahrens */ 904713d6c20SMatthew Ahrens atomic_inc_64(&zfs_free_range_recv_miss); 9052f3d8780SMatthew Ahrens } 9062f3d8780SMatthew Ahrens 9070f6d88adSAlex Reece db = avl_find(&dn->dn_dbufs, &db_search, &where); 9080f6d88adSAlex Reece ASSERT3P(db, ==, NULL); 9090f6d88adSAlex Reece db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER); 9100f6d88adSAlex Reece 9110f6d88adSAlex Reece for (; db != NULL; db = db_next) { 9120f6d88adSAlex Reece db_next = AVL_NEXT(&dn->dn_dbufs, db); 9130a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 914cdb0ab79Smaybee 9150f6d88adSAlex Reece if (db->db_level != 0 || db->db_blkid > end_blkid) { 9160f6d88adSAlex Reece break; 9170f6d88adSAlex Reece } 9180f6d88adSAlex Reece ASSERT3U(db->db_blkid, >=, start_blkid); 919fa9e4066Sahrens 920fa9e4066Sahrens /* found a level 0 buffer in the range */ 9213b2aab18SMatthew Ahrens mutex_enter(&db->db_mtx); 9223b2aab18SMatthew Ahrens if (dbuf_undirty(db, tx)) { 9233b2aab18SMatthew Ahrens /* mutex has been dropped and dbuf destroyed */ 924fa9e4066Sahrens continue; 9253b2aab18SMatthew Ahrens } 926fa9e4066Sahrens 927ea8dc4b6Seschrock if (db->db_state == DB_UNCACHED || 92882c9918fSTim Haley db->db_state == DB_NOFILL || 929ea8dc4b6Seschrock db->db_state == DB_EVICTING) { 930fa9e4066Sahrens ASSERT(db->db.db_data == NULL); 931fa9e4066Sahrens mutex_exit(&db->db_mtx); 932fa9e4066Sahrens continue; 933fa9e4066Sahrens } 934c543ec06Sahrens if (db->db_state == DB_READ || db->db_state == DB_FILL) { 935c543ec06Sahrens /* will be handled in dbuf_read_done or dbuf_rele */ 936c717a561Smaybee db->db_freed_in_flight = TRUE; 937fa9e4066Sahrens mutex_exit(&db->db_mtx); 938fa9e4066Sahrens continue; 939fa9e4066Sahrens } 940ea8dc4b6Seschrock if (refcount_count(&db->db_holds) == 0) { 941ea8dc4b6Seschrock ASSERT(db->db_buf); 942ea8dc4b6Seschrock dbuf_clear(db); 943ea8dc4b6Seschrock continue; 944ea8dc4b6Seschrock } 945c717a561Smaybee /* The dbuf is referenced */ 946fa9e4066Sahrens 947c717a561Smaybee if (db->db_last_dirty != NULL) { 948c717a561Smaybee dbuf_dirty_record_t *dr = db->db_last_dirty; 949c717a561Smaybee 950c717a561Smaybee if (dr->dr_txg == txg) { 95144eda4d7Smaybee /* 952c717a561Smaybee * This buffer is "in-use", re-adjust the file 953c717a561Smaybee * size to reflect that this buffer may 954c717a561Smaybee * contain new data when we sync. 95544eda4d7Smaybee */ 95606e0070dSMark Shellenbaum if (db->db_blkid != DMU_SPILL_BLKID && 95706e0070dSMark Shellenbaum db->db_blkid > dn->dn_maxblkid) 958c717a561Smaybee dn->dn_maxblkid = db->db_blkid; 959c717a561Smaybee dbuf_unoverride(dr); 960c717a561Smaybee } else { 961c717a561Smaybee /* 962c717a561Smaybee * This dbuf is not dirty in the open context. 963c717a561Smaybee * Either uncache it (if its not referenced in 964c717a561Smaybee * the open context) or reset its contents to 965c717a561Smaybee * empty. 966c717a561Smaybee */ 967c717a561Smaybee dbuf_fix_old_data(db, txg); 96844eda4d7Smaybee } 969ea8dc4b6Seschrock } 970c717a561Smaybee /* clear the contents if its cached */ 971ea8dc4b6Seschrock if (db->db_state == DB_CACHED) { 972ea8dc4b6Seschrock ASSERT(db->db.db_data != NULL); 973fa9e4066Sahrens arc_release(db->db_buf, db); 974fa9e4066Sahrens bzero(db->db.db_data, db->db.db_size); 9756b4acc8bSahrens arc_buf_freeze(db->db_buf); 976fa9e4066Sahrens } 977ea8dc4b6Seschrock 978fa9e4066Sahrens mutex_exit(&db->db_mtx); 979fa9e4066Sahrens } 980fa9e4066Sahrens mutex_exit(&dn->dn_dbufs_mtx); 981fa9e4066Sahrens } 982fa9e4066Sahrens 983fa9e4066Sahrens static int 9841934e92fSmaybee dbuf_block_freeable(dmu_buf_impl_t *db) 985fa9e4066Sahrens { 986fa9e4066Sahrens dsl_dataset_t *ds = db->db_objset->os_dsl_dataset; 987fa9e4066Sahrens uint64_t birth_txg = 0; 988fa9e4066Sahrens 989fa9e4066Sahrens /* 990fa9e4066Sahrens * We don't need any locking to protect db_blkptr: 991c717a561Smaybee * If it's syncing, then db_last_dirty will be set 992c717a561Smaybee * so we'll ignore db_blkptr. 99343466aaeSMax Grossman * 99443466aaeSMax Grossman * This logic ensures that only block births for 99543466aaeSMax Grossman * filled blocks are considered. 996fa9e4066Sahrens */ 997c717a561Smaybee ASSERT(MUTEX_HELD(&db->db_mtx)); 99843466aaeSMax Grossman if (db->db_last_dirty && (db->db_blkptr == NULL || 99943466aaeSMax Grossman !BP_IS_HOLE(db->db_blkptr))) { 1000c717a561Smaybee birth_txg = db->db_last_dirty->dr_txg; 100143466aaeSMax Grossman } else if (db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) { 1002fa9e4066Sahrens birth_txg = db->db_blkptr->blk_birth; 100343466aaeSMax Grossman } 1004fa9e4066Sahrens 1005837b568bSGeorge Wilson /* 100643466aaeSMax Grossman * If this block don't exist or is in a snapshot, it can't be freed. 1007837b568bSGeorge Wilson * Don't pass the bp to dsl_dataset_block_freeable() since we 1008837b568bSGeorge Wilson * are holding the db_mtx lock and might deadlock if we are 1009837b568bSGeorge Wilson * prefetching a dedup-ed block. 1010837b568bSGeorge Wilson */ 101143466aaeSMax Grossman if (birth_txg != 0) 10121934e92fSmaybee return (ds == NULL || 1013837b568bSGeorge Wilson dsl_dataset_block_freeable(ds, NULL, birth_txg)); 1014fa9e4066Sahrens else 101543466aaeSMax Grossman return (B_FALSE); 1016fa9e4066Sahrens } 1017fa9e4066Sahrens 1018fa9e4066Sahrens void 1019fa9e4066Sahrens dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx) 1020fa9e4066Sahrens { 1021fa9e4066Sahrens arc_buf_t *buf, *obuf; 1022fa9e4066Sahrens int osize = db->db.db_size; 1023ad23a2dbSjohansen arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 1024744947dcSTom Erickson dnode_t *dn; 1025fa9e4066Sahrens 10260a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 1027ea8dc4b6Seschrock 1028744947dcSTom Erickson DB_DNODE_ENTER(db); 1029744947dcSTom Erickson dn = DB_DNODE(db); 1030744947dcSTom Erickson 1031fa9e4066Sahrens /* XXX does *this* func really need the lock? */ 1032744947dcSTom Erickson ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); 1033fa9e4066Sahrens 1034fa9e4066Sahrens /* 103543466aaeSMax Grossman * This call to dmu_buf_will_dirty() with the dn_struct_rwlock held 1036fa9e4066Sahrens * is OK, because there can be no other references to the db 1037fa9e4066Sahrens * when we are changing its size, so no concurrent DB_FILL can 1038fa9e4066Sahrens * be happening. 1039fa9e4066Sahrens */ 1040ea8dc4b6Seschrock /* 1041ea8dc4b6Seschrock * XXX we should be doing a dbuf_read, checking the return 1042ea8dc4b6Seschrock * value and returning that up to our callers 1043ea8dc4b6Seschrock */ 104443466aaeSMax Grossman dmu_buf_will_dirty(&db->db, tx); 1045fa9e4066Sahrens 1046fa9e4066Sahrens /* create the data buffer for the new block */ 1047744947dcSTom Erickson buf = arc_buf_alloc(dn->dn_objset->os_spa, size, db, type); 1048fa9e4066Sahrens 1049fa9e4066Sahrens /* copy old block data to the new block */ 1050fa9e4066Sahrens obuf = db->db_buf; 1051f65e61c0Sahrens bcopy(obuf->b_data, buf->b_data, MIN(osize, size)); 1052fa9e4066Sahrens /* zero the remainder */ 1053f65e61c0Sahrens if (size > osize) 1054f65e61c0Sahrens bzero((uint8_t *)buf->b_data + osize, size - osize); 1055fa9e4066Sahrens 1056fa9e4066Sahrens mutex_enter(&db->db_mtx); 1057fa9e4066Sahrens dbuf_set_data(db, buf); 10583b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(obuf, db)); 1059fa9e4066Sahrens db->db.db_size = size; 1060fa9e4066Sahrens 1061c717a561Smaybee if (db->db_level == 0) { 1062c717a561Smaybee ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg); 1063c717a561Smaybee db->db_last_dirty->dt.dl.dr_data = buf; 1064c717a561Smaybee } 1065fa9e4066Sahrens mutex_exit(&db->db_mtx); 1066fa9e4066Sahrens 1067744947dcSTom Erickson dnode_willuse_space(dn, size-osize, tx); 1068744947dcSTom Erickson DB_DNODE_EXIT(db); 1069fa9e4066Sahrens } 1070fa9e4066Sahrens 10713f9d6ad7SLin Ling void 10723f9d6ad7SLin Ling dbuf_release_bp(dmu_buf_impl_t *db) 10733f9d6ad7SLin Ling { 107443466aaeSMax Grossman objset_t *os = db->db_objset; 10753f9d6ad7SLin Ling 10763f9d6ad7SLin Ling ASSERT(dsl_pool_sync_context(dmu_objset_pool(os))); 10773f9d6ad7SLin Ling ASSERT(arc_released(os->os_phys_buf) || 10783f9d6ad7SLin Ling list_link_active(&os->os_dsl_dataset->ds_synced_link)); 10793f9d6ad7SLin Ling ASSERT(db->db_parent == NULL || arc_released(db->db_parent->db_buf)); 10803f9d6ad7SLin Ling 10811b912ec7SGeorge Wilson (void) arc_release(db->db_buf, db); 10823f9d6ad7SLin Ling } 10833f9d6ad7SLin Ling 1084c717a561Smaybee dbuf_dirty_record_t * 1085fa9e4066Sahrens dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) 1086fa9e4066Sahrens { 1087744947dcSTom Erickson dnode_t *dn; 1088744947dcSTom Erickson objset_t *os; 1089c717a561Smaybee dbuf_dirty_record_t **drp, *dr; 1090fa9e4066Sahrens int drop_struct_lock = FALSE; 1091d3469faaSMark Maybee boolean_t do_free_accounting = B_FALSE; 1092fa9e4066Sahrens int txgoff = tx->tx_txg & TXG_MASK; 1093fa9e4066Sahrens 1094fa9e4066Sahrens ASSERT(tx->tx_txg != 0); 1095fa9e4066Sahrens ASSERT(!refcount_is_zero(&db->db_holds)); 10969c9dc39aSek DMU_TX_DIRTY_BUF(tx, db); 1097fa9e4066Sahrens 1098744947dcSTom Erickson DB_DNODE_ENTER(db); 1099744947dcSTom Erickson dn = DB_DNODE(db); 1100fa9e4066Sahrens /* 1101fa9e4066Sahrens * Shouldn't dirty a regular buffer in syncing context. Private 1102fa9e4066Sahrens * objects may be dirtied in syncing context, but only if they 1103fa9e4066Sahrens * were already pre-dirtied in open context. 1104fa9e4066Sahrens */ 1105c717a561Smaybee ASSERT(!dmu_tx_is_syncing(tx) || 1106c717a561Smaybee BP_IS_HOLE(dn->dn_objset->os_rootbp) || 110714843421SMatthew Ahrens DMU_OBJECT_IS_SPECIAL(dn->dn_object) || 110814843421SMatthew Ahrens dn->dn_objset->os_dsl_dataset == NULL); 1109fa9e4066Sahrens /* 1110fa9e4066Sahrens * We make this assert for private objects as well, but after we 1111fa9e4066Sahrens * check if we're already dirty. They are allowed to re-dirty 1112fa9e4066Sahrens * in syncing context. 1113fa9e4066Sahrens */ 1114ea8dc4b6Seschrock ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || 1115c717a561Smaybee dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == 1116fa9e4066Sahrens (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); 1117fa9e4066Sahrens 1118fa9e4066Sahrens mutex_enter(&db->db_mtx); 1119fa9e4066Sahrens /* 1120c717a561Smaybee * XXX make this true for indirects too? The problem is that 1121c717a561Smaybee * transactions created with dmu_tx_create_assigned() from 1122c717a561Smaybee * syncing context don't bother holding ahead. 1123fa9e4066Sahrens */ 1124c717a561Smaybee ASSERT(db->db_level != 0 || 112582c9918fSTim Haley db->db_state == DB_CACHED || db->db_state == DB_FILL || 112682c9918fSTim Haley db->db_state == DB_NOFILL); 1127fa9e4066Sahrens 1128fa9e4066Sahrens mutex_enter(&dn->dn_mtx); 1129fa9e4066Sahrens /* 1130fa9e4066Sahrens * Don't set dirtyctx to SYNC if we're just modifying this as we 1131fa9e4066Sahrens * initialize the objset. 1132fa9e4066Sahrens */ 1133fa9e4066Sahrens if (dn->dn_dirtyctx == DN_UNDIRTIED && 1134c717a561Smaybee !BP_IS_HOLE(dn->dn_objset->os_rootbp)) { 1135fa9e4066Sahrens dn->dn_dirtyctx = 1136fa9e4066Sahrens (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN); 1137fa9e4066Sahrens ASSERT(dn->dn_dirtyctx_firstset == NULL); 1138fa9e4066Sahrens dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP); 1139fa9e4066Sahrens } 1140fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 1141fa9e4066Sahrens 11420a586ceaSMark Shellenbaum if (db->db_blkid == DMU_SPILL_BLKID) 11430a586ceaSMark Shellenbaum dn->dn_have_spill = B_TRUE; 11440a586ceaSMark Shellenbaum 1145fa9e4066Sahrens /* 1146fa9e4066Sahrens * If this buffer is already dirty, we're done. 1147fa9e4066Sahrens */ 1148c717a561Smaybee drp = &db->db_last_dirty; 1149c717a561Smaybee ASSERT(*drp == NULL || (*drp)->dr_txg <= tx->tx_txg || 1150c717a561Smaybee db->db.db_object == DMU_META_DNODE_OBJECT); 11517e2186e3Sbonwick while ((dr = *drp) != NULL && dr->dr_txg > tx->tx_txg) 11527e2186e3Sbonwick drp = &dr->dr_next; 11537e2186e3Sbonwick if (dr && dr->dr_txg == tx->tx_txg) { 1154744947dcSTom Erickson DB_DNODE_EXIT(db); 1155744947dcSTom Erickson 11560a586ceaSMark Shellenbaum if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID) { 1157c717a561Smaybee /* 1158c717a561Smaybee * If this buffer has already been written out, 1159c717a561Smaybee * we now need to reset its state. 1160c717a561Smaybee */ 11617e2186e3Sbonwick dbuf_unoverride(dr); 1162b24ab676SJeff Bonwick if (db->db.db_object != DMU_META_DNODE_OBJECT && 1163b24ab676SJeff Bonwick db->db_state != DB_NOFILL) 1164c717a561Smaybee arc_buf_thaw(db->db_buf); 1165c717a561Smaybee } 1166fa9e4066Sahrens mutex_exit(&db->db_mtx); 11677e2186e3Sbonwick return (dr); 1168fa9e4066Sahrens } 1169fa9e4066Sahrens 1170fa9e4066Sahrens /* 1171fa9e4066Sahrens * Only valid if not already dirty. 1172fa9e4066Sahrens */ 117314843421SMatthew Ahrens ASSERT(dn->dn_object == 0 || 117414843421SMatthew Ahrens dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == 1175fa9e4066Sahrens (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); 1176fa9e4066Sahrens 1177fa9e4066Sahrens ASSERT3U(dn->dn_nlevels, >, db->db_level); 1178fa9e4066Sahrens ASSERT((dn->dn_phys->dn_nlevels == 0 && db->db_level == 0) || 1179fa9e4066Sahrens dn->dn_phys->dn_nlevels > db->db_level || 1180fa9e4066Sahrens dn->dn_next_nlevels[txgoff] > db->db_level || 1181fa9e4066Sahrens dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level || 1182fa9e4066Sahrens dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level); 1183fa9e4066Sahrens 1184fa9e4066Sahrens /* 1185fa9e4066Sahrens * We should only be dirtying in syncing context if it's the 118614843421SMatthew Ahrens * mos or we're initializing the os or it's a special object. 118714843421SMatthew Ahrens * However, we are allowed to dirty in syncing context provided 118814843421SMatthew Ahrens * we already dirtied it in open context. Hence we must make 118914843421SMatthew Ahrens * this assertion only if we're not already dirty. 1190fa9e4066Sahrens */ 1191744947dcSTom Erickson os = dn->dn_objset; 119214843421SMatthew Ahrens ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) || 119314843421SMatthew Ahrens os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp)); 1194fa9e4066Sahrens ASSERT(db->db.db_size != 0); 1195fa9e4066Sahrens 1196fa9e4066Sahrens dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); 1197fa9e4066Sahrens 11980a586ceaSMark Shellenbaum if (db->db_blkid != DMU_BONUS_BLKID) { 11991934e92fSmaybee /* 12001934e92fSmaybee * Update the accounting. 1201d3469faaSMark Maybee * Note: we delay "free accounting" until after we drop 1202d3469faaSMark Maybee * the db_mtx. This keeps us from grabbing other locks 1203b24ab676SJeff Bonwick * (and possibly deadlocking) in bp_get_dsize() while 1204d3469faaSMark Maybee * also holding the db_mtx. 12051934e92fSmaybee */ 12061934e92fSmaybee dnode_willuse_space(dn, db->db.db_size, tx); 1207d3469faaSMark Maybee do_free_accounting = dbuf_block_freeable(db); 12081934e92fSmaybee } 12091934e92fSmaybee 1210ea8dc4b6Seschrock /* 1211ea8dc4b6Seschrock * If this buffer is dirty in an old transaction group we need 1212ea8dc4b6Seschrock * to make a copy of it so that the changes we make in this 1213ea8dc4b6Seschrock * transaction group won't leak out when we sync the older txg. 1214ea8dc4b6Seschrock */ 1215c717a561Smaybee dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP); 1216c717a561Smaybee if (db->db_level == 0) { 1217c717a561Smaybee void *data_old = db->db_buf; 1218c717a561Smaybee 121982c9918fSTim Haley if (db->db_state != DB_NOFILL) { 12200a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID) { 122182c9918fSTim Haley dbuf_fix_old_data(db, tx->tx_txg); 122282c9918fSTim Haley data_old = db->db.db_data; 122382c9918fSTim Haley } else if (db->db.db_object != DMU_META_DNODE_OBJECT) { 122482c9918fSTim Haley /* 122582c9918fSTim Haley * Release the data buffer from the cache so 122682c9918fSTim Haley * that we can modify it without impacting 122782c9918fSTim Haley * possible other users of this cached data 122882c9918fSTim Haley * block. Note that indirect blocks and 122982c9918fSTim Haley * private objects are not released until the 123082c9918fSTim Haley * syncing state (since they are only modified 123182c9918fSTim Haley * then). 123282c9918fSTim Haley */ 123382c9918fSTim Haley arc_release(db->db_buf, db); 123482c9918fSTim Haley dbuf_fix_old_data(db, tx->tx_txg); 123582c9918fSTim Haley data_old = db->db_buf; 123682c9918fSTim Haley } 123782c9918fSTim Haley ASSERT(data_old != NULL); 1238fa9e4066Sahrens } 1239c717a561Smaybee dr->dt.dl.dr_data = data_old; 1240c717a561Smaybee } else { 1241c717a561Smaybee mutex_init(&dr->dt.di.dr_mtx, NULL, MUTEX_DEFAULT, NULL); 1242c717a561Smaybee list_create(&dr->dt.di.dr_children, 1243c717a561Smaybee sizeof (dbuf_dirty_record_t), 1244c717a561Smaybee offsetof(dbuf_dirty_record_t, dr_dirty_node)); 1245fa9e4066Sahrens } 124669962b56SMatthew Ahrens if (db->db_blkid != DMU_BONUS_BLKID && os->os_dsl_dataset != NULL) 124769962b56SMatthew Ahrens dr->dr_accounted = db->db.db_size; 1248c717a561Smaybee dr->dr_dbuf = db; 1249c717a561Smaybee dr->dr_txg = tx->tx_txg; 1250c717a561Smaybee dr->dr_next = *drp; 1251c717a561Smaybee *drp = dr; 1252fa9e4066Sahrens 1253fa9e4066Sahrens /* 1254fa9e4066Sahrens * We could have been freed_in_flight between the dbuf_noread 1255fa9e4066Sahrens * and dbuf_dirty. We win, as though the dbuf_noread() had 1256fa9e4066Sahrens * happened after the free. 1257fa9e4066Sahrens */ 12580a586ceaSMark Shellenbaum if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID && 12590a586ceaSMark Shellenbaum db->db_blkid != DMU_SPILL_BLKID) { 1260c717a561Smaybee mutex_enter(&dn->dn_mtx); 1261bf16b11eSMatthew Ahrens if (dn->dn_free_ranges[txgoff] != NULL) { 1262bf16b11eSMatthew Ahrens range_tree_clear(dn->dn_free_ranges[txgoff], 1263bf16b11eSMatthew Ahrens db->db_blkid, 1); 1264bf16b11eSMatthew Ahrens } 1265c717a561Smaybee mutex_exit(&dn->dn_mtx); 1266c717a561Smaybee db->db_freed_in_flight = FALSE; 1267fa9e4066Sahrens } 1268fa9e4066Sahrens 1269fa9e4066Sahrens /* 1270fa9e4066Sahrens * This buffer is now part of this txg 1271fa9e4066Sahrens */ 1272fa9e4066Sahrens dbuf_add_ref(db, (void *)(uintptr_t)tx->tx_txg); 1273fa9e4066Sahrens db->db_dirtycnt += 1; 1274fa9e4066Sahrens ASSERT3U(db->db_dirtycnt, <=, 3); 1275fa9e4066Sahrens 1276fa9e4066Sahrens mutex_exit(&db->db_mtx); 1277fa9e4066Sahrens 12780a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID || 12790a586ceaSMark Shellenbaum db->db_blkid == DMU_SPILL_BLKID) { 1280c717a561Smaybee mutex_enter(&dn->dn_mtx); 1281c717a561Smaybee ASSERT(!list_link_active(&dr->dr_dirty_node)); 1282c717a561Smaybee list_insert_tail(&dn->dn_dirty_records[txgoff], dr); 1283c717a561Smaybee mutex_exit(&dn->dn_mtx); 1284fa9e4066Sahrens dnode_setdirty(dn, tx); 1285744947dcSTom Erickson DB_DNODE_EXIT(db); 1286c717a561Smaybee return (dr); 1287d3469faaSMark Maybee } else if (do_free_accounting) { 1288d3469faaSMark Maybee blkptr_t *bp = db->db_blkptr; 1289d3469faaSMark Maybee int64_t willfree = (bp && !BP_IS_HOLE(bp)) ? 1290b24ab676SJeff Bonwick bp_get_dsize(os->os_spa, bp) : db->db.db_size; 1291d3469faaSMark Maybee /* 1292d3469faaSMark Maybee * This is only a guess -- if the dbuf is dirty 1293d3469faaSMark Maybee * in a previous txg, we don't know how much 1294d3469faaSMark Maybee * space it will use on disk yet. We should 1295d3469faaSMark Maybee * really have the struct_rwlock to access 1296d3469faaSMark Maybee * db_blkptr, but since this is just a guess, 1297d3469faaSMark Maybee * it's OK if we get an odd answer. 1298d3469faaSMark Maybee */ 1299837b568bSGeorge Wilson ddt_prefetch(os->os_spa, bp); 1300d3469faaSMark Maybee dnode_willuse_space(dn, -willfree, tx); 1301fa9e4066Sahrens } 1302fa9e4066Sahrens 1303fa9e4066Sahrens if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) { 1304fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 1305fa9e4066Sahrens drop_struct_lock = TRUE; 1306fa9e4066Sahrens } 1307fa9e4066Sahrens 13088346f03fSJonathan W Adams if (db->db_level == 0) { 13098346f03fSJonathan W Adams dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock); 13108346f03fSJonathan W Adams ASSERT(dn->dn_maxblkid >= db->db_blkid); 13118346f03fSJonathan W Adams } 13128346f03fSJonathan W Adams 131344eda4d7Smaybee if (db->db_level+1 < dn->dn_nlevels) { 1314c717a561Smaybee dmu_buf_impl_t *parent = db->db_parent; 1315c717a561Smaybee dbuf_dirty_record_t *di; 1316c717a561Smaybee int parent_held = FALSE; 1317c717a561Smaybee 1318c717a561Smaybee if (db->db_parent == NULL || db->db_parent == dn->dn_dbuf) { 1319c717a561Smaybee int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 1320c717a561Smaybee 1321c717a561Smaybee parent = dbuf_hold_level(dn, db->db_level+1, 1322c717a561Smaybee db->db_blkid >> epbs, FTAG); 132301025c89SJohn Harres ASSERT(parent != NULL); 1324c717a561Smaybee parent_held = TRUE; 1325c717a561Smaybee } 1326fa9e4066Sahrens if (drop_struct_lock) 1327fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 1328c717a561Smaybee ASSERT3U(db->db_level+1, ==, parent->db_level); 1329c717a561Smaybee di = dbuf_dirty(parent, tx); 1330c717a561Smaybee if (parent_held) 1331c717a561Smaybee dbuf_rele(parent, FTAG); 1332c717a561Smaybee 1333c717a561Smaybee mutex_enter(&db->db_mtx); 133469962b56SMatthew Ahrens /* 133569962b56SMatthew Ahrens * Since we've dropped the mutex, it's possible that 133669962b56SMatthew Ahrens * dbuf_undirty() might have changed this out from under us. 133769962b56SMatthew Ahrens */ 1338c717a561Smaybee if (db->db_last_dirty == dr || 1339c717a561Smaybee dn->dn_object == DMU_META_DNODE_OBJECT) { 1340c717a561Smaybee mutex_enter(&di->dt.di.dr_mtx); 1341c717a561Smaybee ASSERT3U(di->dr_txg, ==, tx->tx_txg); 1342c717a561Smaybee ASSERT(!list_link_active(&dr->dr_dirty_node)); 1343c717a561Smaybee list_insert_tail(&di->dt.di.dr_children, dr); 1344c717a561Smaybee mutex_exit(&di->dt.di.dr_mtx); 1345c717a561Smaybee dr->dr_parent = di; 1346c717a561Smaybee } 1347c717a561Smaybee mutex_exit(&db->db_mtx); 1348fa9e4066Sahrens } else { 1349c717a561Smaybee ASSERT(db->db_level+1 == dn->dn_nlevels); 1350c717a561Smaybee ASSERT(db->db_blkid < dn->dn_nblkptr); 1351744947dcSTom Erickson ASSERT(db->db_parent == NULL || db->db_parent == dn->dn_dbuf); 1352c717a561Smaybee mutex_enter(&dn->dn_mtx); 1353c717a561Smaybee ASSERT(!list_link_active(&dr->dr_dirty_node)); 1354c717a561Smaybee list_insert_tail(&dn->dn_dirty_records[txgoff], dr); 1355c717a561Smaybee mutex_exit(&dn->dn_mtx); 1356fa9e4066Sahrens if (drop_struct_lock) 1357fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 1358fa9e4066Sahrens } 1359fa9e4066Sahrens 1360fa9e4066Sahrens dnode_setdirty(dn, tx); 1361744947dcSTom Erickson DB_DNODE_EXIT(db); 1362c717a561Smaybee return (dr); 1363fa9e4066Sahrens } 1364fa9e4066Sahrens 13653b2aab18SMatthew Ahrens /* 13663e30c24aSWill Andrews * Undirty a buffer in the transaction group referenced by the given 13673e30c24aSWill Andrews * transaction. Return whether this evicted the dbuf. 13683b2aab18SMatthew Ahrens */ 13693b2aab18SMatthew Ahrens static boolean_t 1370fa9e4066Sahrens dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) 1371fa9e4066Sahrens { 1372744947dcSTom Erickson dnode_t *dn; 1373c717a561Smaybee uint64_t txg = tx->tx_txg; 137417f17c2dSbonwick dbuf_dirty_record_t *dr, **drp; 1375fa9e4066Sahrens 1376c717a561Smaybee ASSERT(txg != 0); 13770a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 13783b2aab18SMatthew Ahrens ASSERT0(db->db_level); 13793b2aab18SMatthew Ahrens ASSERT(MUTEX_HELD(&db->db_mtx)); 1380fa9e4066Sahrens 1381fa9e4066Sahrens /* 1382fa9e4066Sahrens * If this buffer is not dirty, we're done. 1383fa9e4066Sahrens */ 138417f17c2dSbonwick for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next) 1385c717a561Smaybee if (dr->dr_txg <= txg) 1386c717a561Smaybee break; 13873b2aab18SMatthew Ahrens if (dr == NULL || dr->dr_txg < txg) 13883b2aab18SMatthew Ahrens return (B_FALSE); 1389c717a561Smaybee ASSERT(dr->dr_txg == txg); 1390b24ab676SJeff Bonwick ASSERT(dr->dr_dbuf == db); 1391fa9e4066Sahrens 1392744947dcSTom Erickson DB_DNODE_ENTER(db); 1393744947dcSTom Erickson dn = DB_DNODE(db); 1394744947dcSTom Erickson 1395fa9e4066Sahrens dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); 1396fa9e4066Sahrens 1397fa9e4066Sahrens ASSERT(db->db.db_size != 0); 1398fa9e4066Sahrens 139969962b56SMatthew Ahrens /* 140069962b56SMatthew Ahrens * Any space we accounted for in dp_dirty_* will be cleaned up by 140169962b56SMatthew Ahrens * dsl_pool_sync(). This is relatively rare so the discrepancy 140269962b56SMatthew Ahrens * is not a big deal. 140369962b56SMatthew Ahrens */ 1404fa9e4066Sahrens 140517f17c2dSbonwick *drp = dr->dr_next; 1406c717a561Smaybee 14073f2366c2SGordon Ross /* 14083f2366c2SGordon Ross * Note that there are three places in dbuf_dirty() 14093f2366c2SGordon Ross * where this dirty record may be put on a list. 14103f2366c2SGordon Ross * Make sure to do a list_remove corresponding to 14113f2366c2SGordon Ross * every one of those list_insert calls. 14123f2366c2SGordon Ross */ 1413c717a561Smaybee if (dr->dr_parent) { 1414c717a561Smaybee mutex_enter(&dr->dr_parent->dt.di.dr_mtx); 1415c717a561Smaybee list_remove(&dr->dr_parent->dt.di.dr_children, dr); 1416c717a561Smaybee mutex_exit(&dr->dr_parent->dt.di.dr_mtx); 14173f2366c2SGordon Ross } else if (db->db_blkid == DMU_SPILL_BLKID || 14183f2366c2SGordon Ross db->db_level+1 == dn->dn_nlevels) { 1419cdb0ab79Smaybee ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf); 1420c717a561Smaybee mutex_enter(&dn->dn_mtx); 1421c717a561Smaybee list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr); 1422c717a561Smaybee mutex_exit(&dn->dn_mtx); 1423c717a561Smaybee } 1424744947dcSTom Erickson DB_DNODE_EXIT(db); 1425c717a561Smaybee 14263b2aab18SMatthew Ahrens if (db->db_state != DB_NOFILL) { 14273b2aab18SMatthew Ahrens dbuf_unoverride(dr); 1428c717a561Smaybee 1429c717a561Smaybee ASSERT(db->db_buf != NULL); 14303b2aab18SMatthew Ahrens ASSERT(dr->dt.dl.dr_data != NULL); 14313b2aab18SMatthew Ahrens if (dr->dt.dl.dr_data != db->db_buf) 14323b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db)); 1433c717a561Smaybee } 1434d2b3cbbdSJorgen Lundman 1435d2b3cbbdSJorgen Lundman if (db->db_level != 0) { 1436d2b3cbbdSJorgen Lundman mutex_destroy(&dr->dt.di.dr_mtx); 1437d2b3cbbdSJorgen Lundman list_destroy(&dr->dt.di.dr_children); 1438d2b3cbbdSJorgen Lundman } 1439d2b3cbbdSJorgen Lundman 1440c717a561Smaybee kmem_free(dr, sizeof (dbuf_dirty_record_t)); 1441fa9e4066Sahrens 1442fa9e4066Sahrens ASSERT(db->db_dirtycnt > 0); 1443fa9e4066Sahrens db->db_dirtycnt -= 1; 1444fa9e4066Sahrens 1445c717a561Smaybee if (refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) { 1446ea8dc4b6Seschrock arc_buf_t *buf = db->db_buf; 1447fa9e4066Sahrens 1448b24ab676SJeff Bonwick ASSERT(db->db_state == DB_NOFILL || arc_released(buf)); 1449*bc9014e6SJustin Gibbs dbuf_clear_data(db); 14503b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(buf, db)); 1451fa9e4066Sahrens dbuf_evict(db); 14523b2aab18SMatthew Ahrens return (B_TRUE); 1453fa9e4066Sahrens } 1454fa9e4066Sahrens 14553b2aab18SMatthew Ahrens return (B_FALSE); 1456fa9e4066Sahrens } 1457fa9e4066Sahrens 1458fa9e4066Sahrens void 145943466aaeSMax Grossman dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx) 1460fa9e4066Sahrens { 146143466aaeSMax Grossman dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 14621ab7f2deSmaybee int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH; 1463fa9e4066Sahrens 1464fa9e4066Sahrens ASSERT(tx->tx_txg != 0); 1465fa9e4066Sahrens ASSERT(!refcount_is_zero(&db->db_holds)); 1466fa9e4066Sahrens 1467744947dcSTom Erickson DB_DNODE_ENTER(db); 1468744947dcSTom Erickson if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock)) 1469fa9e4066Sahrens rf |= DB_RF_HAVESTRUCT; 1470744947dcSTom Erickson DB_DNODE_EXIT(db); 1471ea8dc4b6Seschrock (void) dbuf_read(db, NULL, rf); 1472c717a561Smaybee (void) dbuf_dirty(db, tx); 1473fa9e4066Sahrens } 1474fa9e4066Sahrens 147582c9918fSTim Haley void 147682c9918fSTim Haley dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) 147782c9918fSTim Haley { 147882c9918fSTim Haley dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 147982c9918fSTim Haley 148082c9918fSTim Haley db->db_state = DB_NOFILL; 148182c9918fSTim Haley 148282c9918fSTim Haley dmu_buf_will_fill(db_fake, tx); 148382c9918fSTim Haley } 148482c9918fSTim Haley 1485fa9e4066Sahrens void 1486ea8dc4b6Seschrock dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) 1487fa9e4066Sahrens { 1488ea8dc4b6Seschrock dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 1489ea8dc4b6Seschrock 14900a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 1491fa9e4066Sahrens ASSERT(tx->tx_txg != 0); 1492fa9e4066Sahrens ASSERT(db->db_level == 0); 1493fa9e4066Sahrens ASSERT(!refcount_is_zero(&db->db_holds)); 1494fa9e4066Sahrens 1495ea8dc4b6Seschrock ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT || 1496fa9e4066Sahrens dmu_tx_private_ok(tx)); 1497fa9e4066Sahrens 1498fa9e4066Sahrens dbuf_noread(db); 1499c717a561Smaybee (void) dbuf_dirty(db, tx); 1500fa9e4066Sahrens } 1501fa9e4066Sahrens 1502fa9e4066Sahrens #pragma weak dmu_buf_fill_done = dbuf_fill_done 1503fa9e4066Sahrens /* ARGSUSED */ 1504fa9e4066Sahrens void 1505fa9e4066Sahrens dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx) 1506fa9e4066Sahrens { 1507fa9e4066Sahrens mutex_enter(&db->db_mtx); 15089c9dc39aSek DBUF_VERIFY(db); 1509fa9e4066Sahrens 1510fa9e4066Sahrens if (db->db_state == DB_FILL) { 1511c717a561Smaybee if (db->db_level == 0 && db->db_freed_in_flight) { 15120a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 1513fa9e4066Sahrens /* we were freed while filling */ 1514fa9e4066Sahrens /* XXX dbuf_undirty? */ 1515fa9e4066Sahrens bzero(db->db.db_data, db->db.db_size); 1516c717a561Smaybee db->db_freed_in_flight = FALSE; 1517fa9e4066Sahrens } 1518fa9e4066Sahrens db->db_state = DB_CACHED; 1519fa9e4066Sahrens cv_broadcast(&db->db_changed); 1520fa9e4066Sahrens } 1521fa9e4066Sahrens mutex_exit(&db->db_mtx); 1522fa9e4066Sahrens } 1523fa9e4066Sahrens 15245d7b4d43SMatthew Ahrens void 15255d7b4d43SMatthew Ahrens dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data, 15265d7b4d43SMatthew Ahrens bp_embedded_type_t etype, enum zio_compress comp, 15275d7b4d43SMatthew Ahrens int uncompressed_size, int compressed_size, int byteorder, 15285d7b4d43SMatthew Ahrens dmu_tx_t *tx) 15295d7b4d43SMatthew Ahrens { 15305d7b4d43SMatthew Ahrens dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf; 15315d7b4d43SMatthew Ahrens struct dirty_leaf *dl; 15325d7b4d43SMatthew Ahrens dmu_object_type_t type; 15335d7b4d43SMatthew Ahrens 15345d7b4d43SMatthew Ahrens DB_DNODE_ENTER(db); 15355d7b4d43SMatthew Ahrens type = DB_DNODE(db)->dn_type; 15365d7b4d43SMatthew Ahrens DB_DNODE_EXIT(db); 15375d7b4d43SMatthew Ahrens 15385d7b4d43SMatthew Ahrens ASSERT0(db->db_level); 15395d7b4d43SMatthew Ahrens ASSERT(db->db_blkid != DMU_BONUS_BLKID); 15405d7b4d43SMatthew Ahrens 15415d7b4d43SMatthew Ahrens dmu_buf_will_not_fill(dbuf, tx); 15425d7b4d43SMatthew Ahrens 15435d7b4d43SMatthew Ahrens ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg); 15445d7b4d43SMatthew Ahrens dl = &db->db_last_dirty->dt.dl; 15455d7b4d43SMatthew Ahrens encode_embedded_bp_compressed(&dl->dr_overridden_by, 15465d7b4d43SMatthew Ahrens data, comp, uncompressed_size, compressed_size); 15475d7b4d43SMatthew Ahrens BPE_SET_ETYPE(&dl->dr_overridden_by, etype); 15485d7b4d43SMatthew Ahrens BP_SET_TYPE(&dl->dr_overridden_by, type); 15495d7b4d43SMatthew Ahrens BP_SET_LEVEL(&dl->dr_overridden_by, 0); 15505d7b4d43SMatthew Ahrens BP_SET_BYTEORDER(&dl->dr_overridden_by, byteorder); 15515d7b4d43SMatthew Ahrens 15525d7b4d43SMatthew Ahrens dl->dr_override_state = DR_OVERRIDDEN; 15535d7b4d43SMatthew Ahrens dl->dr_overridden_by.blk_birth = db->db_last_dirty->dr_txg; 15545d7b4d43SMatthew Ahrens } 15555d7b4d43SMatthew Ahrens 15562fdbea25SAleksandr Guzovskiy /* 15572fdbea25SAleksandr Guzovskiy * Directly assign a provided arc buf to a given dbuf if it's not referenced 15582fdbea25SAleksandr Guzovskiy * by anybody except our caller. Otherwise copy arcbuf's contents to dbuf. 15592fdbea25SAleksandr Guzovskiy */ 15602fdbea25SAleksandr Guzovskiy void 15612fdbea25SAleksandr Guzovskiy dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) 15622fdbea25SAleksandr Guzovskiy { 15632fdbea25SAleksandr Guzovskiy ASSERT(!refcount_is_zero(&db->db_holds)); 15640a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 15652fdbea25SAleksandr Guzovskiy ASSERT(db->db_level == 0); 15662fdbea25SAleksandr Guzovskiy ASSERT(DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA); 15672fdbea25SAleksandr Guzovskiy ASSERT(buf != NULL); 15682fdbea25SAleksandr Guzovskiy ASSERT(arc_buf_size(buf) == db->db.db_size); 15692fdbea25SAleksandr Guzovskiy ASSERT(tx->tx_txg != 0); 15702fdbea25SAleksandr Guzovskiy 15712fdbea25SAleksandr Guzovskiy arc_return_buf(buf, db); 15722fdbea25SAleksandr Guzovskiy ASSERT(arc_released(buf)); 15732fdbea25SAleksandr Guzovskiy 15742fdbea25SAleksandr Guzovskiy mutex_enter(&db->db_mtx); 15752fdbea25SAleksandr Guzovskiy 15762fdbea25SAleksandr Guzovskiy while (db->db_state == DB_READ || db->db_state == DB_FILL) 15772fdbea25SAleksandr Guzovskiy cv_wait(&db->db_changed, &db->db_mtx); 15782fdbea25SAleksandr Guzovskiy 15792fdbea25SAleksandr Guzovskiy ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED); 15802fdbea25SAleksandr Guzovskiy 15812fdbea25SAleksandr Guzovskiy if (db->db_state == DB_CACHED && 15822fdbea25SAleksandr Guzovskiy refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) { 15832fdbea25SAleksandr Guzovskiy mutex_exit(&db->db_mtx); 15842fdbea25SAleksandr Guzovskiy (void) dbuf_dirty(db, tx); 15852fdbea25SAleksandr Guzovskiy bcopy(buf->b_data, db->db.db_data, db->db.db_size); 15863b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(buf, db)); 1587c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_stat_wbuf_copied(); 15882fdbea25SAleksandr Guzovskiy return; 15892fdbea25SAleksandr Guzovskiy } 15902fdbea25SAleksandr Guzovskiy 1591c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_stat_wbuf_nocopy(); 15922fdbea25SAleksandr Guzovskiy if (db->db_state == DB_CACHED) { 15932fdbea25SAleksandr Guzovskiy dbuf_dirty_record_t *dr = db->db_last_dirty; 15942fdbea25SAleksandr Guzovskiy 15952fdbea25SAleksandr Guzovskiy ASSERT(db->db_buf != NULL); 15962fdbea25SAleksandr Guzovskiy if (dr != NULL && dr->dr_txg == tx->tx_txg) { 15972fdbea25SAleksandr Guzovskiy ASSERT(dr->dt.dl.dr_data == db->db_buf); 15982fdbea25SAleksandr Guzovskiy if (!arc_released(db->db_buf)) { 15992fdbea25SAleksandr Guzovskiy ASSERT(dr->dt.dl.dr_override_state == 16002fdbea25SAleksandr Guzovskiy DR_OVERRIDDEN); 16012fdbea25SAleksandr Guzovskiy arc_release(db->db_buf, db); 16022fdbea25SAleksandr Guzovskiy } 16032fdbea25SAleksandr Guzovskiy dr->dt.dl.dr_data = buf; 16043b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(db->db_buf, db)); 16052fdbea25SAleksandr Guzovskiy } else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) { 16062fdbea25SAleksandr Guzovskiy arc_release(db->db_buf, db); 16073b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(db->db_buf, db)); 16082fdbea25SAleksandr Guzovskiy } 16092fdbea25SAleksandr Guzovskiy db->db_buf = NULL; 16102fdbea25SAleksandr Guzovskiy } 16112fdbea25SAleksandr Guzovskiy ASSERT(db->db_buf == NULL); 16122fdbea25SAleksandr Guzovskiy dbuf_set_data(db, buf); 16132fdbea25SAleksandr Guzovskiy db->db_state = DB_FILL; 16142fdbea25SAleksandr Guzovskiy mutex_exit(&db->db_mtx); 16152fdbea25SAleksandr Guzovskiy (void) dbuf_dirty(db, tx); 161643466aaeSMax Grossman dmu_buf_fill_done(&db->db, tx); 16172fdbea25SAleksandr Guzovskiy } 16182fdbea25SAleksandr Guzovskiy 1619ea8dc4b6Seschrock /* 1620ea8dc4b6Seschrock * "Clear" the contents of this dbuf. This will mark the dbuf 162169962b56SMatthew Ahrens * EVICTING and clear *most* of its references. Unfortunately, 1622ea8dc4b6Seschrock * when we are not holding the dn_dbufs_mtx, we can't clear the 1623ea8dc4b6Seschrock * entry in the dn_dbufs list. We have to wait until dbuf_destroy() 1624ea8dc4b6Seschrock * in this case. For callers from the DMU we will usually see: 1625bbfa8ea8SMatthew Ahrens * dbuf_clear()->arc_clear_callback()->dbuf_do_evict()->dbuf_destroy() 1626ea8dc4b6Seschrock * For the arc callback, we will usually see: 1627744947dcSTom Erickson * dbuf_do_evict()->dbuf_clear();dbuf_destroy() 1628ea8dc4b6Seschrock * Sometimes, though, we will get a mix of these two: 1629bbfa8ea8SMatthew Ahrens * DMU: dbuf_clear()->arc_clear_callback() 1630ea8dc4b6Seschrock * ARC: dbuf_do_evict()->dbuf_destroy() 1631bbfa8ea8SMatthew Ahrens * 1632bbfa8ea8SMatthew Ahrens * This routine will dissociate the dbuf from the arc, by calling 1633bbfa8ea8SMatthew Ahrens * arc_clear_callback(), but will not evict the data from the ARC. 1634ea8dc4b6Seschrock */ 1635ea8dc4b6Seschrock void 1636fa9e4066Sahrens dbuf_clear(dmu_buf_impl_t *db) 1637fa9e4066Sahrens { 1638744947dcSTom Erickson dnode_t *dn; 1639ea8dc4b6Seschrock dmu_buf_impl_t *parent = db->db_parent; 1640744947dcSTom Erickson dmu_buf_impl_t *dndb; 1641bbfa8ea8SMatthew Ahrens boolean_t dbuf_gone = B_FALSE; 1642fa9e4066Sahrens 1643fa9e4066Sahrens ASSERT(MUTEX_HELD(&db->db_mtx)); 1644fa9e4066Sahrens ASSERT(refcount_is_zero(&db->db_holds)); 1645fa9e4066Sahrens 1646ea8dc4b6Seschrock dbuf_evict_user(db); 1647ea8dc4b6Seschrock 1648fa9e4066Sahrens if (db->db_state == DB_CACHED) { 1649ea8dc4b6Seschrock ASSERT(db->db.db_data != NULL); 16500a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID) { 1651ea8dc4b6Seschrock zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN); 16525a98e54bSBrendan Gregg - Sun Microsystems arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); 16530e8c6158Smaybee } 1654fa9e4066Sahrens db->db.db_data = NULL; 1655fa9e4066Sahrens db->db_state = DB_UNCACHED; 1656fa9e4066Sahrens } 1657fa9e4066Sahrens 165882c9918fSTim Haley ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL); 1659fa9e4066Sahrens ASSERT(db->db_data_pending == NULL); 1660fa9e4066Sahrens 1661ea8dc4b6Seschrock db->db_state = DB_EVICTING; 1662ea8dc4b6Seschrock db->db_blkptr = NULL; 1663ea8dc4b6Seschrock 1664744947dcSTom Erickson DB_DNODE_ENTER(db); 1665744947dcSTom Erickson dn = DB_DNODE(db); 1666744947dcSTom Erickson dndb = dn->dn_dbuf; 16670a586ceaSMark Shellenbaum if (db->db_blkid != DMU_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) { 16680f6d88adSAlex Reece avl_remove(&dn->dn_dbufs, db); 1669640c1670SJosef 'Jeff' Sipek atomic_dec_32(&dn->dn_dbufs_count); 1670744947dcSTom Erickson membar_producer(); 1671744947dcSTom Erickson DB_DNODE_EXIT(db); 1672744947dcSTom Erickson /* 1673744947dcSTom Erickson * Decrementing the dbuf count means that the hold corresponding 1674744947dcSTom Erickson * to the removed dbuf is no longer discounted in dnode_move(), 1675744947dcSTom Erickson * so the dnode cannot be moved until after we release the hold. 1676744947dcSTom Erickson * The membar_producer() ensures visibility of the decremented 1677744947dcSTom Erickson * value in dnode_move(), since DB_DNODE_EXIT doesn't actually 1678744947dcSTom Erickson * release any lock. 1679744947dcSTom Erickson */ 1680ea8dc4b6Seschrock dnode_rele(dn, db); 1681744947dcSTom Erickson db->db_dnode_handle = NULL; 1682744947dcSTom Erickson } else { 1683744947dcSTom Erickson DB_DNODE_EXIT(db); 1684ea8dc4b6Seschrock } 1685ea8dc4b6Seschrock 1686ea8dc4b6Seschrock if (db->db_buf) 1687bbfa8ea8SMatthew Ahrens dbuf_gone = arc_clear_callback(db->db_buf); 1688ea8dc4b6Seschrock 1689ea8dc4b6Seschrock if (!dbuf_gone) 1690ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 1691fa9e4066Sahrens 1692fa9e4066Sahrens /* 1693744947dcSTom Erickson * If this dbuf is referenced from an indirect dbuf, 1694fa9e4066Sahrens * decrement the ref count on the indirect dbuf. 1695fa9e4066Sahrens */ 1696c543ec06Sahrens if (parent && parent != dndb) 1697ea8dc4b6Seschrock dbuf_rele(parent, db); 1698fa9e4066Sahrens } 1699fa9e4066Sahrens 1700fa9e4066Sahrens static int 1701fa9e4066Sahrens dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse, 1702fa9e4066Sahrens dmu_buf_impl_t **parentp, blkptr_t **bpp) 1703fa9e4066Sahrens { 1704fa9e4066Sahrens int nlevels, epbs; 1705fa9e4066Sahrens 17060b69c2f0Sahrens *parentp = NULL; 17070b69c2f0Sahrens *bpp = NULL; 17080b69c2f0Sahrens 17090a586ceaSMark Shellenbaum ASSERT(blkid != DMU_BONUS_BLKID); 17100a586ceaSMark Shellenbaum 17110a586ceaSMark Shellenbaum if (blkid == DMU_SPILL_BLKID) { 17120a586ceaSMark Shellenbaum mutex_enter(&dn->dn_mtx); 171306e0070dSMark Shellenbaum if (dn->dn_have_spill && 171406e0070dSMark Shellenbaum (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) 17150a586ceaSMark Shellenbaum *bpp = &dn->dn_phys->dn_spill; 17160a586ceaSMark Shellenbaum else 17170a586ceaSMark Shellenbaum *bpp = NULL; 17180a586ceaSMark Shellenbaum dbuf_add_ref(dn->dn_dbuf, NULL); 17190a586ceaSMark Shellenbaum *parentp = dn->dn_dbuf; 17200a586ceaSMark Shellenbaum mutex_exit(&dn->dn_mtx); 17210a586ceaSMark Shellenbaum return (0); 17220a586ceaSMark Shellenbaum } 1723ea8dc4b6Seschrock 1724fa9e4066Sahrens if (dn->dn_phys->dn_nlevels == 0) 1725fa9e4066Sahrens nlevels = 1; 1726fa9e4066Sahrens else 1727fa9e4066Sahrens nlevels = dn->dn_phys->dn_nlevels; 1728fa9e4066Sahrens 1729fa9e4066Sahrens epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 1730fa9e4066Sahrens 1731fa9e4066Sahrens ASSERT3U(level * epbs, <, 64); 1732fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); 1733ea8dc4b6Seschrock if (level >= nlevels || 1734fa9e4066Sahrens (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) { 1735fa9e4066Sahrens /* the buffer has no parent yet */ 1736be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1737fa9e4066Sahrens } else if (level < nlevels-1) { 1738fa9e4066Sahrens /* this block is referenced from an indirect block */ 1739fa9e4066Sahrens int err = dbuf_hold_impl(dn, level+1, 1740fa9e4066Sahrens blkid >> epbs, fail_sparse, NULL, parentp); 1741fa9e4066Sahrens if (err) 1742fa9e4066Sahrens return (err); 1743ea8dc4b6Seschrock err = dbuf_read(*parentp, NULL, 1744ea8dc4b6Seschrock (DB_RF_HAVESTRUCT | DB_RF_NOPREFETCH | DB_RF_CANFAIL)); 1745c543ec06Sahrens if (err) { 1746c543ec06Sahrens dbuf_rele(*parentp, NULL); 1747c543ec06Sahrens *parentp = NULL; 1748c543ec06Sahrens return (err); 1749ea8dc4b6Seschrock } 1750c543ec06Sahrens *bpp = ((blkptr_t *)(*parentp)->db.db_data) + 1751c543ec06Sahrens (blkid & ((1ULL << epbs) - 1)); 1752c543ec06Sahrens return (0); 1753fa9e4066Sahrens } else { 1754fa9e4066Sahrens /* the block is referenced from the dnode */ 1755fa9e4066Sahrens ASSERT3U(level, ==, nlevels-1); 1756fa9e4066Sahrens ASSERT(dn->dn_phys->dn_nblkptr == 0 || 1757fa9e4066Sahrens blkid < dn->dn_phys->dn_nblkptr); 1758c543ec06Sahrens if (dn->dn_dbuf) { 1759c543ec06Sahrens dbuf_add_ref(dn->dn_dbuf, NULL); 1760c543ec06Sahrens *parentp = dn->dn_dbuf; 1761c543ec06Sahrens } 1762fa9e4066Sahrens *bpp = &dn->dn_phys->dn_blkptr[blkid]; 1763fa9e4066Sahrens return (0); 1764fa9e4066Sahrens } 1765fa9e4066Sahrens } 1766fa9e4066Sahrens 1767fa9e4066Sahrens static dmu_buf_impl_t * 1768fa9e4066Sahrens dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, 1769fa9e4066Sahrens dmu_buf_impl_t *parent, blkptr_t *blkptr) 1770fa9e4066Sahrens { 1771503ad85cSMatthew Ahrens objset_t *os = dn->dn_objset; 1772fa9e4066Sahrens dmu_buf_impl_t *db, *odb; 1773fa9e4066Sahrens 1774fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); 1775fa9e4066Sahrens ASSERT(dn->dn_type != DMU_OT_NONE); 1776fa9e4066Sahrens 1777fa9e4066Sahrens db = kmem_cache_alloc(dbuf_cache, KM_SLEEP); 1778fa9e4066Sahrens 1779fa9e4066Sahrens db->db_objset = os; 1780fa9e4066Sahrens db->db.db_object = dn->dn_object; 1781fa9e4066Sahrens db->db_level = level; 1782fa9e4066Sahrens db->db_blkid = blkid; 1783c717a561Smaybee db->db_last_dirty = NULL; 1784ea8dc4b6Seschrock db->db_dirtycnt = 0; 1785744947dcSTom Erickson db->db_dnode_handle = dn->dn_handle; 1786ea8dc4b6Seschrock db->db_parent = parent; 1787ea8dc4b6Seschrock db->db_blkptr = blkptr; 1788fa9e4066Sahrens 1789*bc9014e6SJustin Gibbs db->db_user = NULL; 1790c717a561Smaybee db->db_immediate_evict = 0; 1791c717a561Smaybee db->db_freed_in_flight = 0; 1792ea8dc4b6Seschrock 17930a586ceaSMark Shellenbaum if (blkid == DMU_BONUS_BLKID) { 1794ea8dc4b6Seschrock ASSERT3P(parent, ==, dn->dn_dbuf); 17951934e92fSmaybee db->db.db_size = DN_MAX_BONUSLEN - 17961934e92fSmaybee (dn->dn_nblkptr-1) * sizeof (blkptr_t); 17971934e92fSmaybee ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen); 17980a586ceaSMark Shellenbaum db->db.db_offset = DMU_BONUS_BLKID; 1799ea8dc4b6Seschrock db->db_state = DB_UNCACHED; 1800ea8dc4b6Seschrock /* the bonus dbuf is not placed in the hash table */ 18015a98e54bSBrendan Gregg - Sun Microsystems arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER); 1802ea8dc4b6Seschrock return (db); 18030a586ceaSMark Shellenbaum } else if (blkid == DMU_SPILL_BLKID) { 18040a586ceaSMark Shellenbaum db->db.db_size = (blkptr != NULL) ? 18050a586ceaSMark Shellenbaum BP_GET_LSIZE(blkptr) : SPA_MINBLOCKSIZE; 18060a586ceaSMark Shellenbaum db->db.db_offset = 0; 1807fa9e4066Sahrens } else { 1808fa9e4066Sahrens int blocksize = 180969962b56SMatthew Ahrens db->db_level ? 1 << dn->dn_indblkshift : dn->dn_datablksz; 1810fa9e4066Sahrens db->db.db_size = blocksize; 1811fa9e4066Sahrens db->db.db_offset = db->db_blkid * blocksize; 1812fa9e4066Sahrens } 1813fa9e4066Sahrens 1814fa9e4066Sahrens /* 1815fa9e4066Sahrens * Hold the dn_dbufs_mtx while we get the new dbuf 1816fa9e4066Sahrens * in the hash table *and* added to the dbufs list. 1817fa9e4066Sahrens * This prevents a possible deadlock with someone 1818fa9e4066Sahrens * trying to look up this dbuf before its added to the 1819fa9e4066Sahrens * dn_dbufs list. 1820fa9e4066Sahrens */ 1821fa9e4066Sahrens mutex_enter(&dn->dn_dbufs_mtx); 1822ea8dc4b6Seschrock db->db_state = DB_EVICTING; 1823fa9e4066Sahrens if ((odb = dbuf_hash_insert(db)) != NULL) { 1824fa9e4066Sahrens /* someone else inserted it first */ 1825fa9e4066Sahrens kmem_cache_free(dbuf_cache, db); 1826fa9e4066Sahrens mutex_exit(&dn->dn_dbufs_mtx); 1827fa9e4066Sahrens return (odb); 1828fa9e4066Sahrens } 18290f6d88adSAlex Reece avl_add(&dn->dn_dbufs, db); 1830713d6c20SMatthew Ahrens if (db->db_level == 0 && db->db_blkid >= 1831713d6c20SMatthew Ahrens dn->dn_unlisted_l0_blkid) 1832713d6c20SMatthew Ahrens dn->dn_unlisted_l0_blkid = db->db_blkid + 1; 1833ea8dc4b6Seschrock db->db_state = DB_UNCACHED; 1834fa9e4066Sahrens mutex_exit(&dn->dn_dbufs_mtx); 18355a98e54bSBrendan Gregg - Sun Microsystems arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER); 1836fa9e4066Sahrens 1837fa9e4066Sahrens if (parent && parent != dn->dn_dbuf) 1838fa9e4066Sahrens dbuf_add_ref(parent, db); 1839fa9e4066Sahrens 1840ea8dc4b6Seschrock ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || 1841ea8dc4b6Seschrock refcount_count(&dn->dn_holds) > 0); 1842fa9e4066Sahrens (void) refcount_add(&dn->dn_holds, db); 1843640c1670SJosef 'Jeff' Sipek atomic_inc_32(&dn->dn_dbufs_count); 1844fa9e4066Sahrens 1845fa9e4066Sahrens dprintf_dbuf(db, "db=%p\n", db); 1846fa9e4066Sahrens 1847fa9e4066Sahrens return (db); 1848fa9e4066Sahrens } 1849fa9e4066Sahrens 1850fa9e4066Sahrens static int 1851ea8dc4b6Seschrock dbuf_do_evict(void *private) 1852fa9e4066Sahrens { 1853bbfa8ea8SMatthew Ahrens dmu_buf_impl_t *db = private; 1854fa9e4066Sahrens 1855ea8dc4b6Seschrock if (!MUTEX_HELD(&db->db_mtx)) 1856ea8dc4b6Seschrock mutex_enter(&db->db_mtx); 1857fa9e4066Sahrens 1858ea8dc4b6Seschrock ASSERT(refcount_is_zero(&db->db_holds)); 1859fa9e4066Sahrens 1860ea8dc4b6Seschrock if (db->db_state != DB_EVICTING) { 1861ea8dc4b6Seschrock ASSERT(db->db_state == DB_CACHED); 1862ea8dc4b6Seschrock DBUF_VERIFY(db); 1863ea8dc4b6Seschrock db->db_buf = NULL; 1864ea8dc4b6Seschrock dbuf_evict(db); 1865ea8dc4b6Seschrock } else { 1866ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 1867ea8dc4b6Seschrock dbuf_destroy(db); 1868fa9e4066Sahrens } 1869ea8dc4b6Seschrock return (0); 1870fa9e4066Sahrens } 1871fa9e4066Sahrens 1872fa9e4066Sahrens static void 1873fa9e4066Sahrens dbuf_destroy(dmu_buf_impl_t *db) 1874fa9e4066Sahrens { 1875fa9e4066Sahrens ASSERT(refcount_is_zero(&db->db_holds)); 1876fa9e4066Sahrens 18770a586ceaSMark Shellenbaum if (db->db_blkid != DMU_BONUS_BLKID) { 1878ea8dc4b6Seschrock /* 1879ea8dc4b6Seschrock * If this dbuf is still on the dn_dbufs list, 1880ea8dc4b6Seschrock * remove it from that list. 1881ea8dc4b6Seschrock */ 1882744947dcSTom Erickson if (db->db_dnode_handle != NULL) { 1883744947dcSTom Erickson dnode_t *dn; 18841934e92fSmaybee 1885744947dcSTom Erickson DB_DNODE_ENTER(db); 1886744947dcSTom Erickson dn = DB_DNODE(db); 18871934e92fSmaybee mutex_enter(&dn->dn_dbufs_mtx); 18880f6d88adSAlex Reece avl_remove(&dn->dn_dbufs, db); 1889640c1670SJosef 'Jeff' Sipek atomic_dec_32(&dn->dn_dbufs_count); 1890c543ec06Sahrens mutex_exit(&dn->dn_dbufs_mtx); 1891744947dcSTom Erickson DB_DNODE_EXIT(db); 1892744947dcSTom Erickson /* 1893744947dcSTom Erickson * Decrementing the dbuf count means that the hold 1894744947dcSTom Erickson * corresponding to the removed dbuf is no longer 1895744947dcSTom Erickson * discounted in dnode_move(), so the dnode cannot be 1896744947dcSTom Erickson * moved until after we release the hold. 1897744947dcSTom Erickson */ 1898ea8dc4b6Seschrock dnode_rele(dn, db); 1899744947dcSTom Erickson db->db_dnode_handle = NULL; 1900ea8dc4b6Seschrock } 1901ea8dc4b6Seschrock dbuf_hash_remove(db); 1902ea8dc4b6Seschrock } 1903ea8dc4b6Seschrock db->db_parent = NULL; 1904ea8dc4b6Seschrock db->db_buf = NULL; 1905ea8dc4b6Seschrock 1906fa9e4066Sahrens ASSERT(db->db.db_data == NULL); 1907fa9e4066Sahrens ASSERT(db->db_hash_next == NULL); 1908fa9e4066Sahrens ASSERT(db->db_blkptr == NULL); 1909fa9e4066Sahrens ASSERT(db->db_data_pending == NULL); 1910fa9e4066Sahrens 1911fa9e4066Sahrens kmem_cache_free(dbuf_cache, db); 19125a98e54bSBrendan Gregg - Sun Microsystems arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER); 1913fa9e4066Sahrens } 1914fa9e4066Sahrens 1915fa9e4066Sahrens void 191669962b56SMatthew Ahrens dbuf_prefetch(dnode_t *dn, uint64_t blkid, zio_priority_t prio) 1917fa9e4066Sahrens { 191813506d1eSmaybee dmu_buf_impl_t *db = NULL; 1919fa9e4066Sahrens blkptr_t *bp = NULL; 1920fa9e4066Sahrens 19210a586ceaSMark Shellenbaum ASSERT(blkid != DMU_BONUS_BLKID); 1922fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); 1923fa9e4066Sahrens 1924fa9e4066Sahrens if (dnode_block_freed(dn, blkid)) 1925fa9e4066Sahrens return; 1926fa9e4066Sahrens 1927fa9e4066Sahrens /* dbuf_find() returns with db_mtx held */ 1928fa9e4066Sahrens if (db = dbuf_find(dn, 0, blkid)) { 19299e9c486fSGeorge Wilson /* 19309e9c486fSGeorge Wilson * This dbuf is already in the cache. We assume that 19319e9c486fSGeorge Wilson * it is already CACHED, or else about to be either 19329e9c486fSGeorge Wilson * read or filled. 19339e9c486fSGeorge Wilson */ 1934fa9e4066Sahrens mutex_exit(&db->db_mtx); 19359e9c486fSGeorge Wilson return; 1936fa9e4066Sahrens } 1937fa9e4066Sahrens 193813506d1eSmaybee if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) { 19395d7b4d43SMatthew Ahrens if (bp && !BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) { 1940b24ab676SJeff Bonwick dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; 19417adb730bSGeorge Wilson arc_flags_t aflags = 19427adb730bSGeorge Wilson ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; 19437802d7bfSMatthew Ahrens zbookmark_phys_t zb; 1944b24ab676SJeff Bonwick 1945b24ab676SJeff Bonwick SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, 1946b24ab676SJeff Bonwick dn->dn_object, 0, blkid); 1947ea8dc4b6Seschrock 19481b912ec7SGeorge Wilson (void) arc_read(NULL, dn->dn_objset->os_spa, 194969962b56SMatthew Ahrens bp, NULL, NULL, prio, 1950fa9e4066Sahrens ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, 195113506d1eSmaybee &aflags, &zb); 1952fa9e4066Sahrens } 195313506d1eSmaybee if (db) 195413506d1eSmaybee dbuf_rele(db, NULL); 1955fa9e4066Sahrens } 1956fa9e4066Sahrens } 1957fa9e4066Sahrens 1958fa9e4066Sahrens /* 1959fa9e4066Sahrens * Returns with db_holds incremented, and db_mtx not held. 1960fa9e4066Sahrens * Note: dn_struct_rwlock must be held. 1961fa9e4066Sahrens */ 1962fa9e4066Sahrens int 1963fa9e4066Sahrens dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse, 1964fa9e4066Sahrens void *tag, dmu_buf_impl_t **dbp) 1965fa9e4066Sahrens { 1966fa9e4066Sahrens dmu_buf_impl_t *db, *parent = NULL; 1967fa9e4066Sahrens 19680a586ceaSMark Shellenbaum ASSERT(blkid != DMU_BONUS_BLKID); 1969fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); 1970fa9e4066Sahrens ASSERT3U(dn->dn_nlevels, >, level); 1971fa9e4066Sahrens 1972fa9e4066Sahrens *dbp = NULL; 1973ea8dc4b6Seschrock top: 1974fa9e4066Sahrens /* dbuf_find() returns with db_mtx held */ 1975fa9e4066Sahrens db = dbuf_find(dn, level, blkid); 1976fa9e4066Sahrens 1977fa9e4066Sahrens if (db == NULL) { 1978fa9e4066Sahrens blkptr_t *bp = NULL; 1979fa9e4066Sahrens int err; 1980fa9e4066Sahrens 1981c543ec06Sahrens ASSERT3P(parent, ==, NULL); 1982fa9e4066Sahrens err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp); 1983fa9e4066Sahrens if (fail_sparse) { 1984fa9e4066Sahrens if (err == 0 && bp && BP_IS_HOLE(bp)) 1985be6fd75aSMatthew Ahrens err = SET_ERROR(ENOENT); 1986fa9e4066Sahrens if (err) { 1987c543ec06Sahrens if (parent) 1988ea8dc4b6Seschrock dbuf_rele(parent, NULL); 1989fa9e4066Sahrens return (err); 1990fa9e4066Sahrens } 1991fa9e4066Sahrens } 1992ea8dc4b6Seschrock if (err && err != ENOENT) 1993ea8dc4b6Seschrock return (err); 1994fa9e4066Sahrens db = dbuf_create(dn, level, blkid, parent, bp); 1995fa9e4066Sahrens } 1996fa9e4066Sahrens 1997ea8dc4b6Seschrock if (db->db_buf && refcount_is_zero(&db->db_holds)) { 1998ea8dc4b6Seschrock arc_buf_add_ref(db->db_buf, db); 1999ea8dc4b6Seschrock if (db->db_buf->b_data == NULL) { 2000ea8dc4b6Seschrock dbuf_clear(db); 2001c543ec06Sahrens if (parent) { 2002c543ec06Sahrens dbuf_rele(parent, NULL); 2003c543ec06Sahrens parent = NULL; 2004c543ec06Sahrens } 2005ea8dc4b6Seschrock goto top; 2006ea8dc4b6Seschrock } 2007ea8dc4b6Seschrock ASSERT3P(db->db.db_data, ==, db->db_buf->b_data); 2008ea8dc4b6Seschrock } 2009ea8dc4b6Seschrock 2010ea8dc4b6Seschrock ASSERT(db->db_buf == NULL || arc_referenced(db->db_buf)); 2011ea8dc4b6Seschrock 2012fa9e4066Sahrens /* 2013c717a561Smaybee * If this buffer is currently syncing out, and we are are 2014c717a561Smaybee * still referencing it from db_data, we need to make a copy 2015c717a561Smaybee * of it in case we decide we want to dirty it again in this txg. 2016fa9e4066Sahrens */ 20170a586ceaSMark Shellenbaum if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID && 2018ea8dc4b6Seschrock dn->dn_object != DMU_META_DNODE_OBJECT && 2019c717a561Smaybee db->db_state == DB_CACHED && db->db_data_pending) { 2020c717a561Smaybee dbuf_dirty_record_t *dr = db->db_data_pending; 2021fa9e4066Sahrens 2022c717a561Smaybee if (dr->dt.dl.dr_data == db->db_buf) { 2023c717a561Smaybee arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 2024c717a561Smaybee 2025c717a561Smaybee dbuf_set_data(db, 2026744947dcSTom Erickson arc_buf_alloc(dn->dn_objset->os_spa, 2027c717a561Smaybee db->db.db_size, db, type)); 2028c717a561Smaybee bcopy(dr->dt.dl.dr_data->b_data, db->db.db_data, 2029c717a561Smaybee db->db.db_size); 2030c717a561Smaybee } 2031fa9e4066Sahrens } 2032fa9e4066Sahrens 2033ea8dc4b6Seschrock (void) refcount_add(&db->db_holds, tag); 20349c9dc39aSek DBUF_VERIFY(db); 2035fa9e4066Sahrens mutex_exit(&db->db_mtx); 2036fa9e4066Sahrens 2037fa9e4066Sahrens /* NOTE: we can't rele the parent until after we drop the db_mtx */ 2038c543ec06Sahrens if (parent) 2039ea8dc4b6Seschrock dbuf_rele(parent, NULL); 2040fa9e4066Sahrens 2041744947dcSTom Erickson ASSERT3P(DB_DNODE(db), ==, dn); 2042fa9e4066Sahrens ASSERT3U(db->db_blkid, ==, blkid); 2043fa9e4066Sahrens ASSERT3U(db->db_level, ==, level); 2044fa9e4066Sahrens *dbp = db; 2045fa9e4066Sahrens 2046fa9e4066Sahrens return (0); 2047fa9e4066Sahrens } 2048fa9e4066Sahrens 2049fa9e4066Sahrens dmu_buf_impl_t * 2050ea8dc4b6Seschrock dbuf_hold(dnode_t *dn, uint64_t blkid, void *tag) 2051fa9e4066Sahrens { 2052fa9e4066Sahrens dmu_buf_impl_t *db; 2053ea8dc4b6Seschrock int err = dbuf_hold_impl(dn, 0, blkid, FALSE, tag, &db); 2054ea8dc4b6Seschrock return (err ? NULL : db); 2055fa9e4066Sahrens } 2056fa9e4066Sahrens 2057fa9e4066Sahrens dmu_buf_impl_t * 2058fa9e4066Sahrens dbuf_hold_level(dnode_t *dn, int level, uint64_t blkid, void *tag) 2059fa9e4066Sahrens { 2060fa9e4066Sahrens dmu_buf_impl_t *db; 2061ea8dc4b6Seschrock int err = dbuf_hold_impl(dn, level, blkid, FALSE, tag, &db); 2062ea8dc4b6Seschrock return (err ? NULL : db); 2063fa9e4066Sahrens } 2064fa9e4066Sahrens 20651934e92fSmaybee void 2066ea8dc4b6Seschrock dbuf_create_bonus(dnode_t *dn) 2067fa9e4066Sahrens { 2068ea8dc4b6Seschrock ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); 2069ea8dc4b6Seschrock 2070ea8dc4b6Seschrock ASSERT(dn->dn_bonus == NULL); 20710a586ceaSMark Shellenbaum dn->dn_bonus = dbuf_create(dn, 0, DMU_BONUS_BLKID, dn->dn_dbuf, NULL); 20720a586ceaSMark Shellenbaum } 20730a586ceaSMark Shellenbaum 20740a586ceaSMark Shellenbaum int 20750a586ceaSMark Shellenbaum dbuf_spill_set_blksz(dmu_buf_t *db_fake, uint64_t blksz, dmu_tx_t *tx) 20760a586ceaSMark Shellenbaum { 20770a586ceaSMark Shellenbaum dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 2078744947dcSTom Erickson dnode_t *dn; 2079744947dcSTom Erickson 20800a586ceaSMark Shellenbaum if (db->db_blkid != DMU_SPILL_BLKID) 2081be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTSUP)); 20820a586ceaSMark Shellenbaum if (blksz == 0) 20830a586ceaSMark Shellenbaum blksz = SPA_MINBLOCKSIZE; 2084b5152584SMatthew Ahrens ASSERT3U(blksz, <=, spa_maxblocksize(dmu_objset_spa(db->db_objset))); 2085b5152584SMatthew Ahrens blksz = P2ROUNDUP(blksz, SPA_MINBLOCKSIZE); 20860a586ceaSMark Shellenbaum 2087744947dcSTom Erickson DB_DNODE_ENTER(db); 2088744947dcSTom Erickson dn = DB_DNODE(db); 2089744947dcSTom Erickson rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 20900a586ceaSMark Shellenbaum dbuf_new_size(db, blksz, tx); 2091744947dcSTom Erickson rw_exit(&dn->dn_struct_rwlock); 2092744947dcSTom Erickson DB_DNODE_EXIT(db); 20930a586ceaSMark Shellenbaum 20940a586ceaSMark Shellenbaum return (0); 20950a586ceaSMark Shellenbaum } 20960a586ceaSMark Shellenbaum 20970a586ceaSMark Shellenbaum void 20980a586ceaSMark Shellenbaum dbuf_rm_spill(dnode_t *dn, dmu_tx_t *tx) 20990a586ceaSMark Shellenbaum { 21000a586ceaSMark Shellenbaum dbuf_free_range(dn, DMU_SPILL_BLKID, DMU_SPILL_BLKID, tx); 2101fa9e4066Sahrens } 2102fa9e4066Sahrens 2103ea8dc4b6Seschrock #pragma weak dmu_buf_add_ref = dbuf_add_ref 2104fa9e4066Sahrens void 2105fa9e4066Sahrens dbuf_add_ref(dmu_buf_impl_t *db, void *tag) 2106fa9e4066Sahrens { 2107ea8dc4b6Seschrock int64_t holds = refcount_add(&db->db_holds, tag); 2108ea8dc4b6Seschrock ASSERT(holds > 1); 2109fa9e4066Sahrens } 2110fa9e4066Sahrens 2111744947dcSTom Erickson /* 2112744947dcSTom Erickson * If you call dbuf_rele() you had better not be referencing the dnode handle 2113744947dcSTom Erickson * unless you have some other direct or indirect hold on the dnode. (An indirect 2114744947dcSTom Erickson * hold is a hold on one of the dnode's dbufs, including the bonus buffer.) 2115744947dcSTom Erickson * Without that, the dbuf_rele() could lead to a dnode_rele() followed by the 2116744947dcSTom Erickson * dnode's parent dbuf evicting its dnode handles. 2117744947dcSTom Erickson */ 2118fa9e4066Sahrens void 2119ea8dc4b6Seschrock dbuf_rele(dmu_buf_impl_t *db, void *tag) 2120b24ab676SJeff Bonwick { 2121b24ab676SJeff Bonwick mutex_enter(&db->db_mtx); 2122b24ab676SJeff Bonwick dbuf_rele_and_unlock(db, tag); 2123b24ab676SJeff Bonwick } 2124b24ab676SJeff Bonwick 212543466aaeSMax Grossman void 212643466aaeSMax Grossman dmu_buf_rele(dmu_buf_t *db, void *tag) 212743466aaeSMax Grossman { 212843466aaeSMax Grossman dbuf_rele((dmu_buf_impl_t *)db, tag); 212943466aaeSMax Grossman } 213043466aaeSMax Grossman 2131b24ab676SJeff Bonwick /* 2132b24ab676SJeff Bonwick * dbuf_rele() for an already-locked dbuf. This is necessary to allow 2133b24ab676SJeff Bonwick * db_dirtycnt and db_holds to be updated atomically. 2134b24ab676SJeff Bonwick */ 2135b24ab676SJeff Bonwick void 2136b24ab676SJeff Bonwick dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) 2137fa9e4066Sahrens { 2138fa9e4066Sahrens int64_t holds; 2139fa9e4066Sahrens 2140b24ab676SJeff Bonwick ASSERT(MUTEX_HELD(&db->db_mtx)); 21419c9dc39aSek DBUF_VERIFY(db); 2142fa9e4066Sahrens 2143744947dcSTom Erickson /* 2144744947dcSTom Erickson * Remove the reference to the dbuf before removing its hold on the 2145744947dcSTom Erickson * dnode so we can guarantee in dnode_move() that a referenced bonus 2146744947dcSTom Erickson * buffer has a corresponding dnode hold. 2147744947dcSTom Erickson */ 2148fa9e4066Sahrens holds = refcount_remove(&db->db_holds, tag); 2149ea8dc4b6Seschrock ASSERT(holds >= 0); 2150ea8dc4b6Seschrock 2151c717a561Smaybee /* 2152c717a561Smaybee * We can't freeze indirects if there is a possibility that they 2153c717a561Smaybee * may be modified in the current syncing context. 2154c717a561Smaybee */ 2155c717a561Smaybee if (db->db_buf && holds == (db->db_level == 0 ? db->db_dirtycnt : 0)) 21566b4acc8bSahrens arc_buf_freeze(db->db_buf); 21576b4acc8bSahrens 2158ea8dc4b6Seschrock if (holds == db->db_dirtycnt && 2159c717a561Smaybee db->db_level == 0 && db->db_immediate_evict) 2160ea8dc4b6Seschrock dbuf_evict_user(db); 2161fa9e4066Sahrens 2162fa9e4066Sahrens if (holds == 0) { 21630a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID) { 2164ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 2165744947dcSTom Erickson 2166744947dcSTom Erickson /* 2167744947dcSTom Erickson * If the dnode moves here, we cannot cross this barrier 2168744947dcSTom Erickson * until the move completes. 2169744947dcSTom Erickson */ 2170744947dcSTom Erickson DB_DNODE_ENTER(db); 2171640c1670SJosef 'Jeff' Sipek atomic_dec_32(&DB_DNODE(db)->dn_dbufs_count); 2172744947dcSTom Erickson DB_DNODE_EXIT(db); 2173744947dcSTom Erickson /* 2174744947dcSTom Erickson * The bonus buffer's dnode hold is no longer discounted 2175744947dcSTom Erickson * in dnode_move(). The dnode cannot move until after 2176744947dcSTom Erickson * the dnode_rele(). 2177744947dcSTom Erickson */ 2178744947dcSTom Erickson dnode_rele(DB_DNODE(db), db); 2179ea8dc4b6Seschrock } else if (db->db_buf == NULL) { 2180ea8dc4b6Seschrock /* 2181ea8dc4b6Seschrock * This is a special case: we never associated this 2182ea8dc4b6Seschrock * dbuf with any data allocated from the ARC. 2183ea8dc4b6Seschrock */ 218482c9918fSTim Haley ASSERT(db->db_state == DB_UNCACHED || 218582c9918fSTim Haley db->db_state == DB_NOFILL); 2186ea8dc4b6Seschrock dbuf_evict(db); 21876b4acc8bSahrens } else if (arc_released(db->db_buf)) { 2188ea8dc4b6Seschrock arc_buf_t *buf = db->db_buf; 2189ea8dc4b6Seschrock /* 2190ea8dc4b6Seschrock * This dbuf has anonymous data associated with it. 2191ea8dc4b6Seschrock */ 2192*bc9014e6SJustin Gibbs dbuf_clear_data(db); 21933b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(buf, db)); 2194ea8dc4b6Seschrock dbuf_evict(db); 2195ea8dc4b6Seschrock } else { 21963b2aab18SMatthew Ahrens VERIFY(!arc_buf_remove_ref(db->db_buf, db)); 21979253d63dSGeorge Wilson 21989253d63dSGeorge Wilson /* 21999253d63dSGeorge Wilson * A dbuf will be eligible for eviction if either the 22009253d63dSGeorge Wilson * 'primarycache' property is set or a duplicate 22019253d63dSGeorge Wilson * copy of this buffer is already cached in the arc. 22029253d63dSGeorge Wilson * 22039253d63dSGeorge Wilson * In the case of the 'primarycache' a buffer 22049253d63dSGeorge Wilson * is considered for eviction if it matches the 22059253d63dSGeorge Wilson * criteria set in the property. 22069253d63dSGeorge Wilson * 22079253d63dSGeorge Wilson * To decide if our buffer is considered a 22089253d63dSGeorge Wilson * duplicate, we must call into the arc to determine 22099253d63dSGeorge Wilson * if multiple buffers are referencing the same 22109253d63dSGeorge Wilson * block on-disk. If so, then we simply evict 22119253d63dSGeorge Wilson * ourselves. 22129253d63dSGeorge Wilson */ 2213bbfa8ea8SMatthew Ahrens if (!DBUF_IS_CACHEABLE(db)) { 2214bbfa8ea8SMatthew Ahrens if (db->db_blkptr != NULL && 2215bbfa8ea8SMatthew Ahrens !BP_IS_HOLE(db->db_blkptr) && 2216bbfa8ea8SMatthew Ahrens !BP_IS_EMBEDDED(db->db_blkptr)) { 2217bbfa8ea8SMatthew Ahrens spa_t *spa = 2218bbfa8ea8SMatthew Ahrens dmu_objset_spa(db->db_objset); 2219bbfa8ea8SMatthew Ahrens blkptr_t bp = *db->db_blkptr; 2220bbfa8ea8SMatthew Ahrens dbuf_clear(db); 2221bbfa8ea8SMatthew Ahrens arc_freed(spa, &bp); 2222bbfa8ea8SMatthew Ahrens } else { 2223bbfa8ea8SMatthew Ahrens dbuf_clear(db); 2224bbfa8ea8SMatthew Ahrens } 2225*bc9014e6SJustin Gibbs } else if (db->db_objset->os_evicting || 2226*bc9014e6SJustin Gibbs arc_buf_eviction_needed(db->db_buf)) { 22273baa08fcSek dbuf_clear(db); 2228bbfa8ea8SMatthew Ahrens } else { 22293baa08fcSek mutex_exit(&db->db_mtx); 2230bbfa8ea8SMatthew Ahrens } 2231ea8dc4b6Seschrock } 2232fa9e4066Sahrens } else { 2233fa9e4066Sahrens mutex_exit(&db->db_mtx); 2234fa9e4066Sahrens } 2235fa9e4066Sahrens } 2236fa9e4066Sahrens 2237fa9e4066Sahrens #pragma weak dmu_buf_refcount = dbuf_refcount 2238fa9e4066Sahrens uint64_t 2239fa9e4066Sahrens dbuf_refcount(dmu_buf_impl_t *db) 2240fa9e4066Sahrens { 2241fa9e4066Sahrens return (refcount_count(&db->db_holds)); 2242fa9e4066Sahrens } 2243fa9e4066Sahrens 2244fa9e4066Sahrens void * 2245*bc9014e6SJustin Gibbs dmu_buf_replace_user(dmu_buf_t *db_fake, dmu_buf_user_t *old_user, 2246*bc9014e6SJustin Gibbs dmu_buf_user_t *new_user) 2247fa9e4066Sahrens { 2248*bc9014e6SJustin Gibbs dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 2249*bc9014e6SJustin Gibbs 2250*bc9014e6SJustin Gibbs mutex_enter(&db->db_mtx); 2251*bc9014e6SJustin Gibbs dbuf_verify_user(db, DBVU_NOT_EVICTING); 2252*bc9014e6SJustin Gibbs if (db->db_user == old_user) 2253*bc9014e6SJustin Gibbs db->db_user = new_user; 2254*bc9014e6SJustin Gibbs else 2255*bc9014e6SJustin Gibbs old_user = db->db_user; 2256*bc9014e6SJustin Gibbs dbuf_verify_user(db, DBVU_NOT_EVICTING); 2257*bc9014e6SJustin Gibbs mutex_exit(&db->db_mtx); 2258*bc9014e6SJustin Gibbs 2259*bc9014e6SJustin Gibbs return (old_user); 2260fa9e4066Sahrens } 2261fa9e4066Sahrens 2262fa9e4066Sahrens void * 2263*bc9014e6SJustin Gibbs dmu_buf_set_user(dmu_buf_t *db_fake, dmu_buf_user_t *user) 2264fa9e4066Sahrens { 2265*bc9014e6SJustin Gibbs return (dmu_buf_replace_user(db_fake, NULL, user)); 2266fa9e4066Sahrens } 2267fa9e4066Sahrens 2268fa9e4066Sahrens void * 2269*bc9014e6SJustin Gibbs dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user) 2270fa9e4066Sahrens { 2271fa9e4066Sahrens dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 2272fa9e4066Sahrens 2273*bc9014e6SJustin Gibbs db->db_immediate_evict = TRUE; 2274*bc9014e6SJustin Gibbs return (dmu_buf_set_user(db_fake, user)); 2275*bc9014e6SJustin Gibbs } 2276fa9e4066Sahrens 2277*bc9014e6SJustin Gibbs void * 2278*bc9014e6SJustin Gibbs dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user) 2279*bc9014e6SJustin Gibbs { 2280*bc9014e6SJustin Gibbs return (dmu_buf_replace_user(db_fake, user, NULL)); 2281fa9e4066Sahrens } 2282fa9e4066Sahrens 2283fa9e4066Sahrens void * 2284fa9e4066Sahrens dmu_buf_get_user(dmu_buf_t *db_fake) 2285fa9e4066Sahrens { 2286fa9e4066Sahrens dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 2287fa9e4066Sahrens 2288*bc9014e6SJustin Gibbs dbuf_verify_user(db, DBVU_NOT_EVICTING); 2289*bc9014e6SJustin Gibbs return (db->db_user); 2290*bc9014e6SJustin Gibbs } 2291*bc9014e6SJustin Gibbs 2292*bc9014e6SJustin Gibbs void 2293*bc9014e6SJustin Gibbs dmu_buf_user_evict_wait() 2294*bc9014e6SJustin Gibbs { 2295*bc9014e6SJustin Gibbs taskq_wait(dbu_evict_taskq); 2296fa9e4066Sahrens } 2297fa9e4066Sahrens 22983d692628SSanjeev Bagewadi boolean_t 22993d692628SSanjeev Bagewadi dmu_buf_freeable(dmu_buf_t *dbuf) 23003d692628SSanjeev Bagewadi { 23013d692628SSanjeev Bagewadi boolean_t res = B_FALSE; 23023d692628SSanjeev Bagewadi dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf; 23033d692628SSanjeev Bagewadi 23043d692628SSanjeev Bagewadi if (db->db_blkptr) 23053d692628SSanjeev Bagewadi res = dsl_dataset_block_freeable(db->db_objset->os_dsl_dataset, 2306c7cd2421SGeorge Wilson db->db_blkptr, db->db_blkptr->blk_birth); 23073d692628SSanjeev Bagewadi 23083d692628SSanjeev Bagewadi return (res); 23093d692628SSanjeev Bagewadi } 23103d692628SSanjeev Bagewadi 231180901aeaSGeorge Wilson blkptr_t * 231280901aeaSGeorge Wilson dmu_buf_get_blkptr(dmu_buf_t *db) 231380901aeaSGeorge Wilson { 231480901aeaSGeorge Wilson dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db; 231580901aeaSGeorge Wilson return (dbi->db_blkptr); 231680901aeaSGeorge Wilson } 231780901aeaSGeorge Wilson 2318c717a561Smaybee static void 2319c717a561Smaybee dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db) 2320fa9e4066Sahrens { 2321c717a561Smaybee /* ASSERT(dmu_tx_is_syncing(tx) */ 2322c717a561Smaybee ASSERT(MUTEX_HELD(&db->db_mtx)); 2323c717a561Smaybee 2324c717a561Smaybee if (db->db_blkptr != NULL) 2325c717a561Smaybee return; 2326c717a561Smaybee 23270a586ceaSMark Shellenbaum if (db->db_blkid == DMU_SPILL_BLKID) { 23280a586ceaSMark Shellenbaum db->db_blkptr = &dn->dn_phys->dn_spill; 23290a586ceaSMark Shellenbaum BP_ZERO(db->db_blkptr); 23300a586ceaSMark Shellenbaum return; 23310a586ceaSMark Shellenbaum } 2332c717a561Smaybee if (db->db_level == dn->dn_phys->dn_nlevels-1) { 2333c717a561Smaybee /* 2334c717a561Smaybee * This buffer was allocated at a time when there was 2335c717a561Smaybee * no available blkptrs from the dnode, or it was 2336c717a561Smaybee * inappropriate to hook it in (i.e., nlevels mis-match). 2337c717a561Smaybee */ 2338c717a561Smaybee ASSERT(db->db_blkid < dn->dn_phys->dn_nblkptr); 2339c717a561Smaybee ASSERT(db->db_parent == NULL); 2340c717a561Smaybee db->db_parent = dn->dn_dbuf; 2341c717a561Smaybee db->db_blkptr = &dn->dn_phys->dn_blkptr[db->db_blkid]; 2342c717a561Smaybee DBUF_VERIFY(db); 2343c717a561Smaybee } else { 2344c717a561Smaybee dmu_buf_impl_t *parent = db->db_parent; 2345c717a561Smaybee int epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 2346c717a561Smaybee 2347c717a561Smaybee ASSERT(dn->dn_phys->dn_nlevels > 1); 2348c717a561Smaybee if (parent == NULL) { 2349c717a561Smaybee mutex_exit(&db->db_mtx); 2350c717a561Smaybee rw_enter(&dn->dn_struct_rwlock, RW_READER); 2351c717a561Smaybee (void) dbuf_hold_impl(dn, db->db_level+1, 2352c717a561Smaybee db->db_blkid >> epbs, FALSE, db, &parent); 2353c717a561Smaybee rw_exit(&dn->dn_struct_rwlock); 2354c717a561Smaybee mutex_enter(&db->db_mtx); 2355c717a561Smaybee db->db_parent = parent; 2356c717a561Smaybee } 2357c717a561Smaybee db->db_blkptr = (blkptr_t *)parent->db.db_data + 2358c717a561Smaybee (db->db_blkid & ((1ULL << epbs) - 1)); 2359c717a561Smaybee DBUF_VERIFY(db); 2360c717a561Smaybee } 2361c717a561Smaybee } 2362c717a561Smaybee 2363c717a561Smaybee static void 2364c717a561Smaybee dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx) 2365c717a561Smaybee { 2366c717a561Smaybee dmu_buf_impl_t *db = dr->dr_dbuf; 2367744947dcSTom Erickson dnode_t *dn; 2368c717a561Smaybee zio_t *zio; 2369c717a561Smaybee 2370c717a561Smaybee ASSERT(dmu_tx_is_syncing(tx)); 2371c717a561Smaybee 2372c717a561Smaybee dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr); 2373c717a561Smaybee 2374c717a561Smaybee mutex_enter(&db->db_mtx); 2375c717a561Smaybee 2376c717a561Smaybee ASSERT(db->db_level > 0); 2377c717a561Smaybee DBUF_VERIFY(db); 2378c717a561Smaybee 23793e30c24aSWill Andrews /* Read the block if it hasn't been read yet. */ 2380c717a561Smaybee if (db->db_buf == NULL) { 2381c717a561Smaybee mutex_exit(&db->db_mtx); 2382c717a561Smaybee (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED); 2383c717a561Smaybee mutex_enter(&db->db_mtx); 2384c717a561Smaybee } 2385c717a561Smaybee ASSERT3U(db->db_state, ==, DB_CACHED); 2386c717a561Smaybee ASSERT(db->db_buf != NULL); 2387c717a561Smaybee 2388744947dcSTom Erickson DB_DNODE_ENTER(db); 2389744947dcSTom Erickson dn = DB_DNODE(db); 23903e30c24aSWill Andrews /* Indirect block size must match what the dnode thinks it is. */ 2391744947dcSTom Erickson ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift); 2392c717a561Smaybee dbuf_check_blkptr(dn, db); 2393744947dcSTom Erickson DB_DNODE_EXIT(db); 2394c717a561Smaybee 23953e30c24aSWill Andrews /* Provide the pending dirty record to child dbufs */ 2396c717a561Smaybee db->db_data_pending = dr; 2397c717a561Smaybee 2398af2c4821Smaybee mutex_exit(&db->db_mtx); 2399088f3894Sahrens dbuf_write(dr, db->db_buf, tx); 2400c717a561Smaybee 2401c717a561Smaybee zio = dr->dr_zio; 2402c717a561Smaybee mutex_enter(&dr->dt.di.dr_mtx); 2403c717a561Smaybee dbuf_sync_list(&dr->dt.di.dr_children, tx); 2404c717a561Smaybee ASSERT(list_head(&dr->dt.di.dr_children) == NULL); 2405c717a561Smaybee mutex_exit(&dr->dt.di.dr_mtx); 2406c717a561Smaybee zio_nowait(zio); 2407c717a561Smaybee } 2408c717a561Smaybee 2409c717a561Smaybee static void 2410c717a561Smaybee dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) 2411c717a561Smaybee { 2412c717a561Smaybee arc_buf_t **datap = &dr->dt.dl.dr_data; 2413c717a561Smaybee dmu_buf_impl_t *db = dr->dr_dbuf; 2414744947dcSTom Erickson dnode_t *dn; 2415744947dcSTom Erickson objset_t *os; 2416c717a561Smaybee uint64_t txg = tx->tx_txg; 2417fa9e4066Sahrens 2418fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 2419fa9e4066Sahrens 2420fa9e4066Sahrens dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr); 2421fa9e4066Sahrens 2422fa9e4066Sahrens mutex_enter(&db->db_mtx); 2423fa9e4066Sahrens /* 2424fa9e4066Sahrens * To be synced, we must be dirtied. But we 2425fa9e4066Sahrens * might have been freed after the dirty. 2426fa9e4066Sahrens */ 2427fa9e4066Sahrens if (db->db_state == DB_UNCACHED) { 2428fa9e4066Sahrens /* This buffer has been freed since it was dirtied */ 2429fa9e4066Sahrens ASSERT(db->db.db_data == NULL); 2430fa9e4066Sahrens } else if (db->db_state == DB_FILL) { 2431fa9e4066Sahrens /* This buffer was freed and is now being re-filled */ 2432c717a561Smaybee ASSERT(db->db.db_data != dr->dt.dl.dr_data); 2433fa9e4066Sahrens } else { 243482c9918fSTim Haley ASSERT(db->db_state == DB_CACHED || db->db_state == DB_NOFILL); 2435fa9e4066Sahrens } 24369c9dc39aSek DBUF_VERIFY(db); 2437fa9e4066Sahrens 2438744947dcSTom Erickson DB_DNODE_ENTER(db); 2439744947dcSTom Erickson dn = DB_DNODE(db); 2440744947dcSTom Erickson 24410a586ceaSMark Shellenbaum if (db->db_blkid == DMU_SPILL_BLKID) { 24420a586ceaSMark Shellenbaum mutex_enter(&dn->dn_mtx); 24430a586ceaSMark Shellenbaum dn->dn_phys->dn_flags |= DNODE_FLAG_SPILL_BLKPTR; 24440a586ceaSMark Shellenbaum mutex_exit(&dn->dn_mtx); 24450a586ceaSMark Shellenbaum } 24460a586ceaSMark Shellenbaum 2447fa9e4066Sahrens /* 2448c717a561Smaybee * If this is a bonus buffer, simply copy the bonus data into the 2449c717a561Smaybee * dnode. It will be written out when the dnode is synced (and it 2450c717a561Smaybee * will be synced, since it must have been dirty for dbuf_sync to 2451c717a561Smaybee * be called). 2452fa9e4066Sahrens */ 24530a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID) { 2454c717a561Smaybee dbuf_dirty_record_t **drp; 24551934e92fSmaybee 2456ea8dc4b6Seschrock ASSERT(*datap != NULL); 2457fb09f5aaSMadhav Suresh ASSERT0(db->db_level); 2458ea8dc4b6Seschrock ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN); 2459ea8dc4b6Seschrock bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen); 2460744947dcSTom Erickson DB_DNODE_EXIT(db); 2461744947dcSTom Erickson 24620e8c6158Smaybee if (*datap != db->db.db_data) { 2463ea8dc4b6Seschrock zio_buf_free(*datap, DN_MAX_BONUSLEN); 24645a98e54bSBrendan Gregg - Sun Microsystems arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); 24650e8c6158Smaybee } 2466ea8dc4b6Seschrock db->db_data_pending = NULL; 2467c717a561Smaybee drp = &db->db_last_dirty; 2468c717a561Smaybee while (*drp != dr) 2469c717a561Smaybee drp = &(*drp)->dr_next; 247017f17c2dSbonwick ASSERT(dr->dr_next == NULL); 2471b24ab676SJeff Bonwick ASSERT(dr->dr_dbuf == db); 247217f17c2dSbonwick *drp = dr->dr_next; 2473c717a561Smaybee kmem_free(dr, sizeof (dbuf_dirty_record_t)); 2474ea8dc4b6Seschrock ASSERT(db->db_dirtycnt > 0); 2475ea8dc4b6Seschrock db->db_dirtycnt -= 1; 2476b24ab676SJeff Bonwick dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg); 2477ea8dc4b6Seschrock return; 2478ea8dc4b6Seschrock } 2479ea8dc4b6Seschrock 2480744947dcSTom Erickson os = dn->dn_objset; 2481744947dcSTom Erickson 2482f82bfe17Sgw /* 2483f82bfe17Sgw * This function may have dropped the db_mtx lock allowing a dmu_sync 2484f82bfe17Sgw * operation to sneak in. As a result, we need to ensure that we 2485f82bfe17Sgw * don't check the dr_override_state until we have returned from 2486f82bfe17Sgw * dbuf_check_blkptr. 2487f82bfe17Sgw */ 2488f82bfe17Sgw dbuf_check_blkptr(dn, db); 2489f82bfe17Sgw 2490c717a561Smaybee /* 2491744947dcSTom Erickson * If this buffer is in the middle of an immediate write, 2492c717a561Smaybee * wait for the synchronous IO to complete. 2493c717a561Smaybee */ 2494c717a561Smaybee while (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC) { 2495c717a561Smaybee ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 2496c717a561Smaybee cv_wait(&db->db_changed, &db->db_mtx); 2497c717a561Smaybee ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN); 2498c717a561Smaybee } 2499c5c6ffa0Smaybee 2500ab69d62fSMatthew Ahrens if (db->db_state != DB_NOFILL && 2501ab69d62fSMatthew Ahrens dn->dn_object != DMU_META_DNODE_OBJECT && 2502ab69d62fSMatthew Ahrens refcount_count(&db->db_holds) > 1 && 2503b24ab676SJeff Bonwick dr->dt.dl.dr_override_state != DR_OVERRIDDEN && 2504ab69d62fSMatthew Ahrens *datap == db->db_buf) { 2505ab69d62fSMatthew Ahrens /* 2506ab69d62fSMatthew Ahrens * If this buffer is currently "in use" (i.e., there 2507ab69d62fSMatthew Ahrens * are active holds and db_data still references it), 2508ab69d62fSMatthew Ahrens * then make a copy before we start the write so that 2509ab69d62fSMatthew Ahrens * any modifications from the open txg will not leak 2510ab69d62fSMatthew Ahrens * into this write. 2511ab69d62fSMatthew Ahrens * 2512ab69d62fSMatthew Ahrens * NOTE: this copy does not need to be made for 2513ab69d62fSMatthew Ahrens * objects only modified in the syncing context (e.g. 2514ab69d62fSMatthew Ahrens * DNONE_DNODE blocks). 2515ab69d62fSMatthew Ahrens */ 2516ab69d62fSMatthew Ahrens int blksz = arc_buf_size(*datap); 2517ab69d62fSMatthew Ahrens arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 2518ab69d62fSMatthew Ahrens *datap = arc_buf_alloc(os->os_spa, blksz, db, type); 2519ab69d62fSMatthew Ahrens bcopy(db->db.db_data, (*datap)->b_data, blksz); 252082c9918fSTim Haley } 2521c717a561Smaybee db->db_data_pending = dr; 2522fa9e4066Sahrens 2523c717a561Smaybee mutex_exit(&db->db_mtx); 2524fa9e4066Sahrens 2525088f3894Sahrens dbuf_write(dr, *datap, tx); 2526fa9e4066Sahrens 2527c717a561Smaybee ASSERT(!list_link_active(&dr->dr_dirty_node)); 2528744947dcSTom Erickson if (dn->dn_object == DMU_META_DNODE_OBJECT) { 2529c717a561Smaybee list_insert_tail(&dn->dn_dirty_records[txg&TXG_MASK], dr); 2530744947dcSTom Erickson DB_DNODE_EXIT(db); 2531744947dcSTom Erickson } else { 2532744947dcSTom Erickson /* 2533744947dcSTom Erickson * Although zio_nowait() does not "wait for an IO", it does 2534744947dcSTom Erickson * initiate the IO. If this is an empty write it seems plausible 2535744947dcSTom Erickson * that the IO could actually be completed before the nowait 2536744947dcSTom Erickson * returns. We need to DB_DNODE_EXIT() first in case 2537744947dcSTom Erickson * zio_nowait() invalidates the dbuf. 2538744947dcSTom Erickson */ 2539744947dcSTom Erickson DB_DNODE_EXIT(db); 2540c717a561Smaybee zio_nowait(dr->dr_zio); 2541744947dcSTom Erickson } 2542c717a561Smaybee } 254323b11526Smaybee 2544c717a561Smaybee void 2545c717a561Smaybee dbuf_sync_list(list_t *list, dmu_tx_t *tx) 2546c717a561Smaybee { 2547c717a561Smaybee dbuf_dirty_record_t *dr; 2548c717a561Smaybee 2549c717a561Smaybee while (dr = list_head(list)) { 2550c717a561Smaybee if (dr->dr_zio != NULL) { 2551c717a561Smaybee /* 2552c717a561Smaybee * If we find an already initialized zio then we 2553c717a561Smaybee * are processing the meta-dnode, and we have finished. 2554c717a561Smaybee * The dbufs for all dnodes are put back on the list 2555c717a561Smaybee * during processing, so that we can zio_wait() 2556c717a561Smaybee * these IOs after initiating all child IOs. 2557c717a561Smaybee */ 2558c717a561Smaybee ASSERT3U(dr->dr_dbuf->db.db_object, ==, 2559c717a561Smaybee DMU_META_DNODE_OBJECT); 2560c717a561Smaybee break; 256123b11526Smaybee } 2562c717a561Smaybee list_remove(list, dr); 2563c717a561Smaybee if (dr->dr_dbuf->db_level > 0) 2564c717a561Smaybee dbuf_sync_indirect(dr, tx); 2565c717a561Smaybee else 2566c717a561Smaybee dbuf_sync_leaf(dr, tx); 256723b11526Smaybee } 2568c717a561Smaybee } 256923b11526Smaybee 2570fa9e4066Sahrens /* ARGSUSED */ 2571fa9e4066Sahrens static void 2572c717a561Smaybee dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) 2573fa9e4066Sahrens { 2574fa9e4066Sahrens dmu_buf_impl_t *db = vdb; 2575744947dcSTom Erickson dnode_t *dn; 2576e14bb325SJeff Bonwick blkptr_t *bp = zio->io_bp; 2577c717a561Smaybee blkptr_t *bp_orig = &zio->io_bp_orig; 2578b24ab676SJeff Bonwick spa_t *spa = zio->io_spa; 2579b24ab676SJeff Bonwick int64_t delta; 2580fa9e4066Sahrens uint64_t fill = 0; 2581b24ab676SJeff Bonwick int i; 2582fa9e4066Sahrens 25835d7b4d43SMatthew Ahrens ASSERT3P(db->db_blkptr, ==, bp); 2584e14bb325SJeff Bonwick 2585744947dcSTom Erickson DB_DNODE_ENTER(db); 2586744947dcSTom Erickson dn = DB_DNODE(db); 2587b24ab676SJeff Bonwick delta = bp_get_dsize_sync(spa, bp) - bp_get_dsize_sync(spa, bp_orig); 2588b24ab676SJeff Bonwick dnode_diduse_space(dn, delta - zio->io_prev_space_delta); 2589b24ab676SJeff Bonwick zio->io_prev_space_delta = delta; 2590fa9e4066Sahrens 259143466aaeSMax Grossman if (bp->blk_birth != 0) { 259243466aaeSMax Grossman ASSERT((db->db_blkid != DMU_SPILL_BLKID && 259343466aaeSMax Grossman BP_GET_TYPE(bp) == dn->dn_type) || 259443466aaeSMax Grossman (db->db_blkid == DMU_SPILL_BLKID && 25955d7b4d43SMatthew Ahrens BP_GET_TYPE(bp) == dn->dn_bonustype) || 25965d7b4d43SMatthew Ahrens BP_IS_EMBEDDED(bp)); 259743466aaeSMax Grossman ASSERT(BP_GET_LEVEL(bp) == db->db_level); 2598c717a561Smaybee } 2599c5c6ffa0Smaybee 2600c717a561Smaybee mutex_enter(&db->db_mtx); 2601fa9e4066Sahrens 26020a586ceaSMark Shellenbaum #ifdef ZFS_DEBUG 26030a586ceaSMark Shellenbaum if (db->db_blkid == DMU_SPILL_BLKID) { 26040a586ceaSMark Shellenbaum ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR); 26050a586ceaSMark Shellenbaum ASSERT(!(BP_IS_HOLE(db->db_blkptr)) && 26060a586ceaSMark Shellenbaum db->db_blkptr == &dn->dn_phys->dn_spill); 26070a586ceaSMark Shellenbaum } 26080a586ceaSMark Shellenbaum #endif 26090a586ceaSMark Shellenbaum 2610fa9e4066Sahrens if (db->db_level == 0) { 2611fa9e4066Sahrens mutex_enter(&dn->dn_mtx); 26120a586ceaSMark Shellenbaum if (db->db_blkid > dn->dn_phys->dn_maxblkid && 26130a586ceaSMark Shellenbaum db->db_blkid != DMU_SPILL_BLKID) 2614fa9e4066Sahrens dn->dn_phys->dn_maxblkid = db->db_blkid; 2615fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 2616fa9e4066Sahrens 2617fa9e4066Sahrens if (dn->dn_type == DMU_OT_DNODE) { 2618fa9e4066Sahrens dnode_phys_t *dnp = db->db.db_data; 2619fa9e4066Sahrens for (i = db->db.db_size >> DNODE_SHIFT; i > 0; 2620fa9e4066Sahrens i--, dnp++) { 2621fa9e4066Sahrens if (dnp->dn_type != DMU_OT_NONE) 2622fa9e4066Sahrens fill++; 2623fa9e4066Sahrens } 2624fa9e4066Sahrens } else { 262543466aaeSMax Grossman if (BP_IS_HOLE(bp)) { 262643466aaeSMax Grossman fill = 0; 262743466aaeSMax Grossman } else { 262843466aaeSMax Grossman fill = 1; 262943466aaeSMax Grossman } 2630fa9e4066Sahrens } 2631fa9e4066Sahrens } else { 2632e14bb325SJeff Bonwick blkptr_t *ibp = db->db.db_data; 2633fa9e4066Sahrens ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift); 2634e14bb325SJeff Bonwick for (i = db->db.db_size >> SPA_BLKPTRSHIFT; i > 0; i--, ibp++) { 2635e14bb325SJeff Bonwick if (BP_IS_HOLE(ibp)) 2636fa9e4066Sahrens continue; 26375d7b4d43SMatthew Ahrens fill += BP_GET_FILL(ibp); 2638fa9e4066Sahrens } 2639fa9e4066Sahrens } 2640744947dcSTom Erickson DB_DNODE_EXIT(db); 2641fa9e4066Sahrens 26425d7b4d43SMatthew Ahrens if (!BP_IS_EMBEDDED(bp)) 26435d7b4d43SMatthew Ahrens bp->blk_fill = fill; 2644c717a561Smaybee 2645c717a561Smaybee mutex_exit(&db->db_mtx); 2646c717a561Smaybee } 2647fa9e4066Sahrens 264869962b56SMatthew Ahrens /* 264969962b56SMatthew Ahrens * The SPA will call this callback several times for each zio - once 265069962b56SMatthew Ahrens * for every physical child i/o (zio->io_phys_children times). This 265169962b56SMatthew Ahrens * allows the DMU to monitor the progress of each logical i/o. For example, 265269962b56SMatthew Ahrens * there may be 2 copies of an indirect block, or many fragments of a RAID-Z 265369962b56SMatthew Ahrens * block. There may be a long delay before all copies/fragments are completed, 265469962b56SMatthew Ahrens * so this callback allows us to retire dirty space gradually, as the physical 265569962b56SMatthew Ahrens * i/os complete. 265669962b56SMatthew Ahrens */ 265769962b56SMatthew Ahrens /* ARGSUSED */ 265869962b56SMatthew Ahrens static void 265969962b56SMatthew Ahrens dbuf_write_physdone(zio_t *zio, arc_buf_t *buf, void *arg) 266069962b56SMatthew Ahrens { 266169962b56SMatthew Ahrens dmu_buf_impl_t *db = arg; 266269962b56SMatthew Ahrens objset_t *os = db->db_objset; 266369962b56SMatthew Ahrens dsl_pool_t *dp = dmu_objset_pool(os); 266469962b56SMatthew Ahrens dbuf_dirty_record_t *dr; 266569962b56SMatthew Ahrens int delta = 0; 266669962b56SMatthew Ahrens 266769962b56SMatthew Ahrens dr = db->db_data_pending; 266869962b56SMatthew Ahrens ASSERT3U(dr->dr_txg, ==, zio->io_txg); 266969962b56SMatthew Ahrens 267069962b56SMatthew Ahrens /* 267169962b56SMatthew Ahrens * The callback will be called io_phys_children times. Retire one 267269962b56SMatthew Ahrens * portion of our dirty space each time we are called. Any rounding 267369962b56SMatthew Ahrens * error will be cleaned up by dsl_pool_sync()'s call to 267469962b56SMatthew Ahrens * dsl_pool_undirty_space(). 267569962b56SMatthew Ahrens */ 267669962b56SMatthew Ahrens delta = dr->dr_accounted / zio->io_phys_children; 267769962b56SMatthew Ahrens dsl_pool_undirty_space(dp, delta, zio->io_txg); 267869962b56SMatthew Ahrens } 267969962b56SMatthew Ahrens 2680c717a561Smaybee /* ARGSUSED */ 2681c717a561Smaybee static void 2682c717a561Smaybee dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) 2683c717a561Smaybee { 2684c717a561Smaybee dmu_buf_impl_t *db = vdb; 2685b24ab676SJeff Bonwick blkptr_t *bp_orig = &zio->io_bp_orig; 268643466aaeSMax Grossman blkptr_t *bp = db->db_blkptr; 268743466aaeSMax Grossman objset_t *os = db->db_objset; 268843466aaeSMax Grossman dmu_tx_t *tx = os->os_synctx; 2689c717a561Smaybee dbuf_dirty_record_t **drp, *dr; 2690c717a561Smaybee 2691fb09f5aaSMadhav Suresh ASSERT0(zio->io_error); 2692b24ab676SJeff Bonwick ASSERT(db->db_blkptr == bp); 2693b24ab676SJeff Bonwick 269480901aeaSGeorge Wilson /* 269580901aeaSGeorge Wilson * For nopwrites and rewrites we ensure that the bp matches our 269680901aeaSGeorge Wilson * original and bypass all the accounting. 269780901aeaSGeorge Wilson */ 269880901aeaSGeorge Wilson if (zio->io_flags & (ZIO_FLAG_IO_REWRITE | ZIO_FLAG_NOPWRITE)) { 2699b24ab676SJeff Bonwick ASSERT(BP_EQUAL(bp, bp_orig)); 2700b24ab676SJeff Bonwick } else { 270143466aaeSMax Grossman dsl_dataset_t *ds = os->os_dsl_dataset; 2702b24ab676SJeff Bonwick (void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE); 2703b24ab676SJeff Bonwick dsl_dataset_block_born(ds, bp, tx); 2704b24ab676SJeff Bonwick } 2705c717a561Smaybee 2706c717a561Smaybee mutex_enter(&db->db_mtx); 2707c717a561Smaybee 2708b24ab676SJeff Bonwick DBUF_VERIFY(db); 2709b24ab676SJeff Bonwick 2710c717a561Smaybee drp = &db->db_last_dirty; 271117f17c2dSbonwick while ((dr = *drp) != db->db_data_pending) 271217f17c2dSbonwick drp = &dr->dr_next; 271317f17c2dSbonwick ASSERT(!list_link_active(&dr->dr_dirty_node)); 2714b24ab676SJeff Bonwick ASSERT(dr->dr_dbuf == db); 271517f17c2dSbonwick ASSERT(dr->dr_next == NULL); 271617f17c2dSbonwick *drp = dr->dr_next; 2717c717a561Smaybee 27180a586ceaSMark Shellenbaum #ifdef ZFS_DEBUG 27190a586ceaSMark Shellenbaum if (db->db_blkid == DMU_SPILL_BLKID) { 2720744947dcSTom Erickson dnode_t *dn; 2721744947dcSTom Erickson 2722744947dcSTom Erickson DB_DNODE_ENTER(db); 2723744947dcSTom Erickson dn = DB_DNODE(db); 27240a586ceaSMark Shellenbaum ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR); 27250a586ceaSMark Shellenbaum ASSERT(!(BP_IS_HOLE(db->db_blkptr)) && 27260a586ceaSMark Shellenbaum db->db_blkptr == &dn->dn_phys->dn_spill); 2727744947dcSTom Erickson DB_DNODE_EXIT(db); 27280a586ceaSMark Shellenbaum } 27290a586ceaSMark Shellenbaum #endif 27300a586ceaSMark Shellenbaum 2731c717a561Smaybee if (db->db_level == 0) { 27320a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 2733c717a561Smaybee ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN); 273482c9918fSTim Haley if (db->db_state != DB_NOFILL) { 273582c9918fSTim Haley if (dr->dt.dl.dr_data != db->db_buf) 273682c9918fSTim Haley VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, 27373b2aab18SMatthew Ahrens db)); 2738b24ab676SJeff Bonwick else if (!arc_released(db->db_buf)) 273982c9918fSTim Haley arc_set_callback(db->db_buf, dbuf_do_evict, db); 274082c9918fSTim Haley } 2741c717a561Smaybee } else { 2742744947dcSTom Erickson dnode_t *dn; 2743744947dcSTom Erickson 2744744947dcSTom Erickson DB_DNODE_ENTER(db); 2745744947dcSTom Erickson dn = DB_DNODE(db); 2746c717a561Smaybee ASSERT(list_head(&dr->dt.di.dr_children) == NULL); 274743466aaeSMax Grossman ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift); 2748c717a561Smaybee if (!BP_IS_HOLE(db->db_blkptr)) { 2749c717a561Smaybee int epbs = 2750c717a561Smaybee dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 275143466aaeSMax Grossman ASSERT3U(db->db_blkid, <=, 275243466aaeSMax Grossman dn->dn_phys->dn_maxblkid >> (db->db_level * epbs)); 2753c717a561Smaybee ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==, 2754c717a561Smaybee db->db.db_size); 27555d7b4d43SMatthew Ahrens if (!arc_released(db->db_buf)) 27565d7b4d43SMatthew Ahrens arc_set_callback(db->db_buf, dbuf_do_evict, db); 2757c717a561Smaybee } 2758744947dcSTom Erickson DB_DNODE_EXIT(db); 2759c25056deSgw mutex_destroy(&dr->dt.di.dr_mtx); 2760c25056deSgw list_destroy(&dr->dt.di.dr_children); 2761c717a561Smaybee } 2762c717a561Smaybee kmem_free(dr, sizeof (dbuf_dirty_record_t)); 2763fa9e4066Sahrens 2764fa9e4066Sahrens cv_broadcast(&db->db_changed); 2765fa9e4066Sahrens ASSERT(db->db_dirtycnt > 0); 2766fa9e4066Sahrens db->db_dirtycnt -= 1; 2767c717a561Smaybee db->db_data_pending = NULL; 276843466aaeSMax Grossman dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg); 2769b24ab676SJeff Bonwick } 2770b24ab676SJeff Bonwick 2771b24ab676SJeff Bonwick static void 2772b24ab676SJeff Bonwick dbuf_write_nofill_ready(zio_t *zio) 2773b24ab676SJeff Bonwick { 2774b24ab676SJeff Bonwick dbuf_write_ready(zio, NULL, zio->io_private); 2775b24ab676SJeff Bonwick } 2776b24ab676SJeff Bonwick 2777b24ab676SJeff Bonwick static void 2778b24ab676SJeff Bonwick dbuf_write_nofill_done(zio_t *zio) 2779b24ab676SJeff Bonwick { 2780b24ab676SJeff Bonwick dbuf_write_done(zio, NULL, zio->io_private); 2781b24ab676SJeff Bonwick } 2782b24ab676SJeff Bonwick 2783b24ab676SJeff Bonwick static void 2784b24ab676SJeff Bonwick dbuf_write_override_ready(zio_t *zio) 2785b24ab676SJeff Bonwick { 2786b24ab676SJeff Bonwick dbuf_dirty_record_t *dr = zio->io_private; 2787b24ab676SJeff Bonwick dmu_buf_impl_t *db = dr->dr_dbuf; 2788b24ab676SJeff Bonwick 2789b24ab676SJeff Bonwick dbuf_write_ready(zio, NULL, db); 2790b24ab676SJeff Bonwick } 2791b24ab676SJeff Bonwick 2792b24ab676SJeff Bonwick static void 2793b24ab676SJeff Bonwick dbuf_write_override_done(zio_t *zio) 2794b24ab676SJeff Bonwick { 2795b24ab676SJeff Bonwick dbuf_dirty_record_t *dr = zio->io_private; 2796b24ab676SJeff Bonwick dmu_buf_impl_t *db = dr->dr_dbuf; 2797b24ab676SJeff Bonwick blkptr_t *obp = &dr->dt.dl.dr_overridden_by; 2798b24ab676SJeff Bonwick 2799b24ab676SJeff Bonwick mutex_enter(&db->db_mtx); 2800b24ab676SJeff Bonwick if (!BP_EQUAL(zio->io_bp, obp)) { 2801b24ab676SJeff Bonwick if (!BP_IS_HOLE(obp)) 2802b24ab676SJeff Bonwick dsl_free(spa_get_dsl(zio->io_spa), zio->io_txg, obp); 2803b24ab676SJeff Bonwick arc_release(dr->dt.dl.dr_data, db); 2804b24ab676SJeff Bonwick } 2805fa9e4066Sahrens mutex_exit(&db->db_mtx); 2806fa9e4066Sahrens 2807b24ab676SJeff Bonwick dbuf_write_done(zio, NULL, db); 2808b24ab676SJeff Bonwick } 2809b24ab676SJeff Bonwick 28103e30c24aSWill Andrews /* Issue I/O to commit a dirty buffer to disk. */ 2811b24ab676SJeff Bonwick static void 2812b24ab676SJeff Bonwick dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) 2813b24ab676SJeff Bonwick { 2814b24ab676SJeff Bonwick dmu_buf_impl_t *db = dr->dr_dbuf; 2815744947dcSTom Erickson dnode_t *dn; 2816744947dcSTom Erickson objset_t *os; 2817b24ab676SJeff Bonwick dmu_buf_impl_t *parent = db->db_parent; 2818b24ab676SJeff Bonwick uint64_t txg = tx->tx_txg; 28197802d7bfSMatthew Ahrens zbookmark_phys_t zb; 2820b24ab676SJeff Bonwick zio_prop_t zp; 2821b24ab676SJeff Bonwick zio_t *zio; 28220a586ceaSMark Shellenbaum int wp_flag = 0; 2823b24ab676SJeff Bonwick 2824744947dcSTom Erickson DB_DNODE_ENTER(db); 2825744947dcSTom Erickson dn = DB_DNODE(db); 2826744947dcSTom Erickson os = dn->dn_objset; 2827744947dcSTom Erickson 2828b24ab676SJeff Bonwick if (db->db_state != DB_NOFILL) { 2829b24ab676SJeff Bonwick if (db->db_level > 0 || dn->dn_type == DMU_OT_DNODE) { 2830b24ab676SJeff Bonwick /* 2831b24ab676SJeff Bonwick * Private object buffers are released here rather 2832b24ab676SJeff Bonwick * than in dbuf_dirty() since they are only modified 2833b24ab676SJeff Bonwick * in the syncing context and we don't want the 2834b24ab676SJeff Bonwick * overhead of making multiple copies of the data. 2835b24ab676SJeff Bonwick */ 2836b24ab676SJeff Bonwick if (BP_IS_HOLE(db->db_blkptr)) { 2837b24ab676SJeff Bonwick arc_buf_thaw(data); 2838b24ab676SJeff Bonwick } else { 28393f9d6ad7SLin Ling dbuf_release_bp(db); 2840b24ab676SJeff Bonwick } 2841b24ab676SJeff Bonwick } 2842b24ab676SJeff Bonwick } 2843b24ab676SJeff Bonwick 2844b24ab676SJeff Bonwick if (parent != dn->dn_dbuf) { 28453e30c24aSWill Andrews /* Our parent is an indirect block. */ 28463e30c24aSWill Andrews /* We have a dirty parent that has been scheduled for write. */ 2847b24ab676SJeff Bonwick ASSERT(parent && parent->db_data_pending); 28483e30c24aSWill Andrews /* Our parent's buffer is one level closer to the dnode. */ 2849b24ab676SJeff Bonwick ASSERT(db->db_level == parent->db_level-1); 28503e30c24aSWill Andrews /* 28513e30c24aSWill Andrews * We're about to modify our parent's db_data by modifying 28523e30c24aSWill Andrews * our block pointer, so the parent must be released. 28533e30c24aSWill Andrews */ 2854b24ab676SJeff Bonwick ASSERT(arc_released(parent->db_buf)); 2855b24ab676SJeff Bonwick zio = parent->db_data_pending->dr_zio; 2856b24ab676SJeff Bonwick } else { 28573e30c24aSWill Andrews /* Our parent is the dnode itself. */ 28580a586ceaSMark Shellenbaum ASSERT((db->db_level == dn->dn_phys->dn_nlevels-1 && 28590a586ceaSMark Shellenbaum db->db_blkid != DMU_SPILL_BLKID) || 28600a586ceaSMark Shellenbaum (db->db_blkid == DMU_SPILL_BLKID && db->db_level == 0)); 28610a586ceaSMark Shellenbaum if (db->db_blkid != DMU_SPILL_BLKID) 28620a586ceaSMark Shellenbaum ASSERT3P(db->db_blkptr, ==, 28630a586ceaSMark Shellenbaum &dn->dn_phys->dn_blkptr[db->db_blkid]); 2864b24ab676SJeff Bonwick zio = dn->dn_zio; 2865b24ab676SJeff Bonwick } 2866b24ab676SJeff Bonwick 2867b24ab676SJeff Bonwick ASSERT(db->db_level == 0 || data == db->db_buf); 2868b24ab676SJeff Bonwick ASSERT3U(db->db_blkptr->blk_birth, <=, txg); 2869b24ab676SJeff Bonwick ASSERT(zio); 2870fa9e4066Sahrens 2871b24ab676SJeff Bonwick SET_BOOKMARK(&zb, os->os_dsl_dataset ? 2872b24ab676SJeff Bonwick os->os_dsl_dataset->ds_object : DMU_META_OBJSET, 2873b24ab676SJeff Bonwick db->db.db_object, db->db_level, db->db_blkid); 2874b24ab676SJeff Bonwick 28750a586ceaSMark Shellenbaum if (db->db_blkid == DMU_SPILL_BLKID) 28760a586ceaSMark Shellenbaum wp_flag = WP_SPILL; 28770a586ceaSMark Shellenbaum wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0; 28780a586ceaSMark Shellenbaum 28790a586ceaSMark Shellenbaum dmu_write_policy(os, dn, db->db_level, wp_flag, &zp); 2880744947dcSTom Erickson DB_DNODE_EXIT(db); 2881b24ab676SJeff Bonwick 28825d7b4d43SMatthew Ahrens if (db->db_level == 0 && 28835d7b4d43SMatthew Ahrens dr->dt.dl.dr_override_state == DR_OVERRIDDEN) { 28845d7b4d43SMatthew Ahrens /* 28855d7b4d43SMatthew Ahrens * The BP for this block has been provided by open context 28865d7b4d43SMatthew Ahrens * (by dmu_sync() or dmu_buf_write_embedded()). 28875d7b4d43SMatthew Ahrens */ 28885d7b4d43SMatthew Ahrens void *contents = (data != NULL) ? data->b_data : NULL; 28895d7b4d43SMatthew Ahrens 2890b24ab676SJeff Bonwick dr->dr_zio = zio_write(zio, os->os_spa, txg, 28915d7b4d43SMatthew Ahrens db->db_blkptr, contents, db->db.db_size, &zp, 289269962b56SMatthew Ahrens dbuf_write_override_ready, NULL, dbuf_write_override_done, 289369962b56SMatthew Ahrens dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); 2894b24ab676SJeff Bonwick mutex_enter(&db->db_mtx); 2895b24ab676SJeff Bonwick dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; 2896b24ab676SJeff Bonwick zio_write_override(dr->dr_zio, &dr->dt.dl.dr_overridden_by, 289780901aeaSGeorge Wilson dr->dt.dl.dr_copies, dr->dt.dl.dr_nopwrite); 2898b24ab676SJeff Bonwick mutex_exit(&db->db_mtx); 2899b24ab676SJeff Bonwick } else if (db->db_state == DB_NOFILL) { 2900810e43b2SBill Pijewski ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF || 2901810e43b2SBill Pijewski zp.zp_checksum == ZIO_CHECKSUM_NOPARITY); 2902b24ab676SJeff Bonwick dr->dr_zio = zio_write(zio, os->os_spa, txg, 2903b24ab676SJeff Bonwick db->db_blkptr, NULL, db->db.db_size, &zp, 290469962b56SMatthew Ahrens dbuf_write_nofill_ready, NULL, dbuf_write_nofill_done, db, 2905b24ab676SJeff Bonwick ZIO_PRIORITY_ASYNC_WRITE, 2906b24ab676SJeff Bonwick ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb); 2907b24ab676SJeff Bonwick } else { 2908b24ab676SJeff Bonwick ASSERT(arc_released(data)); 2909b24ab676SJeff Bonwick dr->dr_zio = arc_write(zio, os->os_spa, txg, 2910aad02571SSaso Kiselkov db->db_blkptr, data, DBUF_IS_L2CACHEABLE(db), 2911aad02571SSaso Kiselkov DBUF_IS_L2COMPRESSIBLE(db), &zp, dbuf_write_ready, 291269962b56SMatthew Ahrens dbuf_write_physdone, dbuf_write_done, db, 291369962b56SMatthew Ahrens ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); 2914b24ab676SJeff Bonwick } 2915fa9e4066Sahrens } 2916