1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5*f65e61c0Sahrens * Common Development and Distribution License (the "License"). 6*f65e61c0Sahrens * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22*f65e61c0Sahrens * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens 29fa9e4066Sahrens /* 30fa9e4066Sahrens * This file contains the top half of the zfs directory structure 31fa9e4066Sahrens * implementation. The bottom half is in zap_leaf.c. 32fa9e4066Sahrens * 33fa9e4066Sahrens * The zdir is an extendable hash data structure. There is a table of 34fa9e4066Sahrens * pointers to buckets (zap_t->zd_data->zd_leafs). The buckets are 35fa9e4066Sahrens * each a constant size and hold a variable number of directory entries. 36fa9e4066Sahrens * The buckets (aka "leaf nodes") are implemented in zap_leaf.c. 37fa9e4066Sahrens * 38fa9e4066Sahrens * The pointer table holds a power of 2 number of pointers. 39fa9e4066Sahrens * (1<<zap_t->zd_data->zd_phys->zd_prefix_len). The bucket pointed to 40fa9e4066Sahrens * by the pointer at index i in the table holds entries whose hash value 41fa9e4066Sahrens * has a zd_prefix_len - bit prefix 42fa9e4066Sahrens */ 43fa9e4066Sahrens 44fa9e4066Sahrens #include <sys/spa.h> 45fa9e4066Sahrens #include <sys/dmu.h> 46fa9e4066Sahrens #include <sys/zfs_context.h> 47fa9e4066Sahrens #include <sys/zap.h> 48fa9e4066Sahrens #include <sys/zap_impl.h> 49fa9e4066Sahrens #include <sys/zap_leaf.h> 50fa9e4066Sahrens 51*f65e61c0Sahrens #define MIN_FREE(l) (ZAP_LEAF_NUMCHUNKS(l)*9/10) 52*f65e61c0Sahrens 53*f65e61c0Sahrens int fzap_default_block_shift = 14; /* 16k blocksize */ 54fa9e4066Sahrens 55fa9e4066Sahrens static void zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx); 56fa9e4066Sahrens static int zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx); 57fa9e4066Sahrens static zap_leaf_t *zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, 58fa9e4066Sahrens dmu_tx_t *tx, krw_t lt); 59fa9e4066Sahrens static void zap_leaf_pageout(dmu_buf_t *db, void *vl); 60fa9e4066Sahrens 61fa9e4066Sahrens 62fa9e4066Sahrens void 63fa9e4066Sahrens fzap_byteswap(void *vbuf, size_t size) 64fa9e4066Sahrens { 65fa9e4066Sahrens uint64_t block_type; 66fa9e4066Sahrens 67fa9e4066Sahrens block_type = *(uint64_t *)vbuf; 68fa9e4066Sahrens 69fa9e4066Sahrens switch (block_type) { 70fa9e4066Sahrens case ZBT_LEAF: 71fa9e4066Sahrens case BSWAP_64(ZBT_LEAF): 72*f65e61c0Sahrens zap_leaf_byteswap(vbuf, size); 73fa9e4066Sahrens return; 74fa9e4066Sahrens case ZBT_HEADER: 75fa9e4066Sahrens case BSWAP_64(ZBT_HEADER): 76fa9e4066Sahrens default: 77fa9e4066Sahrens /* it's a ptrtbl block */ 78*f65e61c0Sahrens byteswap_uint64_array(vbuf, size); 79fa9e4066Sahrens return; 80fa9e4066Sahrens } 81fa9e4066Sahrens } 82fa9e4066Sahrens 83fa9e4066Sahrens void 84fa9e4066Sahrens fzap_upgrade(zap_t *zap, dmu_tx_t *tx) 85fa9e4066Sahrens { 86fa9e4066Sahrens dmu_buf_t *db; 87fa9e4066Sahrens zap_leaf_t *l; 88fa9e4066Sahrens int i; 89fa9e4066Sahrens zap_phys_t *zp; 90fa9e4066Sahrens 91fa9e4066Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 92fa9e4066Sahrens zap->zap_ismicro = FALSE; 93fa9e4066Sahrens 94fa9e4066Sahrens (void) dmu_buf_update_user(zap->zap_dbuf, zap, zap, 95fa9e4066Sahrens &zap->zap_f.zap_phys, zap_pageout); 96fa9e4066Sahrens 97fa9e4066Sahrens mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); 98*f65e61c0Sahrens zap->zap_f.zap_block_shift = highbit(zap->zap_dbuf->db_size) - 1; 99fa9e4066Sahrens 100fa9e4066Sahrens zp = zap->zap_f.zap_phys; 101fa9e4066Sahrens /* 102fa9e4066Sahrens * explicitly zero it since it might be coming from an 103fa9e4066Sahrens * initialized microzap 104fa9e4066Sahrens */ 105*f65e61c0Sahrens bzero(zap->zap_dbuf->db_data, zap->zap_dbuf->db_size); 106fa9e4066Sahrens zp->zap_block_type = ZBT_HEADER; 107fa9e4066Sahrens zp->zap_magic = ZAP_MAGIC; 108fa9e4066Sahrens 109*f65e61c0Sahrens zp->zap_ptrtbl.zt_shift = ZAP_EMBEDDED_PTRTBL_SHIFT(zap); 110fa9e4066Sahrens 111fa9e4066Sahrens zp->zap_freeblk = 2; /* block 1 will be the first leaf */ 112fa9e4066Sahrens zp->zap_num_leafs = 1; 113fa9e4066Sahrens zp->zap_num_entries = 0; 114fa9e4066Sahrens zp->zap_salt = zap->zap_salt; 115fa9e4066Sahrens 116*f65e61c0Sahrens /* block 1 will be the first leaf */ 117*f65e61c0Sahrens for (i = 0; i < (1<<zp->zap_ptrtbl.zt_shift); i++) 118*f65e61c0Sahrens ZAP_EMBEDDED_PTRTBL_ENT(zap, i) = 1; 119fa9e4066Sahrens 120fa9e4066Sahrens /* 121fa9e4066Sahrens * set up block 1 - the first leaf 122fa9e4066Sahrens */ 123fa9e4066Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 124*f65e61c0Sahrens 1<<FZAP_BLOCK_SHIFT(zap)); 125fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 126fa9e4066Sahrens 127fa9e4066Sahrens l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP); 128fa9e4066Sahrens l->l_dbuf = db; 129fa9e4066Sahrens l->l_phys = db->db_data; 130fa9e4066Sahrens 131fa9e4066Sahrens zap_leaf_init(l); 132fa9e4066Sahrens 133fa9e4066Sahrens kmem_free(l, sizeof (zap_leaf_t)); 134fa9e4066Sahrens dmu_buf_rele(db); 135fa9e4066Sahrens } 136fa9e4066Sahrens 137fa9e4066Sahrens static int 138fa9e4066Sahrens zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx) 139fa9e4066Sahrens { 140fa9e4066Sahrens if (RW_WRITE_HELD(&zap->zap_rwlock)) 141fa9e4066Sahrens return (1); 142fa9e4066Sahrens if (rw_tryupgrade(&zap->zap_rwlock)) { 143fa9e4066Sahrens dmu_buf_will_dirty(zap->zap_dbuf, tx); 144fa9e4066Sahrens return (1); 145fa9e4066Sahrens } 146fa9e4066Sahrens return (0); 147fa9e4066Sahrens } 148fa9e4066Sahrens 149fa9e4066Sahrens /* 150fa9e4066Sahrens * Generic routines for dealing with the pointer & cookie tables. 151fa9e4066Sahrens */ 152fa9e4066Sahrens 153fa9e4066Sahrens static void 154fa9e4066Sahrens zap_table_grow(zap_t *zap, zap_table_phys_t *tbl, 155fa9e4066Sahrens void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n), 156fa9e4066Sahrens dmu_tx_t *tx) 157fa9e4066Sahrens { 158fa9e4066Sahrens uint64_t b, newblk; 159fa9e4066Sahrens dmu_buf_t *db_old, *db_new; 160*f65e61c0Sahrens int bs = FZAP_BLOCK_SHIFT(zap); 161*f65e61c0Sahrens int hepb = 1<<(bs-4); 162fa9e4066Sahrens /* hepb = half the number of entries in a block */ 163fa9e4066Sahrens 164fa9e4066Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 165fa9e4066Sahrens ASSERT(tbl->zt_blk != 0); 166fa9e4066Sahrens ASSERT(tbl->zt_numblks > 0); 167fa9e4066Sahrens 168fa9e4066Sahrens if (tbl->zt_nextblk != 0) { 169fa9e4066Sahrens newblk = tbl->zt_nextblk; 170fa9e4066Sahrens } else { 171fa9e4066Sahrens newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2, tx); 172fa9e4066Sahrens tbl->zt_nextblk = newblk; 173fa9e4066Sahrens ASSERT3U(tbl->zt_blks_copied, ==, 0); 174fa9e4066Sahrens dmu_prefetch(zap->zap_objset, zap->zap_object, 175*f65e61c0Sahrens tbl->zt_blk << bs, tbl->zt_numblks << bs); 176fa9e4066Sahrens } 177fa9e4066Sahrens 178fa9e4066Sahrens /* 179fa9e4066Sahrens * Copy the ptrtbl from the old to new location, leaving the odd 180fa9e4066Sahrens * entries blank as we go. 181fa9e4066Sahrens */ 182fa9e4066Sahrens 183fa9e4066Sahrens b = tbl->zt_blks_copied; 184fa9e4066Sahrens db_old = dmu_buf_hold(zap->zap_objset, zap->zap_object, 185*f65e61c0Sahrens (tbl->zt_blk + b) << bs); 186fa9e4066Sahrens dmu_buf_read(db_old); 187fa9e4066Sahrens 188fa9e4066Sahrens /* first half of entries in old[b] go to new[2*b+0] */ 189fa9e4066Sahrens db_new = dmu_buf_hold(zap->zap_objset, zap->zap_object, 190*f65e61c0Sahrens (newblk + 2*b+0) << bs); 191fa9e4066Sahrens dmu_buf_will_dirty(db_new, tx); 192fa9e4066Sahrens transfer_func(db_old->db_data, db_new->db_data, hepb); 193fa9e4066Sahrens dmu_buf_rele(db_new); 194fa9e4066Sahrens 195fa9e4066Sahrens /* second half of entries in old[b] go to new[2*b+1] */ 196fa9e4066Sahrens db_new = dmu_buf_hold(zap->zap_objset, zap->zap_object, 197*f65e61c0Sahrens (newblk + 2*b+1) << bs); 198fa9e4066Sahrens dmu_buf_will_dirty(db_new, tx); 199fa9e4066Sahrens transfer_func((uint64_t *)db_old->db_data + hepb, 200fa9e4066Sahrens db_new->db_data, hepb); 201fa9e4066Sahrens dmu_buf_rele(db_new); 202fa9e4066Sahrens 203fa9e4066Sahrens dmu_buf_rele(db_old); 204fa9e4066Sahrens 205fa9e4066Sahrens tbl->zt_blks_copied++; 206fa9e4066Sahrens 207fa9e4066Sahrens dprintf("copied block %llu of %llu\n", 208fa9e4066Sahrens tbl->zt_blks_copied, tbl->zt_numblks); 209fa9e4066Sahrens 210fa9e4066Sahrens if (tbl->zt_blks_copied == tbl->zt_numblks) { 211fa9e4066Sahrens dmu_free_range(zap->zap_objset, zap->zap_object, 212*f65e61c0Sahrens tbl->zt_blk << bs, tbl->zt_numblks << bs, tx); 213fa9e4066Sahrens 214fa9e4066Sahrens tbl->zt_blk = newblk; 215fa9e4066Sahrens tbl->zt_numblks *= 2; 216fa9e4066Sahrens tbl->zt_shift++; 217fa9e4066Sahrens tbl->zt_nextblk = 0; 218fa9e4066Sahrens tbl->zt_blks_copied = 0; 219fa9e4066Sahrens 220fa9e4066Sahrens dprintf("finished; numblocks now %llu (%lluk entries)\n", 221fa9e4066Sahrens tbl->zt_numblks, 1<<(tbl->zt_shift-10)); 222fa9e4066Sahrens } 223fa9e4066Sahrens } 224fa9e4066Sahrens 225fa9e4066Sahrens static uint64_t 226fa9e4066Sahrens zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val, 227fa9e4066Sahrens dmu_tx_t *tx) 228fa9e4066Sahrens { 229fa9e4066Sahrens uint64_t blk, off, oldval; 230fa9e4066Sahrens dmu_buf_t *db; 231*f65e61c0Sahrens int bs = FZAP_BLOCK_SHIFT(zap); 232fa9e4066Sahrens 233fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 234fa9e4066Sahrens ASSERT(tbl->zt_blk != 0); 235fa9e4066Sahrens 236fa9e4066Sahrens dprintf("storing %llx at index %llx\n", val, idx); 237fa9e4066Sahrens 238*f65e61c0Sahrens blk = idx >> (bs-3); 239*f65e61c0Sahrens off = idx & ((1<<(bs-3))-1); 240fa9e4066Sahrens 241fa9e4066Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 242*f65e61c0Sahrens (tbl->zt_blk + blk) << bs); 243fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 244fa9e4066Sahrens oldval = ((uint64_t *)db->db_data)[off]; 245fa9e4066Sahrens ((uint64_t *)db->db_data)[off] = val; 246fa9e4066Sahrens dmu_buf_rele(db); 247fa9e4066Sahrens 248fa9e4066Sahrens if (tbl->zt_nextblk != 0) { 249fa9e4066Sahrens idx *= 2; 250*f65e61c0Sahrens blk = idx >> (bs-3); 251*f65e61c0Sahrens off = idx & ((1<<(bs-3))-1); 252fa9e4066Sahrens 253fa9e4066Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 254*f65e61c0Sahrens (tbl->zt_nextblk + blk) << bs); 255fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 256fa9e4066Sahrens ((uint64_t *)db->db_data)[off] = val; 257fa9e4066Sahrens ((uint64_t *)db->db_data)[off+1] = val; 258fa9e4066Sahrens dmu_buf_rele(db); 259fa9e4066Sahrens } 260fa9e4066Sahrens 261fa9e4066Sahrens return (oldval); 262fa9e4066Sahrens } 263fa9e4066Sahrens 264fa9e4066Sahrens static uint64_t 265fa9e4066Sahrens zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx) 266fa9e4066Sahrens { 267fa9e4066Sahrens uint64_t blk, off, val; 268fa9e4066Sahrens dmu_buf_t *db; 269*f65e61c0Sahrens int bs = FZAP_BLOCK_SHIFT(zap); 270fa9e4066Sahrens 271fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 272fa9e4066Sahrens 273*f65e61c0Sahrens blk = idx >> (bs-3); 274*f65e61c0Sahrens off = idx & ((1<<(bs-3))-1); 275fa9e4066Sahrens 276fa9e4066Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 277*f65e61c0Sahrens (tbl->zt_blk + blk) << bs); 278fa9e4066Sahrens dmu_buf_read(db); 279fa9e4066Sahrens val = ((uint64_t *)db->db_data)[off]; 280fa9e4066Sahrens dmu_buf_rele(db); 281fa9e4066Sahrens return (val); 282fa9e4066Sahrens } 283fa9e4066Sahrens 284fa9e4066Sahrens /* 285fa9e4066Sahrens * Routines for growing the ptrtbl. 286fa9e4066Sahrens */ 287fa9e4066Sahrens 288fa9e4066Sahrens static void 289fa9e4066Sahrens zap_ptrtbl_transfer(const uint64_t *src, uint64_t *dst, int n) 290fa9e4066Sahrens { 291fa9e4066Sahrens int i; 292fa9e4066Sahrens for (i = 0; i < n; i++) { 293fa9e4066Sahrens uint64_t lb = src[i]; 294fa9e4066Sahrens dst[2*i+0] = lb; 295fa9e4066Sahrens dst[2*i+1] = lb; 296fa9e4066Sahrens } 297fa9e4066Sahrens } 298fa9e4066Sahrens 299fa9e4066Sahrens static void 300fa9e4066Sahrens zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx) 301fa9e4066Sahrens { 302fa9e4066Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift == 32) 303fa9e4066Sahrens return; 304fa9e4066Sahrens 305fa9e4066Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { 306fa9e4066Sahrens /* 307*f65e61c0Sahrens * We are outgrowing the "embedded" ptrtbl (the one 308*f65e61c0Sahrens * stored in the header block). Give it its own entire 309*f65e61c0Sahrens * block, which will double the size of the ptrtbl. 310fa9e4066Sahrens */ 311fa9e4066Sahrens uint64_t newblk; 312fa9e4066Sahrens dmu_buf_t *db_new; 313fa9e4066Sahrens 314fa9e4066Sahrens ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==, 315*f65e61c0Sahrens ZAP_EMBEDDED_PTRTBL_SHIFT(zap)); 316fa9e4066Sahrens ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk, ==, 0); 317fa9e4066Sahrens 318fa9e4066Sahrens newblk = zap_allocate_blocks(zap, 1, tx); 319fa9e4066Sahrens db_new = dmu_buf_hold(zap->zap_objset, zap->zap_object, 320*f65e61c0Sahrens newblk << FZAP_BLOCK_SHIFT(zap)); 321fa9e4066Sahrens 322fa9e4066Sahrens dmu_buf_will_dirty(db_new, tx); 323*f65e61c0Sahrens zap_ptrtbl_transfer(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), 324*f65e61c0Sahrens db_new->db_data, 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap)); 325fa9e4066Sahrens dmu_buf_rele(db_new); 326fa9e4066Sahrens 327fa9e4066Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_blk = newblk; 328fa9e4066Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks = 1; 329fa9e4066Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_shift++; 330fa9e4066Sahrens 331fa9e4066Sahrens ASSERT3U(1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==, 332fa9e4066Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << 333*f65e61c0Sahrens (FZAP_BLOCK_SHIFT(zap)-3)); 334fa9e4066Sahrens } else { 335fa9e4066Sahrens zap_table_grow(zap, &zap->zap_f.zap_phys->zap_ptrtbl, 336fa9e4066Sahrens zap_ptrtbl_transfer, tx); 337fa9e4066Sahrens } 338fa9e4066Sahrens } 339fa9e4066Sahrens 340fa9e4066Sahrens static void 341fa9e4066Sahrens zap_increment_num_entries(zap_t *zap, int delta, dmu_tx_t *tx) 342fa9e4066Sahrens { 343fa9e4066Sahrens dmu_buf_will_dirty(zap->zap_dbuf, tx); 344fa9e4066Sahrens mutex_enter(&zap->zap_f.zap_num_entries_mtx); 345fa9e4066Sahrens 346fa9e4066Sahrens ASSERT(delta > 0 || zap->zap_f.zap_phys->zap_num_entries >= -delta); 347fa9e4066Sahrens 348fa9e4066Sahrens zap->zap_f.zap_phys->zap_num_entries += delta; 349fa9e4066Sahrens 350fa9e4066Sahrens mutex_exit(&zap->zap_f.zap_num_entries_mtx); 351fa9e4066Sahrens } 352fa9e4066Sahrens 353fa9e4066Sahrens uint64_t 354fa9e4066Sahrens zap_allocate_blocks(zap_t *zap, int nblocks, dmu_tx_t *tx) 355fa9e4066Sahrens { 356fa9e4066Sahrens uint64_t newblk; 357fa9e4066Sahrens ASSERT(tx != NULL); 358fa9e4066Sahrens if (!RW_WRITE_HELD(&zap->zap_rwlock)) { 359fa9e4066Sahrens dmu_buf_will_dirty(zap->zap_dbuf, tx); 360fa9e4066Sahrens } 361fa9e4066Sahrens newblk = atomic_add_64_nv(&zap->zap_f.zap_phys->zap_freeblk, nblocks) - 362fa9e4066Sahrens nblocks; 363fa9e4066Sahrens return (newblk); 364fa9e4066Sahrens } 365fa9e4066Sahrens 366fa9e4066Sahrens 367fa9e4066Sahrens /* 368fa9e4066Sahrens * This function doesn't increment zap_num_leafs because it's used to 369fa9e4066Sahrens * allocate a leaf chain, which doesn't count against zap_num_leafs. 370fa9e4066Sahrens * The directory must be held exclusively for this tx. 371fa9e4066Sahrens */ 372fa9e4066Sahrens zap_leaf_t * 373fa9e4066Sahrens zap_create_leaf(zap_t *zap, dmu_tx_t *tx) 374fa9e4066Sahrens { 375fa9e4066Sahrens void *winner; 376fa9e4066Sahrens zap_leaf_t *l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP); 377fa9e4066Sahrens 378fa9e4066Sahrens ASSERT(tx != NULL); 379fa9e4066Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 380fa9e4066Sahrens /* hence we already dirtied zap->zap_dbuf */ 381fa9e4066Sahrens 382fa9e4066Sahrens rw_init(&l->l_rwlock, 0, 0, 0); 383fa9e4066Sahrens rw_enter(&l->l_rwlock, RW_WRITER); 384fa9e4066Sahrens l->l_blkid = zap_allocate_blocks(zap, 1, tx); 385fa9e4066Sahrens l->l_next = NULL; 386fa9e4066Sahrens l->l_dbuf = NULL; 387fa9e4066Sahrens l->l_phys = NULL; 388fa9e4066Sahrens 389fa9e4066Sahrens l->l_dbuf = dmu_buf_hold(zap->zap_objset, zap->zap_object, 390*f65e61c0Sahrens l->l_blkid << FZAP_BLOCK_SHIFT(zap)); 391fa9e4066Sahrens winner = dmu_buf_set_user(l->l_dbuf, l, &l->l_phys, zap_leaf_pageout); 392fa9e4066Sahrens ASSERT(winner == NULL); 393fa9e4066Sahrens dmu_buf_will_dirty(l->l_dbuf, tx); 394fa9e4066Sahrens 395fa9e4066Sahrens zap_leaf_init(l); 396fa9e4066Sahrens 397fa9e4066Sahrens return (l); 398fa9e4066Sahrens } 399fa9e4066Sahrens 400fa9e4066Sahrens /* ARGSUSED */ 401fa9e4066Sahrens void 402fa9e4066Sahrens zap_destroy_leaf(zap_t *zap, zap_leaf_t *l, dmu_tx_t *tx) 403fa9e4066Sahrens { 404fa9e4066Sahrens /* uint64_t offset = l->l_blkid << ZAP_BLOCK_SHIFT; */ 405fa9e4066Sahrens rw_exit(&l->l_rwlock); 406fa9e4066Sahrens dmu_buf_rele(l->l_dbuf); 407fa9e4066Sahrens /* XXX there are still holds on this block, so we can't free it? */ 408fa9e4066Sahrens /* dmu_free_range(zap->zap_objset, zap->zap_object, */ 409fa9e4066Sahrens /* offset, 1<<ZAP_BLOCK_SHIFT, tx); */ 410fa9e4066Sahrens } 411fa9e4066Sahrens 412fa9e4066Sahrens int 413fa9e4066Sahrens fzap_count(zap_t *zap, uint64_t *count) 414fa9e4066Sahrens { 415fa9e4066Sahrens ASSERT(!zap->zap_ismicro); 416fa9e4066Sahrens mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */ 417fa9e4066Sahrens *count = zap->zap_f.zap_phys->zap_num_entries; 418fa9e4066Sahrens mutex_exit(&zap->zap_f.zap_num_entries_mtx); 419fa9e4066Sahrens return (0); 420fa9e4066Sahrens } 421fa9e4066Sahrens 422fa9e4066Sahrens /* 423fa9e4066Sahrens * Routines for obtaining zap_leaf_t's 424fa9e4066Sahrens */ 425fa9e4066Sahrens 42687e5029aSahrens void 427fa9e4066Sahrens zap_put_leaf(zap_leaf_t *l) 428fa9e4066Sahrens { 429fa9e4066Sahrens zap_leaf_t *nl = l->l_next; 430fa9e4066Sahrens while (nl) { 431fa9e4066Sahrens zap_leaf_t *nnl = nl->l_next; 432fa9e4066Sahrens rw_exit(&nl->l_rwlock); 433fa9e4066Sahrens dmu_buf_rele(nl->l_dbuf); 434fa9e4066Sahrens nl = nnl; 435fa9e4066Sahrens } 436fa9e4066Sahrens rw_exit(&l->l_rwlock); 437fa9e4066Sahrens dmu_buf_rele(l->l_dbuf); 438fa9e4066Sahrens } 439fa9e4066Sahrens 440fa9e4066Sahrens _NOTE(ARGSUSED(0)) 441fa9e4066Sahrens static void 442fa9e4066Sahrens zap_leaf_pageout(dmu_buf_t *db, void *vl) 443fa9e4066Sahrens { 444fa9e4066Sahrens zap_leaf_t *l = vl; 445fa9e4066Sahrens 446fa9e4066Sahrens rw_destroy(&l->l_rwlock); 447fa9e4066Sahrens kmem_free(l, sizeof (zap_leaf_t)); 448fa9e4066Sahrens } 449fa9e4066Sahrens 450fa9e4066Sahrens static zap_leaf_t * 451fa9e4066Sahrens zap_open_leaf(uint64_t blkid, dmu_buf_t *db) 452fa9e4066Sahrens { 453fa9e4066Sahrens zap_leaf_t *l, *winner; 454fa9e4066Sahrens 455fa9e4066Sahrens ASSERT(blkid != 0); 456fa9e4066Sahrens 457fa9e4066Sahrens l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP); 458fa9e4066Sahrens rw_init(&l->l_rwlock, 0, 0, 0); 459fa9e4066Sahrens rw_enter(&l->l_rwlock, RW_WRITER); 460fa9e4066Sahrens l->l_blkid = blkid; 461*f65e61c0Sahrens l->l_bs = highbit(db->db_size)-1; 462fa9e4066Sahrens l->l_next = NULL; 463fa9e4066Sahrens l->l_dbuf = db; 464fa9e4066Sahrens l->l_phys = NULL; 465fa9e4066Sahrens 466fa9e4066Sahrens winner = dmu_buf_set_user(db, l, &l->l_phys, zap_leaf_pageout); 467fa9e4066Sahrens 468fa9e4066Sahrens rw_exit(&l->l_rwlock); 469fa9e4066Sahrens if (winner != NULL) { 470fa9e4066Sahrens /* someone else set it first */ 471fa9e4066Sahrens zap_leaf_pageout(NULL, l); 472fa9e4066Sahrens l = winner; 473fa9e4066Sahrens } 474fa9e4066Sahrens 475*f65e61c0Sahrens /* 476*f65e61c0Sahrens * There should be more hash entries than there can be 477*f65e61c0Sahrens * chunks to put in the hash table 478*f65e61c0Sahrens */ 479*f65e61c0Sahrens ASSERT3U(ZAP_LEAF_HASH_NUMENTRIES(l), >, ZAP_LEAF_NUMCHUNKS(l) / 3); 480*f65e61c0Sahrens 481*f65e61c0Sahrens /* The chunks should begin at the end of the hash table */ 482*f65e61c0Sahrens ASSERT3P(&ZAP_LEAF_CHUNK(l, 0), ==, 483*f65e61c0Sahrens &l->l_phys->l_hash[ZAP_LEAF_HASH_NUMENTRIES(l)]); 484*f65e61c0Sahrens 485*f65e61c0Sahrens /* The chunks should end at the end of the block */ 486*f65e61c0Sahrens ASSERT3U((uintptr_t)&ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)) - 487*f65e61c0Sahrens (uintptr_t)l->l_phys, ==, l->l_dbuf->db_size); 488*f65e61c0Sahrens 489fa9e4066Sahrens return (l); 490fa9e4066Sahrens } 491fa9e4066Sahrens 492fa9e4066Sahrens static zap_leaf_t * 493fa9e4066Sahrens zap_get_leaf_byblk_impl(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt) 494fa9e4066Sahrens { 495fa9e4066Sahrens dmu_buf_t *db; 496fa9e4066Sahrens zap_leaf_t *l; 497*f65e61c0Sahrens int bs = FZAP_BLOCK_SHIFT(zap); 498fa9e4066Sahrens 499fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 500fa9e4066Sahrens 501*f65e61c0Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, blkid << bs); 502fa9e4066Sahrens 503fa9e4066Sahrens ASSERT3U(db->db_object, ==, zap->zap_object); 504*f65e61c0Sahrens ASSERT3U(db->db_offset, ==, blkid << bs); 505*f65e61c0Sahrens ASSERT3U(db->db_size, ==, 1 << bs); 506fa9e4066Sahrens ASSERT(blkid != 0); 507fa9e4066Sahrens 508fa9e4066Sahrens dmu_buf_read(db); 509fa9e4066Sahrens l = dmu_buf_get_user(db); 510fa9e4066Sahrens 511fa9e4066Sahrens if (l == NULL) 512fa9e4066Sahrens l = zap_open_leaf(blkid, db); 513fa9e4066Sahrens 514fa9e4066Sahrens rw_enter(&l->l_rwlock, lt); 515fa9e4066Sahrens /* 516fa9e4066Sahrens * Must lock before dirtying, otherwise l->l_phys could change, 517fa9e4066Sahrens * causing ASSERT below to fail. 518fa9e4066Sahrens */ 519fa9e4066Sahrens if (lt == RW_WRITER) 520fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 521fa9e4066Sahrens ASSERT3U(l->l_blkid, ==, blkid); 522fa9e4066Sahrens ASSERT3P(l->l_dbuf, ==, db); 523fa9e4066Sahrens ASSERT3P(l->l_phys, ==, l->l_dbuf->db_data); 524fa9e4066Sahrens ASSERT3U(l->lh_block_type, ==, ZBT_LEAF); 525fa9e4066Sahrens ASSERT3U(l->lh_magic, ==, ZAP_LEAF_MAGIC); 526fa9e4066Sahrens 527fa9e4066Sahrens return (l); 528fa9e4066Sahrens } 529fa9e4066Sahrens 530fa9e4066Sahrens static zap_leaf_t * 531fa9e4066Sahrens zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt) 532fa9e4066Sahrens { 533fa9e4066Sahrens zap_leaf_t *l, *nl; 534fa9e4066Sahrens 535fa9e4066Sahrens l = zap_get_leaf_byblk_impl(zap, blkid, tx, lt); 536fa9e4066Sahrens 537fa9e4066Sahrens nl = l; 538fa9e4066Sahrens while (nl->lh_next != 0) { 539fa9e4066Sahrens zap_leaf_t *nnl; 540fa9e4066Sahrens nnl = zap_get_leaf_byblk_impl(zap, nl->lh_next, tx, lt); 541fa9e4066Sahrens nl->l_next = nnl; 542fa9e4066Sahrens nl = nnl; 543fa9e4066Sahrens } 544fa9e4066Sahrens 545fa9e4066Sahrens return (l); 546fa9e4066Sahrens } 547fa9e4066Sahrens 548fa9e4066Sahrens static uint64_t 549fa9e4066Sahrens zap_idx_to_blk(zap_t *zap, uint64_t idx) 550fa9e4066Sahrens { 551fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 552fa9e4066Sahrens 553fa9e4066Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { 554fa9e4066Sahrens ASSERT3U(idx, <, 555fa9e4066Sahrens (1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift)); 556*f65e61c0Sahrens return (ZAP_EMBEDDED_PTRTBL_ENT(zap, idx)); 557fa9e4066Sahrens } else { 558fa9e4066Sahrens return (zap_table_load(zap, &zap->zap_f.zap_phys->zap_ptrtbl, 559fa9e4066Sahrens idx)); 560fa9e4066Sahrens } 561fa9e4066Sahrens } 562fa9e4066Sahrens 563fa9e4066Sahrens static void 564fa9e4066Sahrens zap_set_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t blk, dmu_tx_t *tx) 565fa9e4066Sahrens { 566fa9e4066Sahrens ASSERT(tx != NULL); 567fa9e4066Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 568fa9e4066Sahrens 569fa9e4066Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0) { 570*f65e61c0Sahrens ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) = blk; 571fa9e4066Sahrens } else { 572fa9e4066Sahrens (void) zap_table_store(zap, &zap->zap_f.zap_phys->zap_ptrtbl, 573fa9e4066Sahrens idx, blk, tx); 574fa9e4066Sahrens } 575fa9e4066Sahrens } 576fa9e4066Sahrens 577fa9e4066Sahrens static zap_leaf_t * 578fa9e4066Sahrens zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt) 579fa9e4066Sahrens { 580fa9e4066Sahrens uint64_t idx; 581fa9e4066Sahrens zap_leaf_t *l; 582fa9e4066Sahrens 583fa9e4066Sahrens ASSERT(zap->zap_dbuf == NULL || 584fa9e4066Sahrens zap->zap_f.zap_phys == zap->zap_dbuf->db_data); 585fa9e4066Sahrens ASSERT3U(zap->zap_f.zap_phys->zap_magic, ==, ZAP_MAGIC); 586fa9e4066Sahrens idx = ZAP_HASH_IDX(h, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); 587fa9e4066Sahrens l = zap_get_leaf_byblk(zap, zap_idx_to_blk(zap, idx), tx, lt); 588fa9e4066Sahrens 589fa9e4066Sahrens ASSERT3U(ZAP_HASH_IDX(h, l->lh_prefix_len), ==, l->lh_prefix); 590fa9e4066Sahrens 591fa9e4066Sahrens return (l); 592fa9e4066Sahrens } 593fa9e4066Sahrens 594fa9e4066Sahrens 595fa9e4066Sahrens static zap_leaf_t * 596fa9e4066Sahrens zap_expand_leaf(zap_t *zap, zap_leaf_t *l, uint64_t hash, dmu_tx_t *tx) 597fa9e4066Sahrens { 598fa9e4066Sahrens zap_leaf_t *nl; 599fa9e4066Sahrens int prefix_diff, i, err; 600fa9e4066Sahrens uint64_t sibling; 601fa9e4066Sahrens 602fa9e4066Sahrens ASSERT3U(l->lh_prefix_len, <=, 603fa9e4066Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); 604fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 605fa9e4066Sahrens 606fa9e4066Sahrens ASSERT3U(ZAP_HASH_IDX(hash, l->lh_prefix_len), ==, l->lh_prefix); 607fa9e4066Sahrens 608fa9e4066Sahrens if (zap_tryupgradedir(zap, tx) == 0) { 609fa9e4066Sahrens /* failed to upgrade */ 610fa9e4066Sahrens int old_prefix_len = l->lh_prefix_len; 611fa9e4066Sahrens objset_t *os = zap->zap_objset; 612fa9e4066Sahrens uint64_t object = zap->zap_object; 613fa9e4066Sahrens 614fa9e4066Sahrens zap_put_leaf(l); 615fa9e4066Sahrens zap_unlockdir(zap); 616fa9e4066Sahrens err = zap_lockdir(os, object, tx, RW_WRITER, FALSE, &zap); 617fa9e4066Sahrens ASSERT3U(err, ==, 0); 618fa9e4066Sahrens ASSERT(!zap->zap_ismicro); 619fa9e4066Sahrens l = zap_deref_leaf(zap, hash, tx, RW_WRITER); 620fa9e4066Sahrens 621fa9e4066Sahrens if (l->lh_prefix_len != old_prefix_len) 622fa9e4066Sahrens /* it split while our locks were down */ 623fa9e4066Sahrens return (l); 624fa9e4066Sahrens } 625fa9e4066Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 626fa9e4066Sahrens 627fa9e4066Sahrens if (l->lh_prefix_len == zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) { 628fa9e4066Sahrens /* There's only one pointer to us. Chain on another leaf blk. */ 629fa9e4066Sahrens (void) zap_leaf_chainmore(l, zap_create_leaf(zap, tx)); 630fa9e4066Sahrens dprintf("chaining leaf %x/%d\n", l->lh_prefix, 631fa9e4066Sahrens l->lh_prefix_len); 632fa9e4066Sahrens return (l); 633fa9e4066Sahrens } 634fa9e4066Sahrens 635fa9e4066Sahrens ASSERT3U(ZAP_HASH_IDX(hash, l->lh_prefix_len), ==, l->lh_prefix); 636fa9e4066Sahrens 637fa9e4066Sahrens /* There's more than one pointer to us. Split this leaf. */ 638fa9e4066Sahrens nl = zap_leaf_split(zap, l, tx); 639fa9e4066Sahrens 640fa9e4066Sahrens /* set sibling pointers */ 641fa9e4066Sahrens prefix_diff = 642fa9e4066Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - l->lh_prefix_len; 643fa9e4066Sahrens sibling = (ZAP_HASH_IDX(hash, l->lh_prefix_len) | 1) << prefix_diff; 644fa9e4066Sahrens for (i = 0; i < (1ULL<<prefix_diff); i++) { 645fa9e4066Sahrens ASSERT3U(zap_idx_to_blk(zap, sibling+i), ==, l->l_blkid); 646fa9e4066Sahrens zap_set_idx_to_blk(zap, sibling+i, nl->l_blkid, tx); 647fa9e4066Sahrens /* dprintf("set %d to %u %x\n", sibling+i, nl->l_blkid, nl); */ 648fa9e4066Sahrens } 649fa9e4066Sahrens 650fa9e4066Sahrens zap->zap_f.zap_phys->zap_num_leafs++; 651fa9e4066Sahrens 652fa9e4066Sahrens if (hash & (1ULL << (64 - l->lh_prefix_len))) { 653fa9e4066Sahrens /* we want the sibling */ 654fa9e4066Sahrens zap_put_leaf(l); 655fa9e4066Sahrens l = nl; 656fa9e4066Sahrens } else { 657fa9e4066Sahrens zap_put_leaf(nl); 658fa9e4066Sahrens } 659fa9e4066Sahrens 660fa9e4066Sahrens return (l); 661fa9e4066Sahrens } 662fa9e4066Sahrens 663fa9e4066Sahrens static void 664*f65e61c0Sahrens zap_put_leaf_maybe_grow_ptrtbl(zap_t *zap, zap_leaf_t *l, dmu_tx_t *tx) 665fa9e4066Sahrens { 666fa9e4066Sahrens int shift, err; 667fa9e4066Sahrens 668fa9e4066Sahrens again: 669fa9e4066Sahrens shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift; 670fa9e4066Sahrens 671fa9e4066Sahrens if (l->lh_prefix_len == shift && 672*f65e61c0Sahrens (l->l_next != NULL || l->lh_nfree < MIN_FREE(l))) { 673fa9e4066Sahrens /* this leaf will soon make us grow the pointer table */ 674fa9e4066Sahrens 675fa9e4066Sahrens if (zap_tryupgradedir(zap, tx) == 0) { 676fa9e4066Sahrens objset_t *os = zap->zap_objset; 677fa9e4066Sahrens uint64_t zapobj = zap->zap_object; 678fa9e4066Sahrens uint64_t blkid = l->l_blkid; 679fa9e4066Sahrens 680fa9e4066Sahrens zap_put_leaf(l); 681fa9e4066Sahrens zap_unlockdir(zap); 682fa9e4066Sahrens err = zap_lockdir(os, zapobj, tx, 683fa9e4066Sahrens RW_WRITER, FALSE, &zap); 684fa9e4066Sahrens ASSERT3U(err, ==, 0); 685fa9e4066Sahrens l = zap_get_leaf_byblk(zap, blkid, tx, RW_READER); 686fa9e4066Sahrens goto again; 687fa9e4066Sahrens } 688fa9e4066Sahrens 689fa9e4066Sahrens zap_put_leaf(l); 690fa9e4066Sahrens zap_grow_ptrtbl(zap, tx); 691fa9e4066Sahrens } else { 692fa9e4066Sahrens zap_put_leaf(l); 693fa9e4066Sahrens } 694fa9e4066Sahrens } 695fa9e4066Sahrens 696fa9e4066Sahrens 697fa9e4066Sahrens static int 698fa9e4066Sahrens fzap_checksize(uint64_t integer_size, uint64_t num_integers) 699fa9e4066Sahrens { 700fa9e4066Sahrens /* Only integer sizes supported by C */ 701fa9e4066Sahrens switch (integer_size) { 702fa9e4066Sahrens case 1: 703fa9e4066Sahrens case 2: 704fa9e4066Sahrens case 4: 705fa9e4066Sahrens case 8: 706fa9e4066Sahrens break; 707fa9e4066Sahrens default: 708fa9e4066Sahrens return (EINVAL); 709fa9e4066Sahrens } 710fa9e4066Sahrens 711fa9e4066Sahrens /* Make sure we won't overflow */ 712fa9e4066Sahrens if (integer_size * num_integers < num_integers) 713fa9e4066Sahrens return (EINVAL); 714*f65e61c0Sahrens if (integer_size * num_integers > (1<<fzap_default_block_shift)) 715fa9e4066Sahrens return (EINVAL); 716fa9e4066Sahrens 717fa9e4066Sahrens return (0); 718fa9e4066Sahrens } 719fa9e4066Sahrens 720fa9e4066Sahrens /* 721fa9e4066Sahrens * Routines for maniplulating attributes. 722fa9e4066Sahrens */ 723fa9e4066Sahrens int 724fa9e4066Sahrens fzap_lookup(zap_t *zap, const char *name, 725fa9e4066Sahrens uint64_t integer_size, uint64_t num_integers, void *buf) 726fa9e4066Sahrens { 727fa9e4066Sahrens zap_leaf_t *l; 728fa9e4066Sahrens int err; 729fa9e4066Sahrens uint64_t hash; 730fa9e4066Sahrens zap_entry_handle_t zeh; 731fa9e4066Sahrens 732fa9e4066Sahrens err = fzap_checksize(integer_size, num_integers); 733fa9e4066Sahrens if (err != 0) 734fa9e4066Sahrens return (err); 735fa9e4066Sahrens 736fa9e4066Sahrens hash = zap_hash(zap, name); 737fa9e4066Sahrens l = zap_deref_leaf(zap, hash, NULL, RW_READER); 738fa9e4066Sahrens err = zap_leaf_lookup(l, name, hash, &zeh); 739fa9e4066Sahrens if (err != 0) 740fa9e4066Sahrens goto out; 741fa9e4066Sahrens err = zap_entry_read(&zeh, integer_size, num_integers, buf); 742fa9e4066Sahrens out: 743fa9e4066Sahrens zap_put_leaf(l); 744fa9e4066Sahrens return (err); 745fa9e4066Sahrens } 746fa9e4066Sahrens 747fa9e4066Sahrens int 748fa9e4066Sahrens fzap_add_cd(zap_t *zap, const char *name, 749fa9e4066Sahrens uint64_t integer_size, uint64_t num_integers, 750fa9e4066Sahrens const void *val, uint32_t cd, dmu_tx_t *tx, zap_leaf_t **lp) 751fa9e4066Sahrens { 752fa9e4066Sahrens zap_leaf_t *l; 753fa9e4066Sahrens uint64_t hash; 754fa9e4066Sahrens int err; 755fa9e4066Sahrens zap_entry_handle_t zeh; 756fa9e4066Sahrens 757fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 758fa9e4066Sahrens ASSERT(!zap->zap_ismicro); 759fa9e4066Sahrens ASSERT(fzap_checksize(integer_size, num_integers) == 0); 760fa9e4066Sahrens 761fa9e4066Sahrens hash = zap_hash(zap, name); 762fa9e4066Sahrens l = zap_deref_leaf(zap, hash, tx, RW_WRITER); 763fa9e4066Sahrens retry: 764fa9e4066Sahrens err = zap_leaf_lookup(l, name, hash, &zeh); 765fa9e4066Sahrens if (err == 0) { 766fa9e4066Sahrens err = EEXIST; 767fa9e4066Sahrens goto out; 768fa9e4066Sahrens } 769fa9e4066Sahrens ASSERT(err == ENOENT); 770fa9e4066Sahrens 771fa9e4066Sahrens /* XXX If this leaf is chained, split it if we can. */ 772fa9e4066Sahrens err = zap_entry_create(l, name, hash, cd, 773fa9e4066Sahrens integer_size, num_integers, val, &zeh); 774fa9e4066Sahrens 775fa9e4066Sahrens if (err == 0) { 776fa9e4066Sahrens zap_increment_num_entries(zap, 1, tx); 777fa9e4066Sahrens } else if (err == EAGAIN) { 778fa9e4066Sahrens l = zap_expand_leaf(zap, l, hash, tx); 779fa9e4066Sahrens goto retry; 780fa9e4066Sahrens } 781fa9e4066Sahrens 782fa9e4066Sahrens out: 783fa9e4066Sahrens if (lp) 784fa9e4066Sahrens *lp = l; 785fa9e4066Sahrens else 786fa9e4066Sahrens zap_put_leaf(l); 787fa9e4066Sahrens return (err); 788fa9e4066Sahrens } 789fa9e4066Sahrens 790fa9e4066Sahrens int 791fa9e4066Sahrens fzap_add(zap_t *zap, const char *name, 792fa9e4066Sahrens uint64_t integer_size, uint64_t num_integers, 793fa9e4066Sahrens const void *val, dmu_tx_t *tx) 794fa9e4066Sahrens { 795fa9e4066Sahrens int err; 796fa9e4066Sahrens zap_leaf_t *l; 797fa9e4066Sahrens 798fa9e4066Sahrens err = fzap_checksize(integer_size, num_integers); 799fa9e4066Sahrens if (err != 0) 800fa9e4066Sahrens return (err); 801fa9e4066Sahrens 802fa9e4066Sahrens err = fzap_add_cd(zap, name, integer_size, num_integers, 803fa9e4066Sahrens val, ZAP_MAXCD, tx, &l); 804fa9e4066Sahrens 805fa9e4066Sahrens zap_put_leaf_maybe_grow_ptrtbl(zap, l, tx); 806fa9e4066Sahrens return (err); 807fa9e4066Sahrens } 808fa9e4066Sahrens 809fa9e4066Sahrens int 810fa9e4066Sahrens fzap_update(zap_t *zap, const char *name, 811fa9e4066Sahrens int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 812fa9e4066Sahrens { 813fa9e4066Sahrens zap_leaf_t *l; 814fa9e4066Sahrens uint64_t hash; 815fa9e4066Sahrens int err, create; 816fa9e4066Sahrens zap_entry_handle_t zeh; 817fa9e4066Sahrens 818fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 819fa9e4066Sahrens err = fzap_checksize(integer_size, num_integers); 820fa9e4066Sahrens if (err != 0) 821fa9e4066Sahrens return (err); 822fa9e4066Sahrens 823fa9e4066Sahrens hash = zap_hash(zap, name); 824fa9e4066Sahrens l = zap_deref_leaf(zap, hash, tx, RW_WRITER); 825fa9e4066Sahrens retry: 826fa9e4066Sahrens err = zap_leaf_lookup(l, name, hash, &zeh); 827fa9e4066Sahrens create = (err == ENOENT); 828fa9e4066Sahrens ASSERT(err == 0 || err == ENOENT); 829fa9e4066Sahrens 830fa9e4066Sahrens /* XXX If this leaf is chained, split it if we can. */ 831fa9e4066Sahrens 832fa9e4066Sahrens if (create) { 833fa9e4066Sahrens err = zap_entry_create(l, name, hash, ZAP_MAXCD, 834fa9e4066Sahrens integer_size, num_integers, val, &zeh); 835fa9e4066Sahrens if (err == 0) 836fa9e4066Sahrens zap_increment_num_entries(zap, 1, tx); 837fa9e4066Sahrens } else { 838fa9e4066Sahrens err = zap_entry_update(&zeh, integer_size, num_integers, val); 839fa9e4066Sahrens } 840fa9e4066Sahrens 841fa9e4066Sahrens if (err == EAGAIN) { 842fa9e4066Sahrens l = zap_expand_leaf(zap, l, hash, tx); 843fa9e4066Sahrens goto retry; 844fa9e4066Sahrens } 845fa9e4066Sahrens 846fa9e4066Sahrens zap_put_leaf_maybe_grow_ptrtbl(zap, l, tx); 847fa9e4066Sahrens return (err); 848fa9e4066Sahrens } 849fa9e4066Sahrens 850fa9e4066Sahrens int 851fa9e4066Sahrens fzap_length(zap_t *zap, const char *name, 852fa9e4066Sahrens uint64_t *integer_size, uint64_t *num_integers) 853fa9e4066Sahrens { 854fa9e4066Sahrens zap_leaf_t *l; 855fa9e4066Sahrens int err; 856fa9e4066Sahrens uint64_t hash; 857fa9e4066Sahrens zap_entry_handle_t zeh; 858fa9e4066Sahrens 859fa9e4066Sahrens hash = zap_hash(zap, name); 860fa9e4066Sahrens l = zap_deref_leaf(zap, hash, NULL, RW_READER); 861fa9e4066Sahrens err = zap_leaf_lookup(l, name, hash, &zeh); 862fa9e4066Sahrens if (err != 0) 863fa9e4066Sahrens goto out; 864fa9e4066Sahrens 865fa9e4066Sahrens if (integer_size) 866fa9e4066Sahrens *integer_size = zeh.zeh_integer_size; 867fa9e4066Sahrens if (num_integers) 868fa9e4066Sahrens *num_integers = zeh.zeh_num_integers; 869fa9e4066Sahrens out: 870fa9e4066Sahrens zap_put_leaf(l); 871fa9e4066Sahrens return (err); 872fa9e4066Sahrens } 873fa9e4066Sahrens 874fa9e4066Sahrens int 875fa9e4066Sahrens fzap_remove(zap_t *zap, const char *name, dmu_tx_t *tx) 876fa9e4066Sahrens { 877fa9e4066Sahrens zap_leaf_t *l; 878fa9e4066Sahrens uint64_t hash; 879fa9e4066Sahrens int err; 880fa9e4066Sahrens zap_entry_handle_t zeh; 881fa9e4066Sahrens 882fa9e4066Sahrens hash = zap_hash(zap, name); 883fa9e4066Sahrens l = zap_deref_leaf(zap, hash, tx, RW_WRITER); 884fa9e4066Sahrens err = zap_leaf_lookup(l, name, hash, &zeh); 885fa9e4066Sahrens if (err == 0) { 886fa9e4066Sahrens zap_entry_remove(&zeh); 887fa9e4066Sahrens zap_increment_num_entries(zap, -1, tx); 888fa9e4066Sahrens } 889fa9e4066Sahrens zap_put_leaf(l); 890fa9e4066Sahrens dprintf("fzap_remove: ds=%p obj=%llu name=%s err=%d\n", 891fa9e4066Sahrens zap->zap_objset, zap->zap_object, name, err); 892fa9e4066Sahrens return (err); 893fa9e4066Sahrens } 894fa9e4066Sahrens 895fa9e4066Sahrens int 896fa9e4066Sahrens zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, char *name) 897fa9e4066Sahrens { 898fa9e4066Sahrens zap_cursor_t zc; 899fa9e4066Sahrens zap_attribute_t *za; 900fa9e4066Sahrens int err; 901fa9e4066Sahrens 902fa9e4066Sahrens za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 903fa9e4066Sahrens for (zap_cursor_init(&zc, os, zapobj); 904fa9e4066Sahrens (err = zap_cursor_retrieve(&zc, za)) == 0; 905fa9e4066Sahrens zap_cursor_advance(&zc)) { 906fa9e4066Sahrens if (za->za_first_integer == value) { 907fa9e4066Sahrens (void) strcpy(name, za->za_name); 908fa9e4066Sahrens break; 909fa9e4066Sahrens } 910fa9e4066Sahrens } 91187e5029aSahrens zap_cursor_fini(&zc); 912fa9e4066Sahrens kmem_free(za, sizeof (zap_attribute_t)); 913fa9e4066Sahrens return (err); 914fa9e4066Sahrens } 915fa9e4066Sahrens 916fa9e4066Sahrens 917fa9e4066Sahrens /* 918fa9e4066Sahrens * Routines for iterating over the attributes. 919fa9e4066Sahrens */ 920fa9e4066Sahrens 921fa9e4066Sahrens int 922fa9e4066Sahrens fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za) 923fa9e4066Sahrens { 924fa9e4066Sahrens int err = ENOENT; 925fa9e4066Sahrens zap_entry_handle_t zeh; 926fa9e4066Sahrens zap_leaf_t *l; 927fa9e4066Sahrens 928fa9e4066Sahrens /* retrieve the next entry at or after zc_hash/zc_cd */ 929fa9e4066Sahrens /* if no entry, return ENOENT */ 930fa9e4066Sahrens 93187e5029aSahrens if (zc->zc_leaf && 93287e5029aSahrens (ZAP_HASH_IDX(zc->zc_hash, zc->zc_leaf->lh_prefix_len) != 93387e5029aSahrens zc->zc_leaf->lh_prefix)) { 93487e5029aSahrens rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); 93587e5029aSahrens zap_put_leaf(zc->zc_leaf); 93687e5029aSahrens zc->zc_leaf = NULL; 93787e5029aSahrens } 93887e5029aSahrens 939fa9e4066Sahrens again: 94087e5029aSahrens if (zc->zc_leaf == NULL) { 94187e5029aSahrens zc->zc_leaf = zap_deref_leaf(zap, zc->zc_hash, NULL, RW_READER); 94287e5029aSahrens } else { 94387e5029aSahrens rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); 94487e5029aSahrens } 94587e5029aSahrens l = zc->zc_leaf; 94687e5029aSahrens 947fa9e4066Sahrens err = zap_leaf_lookup_closest(l, zc->zc_hash, zc->zc_cd, &zeh); 948fa9e4066Sahrens 949fa9e4066Sahrens if (err == ENOENT) { 950fa9e4066Sahrens uint64_t nocare = (1ULL << (64 - l->lh_prefix_len)) - 1; 951fa9e4066Sahrens zc->zc_hash = (zc->zc_hash & ~nocare) + nocare + 1; 952fa9e4066Sahrens zc->zc_cd = 0; 953fa9e4066Sahrens if (l->lh_prefix_len == 0 || zc->zc_hash == 0) { 954fa9e4066Sahrens zc->zc_hash = -1ULL; 955fa9e4066Sahrens } else { 95687e5029aSahrens zap_put_leaf(zc->zc_leaf); 95787e5029aSahrens zc->zc_leaf = NULL; 958fa9e4066Sahrens goto again; 959fa9e4066Sahrens } 960fa9e4066Sahrens } 961fa9e4066Sahrens 962fa9e4066Sahrens if (err == 0) { 963fa9e4066Sahrens zc->zc_hash = zeh.zeh_hash; 964fa9e4066Sahrens zc->zc_cd = zeh.zeh_cd; 965fa9e4066Sahrens za->za_integer_length = zeh.zeh_integer_size; 966fa9e4066Sahrens za->za_num_integers = zeh.zeh_num_integers; 967fa9e4066Sahrens if (zeh.zeh_num_integers == 0) { 968fa9e4066Sahrens za->za_first_integer = 0; 969fa9e4066Sahrens } else { 970fa9e4066Sahrens err = zap_entry_read(&zeh, 8, 1, &za->za_first_integer); 971fa9e4066Sahrens ASSERT(err == 0 || err == EOVERFLOW); 972fa9e4066Sahrens } 973fa9e4066Sahrens err = zap_entry_read_name(&zeh, 974fa9e4066Sahrens sizeof (za->za_name), za->za_name); 975fa9e4066Sahrens ASSERT(err == 0); 976fa9e4066Sahrens } 97787e5029aSahrens rw_exit(&zc->zc_leaf->l_rwlock); 978fa9e4066Sahrens return (err); 979fa9e4066Sahrens } 980fa9e4066Sahrens 981fa9e4066Sahrens 982fa9e4066Sahrens static void 983fa9e4066Sahrens zap_stats_ptrtbl(zap_t *zap, uint64_t *tbl, int len, zap_stats_t *zs) 984fa9e4066Sahrens { 985fa9e4066Sahrens int i; 986fa9e4066Sahrens uint64_t lastblk = 0; 987fa9e4066Sahrens 988fa9e4066Sahrens /* 989fa9e4066Sahrens * NB: if a leaf has more pointers than an entire ptrtbl block 990fa9e4066Sahrens * can hold, then it'll be accounted for more than once, since 991fa9e4066Sahrens * we won't have lastblk. 992fa9e4066Sahrens */ 993fa9e4066Sahrens for (i = 0; i < len; i++) { 994fa9e4066Sahrens zap_leaf_t *l; 995fa9e4066Sahrens 996fa9e4066Sahrens if (tbl[i] == lastblk) 997fa9e4066Sahrens continue; 998fa9e4066Sahrens lastblk = tbl[i]; 999fa9e4066Sahrens 1000fa9e4066Sahrens l = zap_get_leaf_byblk(zap, tbl[i], NULL, RW_READER); 1001fa9e4066Sahrens 1002fa9e4066Sahrens zap_stats_leaf(zap, l, zs); 1003fa9e4066Sahrens zap_put_leaf(l); 1004fa9e4066Sahrens } 1005fa9e4066Sahrens } 1006fa9e4066Sahrens 1007fa9e4066Sahrens void 1008fa9e4066Sahrens fzap_get_stats(zap_t *zap, zap_stats_t *zs) 1009fa9e4066Sahrens { 1010*f65e61c0Sahrens int bs = FZAP_BLOCK_SHIFT(zap); 1011fa9e4066Sahrens zs->zs_ptrtbl_len = 1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift; 1012*f65e61c0Sahrens zs->zs_blocksize = 1ULL << bs; 1013fa9e4066Sahrens zs->zs_num_leafs = zap->zap_f.zap_phys->zap_num_leafs; 1014fa9e4066Sahrens zs->zs_num_entries = zap->zap_f.zap_phys->zap_num_entries; 1015fa9e4066Sahrens zs->zs_num_blocks = zap->zap_f.zap_phys->zap_freeblk; 1016fa9e4066Sahrens 1017fa9e4066Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { 1018fa9e4066Sahrens /* the ptrtbl is entirely in the header block. */ 1019*f65e61c0Sahrens zap_stats_ptrtbl(zap, &ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), 1020*f65e61c0Sahrens 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap), zs); 1021fa9e4066Sahrens } else { 1022fa9e4066Sahrens int b; 1023fa9e4066Sahrens 1024fa9e4066Sahrens dmu_prefetch(zap->zap_objset, zap->zap_object, 1025*f65e61c0Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_blk << bs, 1026*f65e61c0Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << bs); 1027fa9e4066Sahrens 1028fa9e4066Sahrens for (b = 0; b < zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks; 1029fa9e4066Sahrens b++) { 1030fa9e4066Sahrens dmu_buf_t *db; 1031fa9e4066Sahrens 1032fa9e4066Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 1033*f65e61c0Sahrens (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk + b) << bs); 1034fa9e4066Sahrens dmu_buf_read(db); 1035*f65e61c0Sahrens zap_stats_ptrtbl(zap, db->db_data, 1<<(bs-3), zs); 1036fa9e4066Sahrens dmu_buf_rele(db); 1037fa9e4066Sahrens } 1038fa9e4066Sahrens } 1039fa9e4066Sahrens } 1040