1*fa9e4066Sahrens /* 2*fa9e4066Sahrens * CDDL HEADER START 3*fa9e4066Sahrens * 4*fa9e4066Sahrens * The contents of this file are subject to the terms of the 5*fa9e4066Sahrens * Common Development and Distribution License, Version 1.0 only 6*fa9e4066Sahrens * (the "License"). You may not use this file except in compliance 7*fa9e4066Sahrens * with the License. 8*fa9e4066Sahrens * 9*fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 11*fa9e4066Sahrens * See the License for the specific language governing permissions 12*fa9e4066Sahrens * and limitations under the License. 13*fa9e4066Sahrens * 14*fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 15*fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 17*fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 18*fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 19*fa9e4066Sahrens * 20*fa9e4066Sahrens * CDDL HEADER END 21*fa9e4066Sahrens */ 22*fa9e4066Sahrens /* 23*fa9e4066Sahrens * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*fa9e4066Sahrens * Use is subject to license terms. 25*fa9e4066Sahrens */ 26*fa9e4066Sahrens 27*fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28*fa9e4066Sahrens 29*fa9e4066Sahrens 30*fa9e4066Sahrens /* 31*fa9e4066Sahrens * This file contains the top half of the zfs directory structure 32*fa9e4066Sahrens * implementation. The bottom half is in zap_leaf.c. 33*fa9e4066Sahrens * 34*fa9e4066Sahrens * The zdir is an extendable hash data structure. There is a table of 35*fa9e4066Sahrens * pointers to buckets (zap_t->zd_data->zd_leafs). The buckets are 36*fa9e4066Sahrens * each a constant size and hold a variable number of directory entries. 37*fa9e4066Sahrens * The buckets (aka "leaf nodes") are implemented in zap_leaf.c. 38*fa9e4066Sahrens * 39*fa9e4066Sahrens * The pointer table holds a power of 2 number of pointers. 40*fa9e4066Sahrens * (1<<zap_t->zd_data->zd_phys->zd_prefix_len). The bucket pointed to 41*fa9e4066Sahrens * by the pointer at index i in the table holds entries whose hash value 42*fa9e4066Sahrens * has a zd_prefix_len - bit prefix 43*fa9e4066Sahrens */ 44*fa9e4066Sahrens 45*fa9e4066Sahrens #include <sys/spa.h> 46*fa9e4066Sahrens #include <sys/dmu.h> 47*fa9e4066Sahrens #include <sys/zfs_context.h> 48*fa9e4066Sahrens #include <sys/zap.h> 49*fa9e4066Sahrens #include <sys/zap_impl.h> 50*fa9e4066Sahrens #include <sys/zap_leaf.h> 51*fa9e4066Sahrens 52*fa9e4066Sahrens #define MIN_FREE (ZAP_LEAF_NUMCHUNKS*9/10) 53*fa9e4066Sahrens 54*fa9e4066Sahrens static void zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx); 55*fa9e4066Sahrens static int zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx); 56*fa9e4066Sahrens static zap_leaf_t *zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, 57*fa9e4066Sahrens dmu_tx_t *tx, krw_t lt); 58*fa9e4066Sahrens static void zap_put_leaf(zap_leaf_t *l); 59*fa9e4066Sahrens static void zap_leaf_pageout(dmu_buf_t *db, void *vl); 60*fa9e4066Sahrens 61*fa9e4066Sahrens 62*fa9e4066Sahrens void 63*fa9e4066Sahrens fzap_byteswap(void *vbuf, size_t size) 64*fa9e4066Sahrens { 65*fa9e4066Sahrens uint64_t block_type; 66*fa9e4066Sahrens 67*fa9e4066Sahrens ASSERT(size == (1<<ZAP_BLOCK_SHIFT)); 68*fa9e4066Sahrens block_type = *(uint64_t *)vbuf; 69*fa9e4066Sahrens 70*fa9e4066Sahrens switch (block_type) { 71*fa9e4066Sahrens case ZBT_LEAF: 72*fa9e4066Sahrens case BSWAP_64(ZBT_LEAF): 73*fa9e4066Sahrens zap_leaf_byteswap(vbuf); 74*fa9e4066Sahrens return; 75*fa9e4066Sahrens case ZBT_HEADER: 76*fa9e4066Sahrens case BSWAP_64(ZBT_HEADER): 77*fa9e4066Sahrens default: 78*fa9e4066Sahrens /* it's a ptrtbl block */ 79*fa9e4066Sahrens byteswap_uint64_array(vbuf, 1<<ZAP_BLOCK_SHIFT); 80*fa9e4066Sahrens return; 81*fa9e4066Sahrens } 82*fa9e4066Sahrens } 83*fa9e4066Sahrens 84*fa9e4066Sahrens void 85*fa9e4066Sahrens fzap_upgrade(zap_t *zap, dmu_tx_t *tx) 86*fa9e4066Sahrens { 87*fa9e4066Sahrens dmu_buf_t *db; 88*fa9e4066Sahrens zap_leaf_t *l; 89*fa9e4066Sahrens int i; 90*fa9e4066Sahrens zap_phys_t *zp; 91*fa9e4066Sahrens 92*fa9e4066Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 93*fa9e4066Sahrens zap->zap_ismicro = FALSE; 94*fa9e4066Sahrens 95*fa9e4066Sahrens (void) dmu_buf_update_user(zap->zap_dbuf, zap, zap, 96*fa9e4066Sahrens &zap->zap_f.zap_phys, zap_pageout); 97*fa9e4066Sahrens 98*fa9e4066Sahrens mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); 99*fa9e4066Sahrens 100*fa9e4066Sahrens zp = zap->zap_f.zap_phys; 101*fa9e4066Sahrens /* 102*fa9e4066Sahrens * explicitly zero it since it might be coming from an 103*fa9e4066Sahrens * initialized microzap 104*fa9e4066Sahrens */ 105*fa9e4066Sahrens ASSERT3U(sizeof (zap_phys_t), ==, zap->zap_dbuf->db_size); 106*fa9e4066Sahrens bzero(zp, sizeof (zap_phys_t)); 107*fa9e4066Sahrens zp->zap_block_type = ZBT_HEADER; 108*fa9e4066Sahrens zp->zap_magic = ZAP_MAGIC; 109*fa9e4066Sahrens 110*fa9e4066Sahrens zp->zap_ptrtbl.zt_shift = ZAP_PTRTBL_MIN_SHIFT; 111*fa9e4066Sahrens 112*fa9e4066Sahrens zp->zap_freeblk = 2; /* block 1 will be the first leaf */ 113*fa9e4066Sahrens zp->zap_num_leafs = 1; 114*fa9e4066Sahrens zp->zap_num_entries = 0; 115*fa9e4066Sahrens zp->zap_salt = zap->zap_salt; 116*fa9e4066Sahrens 117*fa9e4066Sahrens for (i = 0; i < (1<<ZAP_PTRTBL_MIN_SHIFT); i++) 118*fa9e4066Sahrens zp->zap_leafs[i] = 1; /* block 1 will be the first leaf */ 119*fa9e4066Sahrens 120*fa9e4066Sahrens /* 121*fa9e4066Sahrens * set up block 1 - the first leaf 122*fa9e4066Sahrens */ 123*fa9e4066Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 124*fa9e4066Sahrens 1<<ZAP_BLOCK_SHIFT); 125*fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 126*fa9e4066Sahrens 127*fa9e4066Sahrens l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP); 128*fa9e4066Sahrens l->l_dbuf = db; 129*fa9e4066Sahrens l->l_phys = db->db_data; 130*fa9e4066Sahrens 131*fa9e4066Sahrens zap_leaf_init(l); 132*fa9e4066Sahrens 133*fa9e4066Sahrens kmem_free(l, sizeof (zap_leaf_t)); 134*fa9e4066Sahrens dmu_buf_rele(db); 135*fa9e4066Sahrens } 136*fa9e4066Sahrens 137*fa9e4066Sahrens static int 138*fa9e4066Sahrens zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx) 139*fa9e4066Sahrens { 140*fa9e4066Sahrens if (RW_WRITE_HELD(&zap->zap_rwlock)) 141*fa9e4066Sahrens return (1); 142*fa9e4066Sahrens if (rw_tryupgrade(&zap->zap_rwlock)) { 143*fa9e4066Sahrens dmu_buf_will_dirty(zap->zap_dbuf, tx); 144*fa9e4066Sahrens return (1); 145*fa9e4066Sahrens } 146*fa9e4066Sahrens return (0); 147*fa9e4066Sahrens } 148*fa9e4066Sahrens 149*fa9e4066Sahrens /* 150*fa9e4066Sahrens * Generic routines for dealing with the pointer & cookie tables. 151*fa9e4066Sahrens */ 152*fa9e4066Sahrens 153*fa9e4066Sahrens static void 154*fa9e4066Sahrens zap_table_grow(zap_t *zap, zap_table_phys_t *tbl, 155*fa9e4066Sahrens void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n), 156*fa9e4066Sahrens dmu_tx_t *tx) 157*fa9e4066Sahrens { 158*fa9e4066Sahrens uint64_t b, newblk; 159*fa9e4066Sahrens dmu_buf_t *db_old, *db_new; 160*fa9e4066Sahrens int hepb = 1<<(ZAP_BLOCK_SHIFT-4); 161*fa9e4066Sahrens /* hepb = half the number of entries in a block */ 162*fa9e4066Sahrens 163*fa9e4066Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 164*fa9e4066Sahrens ASSERT(tbl->zt_blk != 0); 165*fa9e4066Sahrens ASSERT(tbl->zt_numblks > 0); 166*fa9e4066Sahrens 167*fa9e4066Sahrens if (tbl->zt_nextblk != 0) { 168*fa9e4066Sahrens newblk = tbl->zt_nextblk; 169*fa9e4066Sahrens } else { 170*fa9e4066Sahrens newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2, tx); 171*fa9e4066Sahrens tbl->zt_nextblk = newblk; 172*fa9e4066Sahrens ASSERT3U(tbl->zt_blks_copied, ==, 0); 173*fa9e4066Sahrens dmu_prefetch(zap->zap_objset, zap->zap_object, 174*fa9e4066Sahrens tbl->zt_blk << ZAP_BLOCK_SHIFT, tbl->zt_numblks << 175*fa9e4066Sahrens ZAP_BLOCK_SHIFT); 176*fa9e4066Sahrens } 177*fa9e4066Sahrens 178*fa9e4066Sahrens /* 179*fa9e4066Sahrens * Copy the ptrtbl from the old to new location, leaving the odd 180*fa9e4066Sahrens * entries blank as we go. 181*fa9e4066Sahrens */ 182*fa9e4066Sahrens 183*fa9e4066Sahrens b = tbl->zt_blks_copied; 184*fa9e4066Sahrens db_old = dmu_buf_hold(zap->zap_objset, zap->zap_object, 185*fa9e4066Sahrens (tbl->zt_blk + b) << ZAP_BLOCK_SHIFT); 186*fa9e4066Sahrens dmu_buf_read(db_old); 187*fa9e4066Sahrens 188*fa9e4066Sahrens /* first half of entries in old[b] go to new[2*b+0] */ 189*fa9e4066Sahrens db_new = dmu_buf_hold(zap->zap_objset, zap->zap_object, 190*fa9e4066Sahrens (newblk + 2*b+0) << ZAP_BLOCK_SHIFT); 191*fa9e4066Sahrens dmu_buf_will_dirty(db_new, tx); 192*fa9e4066Sahrens transfer_func(db_old->db_data, db_new->db_data, hepb); 193*fa9e4066Sahrens dmu_buf_rele(db_new); 194*fa9e4066Sahrens 195*fa9e4066Sahrens /* second half of entries in old[b] go to new[2*b+1] */ 196*fa9e4066Sahrens db_new = dmu_buf_hold(zap->zap_objset, zap->zap_object, 197*fa9e4066Sahrens (newblk + 2*b+1) << ZAP_BLOCK_SHIFT); 198*fa9e4066Sahrens dmu_buf_will_dirty(db_new, tx); 199*fa9e4066Sahrens transfer_func((uint64_t *)db_old->db_data + hepb, 200*fa9e4066Sahrens db_new->db_data, hepb); 201*fa9e4066Sahrens dmu_buf_rele(db_new); 202*fa9e4066Sahrens 203*fa9e4066Sahrens dmu_buf_rele(db_old); 204*fa9e4066Sahrens 205*fa9e4066Sahrens tbl->zt_blks_copied++; 206*fa9e4066Sahrens 207*fa9e4066Sahrens dprintf("copied block %llu of %llu\n", 208*fa9e4066Sahrens tbl->zt_blks_copied, tbl->zt_numblks); 209*fa9e4066Sahrens 210*fa9e4066Sahrens if (tbl->zt_blks_copied == tbl->zt_numblks) { 211*fa9e4066Sahrens dmu_free_range(zap->zap_objset, zap->zap_object, 212*fa9e4066Sahrens tbl->zt_blk << ZAP_BLOCK_SHIFT, 213*fa9e4066Sahrens tbl->zt_numblks << ZAP_BLOCK_SHIFT, tx); 214*fa9e4066Sahrens 215*fa9e4066Sahrens tbl->zt_blk = newblk; 216*fa9e4066Sahrens tbl->zt_numblks *= 2; 217*fa9e4066Sahrens tbl->zt_shift++; 218*fa9e4066Sahrens tbl->zt_nextblk = 0; 219*fa9e4066Sahrens tbl->zt_blks_copied = 0; 220*fa9e4066Sahrens 221*fa9e4066Sahrens dprintf("finished; numblocks now %llu (%lluk entries)\n", 222*fa9e4066Sahrens tbl->zt_numblks, 1<<(tbl->zt_shift-10)); 223*fa9e4066Sahrens } 224*fa9e4066Sahrens } 225*fa9e4066Sahrens 226*fa9e4066Sahrens static uint64_t 227*fa9e4066Sahrens zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val, 228*fa9e4066Sahrens dmu_tx_t *tx) 229*fa9e4066Sahrens { 230*fa9e4066Sahrens uint64_t blk, off, oldval; 231*fa9e4066Sahrens dmu_buf_t *db; 232*fa9e4066Sahrens 233*fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 234*fa9e4066Sahrens ASSERT(tbl->zt_blk != 0); 235*fa9e4066Sahrens 236*fa9e4066Sahrens dprintf("storing %llx at index %llx\n", val, idx); 237*fa9e4066Sahrens 238*fa9e4066Sahrens blk = idx >> (ZAP_BLOCK_SHIFT-3); 239*fa9e4066Sahrens off = idx & ((1<<(ZAP_BLOCK_SHIFT-3))-1); 240*fa9e4066Sahrens 241*fa9e4066Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 242*fa9e4066Sahrens (tbl->zt_blk + blk) << ZAP_BLOCK_SHIFT); 243*fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 244*fa9e4066Sahrens oldval = ((uint64_t *)db->db_data)[off]; 245*fa9e4066Sahrens ((uint64_t *)db->db_data)[off] = val; 246*fa9e4066Sahrens dmu_buf_rele(db); 247*fa9e4066Sahrens 248*fa9e4066Sahrens if (tbl->zt_nextblk != 0) { 249*fa9e4066Sahrens idx *= 2; 250*fa9e4066Sahrens blk = idx >> (ZAP_BLOCK_SHIFT-3); 251*fa9e4066Sahrens off = idx & ((1<<(ZAP_BLOCK_SHIFT-3))-1); 252*fa9e4066Sahrens 253*fa9e4066Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 254*fa9e4066Sahrens (tbl->zt_nextblk + blk) << ZAP_BLOCK_SHIFT); 255*fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 256*fa9e4066Sahrens ((uint64_t *)db->db_data)[off] = val; 257*fa9e4066Sahrens ((uint64_t *)db->db_data)[off+1] = val; 258*fa9e4066Sahrens dmu_buf_rele(db); 259*fa9e4066Sahrens } 260*fa9e4066Sahrens 261*fa9e4066Sahrens return (oldval); 262*fa9e4066Sahrens } 263*fa9e4066Sahrens 264*fa9e4066Sahrens static uint64_t 265*fa9e4066Sahrens zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx) 266*fa9e4066Sahrens { 267*fa9e4066Sahrens uint64_t blk, off, val; 268*fa9e4066Sahrens dmu_buf_t *db; 269*fa9e4066Sahrens 270*fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 271*fa9e4066Sahrens 272*fa9e4066Sahrens blk = idx >> (ZAP_BLOCK_SHIFT-3); 273*fa9e4066Sahrens off = idx & ((1<<(ZAP_BLOCK_SHIFT-3))-1); 274*fa9e4066Sahrens 275*fa9e4066Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 276*fa9e4066Sahrens (tbl->zt_blk + blk) << ZAP_BLOCK_SHIFT); 277*fa9e4066Sahrens dmu_buf_read(db); 278*fa9e4066Sahrens val = ((uint64_t *)db->db_data)[off]; 279*fa9e4066Sahrens dmu_buf_rele(db); 280*fa9e4066Sahrens return (val); 281*fa9e4066Sahrens } 282*fa9e4066Sahrens 283*fa9e4066Sahrens /* 284*fa9e4066Sahrens * Routines for growing the ptrtbl. 285*fa9e4066Sahrens */ 286*fa9e4066Sahrens 287*fa9e4066Sahrens static void 288*fa9e4066Sahrens zap_ptrtbl_transfer(const uint64_t *src, uint64_t *dst, int n) 289*fa9e4066Sahrens { 290*fa9e4066Sahrens int i; 291*fa9e4066Sahrens for (i = 0; i < n; i++) { 292*fa9e4066Sahrens uint64_t lb = src[i]; 293*fa9e4066Sahrens dst[2*i+0] = lb; 294*fa9e4066Sahrens dst[2*i+1] = lb; 295*fa9e4066Sahrens } 296*fa9e4066Sahrens } 297*fa9e4066Sahrens 298*fa9e4066Sahrens static void 299*fa9e4066Sahrens zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx) 300*fa9e4066Sahrens { 301*fa9e4066Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift == 32) 302*fa9e4066Sahrens return; 303*fa9e4066Sahrens 304*fa9e4066Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { 305*fa9e4066Sahrens /* 306*fa9e4066Sahrens * The ptrtbl can no longer be contained in the 307*fa9e4066Sahrens * header block. Give it its own entire block, which 308*fa9e4066Sahrens * will quadruple the size of the ptrtbl. 309*fa9e4066Sahrens */ 310*fa9e4066Sahrens uint64_t newblk; 311*fa9e4066Sahrens dmu_buf_t *db_new; 312*fa9e4066Sahrens 313*fa9e4066Sahrens ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==, 314*fa9e4066Sahrens ZAP_PTRTBL_MIN_SHIFT); 315*fa9e4066Sahrens ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk, ==, 0); 316*fa9e4066Sahrens 317*fa9e4066Sahrens newblk = zap_allocate_blocks(zap, 1, tx); 318*fa9e4066Sahrens db_new = dmu_buf_hold(zap->zap_objset, zap->zap_object, 319*fa9e4066Sahrens newblk << ZAP_BLOCK_SHIFT); 320*fa9e4066Sahrens 321*fa9e4066Sahrens dmu_buf_will_dirty(db_new, tx); 322*fa9e4066Sahrens zap_ptrtbl_transfer(zap->zap_f.zap_phys->zap_leafs, 323*fa9e4066Sahrens db_new->db_data, 1 << ZAP_PTRTBL_MIN_SHIFT); 324*fa9e4066Sahrens dmu_buf_rele(db_new); 325*fa9e4066Sahrens 326*fa9e4066Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_blk = newblk; 327*fa9e4066Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks = 1; 328*fa9e4066Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_shift++; 329*fa9e4066Sahrens 330*fa9e4066Sahrens ASSERT3U(1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==, 331*fa9e4066Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << 332*fa9e4066Sahrens (ZAP_BLOCK_SHIFT-3)); 333*fa9e4066Sahrens } else { 334*fa9e4066Sahrens zap_table_grow(zap, &zap->zap_f.zap_phys->zap_ptrtbl, 335*fa9e4066Sahrens zap_ptrtbl_transfer, tx); 336*fa9e4066Sahrens } 337*fa9e4066Sahrens } 338*fa9e4066Sahrens 339*fa9e4066Sahrens static void 340*fa9e4066Sahrens zap_increment_num_entries(zap_t *zap, int delta, dmu_tx_t *tx) 341*fa9e4066Sahrens { 342*fa9e4066Sahrens dmu_buf_will_dirty(zap->zap_dbuf, tx); 343*fa9e4066Sahrens mutex_enter(&zap->zap_f.zap_num_entries_mtx); 344*fa9e4066Sahrens 345*fa9e4066Sahrens ASSERT(delta > 0 || zap->zap_f.zap_phys->zap_num_entries >= -delta); 346*fa9e4066Sahrens 347*fa9e4066Sahrens zap->zap_f.zap_phys->zap_num_entries += delta; 348*fa9e4066Sahrens 349*fa9e4066Sahrens mutex_exit(&zap->zap_f.zap_num_entries_mtx); 350*fa9e4066Sahrens } 351*fa9e4066Sahrens 352*fa9e4066Sahrens uint64_t 353*fa9e4066Sahrens zap_allocate_blocks(zap_t *zap, int nblocks, dmu_tx_t *tx) 354*fa9e4066Sahrens { 355*fa9e4066Sahrens uint64_t newblk; 356*fa9e4066Sahrens ASSERT(tx != NULL); 357*fa9e4066Sahrens if (!RW_WRITE_HELD(&zap->zap_rwlock)) { 358*fa9e4066Sahrens dmu_buf_will_dirty(zap->zap_dbuf, tx); 359*fa9e4066Sahrens } 360*fa9e4066Sahrens newblk = atomic_add_64_nv(&zap->zap_f.zap_phys->zap_freeblk, nblocks) - 361*fa9e4066Sahrens nblocks; 362*fa9e4066Sahrens return (newblk); 363*fa9e4066Sahrens } 364*fa9e4066Sahrens 365*fa9e4066Sahrens 366*fa9e4066Sahrens /* 367*fa9e4066Sahrens * This function doesn't increment zap_num_leafs because it's used to 368*fa9e4066Sahrens * allocate a leaf chain, which doesn't count against zap_num_leafs. 369*fa9e4066Sahrens * The directory must be held exclusively for this tx. 370*fa9e4066Sahrens */ 371*fa9e4066Sahrens zap_leaf_t * 372*fa9e4066Sahrens zap_create_leaf(zap_t *zap, dmu_tx_t *tx) 373*fa9e4066Sahrens { 374*fa9e4066Sahrens void *winner; 375*fa9e4066Sahrens zap_leaf_t *l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP); 376*fa9e4066Sahrens 377*fa9e4066Sahrens ASSERT(tx != NULL); 378*fa9e4066Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 379*fa9e4066Sahrens /* hence we already dirtied zap->zap_dbuf */ 380*fa9e4066Sahrens 381*fa9e4066Sahrens rw_init(&l->l_rwlock, 0, 0, 0); 382*fa9e4066Sahrens rw_enter(&l->l_rwlock, RW_WRITER); 383*fa9e4066Sahrens l->l_blkid = zap_allocate_blocks(zap, 1, tx); 384*fa9e4066Sahrens l->l_next = NULL; 385*fa9e4066Sahrens l->l_dbuf = NULL; 386*fa9e4066Sahrens l->l_phys = NULL; 387*fa9e4066Sahrens 388*fa9e4066Sahrens l->l_dbuf = dmu_buf_hold(zap->zap_objset, zap->zap_object, 389*fa9e4066Sahrens l->l_blkid << ZAP_BLOCK_SHIFT); 390*fa9e4066Sahrens winner = dmu_buf_set_user(l->l_dbuf, l, &l->l_phys, zap_leaf_pageout); 391*fa9e4066Sahrens ASSERT(winner == NULL); 392*fa9e4066Sahrens dmu_buf_will_dirty(l->l_dbuf, tx); 393*fa9e4066Sahrens 394*fa9e4066Sahrens zap_leaf_init(l); 395*fa9e4066Sahrens 396*fa9e4066Sahrens return (l); 397*fa9e4066Sahrens } 398*fa9e4066Sahrens 399*fa9e4066Sahrens /* ARGSUSED */ 400*fa9e4066Sahrens void 401*fa9e4066Sahrens zap_destroy_leaf(zap_t *zap, zap_leaf_t *l, dmu_tx_t *tx) 402*fa9e4066Sahrens { 403*fa9e4066Sahrens /* uint64_t offset = l->l_blkid << ZAP_BLOCK_SHIFT; */ 404*fa9e4066Sahrens rw_exit(&l->l_rwlock); 405*fa9e4066Sahrens dmu_buf_rele(l->l_dbuf); 406*fa9e4066Sahrens /* XXX there are still holds on this block, so we can't free it? */ 407*fa9e4066Sahrens /* dmu_free_range(zap->zap_objset, zap->zap_object, */ 408*fa9e4066Sahrens /* offset, 1<<ZAP_BLOCK_SHIFT, tx); */ 409*fa9e4066Sahrens } 410*fa9e4066Sahrens 411*fa9e4066Sahrens int 412*fa9e4066Sahrens fzap_count(zap_t *zap, uint64_t *count) 413*fa9e4066Sahrens { 414*fa9e4066Sahrens ASSERT(!zap->zap_ismicro); 415*fa9e4066Sahrens mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */ 416*fa9e4066Sahrens *count = zap->zap_f.zap_phys->zap_num_entries; 417*fa9e4066Sahrens mutex_exit(&zap->zap_f.zap_num_entries_mtx); 418*fa9e4066Sahrens return (0); 419*fa9e4066Sahrens } 420*fa9e4066Sahrens 421*fa9e4066Sahrens /* 422*fa9e4066Sahrens * Routines for obtaining zap_leaf_t's 423*fa9e4066Sahrens */ 424*fa9e4066Sahrens 425*fa9e4066Sahrens static void 426*fa9e4066Sahrens zap_put_leaf(zap_leaf_t *l) 427*fa9e4066Sahrens { 428*fa9e4066Sahrens zap_leaf_t *nl = l->l_next; 429*fa9e4066Sahrens while (nl) { 430*fa9e4066Sahrens zap_leaf_t *nnl = nl->l_next; 431*fa9e4066Sahrens rw_exit(&nl->l_rwlock); 432*fa9e4066Sahrens dmu_buf_rele(nl->l_dbuf); 433*fa9e4066Sahrens nl = nnl; 434*fa9e4066Sahrens } 435*fa9e4066Sahrens rw_exit(&l->l_rwlock); 436*fa9e4066Sahrens dmu_buf_rele(l->l_dbuf); 437*fa9e4066Sahrens } 438*fa9e4066Sahrens 439*fa9e4066Sahrens _NOTE(ARGSUSED(0)) 440*fa9e4066Sahrens static void 441*fa9e4066Sahrens zap_leaf_pageout(dmu_buf_t *db, void *vl) 442*fa9e4066Sahrens { 443*fa9e4066Sahrens zap_leaf_t *l = vl; 444*fa9e4066Sahrens 445*fa9e4066Sahrens rw_destroy(&l->l_rwlock); 446*fa9e4066Sahrens kmem_free(l, sizeof (zap_leaf_t)); 447*fa9e4066Sahrens } 448*fa9e4066Sahrens 449*fa9e4066Sahrens static zap_leaf_t * 450*fa9e4066Sahrens zap_open_leaf(uint64_t blkid, dmu_buf_t *db) 451*fa9e4066Sahrens { 452*fa9e4066Sahrens zap_leaf_t *l, *winner; 453*fa9e4066Sahrens 454*fa9e4066Sahrens ASSERT(blkid != 0); 455*fa9e4066Sahrens 456*fa9e4066Sahrens l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP); 457*fa9e4066Sahrens rw_init(&l->l_rwlock, 0, 0, 0); 458*fa9e4066Sahrens rw_enter(&l->l_rwlock, RW_WRITER); 459*fa9e4066Sahrens l->l_blkid = blkid; 460*fa9e4066Sahrens l->l_next = NULL; 461*fa9e4066Sahrens l->l_dbuf = db; 462*fa9e4066Sahrens l->l_phys = NULL; 463*fa9e4066Sahrens 464*fa9e4066Sahrens winner = dmu_buf_set_user(db, l, &l->l_phys, zap_leaf_pageout); 465*fa9e4066Sahrens 466*fa9e4066Sahrens rw_exit(&l->l_rwlock); 467*fa9e4066Sahrens if (winner != NULL) { 468*fa9e4066Sahrens /* someone else set it first */ 469*fa9e4066Sahrens zap_leaf_pageout(NULL, l); 470*fa9e4066Sahrens l = winner; 471*fa9e4066Sahrens } 472*fa9e4066Sahrens 473*fa9e4066Sahrens return (l); 474*fa9e4066Sahrens } 475*fa9e4066Sahrens 476*fa9e4066Sahrens static zap_leaf_t * 477*fa9e4066Sahrens zap_get_leaf_byblk_impl(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt) 478*fa9e4066Sahrens { 479*fa9e4066Sahrens dmu_buf_t *db; 480*fa9e4066Sahrens zap_leaf_t *l; 481*fa9e4066Sahrens 482*fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 483*fa9e4066Sahrens 484*fa9e4066Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 485*fa9e4066Sahrens blkid << ZAP_BLOCK_SHIFT); 486*fa9e4066Sahrens 487*fa9e4066Sahrens ASSERT3U(db->db_object, ==, zap->zap_object); 488*fa9e4066Sahrens ASSERT3U(db->db_offset, ==, blkid << ZAP_BLOCK_SHIFT); 489*fa9e4066Sahrens ASSERT3U(db->db_size, ==, 1 << ZAP_BLOCK_SHIFT); 490*fa9e4066Sahrens ASSERT(blkid != 0); 491*fa9e4066Sahrens 492*fa9e4066Sahrens dmu_buf_read(db); 493*fa9e4066Sahrens l = dmu_buf_get_user(db); 494*fa9e4066Sahrens 495*fa9e4066Sahrens if (l == NULL) 496*fa9e4066Sahrens l = zap_open_leaf(blkid, db); 497*fa9e4066Sahrens 498*fa9e4066Sahrens rw_enter(&l->l_rwlock, lt); 499*fa9e4066Sahrens /* 500*fa9e4066Sahrens * Must lock before dirtying, otherwise l->l_phys could change, 501*fa9e4066Sahrens * causing ASSERT below to fail. 502*fa9e4066Sahrens */ 503*fa9e4066Sahrens if (lt == RW_WRITER) 504*fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 505*fa9e4066Sahrens ASSERT3U(l->l_blkid, ==, blkid); 506*fa9e4066Sahrens ASSERT3P(l->l_dbuf, ==, db); 507*fa9e4066Sahrens ASSERT3P(l->l_phys, ==, l->l_dbuf->db_data); 508*fa9e4066Sahrens ASSERT3U(l->lh_block_type, ==, ZBT_LEAF); 509*fa9e4066Sahrens ASSERT3U(l->lh_magic, ==, ZAP_LEAF_MAGIC); 510*fa9e4066Sahrens 511*fa9e4066Sahrens return (l); 512*fa9e4066Sahrens } 513*fa9e4066Sahrens 514*fa9e4066Sahrens static zap_leaf_t * 515*fa9e4066Sahrens zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt) 516*fa9e4066Sahrens { 517*fa9e4066Sahrens zap_leaf_t *l, *nl; 518*fa9e4066Sahrens 519*fa9e4066Sahrens l = zap_get_leaf_byblk_impl(zap, blkid, tx, lt); 520*fa9e4066Sahrens 521*fa9e4066Sahrens nl = l; 522*fa9e4066Sahrens while (nl->lh_next != 0) { 523*fa9e4066Sahrens zap_leaf_t *nnl; 524*fa9e4066Sahrens nnl = zap_get_leaf_byblk_impl(zap, nl->lh_next, tx, lt); 525*fa9e4066Sahrens nl->l_next = nnl; 526*fa9e4066Sahrens nl = nnl; 527*fa9e4066Sahrens } 528*fa9e4066Sahrens 529*fa9e4066Sahrens return (l); 530*fa9e4066Sahrens } 531*fa9e4066Sahrens 532*fa9e4066Sahrens static uint64_t 533*fa9e4066Sahrens zap_idx_to_blk(zap_t *zap, uint64_t idx) 534*fa9e4066Sahrens { 535*fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 536*fa9e4066Sahrens 537*fa9e4066Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { 538*fa9e4066Sahrens ASSERT3U(idx, <, 539*fa9e4066Sahrens (1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift)); 540*fa9e4066Sahrens return (zap->zap_f.zap_phys->zap_leafs[idx]); 541*fa9e4066Sahrens } else { 542*fa9e4066Sahrens return (zap_table_load(zap, &zap->zap_f.zap_phys->zap_ptrtbl, 543*fa9e4066Sahrens idx)); 544*fa9e4066Sahrens } 545*fa9e4066Sahrens } 546*fa9e4066Sahrens 547*fa9e4066Sahrens static void 548*fa9e4066Sahrens zap_set_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t blk, dmu_tx_t *tx) 549*fa9e4066Sahrens { 550*fa9e4066Sahrens ASSERT(tx != NULL); 551*fa9e4066Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 552*fa9e4066Sahrens 553*fa9e4066Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0) { 554*fa9e4066Sahrens zap->zap_f.zap_phys->zap_leafs[idx] = blk; 555*fa9e4066Sahrens } else { 556*fa9e4066Sahrens (void) zap_table_store(zap, &zap->zap_f.zap_phys->zap_ptrtbl, 557*fa9e4066Sahrens idx, blk, tx); 558*fa9e4066Sahrens } 559*fa9e4066Sahrens } 560*fa9e4066Sahrens 561*fa9e4066Sahrens static zap_leaf_t * 562*fa9e4066Sahrens zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt) 563*fa9e4066Sahrens { 564*fa9e4066Sahrens uint64_t idx; 565*fa9e4066Sahrens zap_leaf_t *l; 566*fa9e4066Sahrens 567*fa9e4066Sahrens ASSERT(zap->zap_dbuf == NULL || 568*fa9e4066Sahrens zap->zap_f.zap_phys == zap->zap_dbuf->db_data); 569*fa9e4066Sahrens ASSERT3U(zap->zap_f.zap_phys->zap_magic, ==, ZAP_MAGIC); 570*fa9e4066Sahrens idx = ZAP_HASH_IDX(h, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); 571*fa9e4066Sahrens l = zap_get_leaf_byblk(zap, zap_idx_to_blk(zap, idx), tx, lt); 572*fa9e4066Sahrens 573*fa9e4066Sahrens ASSERT3U(ZAP_HASH_IDX(h, l->lh_prefix_len), ==, l->lh_prefix); 574*fa9e4066Sahrens 575*fa9e4066Sahrens return (l); 576*fa9e4066Sahrens } 577*fa9e4066Sahrens 578*fa9e4066Sahrens 579*fa9e4066Sahrens static zap_leaf_t * 580*fa9e4066Sahrens zap_expand_leaf(zap_t *zap, zap_leaf_t *l, uint64_t hash, dmu_tx_t *tx) 581*fa9e4066Sahrens { 582*fa9e4066Sahrens zap_leaf_t *nl; 583*fa9e4066Sahrens int prefix_diff, i, err; 584*fa9e4066Sahrens uint64_t sibling; 585*fa9e4066Sahrens 586*fa9e4066Sahrens ASSERT3U(l->lh_prefix_len, <=, 587*fa9e4066Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); 588*fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 589*fa9e4066Sahrens 590*fa9e4066Sahrens ASSERT3U(ZAP_HASH_IDX(hash, l->lh_prefix_len), ==, l->lh_prefix); 591*fa9e4066Sahrens 592*fa9e4066Sahrens if (zap_tryupgradedir(zap, tx) == 0) { 593*fa9e4066Sahrens /* failed to upgrade */ 594*fa9e4066Sahrens int old_prefix_len = l->lh_prefix_len; 595*fa9e4066Sahrens objset_t *os = zap->zap_objset; 596*fa9e4066Sahrens uint64_t object = zap->zap_object; 597*fa9e4066Sahrens 598*fa9e4066Sahrens zap_put_leaf(l); 599*fa9e4066Sahrens zap_unlockdir(zap); 600*fa9e4066Sahrens err = zap_lockdir(os, object, tx, RW_WRITER, FALSE, &zap); 601*fa9e4066Sahrens ASSERT3U(err, ==, 0); 602*fa9e4066Sahrens ASSERT(!zap->zap_ismicro); 603*fa9e4066Sahrens l = zap_deref_leaf(zap, hash, tx, RW_WRITER); 604*fa9e4066Sahrens 605*fa9e4066Sahrens if (l->lh_prefix_len != old_prefix_len) 606*fa9e4066Sahrens /* it split while our locks were down */ 607*fa9e4066Sahrens return (l); 608*fa9e4066Sahrens } 609*fa9e4066Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 610*fa9e4066Sahrens 611*fa9e4066Sahrens if (l->lh_prefix_len == zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) { 612*fa9e4066Sahrens /* There's only one pointer to us. Chain on another leaf blk. */ 613*fa9e4066Sahrens (void) zap_leaf_chainmore(l, zap_create_leaf(zap, tx)); 614*fa9e4066Sahrens dprintf("chaining leaf %x/%d\n", l->lh_prefix, 615*fa9e4066Sahrens l->lh_prefix_len); 616*fa9e4066Sahrens return (l); 617*fa9e4066Sahrens } 618*fa9e4066Sahrens 619*fa9e4066Sahrens ASSERT3U(ZAP_HASH_IDX(hash, l->lh_prefix_len), ==, l->lh_prefix); 620*fa9e4066Sahrens 621*fa9e4066Sahrens /* There's more than one pointer to us. Split this leaf. */ 622*fa9e4066Sahrens nl = zap_leaf_split(zap, l, tx); 623*fa9e4066Sahrens 624*fa9e4066Sahrens /* set sibling pointers */ 625*fa9e4066Sahrens prefix_diff = 626*fa9e4066Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - l->lh_prefix_len; 627*fa9e4066Sahrens sibling = (ZAP_HASH_IDX(hash, l->lh_prefix_len) | 1) << prefix_diff; 628*fa9e4066Sahrens for (i = 0; i < (1ULL<<prefix_diff); i++) { 629*fa9e4066Sahrens ASSERT3U(zap_idx_to_blk(zap, sibling+i), ==, l->l_blkid); 630*fa9e4066Sahrens zap_set_idx_to_blk(zap, sibling+i, nl->l_blkid, tx); 631*fa9e4066Sahrens /* dprintf("set %d to %u %x\n", sibling+i, nl->l_blkid, nl); */ 632*fa9e4066Sahrens } 633*fa9e4066Sahrens 634*fa9e4066Sahrens zap->zap_f.zap_phys->zap_num_leafs++; 635*fa9e4066Sahrens 636*fa9e4066Sahrens if (hash & (1ULL << (64 - l->lh_prefix_len))) { 637*fa9e4066Sahrens /* we want the sibling */ 638*fa9e4066Sahrens zap_put_leaf(l); 639*fa9e4066Sahrens l = nl; 640*fa9e4066Sahrens } else { 641*fa9e4066Sahrens zap_put_leaf(nl); 642*fa9e4066Sahrens } 643*fa9e4066Sahrens 644*fa9e4066Sahrens return (l); 645*fa9e4066Sahrens } 646*fa9e4066Sahrens 647*fa9e4066Sahrens static void 648*fa9e4066Sahrens zap_put_leaf_maybe_grow_ptrtbl(zap_t *zap, 649*fa9e4066Sahrens zap_leaf_t *l, dmu_tx_t *tx) 650*fa9e4066Sahrens { 651*fa9e4066Sahrens int shift, err; 652*fa9e4066Sahrens 653*fa9e4066Sahrens again: 654*fa9e4066Sahrens shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift; 655*fa9e4066Sahrens 656*fa9e4066Sahrens if (l->lh_prefix_len == shift && 657*fa9e4066Sahrens (l->l_next != NULL || l->lh_nfree < MIN_FREE)) { 658*fa9e4066Sahrens /* this leaf will soon make us grow the pointer table */ 659*fa9e4066Sahrens 660*fa9e4066Sahrens if (zap_tryupgradedir(zap, tx) == 0) { 661*fa9e4066Sahrens objset_t *os = zap->zap_objset; 662*fa9e4066Sahrens uint64_t zapobj = zap->zap_object; 663*fa9e4066Sahrens uint64_t blkid = l->l_blkid; 664*fa9e4066Sahrens 665*fa9e4066Sahrens zap_put_leaf(l); 666*fa9e4066Sahrens zap_unlockdir(zap); 667*fa9e4066Sahrens err = zap_lockdir(os, zapobj, tx, 668*fa9e4066Sahrens RW_WRITER, FALSE, &zap); 669*fa9e4066Sahrens ASSERT3U(err, ==, 0); 670*fa9e4066Sahrens l = zap_get_leaf_byblk(zap, blkid, tx, RW_READER); 671*fa9e4066Sahrens goto again; 672*fa9e4066Sahrens } 673*fa9e4066Sahrens 674*fa9e4066Sahrens zap_put_leaf(l); 675*fa9e4066Sahrens zap_grow_ptrtbl(zap, tx); 676*fa9e4066Sahrens } else { 677*fa9e4066Sahrens zap_put_leaf(l); 678*fa9e4066Sahrens } 679*fa9e4066Sahrens } 680*fa9e4066Sahrens 681*fa9e4066Sahrens 682*fa9e4066Sahrens static int 683*fa9e4066Sahrens fzap_checksize(uint64_t integer_size, uint64_t num_integers) 684*fa9e4066Sahrens { 685*fa9e4066Sahrens /* Only integer sizes supported by C */ 686*fa9e4066Sahrens switch (integer_size) { 687*fa9e4066Sahrens case 1: 688*fa9e4066Sahrens case 2: 689*fa9e4066Sahrens case 4: 690*fa9e4066Sahrens case 8: 691*fa9e4066Sahrens break; 692*fa9e4066Sahrens default: 693*fa9e4066Sahrens return (EINVAL); 694*fa9e4066Sahrens } 695*fa9e4066Sahrens 696*fa9e4066Sahrens /* Make sure we won't overflow */ 697*fa9e4066Sahrens if (integer_size * num_integers < num_integers) 698*fa9e4066Sahrens return (EINVAL); 699*fa9e4066Sahrens if (integer_size * num_integers > DMU_MAX_ACCESS) 700*fa9e4066Sahrens return (EINVAL); 701*fa9e4066Sahrens 702*fa9e4066Sahrens return (0); 703*fa9e4066Sahrens } 704*fa9e4066Sahrens 705*fa9e4066Sahrens /* 706*fa9e4066Sahrens * Routines for maniplulating attributes. 707*fa9e4066Sahrens */ 708*fa9e4066Sahrens int 709*fa9e4066Sahrens fzap_lookup(zap_t *zap, const char *name, 710*fa9e4066Sahrens uint64_t integer_size, uint64_t num_integers, void *buf) 711*fa9e4066Sahrens { 712*fa9e4066Sahrens zap_leaf_t *l; 713*fa9e4066Sahrens int err; 714*fa9e4066Sahrens uint64_t hash; 715*fa9e4066Sahrens zap_entry_handle_t zeh; 716*fa9e4066Sahrens 717*fa9e4066Sahrens err = fzap_checksize(integer_size, num_integers); 718*fa9e4066Sahrens if (err != 0) 719*fa9e4066Sahrens return (err); 720*fa9e4066Sahrens 721*fa9e4066Sahrens hash = zap_hash(zap, name); 722*fa9e4066Sahrens l = zap_deref_leaf(zap, hash, NULL, RW_READER); 723*fa9e4066Sahrens err = zap_leaf_lookup(l, name, hash, &zeh); 724*fa9e4066Sahrens if (err != 0) 725*fa9e4066Sahrens goto out; 726*fa9e4066Sahrens err = zap_entry_read(&zeh, integer_size, num_integers, buf); 727*fa9e4066Sahrens out: 728*fa9e4066Sahrens zap_put_leaf(l); 729*fa9e4066Sahrens return (err); 730*fa9e4066Sahrens } 731*fa9e4066Sahrens 732*fa9e4066Sahrens int 733*fa9e4066Sahrens fzap_add_cd(zap_t *zap, const char *name, 734*fa9e4066Sahrens uint64_t integer_size, uint64_t num_integers, 735*fa9e4066Sahrens const void *val, uint32_t cd, dmu_tx_t *tx, zap_leaf_t **lp) 736*fa9e4066Sahrens { 737*fa9e4066Sahrens zap_leaf_t *l; 738*fa9e4066Sahrens uint64_t hash; 739*fa9e4066Sahrens int err; 740*fa9e4066Sahrens zap_entry_handle_t zeh; 741*fa9e4066Sahrens 742*fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 743*fa9e4066Sahrens ASSERT(!zap->zap_ismicro); 744*fa9e4066Sahrens ASSERT(fzap_checksize(integer_size, num_integers) == 0); 745*fa9e4066Sahrens 746*fa9e4066Sahrens hash = zap_hash(zap, name); 747*fa9e4066Sahrens l = zap_deref_leaf(zap, hash, tx, RW_WRITER); 748*fa9e4066Sahrens retry: 749*fa9e4066Sahrens err = zap_leaf_lookup(l, name, hash, &zeh); 750*fa9e4066Sahrens if (err == 0) { 751*fa9e4066Sahrens err = EEXIST; 752*fa9e4066Sahrens goto out; 753*fa9e4066Sahrens } 754*fa9e4066Sahrens ASSERT(err == ENOENT); 755*fa9e4066Sahrens 756*fa9e4066Sahrens /* XXX If this leaf is chained, split it if we can. */ 757*fa9e4066Sahrens err = zap_entry_create(l, name, hash, cd, 758*fa9e4066Sahrens integer_size, num_integers, val, &zeh); 759*fa9e4066Sahrens 760*fa9e4066Sahrens if (err == 0) { 761*fa9e4066Sahrens zap_increment_num_entries(zap, 1, tx); 762*fa9e4066Sahrens } else if (err == EAGAIN) { 763*fa9e4066Sahrens l = zap_expand_leaf(zap, l, hash, tx); 764*fa9e4066Sahrens goto retry; 765*fa9e4066Sahrens } 766*fa9e4066Sahrens 767*fa9e4066Sahrens out: 768*fa9e4066Sahrens if (lp) 769*fa9e4066Sahrens *lp = l; 770*fa9e4066Sahrens else 771*fa9e4066Sahrens zap_put_leaf(l); 772*fa9e4066Sahrens return (err); 773*fa9e4066Sahrens } 774*fa9e4066Sahrens 775*fa9e4066Sahrens int 776*fa9e4066Sahrens fzap_add(zap_t *zap, const char *name, 777*fa9e4066Sahrens uint64_t integer_size, uint64_t num_integers, 778*fa9e4066Sahrens const void *val, dmu_tx_t *tx) 779*fa9e4066Sahrens { 780*fa9e4066Sahrens int err; 781*fa9e4066Sahrens zap_leaf_t *l; 782*fa9e4066Sahrens 783*fa9e4066Sahrens err = fzap_checksize(integer_size, num_integers); 784*fa9e4066Sahrens if (err != 0) 785*fa9e4066Sahrens return (err); 786*fa9e4066Sahrens 787*fa9e4066Sahrens err = fzap_add_cd(zap, name, integer_size, num_integers, 788*fa9e4066Sahrens val, ZAP_MAXCD, tx, &l); 789*fa9e4066Sahrens 790*fa9e4066Sahrens zap_put_leaf_maybe_grow_ptrtbl(zap, l, tx); 791*fa9e4066Sahrens return (err); 792*fa9e4066Sahrens } 793*fa9e4066Sahrens 794*fa9e4066Sahrens int 795*fa9e4066Sahrens fzap_update(zap_t *zap, const char *name, 796*fa9e4066Sahrens int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 797*fa9e4066Sahrens { 798*fa9e4066Sahrens zap_leaf_t *l; 799*fa9e4066Sahrens uint64_t hash; 800*fa9e4066Sahrens int err, create; 801*fa9e4066Sahrens zap_entry_handle_t zeh; 802*fa9e4066Sahrens 803*fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 804*fa9e4066Sahrens err = fzap_checksize(integer_size, num_integers); 805*fa9e4066Sahrens if (err != 0) 806*fa9e4066Sahrens return (err); 807*fa9e4066Sahrens 808*fa9e4066Sahrens hash = zap_hash(zap, name); 809*fa9e4066Sahrens l = zap_deref_leaf(zap, hash, tx, RW_WRITER); 810*fa9e4066Sahrens retry: 811*fa9e4066Sahrens err = zap_leaf_lookup(l, name, hash, &zeh); 812*fa9e4066Sahrens create = (err == ENOENT); 813*fa9e4066Sahrens ASSERT(err == 0 || err == ENOENT); 814*fa9e4066Sahrens 815*fa9e4066Sahrens /* XXX If this leaf is chained, split it if we can. */ 816*fa9e4066Sahrens 817*fa9e4066Sahrens if (create) { 818*fa9e4066Sahrens err = zap_entry_create(l, name, hash, ZAP_MAXCD, 819*fa9e4066Sahrens integer_size, num_integers, val, &zeh); 820*fa9e4066Sahrens if (err == 0) 821*fa9e4066Sahrens zap_increment_num_entries(zap, 1, tx); 822*fa9e4066Sahrens } else { 823*fa9e4066Sahrens err = zap_entry_update(&zeh, integer_size, num_integers, val); 824*fa9e4066Sahrens } 825*fa9e4066Sahrens 826*fa9e4066Sahrens if (err == EAGAIN) { 827*fa9e4066Sahrens l = zap_expand_leaf(zap, l, hash, tx); 828*fa9e4066Sahrens goto retry; 829*fa9e4066Sahrens } 830*fa9e4066Sahrens 831*fa9e4066Sahrens zap_put_leaf_maybe_grow_ptrtbl(zap, l, tx); 832*fa9e4066Sahrens return (err); 833*fa9e4066Sahrens } 834*fa9e4066Sahrens 835*fa9e4066Sahrens int 836*fa9e4066Sahrens fzap_length(zap_t *zap, const char *name, 837*fa9e4066Sahrens uint64_t *integer_size, uint64_t *num_integers) 838*fa9e4066Sahrens { 839*fa9e4066Sahrens zap_leaf_t *l; 840*fa9e4066Sahrens int err; 841*fa9e4066Sahrens uint64_t hash; 842*fa9e4066Sahrens zap_entry_handle_t zeh; 843*fa9e4066Sahrens 844*fa9e4066Sahrens hash = zap_hash(zap, name); 845*fa9e4066Sahrens l = zap_deref_leaf(zap, hash, NULL, RW_READER); 846*fa9e4066Sahrens err = zap_leaf_lookup(l, name, hash, &zeh); 847*fa9e4066Sahrens if (err != 0) 848*fa9e4066Sahrens goto out; 849*fa9e4066Sahrens 850*fa9e4066Sahrens if (integer_size) 851*fa9e4066Sahrens *integer_size = zeh.zeh_integer_size; 852*fa9e4066Sahrens if (num_integers) 853*fa9e4066Sahrens *num_integers = zeh.zeh_num_integers; 854*fa9e4066Sahrens out: 855*fa9e4066Sahrens zap_put_leaf(l); 856*fa9e4066Sahrens return (err); 857*fa9e4066Sahrens } 858*fa9e4066Sahrens 859*fa9e4066Sahrens int 860*fa9e4066Sahrens fzap_remove(zap_t *zap, const char *name, dmu_tx_t *tx) 861*fa9e4066Sahrens { 862*fa9e4066Sahrens zap_leaf_t *l; 863*fa9e4066Sahrens uint64_t hash; 864*fa9e4066Sahrens int err; 865*fa9e4066Sahrens zap_entry_handle_t zeh; 866*fa9e4066Sahrens 867*fa9e4066Sahrens hash = zap_hash(zap, name); 868*fa9e4066Sahrens l = zap_deref_leaf(zap, hash, tx, RW_WRITER); 869*fa9e4066Sahrens err = zap_leaf_lookup(l, name, hash, &zeh); 870*fa9e4066Sahrens if (err == 0) { 871*fa9e4066Sahrens zap_entry_remove(&zeh); 872*fa9e4066Sahrens zap_increment_num_entries(zap, -1, tx); 873*fa9e4066Sahrens } 874*fa9e4066Sahrens zap_put_leaf(l); 875*fa9e4066Sahrens dprintf("fzap_remove: ds=%p obj=%llu name=%s err=%d\n", 876*fa9e4066Sahrens zap->zap_objset, zap->zap_object, name, err); 877*fa9e4066Sahrens return (err); 878*fa9e4066Sahrens } 879*fa9e4066Sahrens 880*fa9e4066Sahrens int 881*fa9e4066Sahrens zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, char *name) 882*fa9e4066Sahrens { 883*fa9e4066Sahrens zap_cursor_t zc; 884*fa9e4066Sahrens zap_attribute_t *za; 885*fa9e4066Sahrens int err; 886*fa9e4066Sahrens 887*fa9e4066Sahrens za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 888*fa9e4066Sahrens for (zap_cursor_init(&zc, os, zapobj); 889*fa9e4066Sahrens (err = zap_cursor_retrieve(&zc, za)) == 0; 890*fa9e4066Sahrens zap_cursor_advance(&zc)) { 891*fa9e4066Sahrens if (za->za_first_integer == value) { 892*fa9e4066Sahrens (void) strcpy(name, za->za_name); 893*fa9e4066Sahrens break; 894*fa9e4066Sahrens } 895*fa9e4066Sahrens } 896*fa9e4066Sahrens kmem_free(za, sizeof (zap_attribute_t)); 897*fa9e4066Sahrens return (err); 898*fa9e4066Sahrens } 899*fa9e4066Sahrens 900*fa9e4066Sahrens 901*fa9e4066Sahrens /* 902*fa9e4066Sahrens * Routines for iterating over the attributes. 903*fa9e4066Sahrens */ 904*fa9e4066Sahrens 905*fa9e4066Sahrens int 906*fa9e4066Sahrens fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za) 907*fa9e4066Sahrens { 908*fa9e4066Sahrens int err = ENOENT; 909*fa9e4066Sahrens zap_entry_handle_t zeh; 910*fa9e4066Sahrens zap_leaf_t *l; 911*fa9e4066Sahrens 912*fa9e4066Sahrens /* retrieve the next entry at or after zc_hash/zc_cd */ 913*fa9e4066Sahrens /* if no entry, return ENOENT */ 914*fa9e4066Sahrens 915*fa9e4066Sahrens again: 916*fa9e4066Sahrens l = zap_deref_leaf(zap, zc->zc_hash, NULL, RW_READER); 917*fa9e4066Sahrens err = zap_leaf_lookup_closest(l, zc->zc_hash, zc->zc_cd, &zeh); 918*fa9e4066Sahrens 919*fa9e4066Sahrens if (err == ENOENT) { 920*fa9e4066Sahrens uint64_t nocare = (1ULL << (64 - l->lh_prefix_len)) - 1; 921*fa9e4066Sahrens zc->zc_hash = (zc->zc_hash & ~nocare) + nocare + 1; 922*fa9e4066Sahrens zc->zc_cd = 0; 923*fa9e4066Sahrens if (l->lh_prefix_len == 0 || zc->zc_hash == 0) { 924*fa9e4066Sahrens zc->zc_hash = -1ULL; 925*fa9e4066Sahrens } else { 926*fa9e4066Sahrens zap_put_leaf(l); 927*fa9e4066Sahrens goto again; 928*fa9e4066Sahrens } 929*fa9e4066Sahrens } 930*fa9e4066Sahrens 931*fa9e4066Sahrens if (err == 0) { 932*fa9e4066Sahrens zc->zc_hash = zeh.zeh_hash; 933*fa9e4066Sahrens zc->zc_cd = zeh.zeh_cd; 934*fa9e4066Sahrens za->za_integer_length = zeh.zeh_integer_size; 935*fa9e4066Sahrens za->za_num_integers = zeh.zeh_num_integers; 936*fa9e4066Sahrens if (zeh.zeh_num_integers == 0) { 937*fa9e4066Sahrens za->za_first_integer = 0; 938*fa9e4066Sahrens } else { 939*fa9e4066Sahrens err = zap_entry_read(&zeh, 8, 1, &za->za_first_integer); 940*fa9e4066Sahrens ASSERT(err == 0 || err == EOVERFLOW); 941*fa9e4066Sahrens } 942*fa9e4066Sahrens err = zap_entry_read_name(&zeh, 943*fa9e4066Sahrens sizeof (za->za_name), za->za_name); 944*fa9e4066Sahrens ASSERT(err == 0); 945*fa9e4066Sahrens } 946*fa9e4066Sahrens zap_put_leaf(l); 947*fa9e4066Sahrens return (err); 948*fa9e4066Sahrens } 949*fa9e4066Sahrens 950*fa9e4066Sahrens 951*fa9e4066Sahrens static void 952*fa9e4066Sahrens zap_stats_ptrtbl(zap_t *zap, uint64_t *tbl, int len, zap_stats_t *zs) 953*fa9e4066Sahrens { 954*fa9e4066Sahrens int i; 955*fa9e4066Sahrens uint64_t lastblk = 0; 956*fa9e4066Sahrens 957*fa9e4066Sahrens /* 958*fa9e4066Sahrens * NB: if a leaf has more pointers than an entire ptrtbl block 959*fa9e4066Sahrens * can hold, then it'll be accounted for more than once, since 960*fa9e4066Sahrens * we won't have lastblk. 961*fa9e4066Sahrens */ 962*fa9e4066Sahrens for (i = 0; i < len; i++) { 963*fa9e4066Sahrens zap_leaf_t *l; 964*fa9e4066Sahrens 965*fa9e4066Sahrens if (tbl[i] == lastblk) 966*fa9e4066Sahrens continue; 967*fa9e4066Sahrens lastblk = tbl[i]; 968*fa9e4066Sahrens 969*fa9e4066Sahrens l = zap_get_leaf_byblk(zap, tbl[i], NULL, RW_READER); 970*fa9e4066Sahrens 971*fa9e4066Sahrens zap_stats_leaf(zap, l, zs); 972*fa9e4066Sahrens zap_put_leaf(l); 973*fa9e4066Sahrens } 974*fa9e4066Sahrens } 975*fa9e4066Sahrens 976*fa9e4066Sahrens void 977*fa9e4066Sahrens fzap_get_stats(zap_t *zap, zap_stats_t *zs) 978*fa9e4066Sahrens { 979*fa9e4066Sahrens zs->zs_ptrtbl_len = 1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift; 980*fa9e4066Sahrens zs->zs_blocksize = 1ULL << ZAP_BLOCK_SHIFT; 981*fa9e4066Sahrens zs->zs_num_leafs = zap->zap_f.zap_phys->zap_num_leafs; 982*fa9e4066Sahrens zs->zs_num_entries = zap->zap_f.zap_phys->zap_num_entries; 983*fa9e4066Sahrens zs->zs_num_blocks = zap->zap_f.zap_phys->zap_freeblk; 984*fa9e4066Sahrens 985*fa9e4066Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { 986*fa9e4066Sahrens /* the ptrtbl is entirely in the header block. */ 987*fa9e4066Sahrens zap_stats_ptrtbl(zap, zap->zap_f.zap_phys->zap_leafs, 988*fa9e4066Sahrens 1 << ZAP_PTRTBL_MIN_SHIFT, zs); 989*fa9e4066Sahrens } else { 990*fa9e4066Sahrens int b; 991*fa9e4066Sahrens 992*fa9e4066Sahrens dmu_prefetch(zap->zap_objset, zap->zap_object, 993*fa9e4066Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_blk << ZAP_BLOCK_SHIFT, 994*fa9e4066Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << 995*fa9e4066Sahrens ZAP_BLOCK_SHIFT); 996*fa9e4066Sahrens 997*fa9e4066Sahrens for (b = 0; b < zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks; 998*fa9e4066Sahrens b++) { 999*fa9e4066Sahrens dmu_buf_t *db; 1000*fa9e4066Sahrens 1001*fa9e4066Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 1002*fa9e4066Sahrens (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk + b) << 1003*fa9e4066Sahrens ZAP_BLOCK_SHIFT); 1004*fa9e4066Sahrens dmu_buf_read(db); 1005*fa9e4066Sahrens zap_stats_ptrtbl(zap, db->db_data, 1006*fa9e4066Sahrens 1<<(ZAP_BLOCK_SHIFT-3), zs); 1007*fa9e4066Sahrens dmu_buf_rele(db); 1008*fa9e4066Sahrens } 1009*fa9e4066Sahrens } 1010*fa9e4066Sahrens } 1011