1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * The 512-byte leaf is broken into 32 16-byte chunks. 31 * chunk number n means l_chunk[n], even though the header precedes it. 32 * the names are stored null-terminated. 33 */ 34 35 #include <sys/zfs_context.h> 36 #include <sys/zap.h> 37 #include <sys/zap_impl.h> 38 #include <sys/zap_leaf.h> 39 #include <sys/spa.h> 40 #include <sys/dmu.h> 41 42 #define CHAIN_END 0xffff /* end of the chunk chain */ 43 44 /* somewhat arbitrary, could go up to around 100k ... */ 45 #define MAX_ARRAY_BYTES (8<<10) 46 47 #define NCHUNKS(bytes) (((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES) 48 49 /* 50 * XXX This will >> by a negative number when 51 * lh_prefix_len > 64-ZAP_LEAF_HASH_SHIFT. 52 */ 53 #define LEAF_HASH(l, h) \ 54 ((ZAP_LEAF_HASH_NUMENTRIES-1) & \ 55 ((h) >> (64 - ZAP_LEAF_HASH_SHIFT-(l)->lh_prefix_len))) 56 57 #define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)]) 58 59 /* #define MEMCHECK */ 60 61 62 static void 63 zap_memset(void *a, int c, size_t n) 64 { 65 char *cp = a; 66 char *cpend = cp + n; 67 68 while (cp < cpend) 69 *cp++ = c; 70 } 71 72 static void 73 stv(int len, void *addr, uint64_t value) 74 { 75 switch (len) { 76 case 1: 77 *(uint8_t *)addr = value; 78 return; 79 case 2: 80 *(uint16_t *)addr = value; 81 return; 82 case 4: 83 *(uint32_t *)addr = value; 84 return; 85 case 8: 86 *(uint64_t *)addr = value; 87 return; 88 } 89 ASSERT(!"bad int len"); 90 } 91 92 static uint64_t 93 ldv(int len, const void *addr) 94 { 95 switch (len) { 96 case 1: 97 return (*(uint8_t *)addr); 98 case 2: 99 return (*(uint16_t *)addr); 100 case 4: 101 return (*(uint32_t *)addr); 102 case 8: 103 return (*(uint64_t *)addr); 104 } 105 ASSERT(!"bad int len"); 106 return (0xFEEDFACEDEADBEEF); 107 } 108 109 void 110 zap_leaf_byteswap(zap_leaf_phys_t *buf) 111 { 112 int i; 113 114 buf->l_hdr.lhr_block_type = BSWAP_64(buf->l_hdr.lhr_block_type); 115 buf->l_hdr.lhr_next = BSWAP_64(buf->l_hdr.lhr_next); 116 buf->l_hdr.lhr_prefix = BSWAP_64(buf->l_hdr.lhr_prefix); 117 buf->l_hdr.lhr_magic = BSWAP_32(buf->l_hdr.lhr_magic); 118 buf->l_hdr.lhr_nfree = BSWAP_16(buf->l_hdr.lhr_nfree); 119 buf->l_hdr.lhr_nentries = BSWAP_16(buf->l_hdr.lhr_nentries); 120 buf->l_hdr.lhr_prefix_len = BSWAP_16(buf->l_hdr.lhr_prefix_len); 121 buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist); 122 123 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES; i++) 124 buf->l_hash[i] = BSWAP_16(buf->l_hash[i]); 125 126 for (i = 0; i < ZAP_LEAF_NUMCHUNKS; i++) { 127 struct zap_leaf_entry *le; 128 129 switch (buf->l_chunk[i].l_free.lf_type) { 130 case ZAP_LEAF_ENTRY: 131 le = &buf->l_chunk[i].l_entry; 132 133 le->le_type = BSWAP_8(le->le_type); 134 le->le_int_size = BSWAP_8(le->le_int_size); 135 le->le_next = BSWAP_16(le->le_next); 136 le->le_name_chunk = BSWAP_16(le->le_name_chunk); 137 le->le_name_length = BSWAP_16(le->le_name_length); 138 le->le_value_chunk = BSWAP_16(le->le_value_chunk); 139 le->le_value_length = BSWAP_16(le->le_value_length); 140 le->le_cd = BSWAP_32(le->le_cd); 141 le->le_hash = BSWAP_64(le->le_hash); 142 break; 143 case ZAP_LEAF_FREE: 144 buf->l_chunk[i].l_free.lf_type = 145 BSWAP_8(buf->l_chunk[i].l_free.lf_type); 146 buf->l_chunk[i].l_free.lf_next = 147 BSWAP_16(buf->l_chunk[i].l_free.lf_next); 148 break; 149 case ZAP_LEAF_ARRAY: 150 /* zap_leaf_array */ 151 buf->l_chunk[i].l_array.la_type = 152 BSWAP_8(buf->l_chunk[i].l_array.la_type); 153 buf->l_chunk[i].l_array.la_next = 154 BSWAP_16(buf->l_chunk[i].l_array.la_next); 155 /* la_array doesn't need swapping */ 156 break; 157 default: 158 ASSERT(!"bad leaf type"); 159 } 160 } 161 } 162 163 void 164 zap_leaf_init(zap_leaf_t *l) 165 { 166 int i; 167 168 ASSERT3U(sizeof (zap_leaf_phys_t), ==, l->l_dbuf->db_size); 169 zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header)); 170 zap_memset(&l->l_phys->l_hash, CHAIN_END, sizeof (l->l_phys->l_hash)); 171 for (i = 0; i < ZAP_LEAF_NUMCHUNKS; i++) { 172 l->l_phys->l_chunk[i].l_free.lf_type = ZAP_LEAF_FREE; 173 l->l_phys->l_chunk[i].l_free.lf_next = i+1; 174 } 175 l->l_phys->l_chunk[ZAP_LEAF_NUMCHUNKS-1].l_free.lf_next = CHAIN_END; 176 l->lh_block_type = ZBT_LEAF; 177 l->lh_magic = ZAP_LEAF_MAGIC; 178 l->lh_nfree = ZAP_LEAF_NUMCHUNKS; 179 } 180 181 zap_leaf_t * 182 zap_leaf_chainmore(zap_leaf_t *l, zap_leaf_t *nl) 183 { 184 nl->lh_prefix = l->lh_prefix; 185 nl->lh_prefix_len = l->lh_prefix_len; 186 nl->l_next = l->l_next; 187 l->l_next = nl; 188 nl->lh_next = l->lh_next; 189 l->lh_next = nl->l_blkid; 190 return (nl); 191 } 192 193 /* 194 * Routines which manipulate leaf chunks (l_chunk[]). 195 */ 196 197 static uint16_t 198 zap_leaf_chunk_alloc(zap_leaf_t *l) 199 { 200 int chunk; 201 202 ASSERT(l->lh_nfree > 0); 203 204 chunk = l->l_phys->l_hdr.lh_freelist; 205 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 206 ASSERT3U(l->l_phys->l_chunk[chunk].l_free.lf_type, ==, ZAP_LEAF_FREE); 207 208 l->l_phys->l_hdr.lh_freelist = l->l_phys->l_chunk[chunk].l_free.lf_next; 209 210 #ifdef MEMCHECK 211 zap_memset(&l->l_phys->l_chunk[chunk], 0xa1, 212 sizeof (l->l_phys->l_chunk[chunk])); 213 #endif 214 215 l->lh_nfree--; 216 217 return (chunk); 218 } 219 220 static void 221 zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk) 222 { 223 struct zap_leaf_free *zlf = &l->l_phys->l_chunk[chunk].l_free; 224 ASSERT3U(l->lh_nfree, <, ZAP_LEAF_NUMCHUNKS); 225 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 226 ASSERT(zlf->lf_type != ZAP_LEAF_FREE); 227 228 #ifdef MEMCHECK 229 zap_memset(&l->l_phys->l_chunk[chunk], 0xf4, 230 sizeof (l->l_phys->l_chunk[chunk])); 231 #endif 232 233 zlf->lf_type = ZAP_LEAF_FREE; 234 zlf->lf_next = l->l_phys->l_hdr.lh_freelist; 235 bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */ 236 l->l_phys->l_hdr.lh_freelist = chunk; 237 238 l->lh_nfree++; 239 } 240 241 242 /* 243 * Routines which manipulate leaf arrays (zap_leaf_array type chunks). 244 */ 245 246 static uint16_t 247 zap_leaf_array_create(const zap_entry_handle_t *zeh, const char *buf, 248 int integer_size, int num_integers) 249 { 250 uint16_t chunk_head; 251 uint16_t *chunkp = &chunk_head; 252 int byten = 0; 253 uint64_t value; 254 int shift = (integer_size-1)*8; 255 int len = num_integers; 256 zap_leaf_t *l = zeh->zeh_found_leaf; 257 258 ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES); 259 260 while (len > 0) { 261 uint16_t chunk = zap_leaf_chunk_alloc(l); 262 struct zap_leaf_array *la = &l->l_phys->l_chunk[chunk].l_array; 263 int i; 264 265 la->la_type = ZAP_LEAF_ARRAY; 266 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) { 267 if (byten == 0) 268 value = ldv(integer_size, buf); 269 la->la_array[i] = (value & (0xff << shift)) >> shift; 270 value <<= 8; 271 if (++byten == integer_size) { 272 byten = 0; 273 buf += integer_size; 274 if (--len == 0) 275 break; 276 } 277 } 278 279 *chunkp = chunk; 280 chunkp = &la->la_next; 281 } 282 *chunkp = CHAIN_END; 283 284 return (chunk_head); 285 } 286 287 static void 288 zap_leaf_array_free(zap_entry_handle_t *zeh, uint16_t *chunkp) 289 { 290 uint16_t chunk = *chunkp; 291 zap_leaf_t *l = zeh->zeh_found_leaf; 292 293 *chunkp = CHAIN_END; 294 295 while (chunk != CHAIN_END) { 296 int nextchunk = l->l_phys->l_chunk[chunk].l_array.la_next; 297 ASSERT3U(l->l_phys->l_chunk[chunk].l_array.la_type, ==, 298 ZAP_LEAF_ARRAY); 299 zap_leaf_chunk_free(l, chunk); 300 chunk = nextchunk; 301 } 302 } 303 304 /* array_len and buf_len are in integers, not bytes */ 305 static void 306 zap_leaf_array_read(const zap_entry_handle_t *zeh, uint16_t chunk, 307 int array_int_len, int array_len, int buf_int_len, uint64_t buf_len, 308 char *buf) 309 { 310 int len = MIN(array_len, buf_len); 311 int byten = 0; 312 uint64_t value = 0; 313 zap_leaf_t *l = zeh->zeh_found_leaf; 314 315 ASSERT3U(array_int_len, <=, buf_int_len); 316 317 /* Fast path for one 8-byte integer */ 318 if (array_int_len == 8 && buf_int_len == 8 && len == 1) { 319 struct zap_leaf_array *la = &l->l_phys->l_chunk[chunk].l_array; 320 uint64_t *buf64 = (uint64_t *)buf; 321 uint64_t val = *(uint64_t *)la->la_array; 322 *buf64 = BE_64(val); 323 return; 324 } 325 326 /* Fast path for an array of 1-byte integers (eg. the entry name) */ 327 if (array_int_len == 1 && buf_int_len == 1 && 328 buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) { 329 while (chunk != CHAIN_END) { 330 struct zap_leaf_array *la = 331 &l->l_phys->l_chunk[chunk].l_array; 332 bcopy(la->la_array, buf, ZAP_LEAF_ARRAY_BYTES); 333 buf += ZAP_LEAF_ARRAY_BYTES; 334 chunk = la->la_next; 335 } 336 return; 337 } 338 339 while (len > 0) { 340 struct zap_leaf_array *la = &l->l_phys->l_chunk[chunk].l_array; 341 int i; 342 343 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 344 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) { 345 value = (value << 8) | la->la_array[i]; 346 byten++; 347 if (byten == array_int_len) { 348 stv(buf_int_len, buf, value); 349 byten = 0; 350 len--; 351 if (len == 0) 352 return; 353 buf += buf_int_len; 354 } 355 } 356 chunk = la->la_next; 357 } 358 } 359 360 /* 361 * Only to be used on 8-bit arrays. 362 * array_len is actual len in bytes (not encoded le_value_length). 363 * buf is null-terminated. 364 */ 365 static int 366 zap_leaf_array_equal(const zap_entry_handle_t *zeh, int chunk, 367 int array_len, const char *buf) 368 { 369 int bseen = 0; 370 zap_leaf_t *l = zeh->zeh_found_leaf; 371 372 while (bseen < array_len) { 373 struct zap_leaf_array *la = &l->l_phys->l_chunk[chunk].l_array; 374 int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES); 375 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 376 if (bcmp(la->la_array, buf + bseen, toread)) 377 break; 378 chunk = la->la_next; 379 bseen += toread; 380 } 381 return (bseen == array_len); 382 } 383 384 /* 385 * Routines which manipulate leaf entries. 386 */ 387 388 int 389 zap_leaf_lookup(zap_leaf_t *l, 390 const char *name, uint64_t h, zap_entry_handle_t *zeh) 391 { 392 uint16_t *chunkp; 393 struct zap_leaf_entry *le; 394 395 zeh->zeh_head_leaf = l; 396 397 again: 398 ASSERT3U(l->lh_magic, ==, ZAP_LEAF_MAGIC); 399 400 for (chunkp = LEAF_HASH_ENTPTR(l, h); 401 *chunkp != CHAIN_END; chunkp = &le->le_next) { 402 uint16_t chunk = *chunkp; 403 le = &l->l_phys->l_chunk[chunk].l_entry; 404 405 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 406 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 407 408 if (le->le_hash != h) 409 continue; 410 411 zeh->zeh_found_leaf = l; 412 if (zap_leaf_array_equal(zeh, le->le_name_chunk, 413 le->le_name_length, name)) { 414 zeh->zeh_num_integers = le->le_value_length; 415 zeh->zeh_integer_size = le->le_int_size; 416 zeh->zeh_cd = le->le_cd; 417 zeh->zeh_hash = le->le_hash; 418 zeh->zeh_chunkp = chunkp; 419 zeh->zeh_found_leaf = l; 420 return (0); 421 } 422 } 423 424 if (l->l_next) { 425 l = l->l_next; 426 goto again; 427 } 428 429 return (ENOENT); 430 } 431 432 /* Return (h1,cd1 >= h2,cd2) */ 433 #define HCD_GTEQ(h1, cd1, h2, cd2) \ 434 ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE)) 435 436 int 437 zap_leaf_lookup_closest(zap_leaf_t *l, 438 uint64_t h, uint32_t cd, zap_entry_handle_t *zeh) 439 { 440 uint16_t chunk; 441 uint64_t besth = -1ULL; 442 uint32_t bestcd = ZAP_MAXCD; 443 uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES-1; 444 uint16_t lh; 445 struct zap_leaf_entry *le; 446 447 zeh->zeh_head_leaf = l; 448 449 again: 450 ASSERT3U(l->lh_magic, ==, ZAP_LEAF_MAGIC); 451 452 for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) { 453 for (chunk = l->l_phys->l_hash[lh]; 454 chunk != CHAIN_END; chunk = le->le_next) { 455 le = &l->l_phys->l_chunk[chunk].l_entry; 456 457 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 458 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 459 460 if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) && 461 HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) { 462 ASSERT3U(bestlh, >=, lh); 463 bestlh = lh; 464 besth = le->le_hash; 465 bestcd = le->le_cd; 466 467 zeh->zeh_num_integers = le->le_value_length; 468 zeh->zeh_integer_size = le->le_int_size; 469 zeh->zeh_cd = le->le_cd; 470 zeh->zeh_hash = le->le_hash; 471 zeh->zeh_fakechunk = chunk; 472 zeh->zeh_chunkp = &zeh->zeh_fakechunk; 473 zeh->zeh_found_leaf = l; 474 } 475 } 476 } 477 478 if (l->l_next) { 479 l = l->l_next; 480 goto again; 481 } 482 483 return (bestcd == ZAP_MAXCD ? ENOENT : 0); 484 } 485 486 int 487 zap_entry_read(const zap_entry_handle_t *zeh, 488 uint8_t integer_size, uint64_t num_integers, void *buf) 489 { 490 struct zap_leaf_entry *le; 491 492 le = &zeh->zeh_found_leaf->l_phys->l_chunk[*zeh->zeh_chunkp].l_entry; 493 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 494 495 if (le->le_int_size > integer_size) 496 return (EINVAL); 497 498 zap_leaf_array_read(zeh, le->le_value_chunk, le->le_int_size, 499 le->le_value_length, integer_size, num_integers, buf); 500 501 if (zeh->zeh_num_integers > num_integers) 502 return (EOVERFLOW); 503 return (0); 504 505 } 506 507 int 508 zap_entry_read_name(const zap_entry_handle_t *zeh, uint16_t buflen, char *buf) 509 { 510 struct zap_leaf_entry *le; 511 512 le = &zeh->zeh_found_leaf->l_phys->l_chunk[*zeh->zeh_chunkp].l_entry; 513 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 514 515 zap_leaf_array_read(zeh, le->le_name_chunk, 1, 516 le->le_name_length, 1, buflen, buf); 517 if (le->le_name_length > buflen) 518 return (EOVERFLOW); 519 return (0); 520 } 521 522 int 523 zap_entry_update(zap_entry_handle_t *zeh, 524 uint8_t integer_size, uint64_t num_integers, const void *buf) 525 { 526 int delta_chunks; 527 struct zap_leaf_entry *le; 528 le = &zeh->zeh_found_leaf->l_phys->l_chunk[*zeh->zeh_chunkp].l_entry; 529 530 delta_chunks = NCHUNKS(num_integers * integer_size) - 531 NCHUNKS(le->le_value_length * le->le_int_size); 532 533 if (zeh->zeh_found_leaf->lh_nfree < delta_chunks) 534 return (EAGAIN); 535 536 /* 537 * We should search other chained leaves (via 538 * zap_entry_remove,create?) otherwise returning EAGAIN will 539 * just send us into an infinite loop if we have to chain 540 * another leaf block, rather than being able to split this 541 * block. 542 */ 543 544 zap_leaf_array_free(zeh, &le->le_value_chunk); 545 le->le_value_chunk = 546 zap_leaf_array_create(zeh, buf, integer_size, num_integers); 547 le->le_value_length = (num_integers*integer_size > MAX_ARRAY_BYTES) ? 548 (MAX_ARRAY_BYTES + 1) : (num_integers); 549 le->le_int_size = integer_size; 550 return (0); 551 } 552 553 void 554 zap_entry_remove(zap_entry_handle_t *zeh) 555 { 556 uint16_t entry_chunk; 557 struct zap_leaf_entry *le; 558 zap_leaf_t *l = zeh->zeh_found_leaf; 559 560 ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk); 561 562 entry_chunk = *zeh->zeh_chunkp; 563 le = &l->l_phys->l_chunk[entry_chunk].l_entry; 564 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 565 566 zap_leaf_array_free(zeh, &le->le_name_chunk); 567 zap_leaf_array_free(zeh, &le->le_value_chunk); 568 569 *zeh->zeh_chunkp = le->le_next; 570 zap_leaf_chunk_free(l, entry_chunk); 571 572 l->lh_nentries--; 573 } 574 575 int 576 zap_entry_create(zap_leaf_t *l, const char *name, uint64_t h, uint32_t cd, 577 uint8_t integer_size, uint64_t num_integers, const void *buf, 578 zap_entry_handle_t *zeh) 579 { 580 uint16_t chunk; 581 uint16_t *chunkp; 582 struct zap_leaf_entry *le; 583 uint64_t namelen, valuelen; 584 int numchunks; 585 586 valuelen = integer_size * num_integers; 587 namelen = strlen(name) + 1; 588 ASSERT(namelen >= 2); 589 590 zeh->zeh_head_leaf = l; 591 592 if (namelen > MAXNAMELEN) 593 return (ENAMETOOLONG); 594 /* find the first leaf in the chain that has sufficient free space */ 595 numchunks = 1 + NCHUNKS(namelen) + NCHUNKS(valuelen); 596 if (numchunks > ZAP_LEAF_NUMCHUNKS) 597 return (E2BIG); 598 599 if (cd == ZAP_MAXCD) { 600 for (cd = 0; cd < ZAP_MAXCD; cd++) { 601 zap_leaf_t *ll; 602 for (ll = l; ll; ll = ll->l_next) { 603 for (chunk = *LEAF_HASH_ENTPTR(ll, h); 604 chunk != CHAIN_END; chunk = le->le_next) { 605 le = &ll->l_phys->l_chunk 606 [chunk].l_entry; 607 if (le->le_hash == h && 608 le->le_cd == cd) { 609 break; 610 } 611 } 612 /* 613 * if this cd is in use, no need to 614 * check more chained leafs 615 */ 616 if (chunk != CHAIN_END) 617 break; 618 } 619 /* If this cd is not in use, we are good. */ 620 if (chunk == CHAIN_END) 621 break; 622 } 623 /* If we tried all the cd's, we lose. */ 624 if (cd == ZAP_MAXCD) 625 return (ENOSPC); 626 } 627 628 for (; l; l = l->l_next) 629 if (l->lh_nfree >= numchunks) 630 break; 631 if (l == NULL) 632 return (EAGAIN); 633 634 zeh->zeh_found_leaf = l; 635 636 /* make the entry */ 637 chunk = zap_leaf_chunk_alloc(l); 638 le = &l->l_phys->l_chunk[chunk].l_entry; 639 le->le_type = ZAP_LEAF_ENTRY; 640 le->le_name_chunk = zap_leaf_array_create(zeh, name, 1, namelen); 641 le->le_name_length = namelen; 642 le->le_value_chunk = 643 zap_leaf_array_create(zeh, buf, integer_size, num_integers); 644 le->le_value_length = (num_integers*integer_size > MAX_ARRAY_BYTES) ? 645 (MAX_ARRAY_BYTES + 1) : (num_integers); 646 le->le_int_size = integer_size; 647 le->le_hash = h; 648 le->le_cd = cd; 649 650 /* link it into the hash chain */ 651 chunkp = LEAF_HASH_ENTPTR(l, h); 652 le->le_next = *chunkp; 653 *chunkp = chunk; 654 655 l->lh_nentries++; 656 657 zeh->zeh_num_integers = num_integers; 658 zeh->zeh_integer_size = le->le_int_size; 659 zeh->zeh_cd = le->le_cd; 660 zeh->zeh_hash = le->le_hash; 661 zeh->zeh_chunkp = chunkp; 662 663 return (0); 664 } 665 666 /* 667 * Routines for transferring entries between leafs. 668 */ 669 670 static void 671 zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry) 672 { 673 struct zap_leaf_entry *le = &l->l_phys->l_chunk[entry].l_entry; 674 uint16_t *ptr = LEAF_HASH_ENTPTR(l, le->le_hash); 675 le->le_next = *ptr; 676 *ptr = entry; 677 } 678 679 static void 680 zap_leaf_rehash_entries(zap_leaf_t *l) 681 { 682 int i; 683 684 if (l->lh_nentries == 0) 685 return; 686 687 /* break existing hash chains */ 688 zap_memset(l->l_phys->l_hash, CHAIN_END, sizeof (l->l_phys->l_hash)); 689 690 for (i = 0; i < ZAP_LEAF_NUMCHUNKS; i++) { 691 struct zap_leaf_entry *le = &l->l_phys->l_chunk[i].l_entry; 692 if (le->le_type != ZAP_LEAF_ENTRY) 693 continue; 694 zap_leaf_rehash_entry(l, i); 695 } 696 } 697 698 static uint16_t 699 zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl) 700 { 701 uint16_t new_chunk; 702 uint16_t *nchunkp = &new_chunk; 703 704 while (chunk != CHAIN_END) { 705 uint16_t nchunk = zap_leaf_chunk_alloc(nl); 706 struct zap_leaf_array *nla = 707 &nl->l_phys->l_chunk[nchunk].l_array; 708 struct zap_leaf_array *la = 709 &l->l_phys->l_chunk[chunk].l_array; 710 int nextchunk = la->la_next; 711 712 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS); 713 ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS); 714 715 *nla = *la; 716 717 zap_leaf_chunk_free(l, chunk); 718 chunk = nextchunk; 719 *nchunkp = nchunk; 720 nchunkp = &nla->la_next; 721 } 722 *nchunkp = CHAIN_END; 723 return (new_chunk); 724 } 725 726 static void 727 zap_leaf_transfer_entry(zap_t *zap, zap_leaf_t *l, int entry, zap_leaf_t *nhl, 728 dmu_tx_t *tx) 729 { 730 zap_leaf_t *nl; 731 struct zap_leaf_entry *le, *nle; 732 uint16_t chunk, nchunks; 733 734 le = &l->l_phys->l_chunk[entry].l_entry; 735 ASSERT3U(le->le_type, ==, ZAP_LEAF_ENTRY); 736 737 /* find a leaf in the destination leaf chain with enough free space */ 738 nchunks = 1 + NCHUNKS(le->le_name_length) + 739 NCHUNKS(le->le_value_length * le->le_int_size); 740 for (nl = nhl; nl; nl = nl->l_next) 741 if (nl->lh_nfree >= nchunks) 742 break; 743 if (nl == NULL) { 744 nl = zap_leaf_chainmore(nhl, zap_create_leaf(zap, tx)); 745 dprintf("transfer_entry: chaining leaf %x/%d\n", 746 nl->lh_prefix, nl->lh_prefix_len); 747 } 748 749 chunk = zap_leaf_chunk_alloc(nl); 750 nle = &nl->l_phys->l_chunk[chunk].l_entry; 751 *nle = *le; 752 753 zap_leaf_rehash_entry(nl, chunk); 754 755 nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl); 756 nle->le_value_chunk = 757 zap_leaf_transfer_array(l, le->le_value_chunk, nl); 758 759 zap_leaf_chunk_free(l, entry); 760 761 l->lh_nentries--; 762 nl->lh_nentries++; 763 } 764 765 /* 766 * Transfer entries whose hash bit 'bit' is 1 to nl1, and 0 to nl0. 767 * Ignore leaf chaining in source (l), but chain in destinations. 768 * We'll re-chain all the entries in l as we go along. 769 */ 770 static void 771 zap_leaf_transfer_entries(zap_t *zap, zap_leaf_t *l, 772 zap_leaf_t *nl0, zap_leaf_t *nl1, int bit, dmu_tx_t *tx) 773 { 774 int i; 775 776 ASSERT(bit < 64 && bit >= 0); 777 /* break existing hash chains */ 778 zap_memset(l->l_phys->l_hash, CHAIN_END, sizeof (l->l_phys->l_hash)); 779 780 if (nl0 != l) 781 zap_leaf_rehash_entries(nl0); 782 if (nl1 != nl0) 783 zap_leaf_rehash_entries(nl1); 784 785 for (i = 0; i < ZAP_LEAF_NUMCHUNKS; i++) { 786 struct zap_leaf_entry *le = &l->l_phys->l_chunk[i].l_entry; 787 if (le->le_type != ZAP_LEAF_ENTRY) 788 continue; 789 790 /* 791 * We could find entries via hashtable instead. That 792 * would be O(hashents+numents) rather than 793 * O(numblks+numents), but this accesses memory more 794 * sequentially, and when we're called, the block is 795 * usually pretty full. 796 */ 797 798 if (le->le_hash & (1ULL << bit)) { 799 zap_leaf_transfer_entry(zap, l, i, nl1, tx); 800 } else { 801 if (nl0 == l) 802 zap_leaf_rehash_entry(l, i); 803 else 804 zap_leaf_transfer_entry(zap, l, i, nl0, tx); 805 } 806 } 807 808 } 809 810 /* 811 * nl will contain the entries whose hash prefix ends in 1 812 * handles leaf chaining 813 */ 814 zap_leaf_t * 815 zap_leaf_split(zap_t *zap, zap_leaf_t *hl, dmu_tx_t *tx) 816 { 817 zap_leaf_t *l = hl; 818 int bit = 64 - 1 - hl->lh_prefix_len; 819 zap_leaf_t *nl = zap_create_leaf(zap, tx); 820 821 /* set new prefix and prefix_len */ 822 hl->lh_prefix <<= 1; 823 hl->lh_prefix_len++; 824 nl->lh_prefix = hl->lh_prefix | 1; 825 nl->lh_prefix_len = hl->lh_prefix_len; 826 827 /* transfer odd entries from first leaf in hl chain to nl */ 828 zap_leaf_transfer_entries(zap, hl, hl, nl, bit, tx); 829 830 /* take rest of chain off hl */ 831 l = hl->l_next; 832 hl->l_next = NULL; 833 hl->lh_next = 0; 834 835 /* transfer even entries from hl chain back to hl, odd entries to nl */ 836 while (l) { 837 zap_leaf_t *next = l->l_next; 838 zap_leaf_transfer_entries(zap, l, hl, nl, bit, tx); 839 zap_destroy_leaf(zap, l, tx); 840 l = next; 841 } 842 843 return (nl); 844 } 845 846 void 847 zap_stats_leaf(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs) 848 { 849 int n, nchained = 0; 850 851 n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - l->lh_prefix_len; 852 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 853 zs->zs_leafs_with_2n_pointers[n]++; 854 855 do { 856 int i; 857 858 n = l->lh_nentries/5; 859 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 860 zs->zs_blocks_with_n5_entries[n]++; 861 862 n = ((1<<ZAP_BLOCK_SHIFT) - 863 l->lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 / 864 (1<<ZAP_BLOCK_SHIFT); 865 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 866 zs->zs_blocks_n_tenths_full[n]++; 867 868 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES; i++) { 869 int nentries = 0; 870 int chunk = l->l_phys->l_hash[i]; 871 872 while (chunk != CHAIN_END) { 873 struct zap_leaf_entry *le = 874 &l->l_phys->l_chunk[chunk].l_entry; 875 876 n = 1 + NCHUNKS(le->le_name_length) + 877 NCHUNKS(le->le_value_length * 878 le->le_int_size); 879 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 880 zs->zs_entries_using_n_chunks[n]++; 881 882 chunk = le->le_next; 883 nentries++; 884 } 885 886 n = nentries; 887 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 888 zs->zs_buckets_with_n_entries[n]++; 889 } 890 891 nchained++; 892 l = l->l_next; 893 } while (l); 894 895 n = nchained-1; 896 n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 897 zs->zs_leafs_with_n_chained[n]++; 898 } 899