1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2011, 2017 by Delphix. All rights reserved. 25 * Copyright (c) 2014 Integros [integros.com] 26 * Copyright 2017 Nexenta Systems, Inc. 27 */ 28 29 #include <stdio.h> 30 #include <unistd.h> 31 #include <stdio_ext.h> 32 #include <stdlib.h> 33 #include <ctype.h> 34 #include <sys/zfs_context.h> 35 #include <sys/spa.h> 36 #include <sys/spa_impl.h> 37 #include <sys/dmu.h> 38 #include <sys/zap.h> 39 #include <sys/fs/zfs.h> 40 #include <sys/zfs_znode.h> 41 #include <sys/zfs_sa.h> 42 #include <sys/sa.h> 43 #include <sys/sa_impl.h> 44 #include <sys/vdev.h> 45 #include <sys/vdev_impl.h> 46 #include <sys/metaslab_impl.h> 47 #include <sys/dmu_objset.h> 48 #include <sys/dsl_dir.h> 49 #include <sys/dsl_dataset.h> 50 #include <sys/dsl_pool.h> 51 #include <sys/dbuf.h> 52 #include <sys/zil.h> 53 #include <sys/zil_impl.h> 54 #include <sys/stat.h> 55 #include <sys/resource.h> 56 #include <sys/dmu_traverse.h> 57 #include <sys/zio_checksum.h> 58 #include <sys/zio_compress.h> 59 #include <sys/zfs_fuid.h> 60 #include <sys/arc.h> 61 #include <sys/ddt.h> 62 #include <sys/zfeature.h> 63 #include <sys/abd.h> 64 #include <sys/blkptr.h> 65 #include <zfs_comutil.h> 66 #include <libcmdutils.h> 67 #undef verify 68 #include <libzfs.h> 69 70 #include "zdb.h" 71 72 #define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \ 73 zio_compress_table[(idx)].ci_name : "UNKNOWN") 74 #define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \ 75 zio_checksum_table[(idx)].ci_name : "UNKNOWN") 76 #define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \ 77 dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ? \ 78 dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN") 79 #define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : \ 80 (idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ? \ 81 DMU_OT_ZAP_OTHER : \ 82 (idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \ 83 DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES) 84 85 #ifndef lint 86 extern int reference_tracking_enable; 87 extern boolean_t zfs_recover; 88 extern uint64_t zfs_arc_max, zfs_arc_meta_limit; 89 extern int zfs_vdev_async_read_max_active; 90 #else 91 int reference_tracking_enable; 92 boolean_t zfs_recover; 93 uint64_t zfs_arc_max, zfs_arc_meta_limit; 94 int zfs_vdev_async_read_max_active; 95 #endif 96 97 static const char cmdname[] = "zdb"; 98 uint8_t dump_opt[256]; 99 100 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); 101 102 uint64_t *zopt_object = NULL; 103 static unsigned zopt_objects = 0; 104 libzfs_handle_t *g_zfs; 105 uint64_t max_inflight = 1000; 106 107 static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *); 108 109 /* 110 * These libumem hooks provide a reasonable set of defaults for the allocator's 111 * debugging facilities. 112 */ 113 const char * 114 _umem_debug_init() 115 { 116 return ("default,verbose"); /* $UMEM_DEBUG setting */ 117 } 118 119 const char * 120 _umem_logging_init(void) 121 { 122 return ("fail,contents"); /* $UMEM_LOGGING setting */ 123 } 124 125 static void 126 usage(void) 127 { 128 (void) fprintf(stderr, 129 "Usage:\t%s [-AbcdDFGhiLMPsvX] [-e [-V] [-p <path> ...]] " 130 "[-I <inflight I/Os>]\n" 131 "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n" 132 "\t\t[<poolname> [<object> ...]]\n" 133 "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] <dataset> " 134 "[<object> ...]\n" 135 "\t%s -C [-A] [-U <cache>]\n" 136 "\t%s -l [-Aqu] <device>\n" 137 "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] " 138 "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n" 139 "\t%s -O <dataset> <path>\n" 140 "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n" 141 "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n" 142 "\t%s -E [-A] word0:word1:...:word15\n" 143 "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] " 144 "<poolname>\n\n", 145 cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, 146 cmdname, cmdname); 147 148 (void) fprintf(stderr, " Dataset name must include at least one " 149 "separator character '/' or '@'\n"); 150 (void) fprintf(stderr, " If dataset name is specified, only that " 151 "dataset is dumped\n"); 152 (void) fprintf(stderr, " If object numbers are specified, only " 153 "those objects are dumped\n\n"); 154 (void) fprintf(stderr, " Options to control amount of output:\n"); 155 (void) fprintf(stderr, " -b block statistics\n"); 156 (void) fprintf(stderr, " -c checksum all metadata (twice for " 157 "all data) blocks\n"); 158 (void) fprintf(stderr, " -C config (or cachefile if alone)\n"); 159 (void) fprintf(stderr, " -d dataset(s)\n"); 160 (void) fprintf(stderr, " -D dedup statistics\n"); 161 (void) fprintf(stderr, " -E decode and display block from an " 162 "embedded block pointer\n"); 163 (void) fprintf(stderr, " -h pool history\n"); 164 (void) fprintf(stderr, " -i intent logs\n"); 165 (void) fprintf(stderr, " -l read label contents\n"); 166 (void) fprintf(stderr, " -L disable leak tracking (do not " 167 "load spacemaps)\n"); 168 (void) fprintf(stderr, " -m metaslabs\n"); 169 (void) fprintf(stderr, " -M metaslab groups\n"); 170 (void) fprintf(stderr, " -O perform object lookups by path\n"); 171 (void) fprintf(stderr, " -R read and display block from a " 172 "device\n"); 173 (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); 174 (void) fprintf(stderr, " -S simulate dedup to measure effect\n"); 175 (void) fprintf(stderr, " -v verbose (applies to all " 176 "others)\n\n"); 177 (void) fprintf(stderr, " Below options are intended for use " 178 "with other options:\n"); 179 (void) fprintf(stderr, " -A ignore assertions (-A), enable " 180 "panic recovery (-AA) or both (-AAA)\n"); 181 (void) fprintf(stderr, " -e pool is exported/destroyed/" 182 "has altroot/not in a cachefile\n"); 183 (void) fprintf(stderr, " -F attempt automatic rewind within " 184 "safe range of transaction groups\n"); 185 (void) fprintf(stderr, " -G dump zfs_dbgmsg buffer before " 186 "exiting\n"); 187 (void) fprintf(stderr, " -I <number of inflight I/Os> -- " 188 "specify the maximum number of " 189 "checksumming I/Os [default is 200]\n"); 190 (void) fprintf(stderr, " -o <variable>=<value> set global " 191 "variable to an unsigned 32-bit integer value\n"); 192 (void) fprintf(stderr, " -p <path> -- use one or more with " 193 "-e to specify path to vdev dir\n"); 194 (void) fprintf(stderr, " -P print numbers in parseable form\n"); 195 (void) fprintf(stderr, " -q don't print label contents\n"); 196 (void) fprintf(stderr, " -t <txg> -- highest txg to use when " 197 "searching for uberblocks\n"); 198 (void) fprintf(stderr, " -u uberblock\n"); 199 (void) fprintf(stderr, " -U <cachefile_path> -- use alternate " 200 "cachefile\n"); 201 (void) fprintf(stderr, " -V do verbatim import\n"); 202 (void) fprintf(stderr, " -x <dumpdir> -- " 203 "dump all read blocks into specified directory\n"); 204 (void) fprintf(stderr, " -X attempt extreme rewind (does not " 205 "work with dataset)\n\n"); 206 (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " 207 "to make only that option verbose\n"); 208 (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); 209 exit(1); 210 } 211 212 static void 213 dump_debug_buffer() 214 { 215 if (dump_opt['G']) { 216 (void) printf("\n"); 217 zfs_dbgmsg_print("zdb"); 218 } 219 } 220 221 /* 222 * Called for usage errors that are discovered after a call to spa_open(), 223 * dmu_bonus_hold(), or pool_match(). abort() is called for other errors. 224 */ 225 226 static void 227 fatal(const char *fmt, ...) 228 { 229 va_list ap; 230 231 va_start(ap, fmt); 232 (void) fprintf(stderr, "%s: ", cmdname); 233 (void) vfprintf(stderr, fmt, ap); 234 va_end(ap); 235 (void) fprintf(stderr, "\n"); 236 237 dump_debug_buffer(); 238 239 exit(1); 240 } 241 242 /* ARGSUSED */ 243 static void 244 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) 245 { 246 nvlist_t *nv; 247 size_t nvsize = *(uint64_t *)data; 248 char *packed = umem_alloc(nvsize, UMEM_NOFAIL); 249 250 VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH)); 251 252 VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0); 253 254 umem_free(packed, nvsize); 255 256 dump_nvlist(nv, 8); 257 258 nvlist_free(nv); 259 } 260 261 /* ARGSUSED */ 262 static void 263 dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size) 264 { 265 spa_history_phys_t *shp = data; 266 267 if (shp == NULL) 268 return; 269 270 (void) printf("\t\tpool_create_len = %llu\n", 271 (u_longlong_t)shp->sh_pool_create_len); 272 (void) printf("\t\tphys_max_off = %llu\n", 273 (u_longlong_t)shp->sh_phys_max_off); 274 (void) printf("\t\tbof = %llu\n", 275 (u_longlong_t)shp->sh_bof); 276 (void) printf("\t\teof = %llu\n", 277 (u_longlong_t)shp->sh_eof); 278 (void) printf("\t\trecords_lost = %llu\n", 279 (u_longlong_t)shp->sh_records_lost); 280 } 281 282 static void 283 zdb_nicenum(uint64_t num, char *buf, size_t buflen) 284 { 285 if (dump_opt['P']) 286 (void) snprintf(buf, buflen, "%llu", (longlong_t)num); 287 else 288 nicenum(num, buf, sizeof (buf)); 289 } 290 291 static const char histo_stars[] = "****************************************"; 292 static const uint64_t histo_width = sizeof (histo_stars) - 1; 293 294 static void 295 dump_histogram(const uint64_t *histo, int size, int offset) 296 { 297 int i; 298 int minidx = size - 1; 299 int maxidx = 0; 300 uint64_t max = 0; 301 302 for (i = 0; i < size; i++) { 303 if (histo[i] > max) 304 max = histo[i]; 305 if (histo[i] > 0 && i > maxidx) 306 maxidx = i; 307 if (histo[i] > 0 && i < minidx) 308 minidx = i; 309 } 310 311 if (max < histo_width) 312 max = histo_width; 313 314 for (i = minidx; i <= maxidx; i++) { 315 (void) printf("\t\t\t%3u: %6llu %s\n", 316 i + offset, (u_longlong_t)histo[i], 317 &histo_stars[(max - histo[i]) * histo_width / max]); 318 } 319 } 320 321 static void 322 dump_zap_stats(objset_t *os, uint64_t object) 323 { 324 int error; 325 zap_stats_t zs; 326 327 error = zap_get_stats(os, object, &zs); 328 if (error) 329 return; 330 331 if (zs.zs_ptrtbl_len == 0) { 332 ASSERT(zs.zs_num_blocks == 1); 333 (void) printf("\tmicrozap: %llu bytes, %llu entries\n", 334 (u_longlong_t)zs.zs_blocksize, 335 (u_longlong_t)zs.zs_num_entries); 336 return; 337 } 338 339 (void) printf("\tFat ZAP stats:\n"); 340 341 (void) printf("\t\tPointer table:\n"); 342 (void) printf("\t\t\t%llu elements\n", 343 (u_longlong_t)zs.zs_ptrtbl_len); 344 (void) printf("\t\t\tzt_blk: %llu\n", 345 (u_longlong_t)zs.zs_ptrtbl_zt_blk); 346 (void) printf("\t\t\tzt_numblks: %llu\n", 347 (u_longlong_t)zs.zs_ptrtbl_zt_numblks); 348 (void) printf("\t\t\tzt_shift: %llu\n", 349 (u_longlong_t)zs.zs_ptrtbl_zt_shift); 350 (void) printf("\t\t\tzt_blks_copied: %llu\n", 351 (u_longlong_t)zs.zs_ptrtbl_blks_copied); 352 (void) printf("\t\t\tzt_nextblk: %llu\n", 353 (u_longlong_t)zs.zs_ptrtbl_nextblk); 354 355 (void) printf("\t\tZAP entries: %llu\n", 356 (u_longlong_t)zs.zs_num_entries); 357 (void) printf("\t\tLeaf blocks: %llu\n", 358 (u_longlong_t)zs.zs_num_leafs); 359 (void) printf("\t\tTotal blocks: %llu\n", 360 (u_longlong_t)zs.zs_num_blocks); 361 (void) printf("\t\tzap_block_type: 0x%llx\n", 362 (u_longlong_t)zs.zs_block_type); 363 (void) printf("\t\tzap_magic: 0x%llx\n", 364 (u_longlong_t)zs.zs_magic); 365 (void) printf("\t\tzap_salt: 0x%llx\n", 366 (u_longlong_t)zs.zs_salt); 367 368 (void) printf("\t\tLeafs with 2^n pointers:\n"); 369 dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0); 370 371 (void) printf("\t\tBlocks with n*5 entries:\n"); 372 dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0); 373 374 (void) printf("\t\tBlocks n/10 full:\n"); 375 dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0); 376 377 (void) printf("\t\tEntries with n chunks:\n"); 378 dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0); 379 380 (void) printf("\t\tBuckets with n entries:\n"); 381 dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0); 382 } 383 384 /*ARGSUSED*/ 385 static void 386 dump_none(objset_t *os, uint64_t object, void *data, size_t size) 387 { 388 } 389 390 /*ARGSUSED*/ 391 static void 392 dump_unknown(objset_t *os, uint64_t object, void *data, size_t size) 393 { 394 (void) printf("\tUNKNOWN OBJECT TYPE\n"); 395 } 396 397 /*ARGSUSED*/ 398 static void 399 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size) 400 { 401 } 402 403 /*ARGSUSED*/ 404 static void 405 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size) 406 { 407 } 408 409 /*ARGSUSED*/ 410 static void 411 dump_zap(objset_t *os, uint64_t object, void *data, size_t size) 412 { 413 zap_cursor_t zc; 414 zap_attribute_t attr; 415 void *prop; 416 unsigned i; 417 418 dump_zap_stats(os, object); 419 (void) printf("\n"); 420 421 for (zap_cursor_init(&zc, os, object); 422 zap_cursor_retrieve(&zc, &attr) == 0; 423 zap_cursor_advance(&zc)) { 424 (void) printf("\t\t%s = ", attr.za_name); 425 if (attr.za_num_integers == 0) { 426 (void) printf("\n"); 427 continue; 428 } 429 prop = umem_zalloc(attr.za_num_integers * 430 attr.za_integer_length, UMEM_NOFAIL); 431 (void) zap_lookup(os, object, attr.za_name, 432 attr.za_integer_length, attr.za_num_integers, prop); 433 if (attr.za_integer_length == 1) { 434 (void) printf("%s", (char *)prop); 435 } else { 436 for (i = 0; i < attr.za_num_integers; i++) { 437 switch (attr.za_integer_length) { 438 case 2: 439 (void) printf("%u ", 440 ((uint16_t *)prop)[i]); 441 break; 442 case 4: 443 (void) printf("%u ", 444 ((uint32_t *)prop)[i]); 445 break; 446 case 8: 447 (void) printf("%lld ", 448 (u_longlong_t)((int64_t *)prop)[i]); 449 break; 450 } 451 } 452 } 453 (void) printf("\n"); 454 umem_free(prop, attr.za_num_integers * attr.za_integer_length); 455 } 456 zap_cursor_fini(&zc); 457 } 458 459 static void 460 dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size) 461 { 462 bpobj_phys_t *bpop = data; 463 char bytes[32], comp[32], uncomp[32]; 464 465 /* make sure the output won't get truncated */ 466 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); 467 CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ); 468 CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ); 469 470 if (bpop == NULL) 471 return; 472 473 zdb_nicenum(bpop->bpo_bytes, bytes, sizeof (bytes)); 474 zdb_nicenum(bpop->bpo_comp, comp, sizeof (comp)); 475 zdb_nicenum(bpop->bpo_uncomp, uncomp, sizeof (uncomp)); 476 477 (void) printf("\t\tnum_blkptrs = %llu\n", 478 (u_longlong_t)bpop->bpo_num_blkptrs); 479 (void) printf("\t\tbytes = %s\n", bytes); 480 if (size >= BPOBJ_SIZE_V1) { 481 (void) printf("\t\tcomp = %s\n", comp); 482 (void) printf("\t\tuncomp = %s\n", uncomp); 483 } 484 if (size >= sizeof (*bpop)) { 485 (void) printf("\t\tsubobjs = %llu\n", 486 (u_longlong_t)bpop->bpo_subobjs); 487 (void) printf("\t\tnum_subobjs = %llu\n", 488 (u_longlong_t)bpop->bpo_num_subobjs); 489 } 490 491 if (dump_opt['d'] < 5) 492 return; 493 494 for (uint64_t i = 0; i < bpop->bpo_num_blkptrs; i++) { 495 char blkbuf[BP_SPRINTF_LEN]; 496 blkptr_t bp; 497 498 int err = dmu_read(os, object, 499 i * sizeof (bp), sizeof (bp), &bp, 0); 500 if (err != 0) { 501 (void) printf("got error %u from dmu_read\n", err); 502 break; 503 } 504 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp); 505 (void) printf("\t%s\n", blkbuf); 506 } 507 } 508 509 /* ARGSUSED */ 510 static void 511 dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size) 512 { 513 dmu_object_info_t doi; 514 515 VERIFY0(dmu_object_info(os, object, &doi)); 516 uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP); 517 518 int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0); 519 if (err != 0) { 520 (void) printf("got error %u from dmu_read\n", err); 521 kmem_free(subobjs, doi.doi_max_offset); 522 return; 523 } 524 525 int64_t last_nonzero = -1; 526 for (uint64_t i = 0; i < doi.doi_max_offset / 8; i++) { 527 if (subobjs[i] != 0) 528 last_nonzero = i; 529 } 530 531 for (int64_t i = 0; i <= last_nonzero; i++) { 532 (void) printf("\t%llu\n", (longlong_t)subobjs[i]); 533 } 534 kmem_free(subobjs, doi.doi_max_offset); 535 } 536 537 /*ARGSUSED*/ 538 static void 539 dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size) 540 { 541 dump_zap_stats(os, object); 542 /* contents are printed elsewhere, properly decoded */ 543 } 544 545 /*ARGSUSED*/ 546 static void 547 dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size) 548 { 549 zap_cursor_t zc; 550 zap_attribute_t attr; 551 552 dump_zap_stats(os, object); 553 (void) printf("\n"); 554 555 for (zap_cursor_init(&zc, os, object); 556 zap_cursor_retrieve(&zc, &attr) == 0; 557 zap_cursor_advance(&zc)) { 558 (void) printf("\t\t%s = ", attr.za_name); 559 if (attr.za_num_integers == 0) { 560 (void) printf("\n"); 561 continue; 562 } 563 (void) printf(" %llx : [%d:%d:%d]\n", 564 (u_longlong_t)attr.za_first_integer, 565 (int)ATTR_LENGTH(attr.za_first_integer), 566 (int)ATTR_BSWAP(attr.za_first_integer), 567 (int)ATTR_NUM(attr.za_first_integer)); 568 } 569 zap_cursor_fini(&zc); 570 } 571 572 /*ARGSUSED*/ 573 static void 574 dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size) 575 { 576 zap_cursor_t zc; 577 zap_attribute_t attr; 578 uint16_t *layout_attrs; 579 unsigned i; 580 581 dump_zap_stats(os, object); 582 (void) printf("\n"); 583 584 for (zap_cursor_init(&zc, os, object); 585 zap_cursor_retrieve(&zc, &attr) == 0; 586 zap_cursor_advance(&zc)) { 587 (void) printf("\t\t%s = [", attr.za_name); 588 if (attr.za_num_integers == 0) { 589 (void) printf("\n"); 590 continue; 591 } 592 593 VERIFY(attr.za_integer_length == 2); 594 layout_attrs = umem_zalloc(attr.za_num_integers * 595 attr.za_integer_length, UMEM_NOFAIL); 596 597 VERIFY(zap_lookup(os, object, attr.za_name, 598 attr.za_integer_length, 599 attr.za_num_integers, layout_attrs) == 0); 600 601 for (i = 0; i != attr.za_num_integers; i++) 602 (void) printf(" %d ", (int)layout_attrs[i]); 603 (void) printf("]\n"); 604 umem_free(layout_attrs, 605 attr.za_num_integers * attr.za_integer_length); 606 } 607 zap_cursor_fini(&zc); 608 } 609 610 /*ARGSUSED*/ 611 static void 612 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size) 613 { 614 zap_cursor_t zc; 615 zap_attribute_t attr; 616 const char *typenames[] = { 617 /* 0 */ "not specified", 618 /* 1 */ "FIFO", 619 /* 2 */ "Character Device", 620 /* 3 */ "3 (invalid)", 621 /* 4 */ "Directory", 622 /* 5 */ "5 (invalid)", 623 /* 6 */ "Block Device", 624 /* 7 */ "7 (invalid)", 625 /* 8 */ "Regular File", 626 /* 9 */ "9 (invalid)", 627 /* 10 */ "Symbolic Link", 628 /* 11 */ "11 (invalid)", 629 /* 12 */ "Socket", 630 /* 13 */ "Door", 631 /* 14 */ "Event Port", 632 /* 15 */ "15 (invalid)", 633 }; 634 635 dump_zap_stats(os, object); 636 (void) printf("\n"); 637 638 for (zap_cursor_init(&zc, os, object); 639 zap_cursor_retrieve(&zc, &attr) == 0; 640 zap_cursor_advance(&zc)) { 641 (void) printf("\t\t%s = %lld (type: %s)\n", 642 attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer), 643 typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]); 644 } 645 zap_cursor_fini(&zc); 646 } 647 648 static int 649 get_dtl_refcount(vdev_t *vd) 650 { 651 int refcount = 0; 652 653 if (vd->vdev_ops->vdev_op_leaf) { 654 space_map_t *sm = vd->vdev_dtl_sm; 655 656 if (sm != NULL && 657 sm->sm_dbuf->db_size == sizeof (space_map_phys_t)) 658 return (1); 659 return (0); 660 } 661 662 for (unsigned c = 0; c < vd->vdev_children; c++) 663 refcount += get_dtl_refcount(vd->vdev_child[c]); 664 return (refcount); 665 } 666 667 static int 668 get_metaslab_refcount(vdev_t *vd) 669 { 670 int refcount = 0; 671 672 if (vd->vdev_top == vd) { 673 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 674 space_map_t *sm = vd->vdev_ms[m]->ms_sm; 675 676 if (sm != NULL && 677 sm->sm_dbuf->db_size == sizeof (space_map_phys_t)) 678 refcount++; 679 } 680 } 681 for (unsigned c = 0; c < vd->vdev_children; c++) 682 refcount += get_metaslab_refcount(vd->vdev_child[c]); 683 684 return (refcount); 685 } 686 687 static int 688 get_obsolete_refcount(vdev_t *vd) 689 { 690 int refcount = 0; 691 692 uint64_t obsolete_sm_obj = vdev_obsolete_sm_object(vd); 693 if (vd->vdev_top == vd && obsolete_sm_obj != 0) { 694 dmu_object_info_t doi; 695 VERIFY0(dmu_object_info(vd->vdev_spa->spa_meta_objset, 696 obsolete_sm_obj, &doi)); 697 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) { 698 refcount++; 699 } 700 } else { 701 ASSERT3P(vd->vdev_obsolete_sm, ==, NULL); 702 ASSERT3U(obsolete_sm_obj, ==, 0); 703 } 704 for (unsigned c = 0; c < vd->vdev_children; c++) { 705 refcount += get_obsolete_refcount(vd->vdev_child[c]); 706 } 707 708 return (refcount); 709 } 710 711 static int 712 get_prev_obsolete_spacemap_refcount(spa_t *spa) 713 { 714 uint64_t prev_obj = 715 spa->spa_condensing_indirect_phys.scip_prev_obsolete_sm_object; 716 if (prev_obj != 0) { 717 dmu_object_info_t doi; 718 VERIFY0(dmu_object_info(spa->spa_meta_objset, prev_obj, &doi)); 719 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) { 720 return (1); 721 } 722 } 723 return (0); 724 } 725 726 static int 727 verify_spacemap_refcounts(spa_t *spa) 728 { 729 uint64_t expected_refcount = 0; 730 uint64_t actual_refcount; 731 732 (void) feature_get_refcount(spa, 733 &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM], 734 &expected_refcount); 735 actual_refcount = get_dtl_refcount(spa->spa_root_vdev); 736 actual_refcount += get_metaslab_refcount(spa->spa_root_vdev); 737 actual_refcount += get_obsolete_refcount(spa->spa_root_vdev); 738 actual_refcount += get_prev_obsolete_spacemap_refcount(spa); 739 740 if (expected_refcount != actual_refcount) { 741 (void) printf("space map refcount mismatch: expected %lld != " 742 "actual %lld\n", 743 (longlong_t)expected_refcount, 744 (longlong_t)actual_refcount); 745 return (2); 746 } 747 return (0); 748 } 749 750 static void 751 dump_spacemap(objset_t *os, space_map_t *sm) 752 { 753 uint64_t alloc, offset, entry; 754 char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID", 755 "INVALID", "INVALID", "INVALID", "INVALID" }; 756 757 if (sm == NULL) 758 return; 759 760 (void) printf("space map object %llu:\n", 761 (longlong_t)sm->sm_phys->smp_object); 762 (void) printf(" smp_objsize = 0x%llx\n", 763 (longlong_t)sm->sm_phys->smp_objsize); 764 (void) printf(" smp_alloc = 0x%llx\n", 765 (longlong_t)sm->sm_phys->smp_alloc); 766 767 /* 768 * Print out the freelist entries in both encoded and decoded form. 769 */ 770 alloc = 0; 771 for (offset = 0; offset < space_map_length(sm); 772 offset += sizeof (entry)) { 773 uint8_t mapshift = sm->sm_shift; 774 775 VERIFY0(dmu_read(os, space_map_object(sm), offset, 776 sizeof (entry), &entry, DMU_READ_PREFETCH)); 777 if (SM_DEBUG_DECODE(entry)) { 778 779 (void) printf("\t [%6llu] %s: txg %llu, pass %llu\n", 780 (u_longlong_t)(offset / sizeof (entry)), 781 ddata[SM_DEBUG_ACTION_DECODE(entry)], 782 (u_longlong_t)SM_DEBUG_TXG_DECODE(entry), 783 (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry)); 784 } else { 785 (void) printf("\t [%6llu] %c range:" 786 " %010llx-%010llx size: %06llx\n", 787 (u_longlong_t)(offset / sizeof (entry)), 788 SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F', 789 (u_longlong_t)((SM_OFFSET_DECODE(entry) << 790 mapshift) + sm->sm_start), 791 (u_longlong_t)((SM_OFFSET_DECODE(entry) << 792 mapshift) + sm->sm_start + 793 (SM_RUN_DECODE(entry) << mapshift)), 794 (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift)); 795 if (SM_TYPE_DECODE(entry) == SM_ALLOC) 796 alloc += SM_RUN_DECODE(entry) << mapshift; 797 else 798 alloc -= SM_RUN_DECODE(entry) << mapshift; 799 } 800 } 801 if (alloc != space_map_allocated(sm)) { 802 (void) printf("space_map_object alloc (%llu) INCONSISTENT " 803 "with space map summary (%llu)\n", 804 (u_longlong_t)space_map_allocated(sm), (u_longlong_t)alloc); 805 } 806 } 807 808 static void 809 dump_metaslab_stats(metaslab_t *msp) 810 { 811 char maxbuf[32]; 812 range_tree_t *rt = msp->ms_tree; 813 avl_tree_t *t = &msp->ms_size_tree; 814 int free_pct = range_tree_space(rt) * 100 / msp->ms_size; 815 816 /* max sure nicenum has enough space */ 817 CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ); 818 819 zdb_nicenum(metaslab_block_maxsize(msp), maxbuf, sizeof (maxbuf)); 820 821 (void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n", 822 "segments", avl_numnodes(t), "maxsize", maxbuf, 823 "freepct", free_pct); 824 (void) printf("\tIn-memory histogram:\n"); 825 dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 826 } 827 828 static void 829 dump_metaslab(metaslab_t *msp) 830 { 831 vdev_t *vd = msp->ms_group->mg_vd; 832 spa_t *spa = vd->vdev_spa; 833 space_map_t *sm = msp->ms_sm; 834 char freebuf[32]; 835 836 zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf, 837 sizeof (freebuf)); 838 839 (void) printf( 840 "\tmetaslab %6llu offset %12llx spacemap %6llu free %5s\n", 841 (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start, 842 (u_longlong_t)space_map_object(sm), freebuf); 843 844 if (dump_opt['m'] > 2 && !dump_opt['L']) { 845 mutex_enter(&msp->ms_lock); 846 metaslab_load_wait(msp); 847 if (!msp->ms_loaded) { 848 VERIFY0(metaslab_load(msp)); 849 range_tree_stat_verify(msp->ms_tree); 850 } 851 dump_metaslab_stats(msp); 852 metaslab_unload(msp); 853 mutex_exit(&msp->ms_lock); 854 } 855 856 if (dump_opt['m'] > 1 && sm != NULL && 857 spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) { 858 /* 859 * The space map histogram represents free space in chunks 860 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift). 861 */ 862 (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n", 863 (u_longlong_t)msp->ms_fragmentation); 864 dump_histogram(sm->sm_phys->smp_histogram, 865 SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); 866 } 867 868 if (dump_opt['d'] > 5 || dump_opt['m'] > 3) { 869 ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift)); 870 871 dump_spacemap(spa->spa_meta_objset, msp->ms_sm); 872 } 873 } 874 875 static void 876 print_vdev_metaslab_header(vdev_t *vd) 877 { 878 (void) printf("\tvdev %10llu\n\t%-10s%5llu %-19s %-15s %-10s\n", 879 (u_longlong_t)vd->vdev_id, 880 "metaslabs", (u_longlong_t)vd->vdev_ms_count, 881 "offset", "spacemap", "free"); 882 (void) printf("\t%15s %19s %15s %10s\n", 883 "---------------", "-------------------", 884 "---------------", "-------------"); 885 } 886 887 static void 888 dump_metaslab_groups(spa_t *spa) 889 { 890 vdev_t *rvd = spa->spa_root_vdev; 891 metaslab_class_t *mc = spa_normal_class(spa); 892 uint64_t fragmentation; 893 894 metaslab_class_histogram_verify(mc); 895 896 for (unsigned c = 0; c < rvd->vdev_children; c++) { 897 vdev_t *tvd = rvd->vdev_child[c]; 898 metaslab_group_t *mg = tvd->vdev_mg; 899 900 if (mg->mg_class != mc) 901 continue; 902 903 metaslab_group_histogram_verify(mg); 904 mg->mg_fragmentation = metaslab_group_fragmentation(mg); 905 906 (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t" 907 "fragmentation", 908 (u_longlong_t)tvd->vdev_id, 909 (u_longlong_t)tvd->vdev_ms_count); 910 if (mg->mg_fragmentation == ZFS_FRAG_INVALID) { 911 (void) printf("%3s\n", "-"); 912 } else { 913 (void) printf("%3llu%%\n", 914 (u_longlong_t)mg->mg_fragmentation); 915 } 916 dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 917 } 918 919 (void) printf("\tpool %s\tfragmentation", spa_name(spa)); 920 fragmentation = metaslab_class_fragmentation(mc); 921 if (fragmentation == ZFS_FRAG_INVALID) 922 (void) printf("\t%3s\n", "-"); 923 else 924 (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation); 925 dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 926 } 927 928 static void 929 print_vdev_indirect(vdev_t *vd) 930 { 931 vdev_indirect_config_t *vic = &vd->vdev_indirect_config; 932 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 933 vdev_indirect_births_t *vib = vd->vdev_indirect_births; 934 935 if (vim == NULL) { 936 ASSERT3P(vib, ==, NULL); 937 return; 938 } 939 940 ASSERT3U(vdev_indirect_mapping_object(vim), ==, 941 vic->vic_mapping_object); 942 ASSERT3U(vdev_indirect_births_object(vib), ==, 943 vic->vic_births_object); 944 945 (void) printf("indirect births obj %llu:\n", 946 (longlong_t)vic->vic_births_object); 947 (void) printf(" vib_count = %llu\n", 948 (longlong_t)vdev_indirect_births_count(vib)); 949 for (uint64_t i = 0; i < vdev_indirect_births_count(vib); i++) { 950 vdev_indirect_birth_entry_phys_t *cur_vibe = 951 &vib->vib_entries[i]; 952 (void) printf("\toffset %llx -> txg %llu\n", 953 (longlong_t)cur_vibe->vibe_offset, 954 (longlong_t)cur_vibe->vibe_phys_birth_txg); 955 } 956 (void) printf("\n"); 957 958 (void) printf("indirect mapping obj %llu:\n", 959 (longlong_t)vic->vic_mapping_object); 960 (void) printf(" vim_max_offset = 0x%llx\n", 961 (longlong_t)vdev_indirect_mapping_max_offset(vim)); 962 (void) printf(" vim_bytes_mapped = 0x%llx\n", 963 (longlong_t)vdev_indirect_mapping_bytes_mapped(vim)); 964 (void) printf(" vim_count = %llu\n", 965 (longlong_t)vdev_indirect_mapping_num_entries(vim)); 966 967 if (dump_opt['d'] <= 5 && dump_opt['m'] <= 3) 968 return; 969 970 uint32_t *counts = vdev_indirect_mapping_load_obsolete_counts(vim); 971 972 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) { 973 vdev_indirect_mapping_entry_phys_t *vimep = 974 &vim->vim_entries[i]; 975 (void) printf("\t<%llx:%llx:%llx> -> " 976 "<%llx:%llx:%llx> (%x obsolete)\n", 977 (longlong_t)vd->vdev_id, 978 (longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep), 979 (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), 980 (longlong_t)DVA_GET_VDEV(&vimep->vimep_dst), 981 (longlong_t)DVA_GET_OFFSET(&vimep->vimep_dst), 982 (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), 983 counts[i]); 984 } 985 (void) printf("\n"); 986 987 uint64_t obsolete_sm_object = vdev_obsolete_sm_object(vd); 988 if (obsolete_sm_object != 0) { 989 objset_t *mos = vd->vdev_spa->spa_meta_objset; 990 (void) printf("obsolete space map object %llu:\n", 991 (u_longlong_t)obsolete_sm_object); 992 ASSERT(vd->vdev_obsolete_sm != NULL); 993 ASSERT3U(space_map_object(vd->vdev_obsolete_sm), ==, 994 obsolete_sm_object); 995 dump_spacemap(mos, vd->vdev_obsolete_sm); 996 (void) printf("\n"); 997 } 998 } 999 1000 static void 1001 dump_metaslabs(spa_t *spa) 1002 { 1003 vdev_t *vd, *rvd = spa->spa_root_vdev; 1004 uint64_t m, c = 0, children = rvd->vdev_children; 1005 1006 (void) printf("\nMetaslabs:\n"); 1007 1008 if (!dump_opt['d'] && zopt_objects > 0) { 1009 c = zopt_object[0]; 1010 1011 if (c >= children) 1012 (void) fatal("bad vdev id: %llu", (u_longlong_t)c); 1013 1014 if (zopt_objects > 1) { 1015 vd = rvd->vdev_child[c]; 1016 print_vdev_metaslab_header(vd); 1017 1018 for (m = 1; m < zopt_objects; m++) { 1019 if (zopt_object[m] < vd->vdev_ms_count) 1020 dump_metaslab( 1021 vd->vdev_ms[zopt_object[m]]); 1022 else 1023 (void) fprintf(stderr, "bad metaslab " 1024 "number %llu\n", 1025 (u_longlong_t)zopt_object[m]); 1026 } 1027 (void) printf("\n"); 1028 return; 1029 } 1030 children = c + 1; 1031 } 1032 for (; c < children; c++) { 1033 vd = rvd->vdev_child[c]; 1034 print_vdev_metaslab_header(vd); 1035 1036 print_vdev_indirect(vd); 1037 1038 for (m = 0; m < vd->vdev_ms_count; m++) 1039 dump_metaslab(vd->vdev_ms[m]); 1040 (void) printf("\n"); 1041 } 1042 } 1043 1044 static void 1045 dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index) 1046 { 1047 const ddt_phys_t *ddp = dde->dde_phys; 1048 const ddt_key_t *ddk = &dde->dde_key; 1049 const char *types[4] = { "ditto", "single", "double", "triple" }; 1050 char blkbuf[BP_SPRINTF_LEN]; 1051 blkptr_t blk; 1052 1053 for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 1054 if (ddp->ddp_phys_birth == 0) 1055 continue; 1056 ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); 1057 snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk); 1058 (void) printf("index %llx refcnt %llu %s %s\n", 1059 (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt, 1060 types[p], blkbuf); 1061 } 1062 } 1063 1064 static void 1065 dump_dedup_ratio(const ddt_stat_t *dds) 1066 { 1067 double rL, rP, rD, D, dedup, compress, copies; 1068 1069 if (dds->dds_blocks == 0) 1070 return; 1071 1072 rL = (double)dds->dds_ref_lsize; 1073 rP = (double)dds->dds_ref_psize; 1074 rD = (double)dds->dds_ref_dsize; 1075 D = (double)dds->dds_dsize; 1076 1077 dedup = rD / D; 1078 compress = rL / rP; 1079 copies = rD / rP; 1080 1081 (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, " 1082 "dedup * compress / copies = %.2f\n\n", 1083 dedup, compress, copies, dedup * compress / copies); 1084 } 1085 1086 static void 1087 dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 1088 { 1089 char name[DDT_NAMELEN]; 1090 ddt_entry_t dde; 1091 uint64_t walk = 0; 1092 dmu_object_info_t doi; 1093 uint64_t count, dspace, mspace; 1094 int error; 1095 1096 error = ddt_object_info(ddt, type, class, &doi); 1097 1098 if (error == ENOENT) 1099 return; 1100 ASSERT(error == 0); 1101 1102 if ((count = ddt_object_count(ddt, type, class)) == 0) 1103 return; 1104 1105 dspace = doi.doi_physical_blocks_512 << 9; 1106 mspace = doi.doi_fill_count * doi.doi_data_block_size; 1107 1108 ddt_object_name(ddt, type, class, name); 1109 1110 (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n", 1111 name, 1112 (u_longlong_t)count, 1113 (u_longlong_t)(dspace / count), 1114 (u_longlong_t)(mspace / count)); 1115 1116 if (dump_opt['D'] < 3) 1117 return; 1118 1119 zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]); 1120 1121 if (dump_opt['D'] < 4) 1122 return; 1123 1124 if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE) 1125 return; 1126 1127 (void) printf("%s contents:\n\n", name); 1128 1129 while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0) 1130 dump_dde(ddt, &dde, walk); 1131 1132 ASSERT(error == ENOENT); 1133 1134 (void) printf("\n"); 1135 } 1136 1137 static void 1138 dump_all_ddts(spa_t *spa) 1139 { 1140 ddt_histogram_t ddh_total; 1141 ddt_stat_t dds_total; 1142 1143 bzero(&ddh_total, sizeof (ddh_total)); 1144 bzero(&dds_total, sizeof (dds_total)); 1145 1146 for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 1147 ddt_t *ddt = spa->spa_ddt[c]; 1148 for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 1149 for (enum ddt_class class = 0; class < DDT_CLASSES; 1150 class++) { 1151 dump_ddt(ddt, type, class); 1152 } 1153 } 1154 } 1155 1156 ddt_get_dedup_stats(spa, &dds_total); 1157 1158 if (dds_total.dds_blocks == 0) { 1159 (void) printf("All DDTs are empty\n"); 1160 return; 1161 } 1162 1163 (void) printf("\n"); 1164 1165 if (dump_opt['D'] > 1) { 1166 (void) printf("DDT histogram (aggregated over all DDTs):\n"); 1167 ddt_get_dedup_histogram(spa, &ddh_total); 1168 zpool_dump_ddt(&dds_total, &ddh_total); 1169 } 1170 1171 dump_dedup_ratio(&dds_total); 1172 } 1173 1174 static void 1175 dump_dtl_seg(void *arg, uint64_t start, uint64_t size) 1176 { 1177 char *prefix = arg; 1178 1179 (void) printf("%s [%llu,%llu) length %llu\n", 1180 prefix, 1181 (u_longlong_t)start, 1182 (u_longlong_t)(start + size), 1183 (u_longlong_t)(size)); 1184 } 1185 1186 static void 1187 dump_dtl(vdev_t *vd, int indent) 1188 { 1189 spa_t *spa = vd->vdev_spa; 1190 boolean_t required; 1191 const char *name[DTL_TYPES] = { "missing", "partial", "scrub", 1192 "outage" }; 1193 char prefix[256]; 1194 1195 spa_vdev_state_enter(spa, SCL_NONE); 1196 required = vdev_dtl_required(vd); 1197 (void) spa_vdev_state_exit(spa, NULL, 0); 1198 1199 if (indent == 0) 1200 (void) printf("\nDirty time logs:\n\n"); 1201 1202 (void) printf("\t%*s%s [%s]\n", indent, "", 1203 vd->vdev_path ? vd->vdev_path : 1204 vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa), 1205 required ? "DTL-required" : "DTL-expendable"); 1206 1207 for (int t = 0; t < DTL_TYPES; t++) { 1208 range_tree_t *rt = vd->vdev_dtl[t]; 1209 if (range_tree_space(rt) == 0) 1210 continue; 1211 (void) snprintf(prefix, sizeof (prefix), "\t%*s%s", 1212 indent + 2, "", name[t]); 1213 range_tree_walk(rt, dump_dtl_seg, prefix); 1214 if (dump_opt['d'] > 5 && vd->vdev_children == 0) 1215 dump_spacemap(spa->spa_meta_objset, vd->vdev_dtl_sm); 1216 } 1217 1218 for (unsigned c = 0; c < vd->vdev_children; c++) 1219 dump_dtl(vd->vdev_child[c], indent + 4); 1220 } 1221 1222 static void 1223 dump_history(spa_t *spa) 1224 { 1225 nvlist_t **events = NULL; 1226 uint64_t resid, len, off = 0; 1227 uint_t num = 0; 1228 int error; 1229 time_t tsec; 1230 struct tm t; 1231 char tbuf[30]; 1232 char internalstr[MAXPATHLEN]; 1233 1234 char *buf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 1235 do { 1236 len = SPA_MAXBLOCKSIZE; 1237 1238 if ((error = spa_history_get(spa, &off, &len, buf)) != 0) { 1239 (void) fprintf(stderr, "Unable to read history: " 1240 "error %d\n", error); 1241 umem_free(buf, SPA_MAXBLOCKSIZE); 1242 return; 1243 } 1244 1245 if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0) 1246 break; 1247 1248 off -= resid; 1249 } while (len != 0); 1250 umem_free(buf, SPA_MAXBLOCKSIZE); 1251 1252 (void) printf("\nHistory:\n"); 1253 for (unsigned i = 0; i < num; i++) { 1254 uint64_t time, txg, ievent; 1255 char *cmd, *intstr; 1256 boolean_t printed = B_FALSE; 1257 1258 if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME, 1259 &time) != 0) 1260 goto next; 1261 if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD, 1262 &cmd) != 0) { 1263 if (nvlist_lookup_uint64(events[i], 1264 ZPOOL_HIST_INT_EVENT, &ievent) != 0) 1265 goto next; 1266 verify(nvlist_lookup_uint64(events[i], 1267 ZPOOL_HIST_TXG, &txg) == 0); 1268 verify(nvlist_lookup_string(events[i], 1269 ZPOOL_HIST_INT_STR, &intstr) == 0); 1270 if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) 1271 goto next; 1272 1273 (void) snprintf(internalstr, 1274 sizeof (internalstr), 1275 "[internal %s txg:%ju] %s", 1276 zfs_history_event_names[ievent], (uintmax_t)txg, 1277 intstr); 1278 cmd = internalstr; 1279 } 1280 tsec = time; 1281 (void) localtime_r(&tsec, &t); 1282 (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); 1283 (void) printf("%s %s\n", tbuf, cmd); 1284 printed = B_TRUE; 1285 1286 next: 1287 if (dump_opt['h'] > 1) { 1288 if (!printed) 1289 (void) printf("unrecognized record:\n"); 1290 dump_nvlist(events[i], 2); 1291 } 1292 } 1293 } 1294 1295 /*ARGSUSED*/ 1296 static void 1297 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size) 1298 { 1299 } 1300 1301 static uint64_t 1302 blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, 1303 const zbookmark_phys_t *zb) 1304 { 1305 if (dnp == NULL) { 1306 ASSERT(zb->zb_level < 0); 1307 if (zb->zb_object == 0) 1308 return (zb->zb_blkid); 1309 return (zb->zb_blkid * BP_GET_LSIZE(bp)); 1310 } 1311 1312 ASSERT(zb->zb_level >= 0); 1313 1314 return ((zb->zb_blkid << 1315 (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) * 1316 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); 1317 } 1318 1319 static void 1320 snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) 1321 { 1322 const dva_t *dva = bp->blk_dva; 1323 int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; 1324 1325 if (dump_opt['b'] >= 6) { 1326 snprintf_blkptr(blkbuf, buflen, bp); 1327 return; 1328 } 1329 1330 if (BP_IS_EMBEDDED(bp)) { 1331 (void) sprintf(blkbuf, 1332 "EMBEDDED et=%u %llxL/%llxP B=%llu", 1333 (int)BPE_GET_ETYPE(bp), 1334 (u_longlong_t)BPE_GET_LSIZE(bp), 1335 (u_longlong_t)BPE_GET_PSIZE(bp), 1336 (u_longlong_t)bp->blk_birth); 1337 return; 1338 } 1339 1340 blkbuf[0] = '\0'; 1341 for (int i = 0; i < ndvas; i++) 1342 (void) snprintf(blkbuf + strlen(blkbuf), 1343 buflen - strlen(blkbuf), "%llu:%llx:%llx ", 1344 (u_longlong_t)DVA_GET_VDEV(&dva[i]), 1345 (u_longlong_t)DVA_GET_OFFSET(&dva[i]), 1346 (u_longlong_t)DVA_GET_ASIZE(&dva[i])); 1347 1348 if (BP_IS_HOLE(bp)) { 1349 (void) snprintf(blkbuf + strlen(blkbuf), 1350 buflen - strlen(blkbuf), 1351 "%llxL B=%llu", 1352 (u_longlong_t)BP_GET_LSIZE(bp), 1353 (u_longlong_t)bp->blk_birth); 1354 } else { 1355 (void) snprintf(blkbuf + strlen(blkbuf), 1356 buflen - strlen(blkbuf), 1357 "%llxL/%llxP F=%llu B=%llu/%llu", 1358 (u_longlong_t)BP_GET_LSIZE(bp), 1359 (u_longlong_t)BP_GET_PSIZE(bp), 1360 (u_longlong_t)BP_GET_FILL(bp), 1361 (u_longlong_t)bp->blk_birth, 1362 (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); 1363 } 1364 } 1365 1366 static void 1367 print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb, 1368 const dnode_phys_t *dnp) 1369 { 1370 char blkbuf[BP_SPRINTF_LEN]; 1371 int l; 1372 1373 if (!BP_IS_EMBEDDED(bp)) { 1374 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); 1375 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); 1376 } 1377 1378 (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb)); 1379 1380 ASSERT(zb->zb_level >= 0); 1381 1382 for (l = dnp->dn_nlevels - 1; l >= -1; l--) { 1383 if (l == zb->zb_level) { 1384 (void) printf("L%llx", (u_longlong_t)zb->zb_level); 1385 } else { 1386 (void) printf(" "); 1387 } 1388 } 1389 1390 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp); 1391 (void) printf("%s\n", blkbuf); 1392 } 1393 1394 static int 1395 visit_indirect(spa_t *spa, const dnode_phys_t *dnp, 1396 blkptr_t *bp, const zbookmark_phys_t *zb) 1397 { 1398 int err = 0; 1399 1400 if (bp->blk_birth == 0) 1401 return (0); 1402 1403 print_indirect(bp, zb, dnp); 1404 1405 if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) { 1406 arc_flags_t flags = ARC_FLAG_WAIT; 1407 int i; 1408 blkptr_t *cbp; 1409 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; 1410 arc_buf_t *buf; 1411 uint64_t fill = 0; 1412 1413 err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf, 1414 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); 1415 if (err) 1416 return (err); 1417 ASSERT(buf->b_data); 1418 1419 /* recursively visit blocks below this */ 1420 cbp = buf->b_data; 1421 for (i = 0; i < epb; i++, cbp++) { 1422 zbookmark_phys_t czb; 1423 1424 SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, 1425 zb->zb_level - 1, 1426 zb->zb_blkid * epb + i); 1427 err = visit_indirect(spa, dnp, cbp, &czb); 1428 if (err) 1429 break; 1430 fill += BP_GET_FILL(cbp); 1431 } 1432 if (!err) 1433 ASSERT3U(fill, ==, BP_GET_FILL(bp)); 1434 arc_buf_destroy(buf, &buf); 1435 } 1436 1437 return (err); 1438 } 1439 1440 /*ARGSUSED*/ 1441 static void 1442 dump_indirect(dnode_t *dn) 1443 { 1444 dnode_phys_t *dnp = dn->dn_phys; 1445 int j; 1446 zbookmark_phys_t czb; 1447 1448 (void) printf("Indirect blocks:\n"); 1449 1450 SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset), 1451 dn->dn_object, dnp->dn_nlevels - 1, 0); 1452 for (j = 0; j < dnp->dn_nblkptr; j++) { 1453 czb.zb_blkid = j; 1454 (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp, 1455 &dnp->dn_blkptr[j], &czb); 1456 } 1457 1458 (void) printf("\n"); 1459 } 1460 1461 /*ARGSUSED*/ 1462 static void 1463 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) 1464 { 1465 dsl_dir_phys_t *dd = data; 1466 time_t crtime; 1467 char nice[32]; 1468 1469 /* make sure nicenum has enough space */ 1470 CTASSERT(sizeof (nice) >= NN_NUMBUF_SZ); 1471 1472 if (dd == NULL) 1473 return; 1474 1475 ASSERT3U(size, >=, sizeof (dsl_dir_phys_t)); 1476 1477 crtime = dd->dd_creation_time; 1478 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 1479 (void) printf("\t\thead_dataset_obj = %llu\n", 1480 (u_longlong_t)dd->dd_head_dataset_obj); 1481 (void) printf("\t\tparent_dir_obj = %llu\n", 1482 (u_longlong_t)dd->dd_parent_obj); 1483 (void) printf("\t\torigin_obj = %llu\n", 1484 (u_longlong_t)dd->dd_origin_obj); 1485 (void) printf("\t\tchild_dir_zapobj = %llu\n", 1486 (u_longlong_t)dd->dd_child_dir_zapobj); 1487 zdb_nicenum(dd->dd_used_bytes, nice, sizeof (nice)); 1488 (void) printf("\t\tused_bytes = %s\n", nice); 1489 zdb_nicenum(dd->dd_compressed_bytes, nice, sizeof (nice)); 1490 (void) printf("\t\tcompressed_bytes = %s\n", nice); 1491 zdb_nicenum(dd->dd_uncompressed_bytes, nice, sizeof (nice)); 1492 (void) printf("\t\tuncompressed_bytes = %s\n", nice); 1493 zdb_nicenum(dd->dd_quota, nice, sizeof (nice)); 1494 (void) printf("\t\tquota = %s\n", nice); 1495 zdb_nicenum(dd->dd_reserved, nice, sizeof (nice)); 1496 (void) printf("\t\treserved = %s\n", nice); 1497 (void) printf("\t\tprops_zapobj = %llu\n", 1498 (u_longlong_t)dd->dd_props_zapobj); 1499 (void) printf("\t\tdeleg_zapobj = %llu\n", 1500 (u_longlong_t)dd->dd_deleg_zapobj); 1501 (void) printf("\t\tflags = %llx\n", 1502 (u_longlong_t)dd->dd_flags); 1503 1504 #define DO(which) \ 1505 zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice, \ 1506 sizeof (nice)); \ 1507 (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice) 1508 DO(HEAD); 1509 DO(SNAP); 1510 DO(CHILD); 1511 DO(CHILD_RSRV); 1512 DO(REFRSRV); 1513 #undef DO 1514 } 1515 1516 /*ARGSUSED*/ 1517 static void 1518 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) 1519 { 1520 dsl_dataset_phys_t *ds = data; 1521 time_t crtime; 1522 char used[32], compressed[32], uncompressed[32], unique[32]; 1523 char blkbuf[BP_SPRINTF_LEN]; 1524 1525 /* make sure nicenum has enough space */ 1526 CTASSERT(sizeof (used) >= NN_NUMBUF_SZ); 1527 CTASSERT(sizeof (compressed) >= NN_NUMBUF_SZ); 1528 CTASSERT(sizeof (uncompressed) >= NN_NUMBUF_SZ); 1529 CTASSERT(sizeof (unique) >= NN_NUMBUF_SZ); 1530 1531 if (ds == NULL) 1532 return; 1533 1534 ASSERT(size == sizeof (*ds)); 1535 crtime = ds->ds_creation_time; 1536 zdb_nicenum(ds->ds_referenced_bytes, used, sizeof (used)); 1537 zdb_nicenum(ds->ds_compressed_bytes, compressed, sizeof (compressed)); 1538 zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed, 1539 sizeof (uncompressed)); 1540 zdb_nicenum(ds->ds_unique_bytes, unique, sizeof (unique)); 1541 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp); 1542 1543 (void) printf("\t\tdir_obj = %llu\n", 1544 (u_longlong_t)ds->ds_dir_obj); 1545 (void) printf("\t\tprev_snap_obj = %llu\n", 1546 (u_longlong_t)ds->ds_prev_snap_obj); 1547 (void) printf("\t\tprev_snap_txg = %llu\n", 1548 (u_longlong_t)ds->ds_prev_snap_txg); 1549 (void) printf("\t\tnext_snap_obj = %llu\n", 1550 (u_longlong_t)ds->ds_next_snap_obj); 1551 (void) printf("\t\tsnapnames_zapobj = %llu\n", 1552 (u_longlong_t)ds->ds_snapnames_zapobj); 1553 (void) printf("\t\tnum_children = %llu\n", 1554 (u_longlong_t)ds->ds_num_children); 1555 (void) printf("\t\tuserrefs_obj = %llu\n", 1556 (u_longlong_t)ds->ds_userrefs_obj); 1557 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 1558 (void) printf("\t\tcreation_txg = %llu\n", 1559 (u_longlong_t)ds->ds_creation_txg); 1560 (void) printf("\t\tdeadlist_obj = %llu\n", 1561 (u_longlong_t)ds->ds_deadlist_obj); 1562 (void) printf("\t\tused_bytes = %s\n", used); 1563 (void) printf("\t\tcompressed_bytes = %s\n", compressed); 1564 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); 1565 (void) printf("\t\tunique = %s\n", unique); 1566 (void) printf("\t\tfsid_guid = %llu\n", 1567 (u_longlong_t)ds->ds_fsid_guid); 1568 (void) printf("\t\tguid = %llu\n", 1569 (u_longlong_t)ds->ds_guid); 1570 (void) printf("\t\tflags = %llx\n", 1571 (u_longlong_t)ds->ds_flags); 1572 (void) printf("\t\tnext_clones_obj = %llu\n", 1573 (u_longlong_t)ds->ds_next_clones_obj); 1574 (void) printf("\t\tprops_obj = %llu\n", 1575 (u_longlong_t)ds->ds_props_obj); 1576 (void) printf("\t\tbp = %s\n", blkbuf); 1577 } 1578 1579 /* ARGSUSED */ 1580 static int 1581 dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 1582 { 1583 char blkbuf[BP_SPRINTF_LEN]; 1584 1585 if (bp->blk_birth != 0) { 1586 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 1587 (void) printf("\t%s\n", blkbuf); 1588 } 1589 return (0); 1590 } 1591 1592 static void 1593 dump_bptree(objset_t *os, uint64_t obj, const char *name) 1594 { 1595 char bytes[32]; 1596 bptree_phys_t *bt; 1597 dmu_buf_t *db; 1598 1599 /* make sure nicenum has enough space */ 1600 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); 1601 1602 if (dump_opt['d'] < 3) 1603 return; 1604 1605 VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); 1606 bt = db->db_data; 1607 zdb_nicenum(bt->bt_bytes, bytes, sizeof (bytes)); 1608 (void) printf("\n %s: %llu datasets, %s\n", 1609 name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes); 1610 dmu_buf_rele(db, FTAG); 1611 1612 if (dump_opt['d'] < 5) 1613 return; 1614 1615 (void) printf("\n"); 1616 1617 (void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL); 1618 } 1619 1620 /* ARGSUSED */ 1621 static int 1622 dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 1623 { 1624 char blkbuf[BP_SPRINTF_LEN]; 1625 1626 ASSERT(bp->blk_birth != 0); 1627 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp); 1628 (void) printf("\t%s\n", blkbuf); 1629 return (0); 1630 } 1631 1632 static void 1633 dump_full_bpobj(bpobj_t *bpo, const char *name, int indent) 1634 { 1635 char bytes[32]; 1636 char comp[32]; 1637 char uncomp[32]; 1638 1639 /* make sure nicenum has enough space */ 1640 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); 1641 CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ); 1642 CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ); 1643 1644 if (dump_opt['d'] < 3) 1645 return; 1646 1647 zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes, sizeof (bytes)); 1648 if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) { 1649 zdb_nicenum(bpo->bpo_phys->bpo_comp, comp, sizeof (comp)); 1650 zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp, sizeof (uncomp)); 1651 (void) printf(" %*s: object %llu, %llu local blkptrs, " 1652 "%llu subobjs in object %llu, %s (%s/%s comp)\n", 1653 indent * 8, name, 1654 (u_longlong_t)bpo->bpo_object, 1655 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 1656 (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs, 1657 (u_longlong_t)bpo->bpo_phys->bpo_subobjs, 1658 bytes, comp, uncomp); 1659 1660 for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) { 1661 uint64_t subobj; 1662 bpobj_t subbpo; 1663 int error; 1664 VERIFY0(dmu_read(bpo->bpo_os, 1665 bpo->bpo_phys->bpo_subobjs, 1666 i * sizeof (subobj), sizeof (subobj), &subobj, 0)); 1667 error = bpobj_open(&subbpo, bpo->bpo_os, subobj); 1668 if (error != 0) { 1669 (void) printf("ERROR %u while trying to open " 1670 "subobj id %llu\n", 1671 error, (u_longlong_t)subobj); 1672 continue; 1673 } 1674 dump_full_bpobj(&subbpo, "subobj", indent + 1); 1675 bpobj_close(&subbpo); 1676 } 1677 } else { 1678 (void) printf(" %*s: object %llu, %llu blkptrs, %s\n", 1679 indent * 8, name, 1680 (u_longlong_t)bpo->bpo_object, 1681 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 1682 bytes); 1683 } 1684 1685 if (dump_opt['d'] < 5) 1686 return; 1687 1688 1689 if (indent == 0) { 1690 (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL); 1691 (void) printf("\n"); 1692 } 1693 } 1694 1695 static void 1696 dump_deadlist(dsl_deadlist_t *dl) 1697 { 1698 dsl_deadlist_entry_t *dle; 1699 uint64_t unused; 1700 char bytes[32]; 1701 char comp[32]; 1702 char uncomp[32]; 1703 1704 /* make sure nicenum has enough space */ 1705 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); 1706 CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ); 1707 CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ); 1708 1709 if (dump_opt['d'] < 3) 1710 return; 1711 1712 if (dl->dl_oldfmt) { 1713 dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0); 1714 return; 1715 } 1716 1717 zdb_nicenum(dl->dl_phys->dl_used, bytes, sizeof (bytes)); 1718 zdb_nicenum(dl->dl_phys->dl_comp, comp, sizeof (comp)); 1719 zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp, sizeof (uncomp)); 1720 (void) printf("\n Deadlist: %s (%s/%s comp)\n", 1721 bytes, comp, uncomp); 1722 1723 if (dump_opt['d'] < 4) 1724 return; 1725 1726 (void) printf("\n"); 1727 1728 /* force the tree to be loaded */ 1729 dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused); 1730 1731 for (dle = avl_first(&dl->dl_tree); dle; 1732 dle = AVL_NEXT(&dl->dl_tree, dle)) { 1733 if (dump_opt['d'] >= 5) { 1734 char buf[128]; 1735 (void) snprintf(buf, sizeof (buf), 1736 "mintxg %llu -> obj %llu", 1737 (longlong_t)dle->dle_mintxg, 1738 (longlong_t)dle->dle_bpobj.bpo_object); 1739 1740 dump_full_bpobj(&dle->dle_bpobj, buf, 0); 1741 } else { 1742 (void) printf("mintxg %llu -> obj %llu\n", 1743 (longlong_t)dle->dle_mintxg, 1744 (longlong_t)dle->dle_bpobj.bpo_object); 1745 1746 } 1747 } 1748 } 1749 1750 static avl_tree_t idx_tree; 1751 static avl_tree_t domain_tree; 1752 static boolean_t fuid_table_loaded; 1753 static objset_t *sa_os = NULL; 1754 static sa_attr_type_t *sa_attr_table = NULL; 1755 1756 static int 1757 open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp) 1758 { 1759 int err; 1760 uint64_t sa_attrs = 0; 1761 uint64_t version = 0; 1762 1763 VERIFY3P(sa_os, ==, NULL); 1764 err = dmu_objset_own(path, type, B_TRUE, tag, osp); 1765 if (err != 0) { 1766 (void) fprintf(stderr, "failed to own dataset '%s': %s\n", path, 1767 strerror(err)); 1768 return (err); 1769 } 1770 1771 if (dmu_objset_type(*osp) == DMU_OST_ZFS) { 1772 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR, 1773 8, 1, &version); 1774 if (version >= ZPL_VERSION_SA) { 1775 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 1776 8, 1, &sa_attrs); 1777 } 1778 err = sa_setup(*osp, sa_attrs, zfs_attr_table, ZPL_END, 1779 &sa_attr_table); 1780 if (err != 0) { 1781 (void) fprintf(stderr, "sa_setup failed: %s\n", 1782 strerror(err)); 1783 dmu_objset_disown(*osp, tag); 1784 *osp = NULL; 1785 } 1786 } 1787 sa_os = *osp; 1788 1789 return (0); 1790 } 1791 1792 static void 1793 close_objset(objset_t *os, void *tag) 1794 { 1795 VERIFY3P(os, ==, sa_os); 1796 if (os->os_sa != NULL) 1797 sa_tear_down(os); 1798 dmu_objset_disown(os, tag); 1799 sa_attr_table = NULL; 1800 sa_os = NULL; 1801 } 1802 1803 static void 1804 fuid_table_destroy() 1805 { 1806 if (fuid_table_loaded) { 1807 zfs_fuid_table_destroy(&idx_tree, &domain_tree); 1808 fuid_table_loaded = B_FALSE; 1809 } 1810 } 1811 1812 /* 1813 * print uid or gid information. 1814 * For normal POSIX id just the id is printed in decimal format. 1815 * For CIFS files with FUID the fuid is printed in hex followed by 1816 * the domain-rid string. 1817 */ 1818 static void 1819 print_idstr(uint64_t id, const char *id_type) 1820 { 1821 if (FUID_INDEX(id)) { 1822 char *domain; 1823 1824 domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id)); 1825 (void) printf("\t%s %llx [%s-%d]\n", id_type, 1826 (u_longlong_t)id, domain, (int)FUID_RID(id)); 1827 } else { 1828 (void) printf("\t%s %llu\n", id_type, (u_longlong_t)id); 1829 } 1830 1831 } 1832 1833 static void 1834 dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid) 1835 { 1836 uint32_t uid_idx, gid_idx; 1837 1838 uid_idx = FUID_INDEX(uid); 1839 gid_idx = FUID_INDEX(gid); 1840 1841 /* Load domain table, if not already loaded */ 1842 if (!fuid_table_loaded && (uid_idx || gid_idx)) { 1843 uint64_t fuid_obj; 1844 1845 /* first find the fuid object. It lives in the master node */ 1846 VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 1847 8, 1, &fuid_obj) == 0); 1848 zfs_fuid_avl_tree_create(&idx_tree, &domain_tree); 1849 (void) zfs_fuid_table_load(os, fuid_obj, 1850 &idx_tree, &domain_tree); 1851 fuid_table_loaded = B_TRUE; 1852 } 1853 1854 print_idstr(uid, "uid"); 1855 print_idstr(gid, "gid"); 1856 } 1857 1858 /*ARGSUSED*/ 1859 static void 1860 dump_znode(objset_t *os, uint64_t object, void *data, size_t size) 1861 { 1862 char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */ 1863 sa_handle_t *hdl; 1864 uint64_t xattr, rdev, gen; 1865 uint64_t uid, gid, mode, fsize, parent, links; 1866 uint64_t pflags; 1867 uint64_t acctm[2], modtm[2], chgtm[2], crtm[2]; 1868 time_t z_crtime, z_atime, z_mtime, z_ctime; 1869 sa_bulk_attr_t bulk[12]; 1870 int idx = 0; 1871 int error; 1872 1873 VERIFY3P(os, ==, sa_os); 1874 if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) { 1875 (void) printf("Failed to get handle for SA znode\n"); 1876 return; 1877 } 1878 1879 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8); 1880 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8); 1881 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL, 1882 &links, 8); 1883 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8); 1884 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL, 1885 &mode, 8); 1886 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT], 1887 NULL, &parent, 8); 1888 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL, 1889 &fsize, 8); 1890 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL, 1891 acctm, 16); 1892 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL, 1893 modtm, 16); 1894 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL, 1895 crtm, 16); 1896 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL, 1897 chgtm, 16); 1898 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL, 1899 &pflags, 8); 1900 1901 if (sa_bulk_lookup(hdl, bulk, idx)) { 1902 (void) sa_handle_destroy(hdl); 1903 return; 1904 } 1905 1906 z_crtime = (time_t)crtm[0]; 1907 z_atime = (time_t)acctm[0]; 1908 z_mtime = (time_t)modtm[0]; 1909 z_ctime = (time_t)chgtm[0]; 1910 1911 if (dump_opt['d'] > 4) { 1912 error = zfs_obj_to_path(os, object, path, sizeof (path)); 1913 if (error != 0) { 1914 (void) snprintf(path, sizeof (path), 1915 "\?\?\?<object#%llu>", (u_longlong_t)object); 1916 } 1917 (void) printf("\tpath %s\n", path); 1918 } 1919 dump_uidgid(os, uid, gid); 1920 (void) printf("\tatime %s", ctime(&z_atime)); 1921 (void) printf("\tmtime %s", ctime(&z_mtime)); 1922 (void) printf("\tctime %s", ctime(&z_ctime)); 1923 (void) printf("\tcrtime %s", ctime(&z_crtime)); 1924 (void) printf("\tgen %llu\n", (u_longlong_t)gen); 1925 (void) printf("\tmode %llo\n", (u_longlong_t)mode); 1926 (void) printf("\tsize %llu\n", (u_longlong_t)fsize); 1927 (void) printf("\tparent %llu\n", (u_longlong_t)parent); 1928 (void) printf("\tlinks %llu\n", (u_longlong_t)links); 1929 (void) printf("\tpflags %llx\n", (u_longlong_t)pflags); 1930 if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr, 1931 sizeof (uint64_t)) == 0) 1932 (void) printf("\txattr %llu\n", (u_longlong_t)xattr); 1933 if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev, 1934 sizeof (uint64_t)) == 0) 1935 (void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev); 1936 sa_handle_destroy(hdl); 1937 } 1938 1939 /*ARGSUSED*/ 1940 static void 1941 dump_acl(objset_t *os, uint64_t object, void *data, size_t size) 1942 { 1943 } 1944 1945 /*ARGSUSED*/ 1946 static void 1947 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size) 1948 { 1949 } 1950 1951 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = { 1952 dump_none, /* unallocated */ 1953 dump_zap, /* object directory */ 1954 dump_uint64, /* object array */ 1955 dump_none, /* packed nvlist */ 1956 dump_packed_nvlist, /* packed nvlist size */ 1957 dump_none, /* bpobj */ 1958 dump_bpobj, /* bpobj header */ 1959 dump_none, /* SPA space map header */ 1960 dump_none, /* SPA space map */ 1961 dump_none, /* ZIL intent log */ 1962 dump_dnode, /* DMU dnode */ 1963 dump_dmu_objset, /* DMU objset */ 1964 dump_dsl_dir, /* DSL directory */ 1965 dump_zap, /* DSL directory child map */ 1966 dump_zap, /* DSL dataset snap map */ 1967 dump_zap, /* DSL props */ 1968 dump_dsl_dataset, /* DSL dataset */ 1969 dump_znode, /* ZFS znode */ 1970 dump_acl, /* ZFS V0 ACL */ 1971 dump_uint8, /* ZFS plain file */ 1972 dump_zpldir, /* ZFS directory */ 1973 dump_zap, /* ZFS master node */ 1974 dump_zap, /* ZFS delete queue */ 1975 dump_uint8, /* zvol object */ 1976 dump_zap, /* zvol prop */ 1977 dump_uint8, /* other uint8[] */ 1978 dump_uint64, /* other uint64[] */ 1979 dump_zap, /* other ZAP */ 1980 dump_zap, /* persistent error log */ 1981 dump_uint8, /* SPA history */ 1982 dump_history_offsets, /* SPA history offsets */ 1983 dump_zap, /* Pool properties */ 1984 dump_zap, /* DSL permissions */ 1985 dump_acl, /* ZFS ACL */ 1986 dump_uint8, /* ZFS SYSACL */ 1987 dump_none, /* FUID nvlist */ 1988 dump_packed_nvlist, /* FUID nvlist size */ 1989 dump_zap, /* DSL dataset next clones */ 1990 dump_zap, /* DSL scrub queue */ 1991 dump_zap, /* ZFS user/group used */ 1992 dump_zap, /* ZFS user/group quota */ 1993 dump_zap, /* snapshot refcount tags */ 1994 dump_ddt_zap, /* DDT ZAP object */ 1995 dump_zap, /* DDT statistics */ 1996 dump_znode, /* SA object */ 1997 dump_zap, /* SA Master Node */ 1998 dump_sa_attrs, /* SA attribute registration */ 1999 dump_sa_layouts, /* SA attribute layouts */ 2000 dump_zap, /* DSL scrub translations */ 2001 dump_none, /* fake dedup BP */ 2002 dump_zap, /* deadlist */ 2003 dump_none, /* deadlist hdr */ 2004 dump_zap, /* dsl clones */ 2005 dump_bpobj_subobjs, /* bpobj subobjs */ 2006 dump_unknown, /* Unknown type, must be last */ 2007 }; 2008 2009 static void 2010 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) 2011 { 2012 dmu_buf_t *db = NULL; 2013 dmu_object_info_t doi; 2014 dnode_t *dn; 2015 void *bonus = NULL; 2016 size_t bsize = 0; 2017 char iblk[32], dblk[32], lsize[32], asize[32], fill[32]; 2018 char bonus_size[32]; 2019 char aux[50]; 2020 int error; 2021 2022 /* make sure nicenum has enough space */ 2023 CTASSERT(sizeof (iblk) >= NN_NUMBUF_SZ); 2024 CTASSERT(sizeof (dblk) >= NN_NUMBUF_SZ); 2025 CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ); 2026 CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ); 2027 CTASSERT(sizeof (bonus_size) >= NN_NUMBUF_SZ); 2028 2029 if (*print_header) { 2030 (void) printf("\n%10s %3s %5s %5s %5s %5s %6s %s\n", 2031 "Object", "lvl", "iblk", "dblk", "dsize", "lsize", 2032 "%full", "type"); 2033 *print_header = 0; 2034 } 2035 2036 if (object == 0) { 2037 dn = DMU_META_DNODE(os); 2038 } else { 2039 error = dmu_bonus_hold(os, object, FTAG, &db); 2040 if (error) 2041 fatal("dmu_bonus_hold(%llu) failed, errno %u", 2042 object, error); 2043 bonus = db->db_data; 2044 bsize = db->db_size; 2045 dn = DB_DNODE((dmu_buf_impl_t *)db); 2046 } 2047 dmu_object_info_from_dnode(dn, &doi); 2048 2049 zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk)); 2050 zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk)); 2051 zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize)); 2052 zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize)); 2053 zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size)); 2054 (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count * 2055 doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) / 2056 doi.doi_max_offset); 2057 2058 aux[0] = '\0'; 2059 2060 if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) { 2061 (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)", 2062 ZDB_CHECKSUM_NAME(doi.doi_checksum)); 2063 } 2064 2065 if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) { 2066 (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)", 2067 ZDB_COMPRESS_NAME(doi.doi_compress)); 2068 } 2069 2070 (void) printf("%10lld %3u %5s %5s %5s %5s %6s %s%s\n", 2071 (u_longlong_t)object, doi.doi_indirection, iblk, dblk, 2072 asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux); 2073 2074 if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) { 2075 (void) printf("%10s %3s %5s %5s %5s %5s %6s %s\n", 2076 "", "", "", "", "", bonus_size, "bonus", 2077 ZDB_OT_NAME(doi.doi_bonus_type)); 2078 } 2079 2080 if (verbosity >= 4) { 2081 (void) printf("\tdnode flags: %s%s%s\n", 2082 (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ? 2083 "USED_BYTES " : "", 2084 (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ? 2085 "USERUSED_ACCOUNTED " : "", 2086 (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? 2087 "SPILL_BLKPTR" : ""); 2088 (void) printf("\tdnode maxblkid: %llu\n", 2089 (longlong_t)dn->dn_phys->dn_maxblkid); 2090 2091 object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object, 2092 bonus, bsize); 2093 object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0); 2094 *print_header = 1; 2095 } 2096 2097 if (verbosity >= 5) 2098 dump_indirect(dn); 2099 2100 if (verbosity >= 5) { 2101 /* 2102 * Report the list of segments that comprise the object. 2103 */ 2104 uint64_t start = 0; 2105 uint64_t end; 2106 uint64_t blkfill = 1; 2107 int minlvl = 1; 2108 2109 if (dn->dn_type == DMU_OT_DNODE) { 2110 minlvl = 0; 2111 blkfill = DNODES_PER_BLOCK; 2112 } 2113 2114 for (;;) { 2115 char segsize[32]; 2116 /* make sure nicenum has enough space */ 2117 CTASSERT(sizeof (segsize) >= NN_NUMBUF_SZ); 2118 error = dnode_next_offset(dn, 2119 0, &start, minlvl, blkfill, 0); 2120 if (error) 2121 break; 2122 end = start; 2123 error = dnode_next_offset(dn, 2124 DNODE_FIND_HOLE, &end, minlvl, blkfill, 0); 2125 zdb_nicenum(end - start, segsize, sizeof (segsize)); 2126 (void) printf("\t\tsegment [%016llx, %016llx)" 2127 " size %5s\n", (u_longlong_t)start, 2128 (u_longlong_t)end, segsize); 2129 if (error) 2130 break; 2131 start = end; 2132 } 2133 } 2134 2135 if (db != NULL) 2136 dmu_buf_rele(db, FTAG); 2137 } 2138 2139 static const char *objset_types[DMU_OST_NUMTYPES] = { 2140 "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" }; 2141 2142 static void 2143 dump_dir(objset_t *os) 2144 { 2145 dmu_objset_stats_t dds; 2146 uint64_t object, object_count; 2147 uint64_t refdbytes, usedobjs, scratch; 2148 char numbuf[32]; 2149 char blkbuf[BP_SPRINTF_LEN + 20]; 2150 char osname[ZFS_MAX_DATASET_NAME_LEN]; 2151 const char *type = "UNKNOWN"; 2152 int verbosity = dump_opt['d']; 2153 int print_header = 1; 2154 unsigned i; 2155 int error; 2156 2157 /* make sure nicenum has enough space */ 2158 CTASSERT(sizeof (numbuf) >= NN_NUMBUF_SZ); 2159 2160 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 2161 dmu_objset_fast_stat(os, &dds); 2162 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 2163 2164 if (dds.dds_type < DMU_OST_NUMTYPES) 2165 type = objset_types[dds.dds_type]; 2166 2167 if (dds.dds_type == DMU_OST_META) { 2168 dds.dds_creation_txg = TXG_INITIAL; 2169 usedobjs = BP_GET_FILL(os->os_rootbp); 2170 refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)-> 2171 dd_used_bytes; 2172 } else { 2173 dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch); 2174 } 2175 2176 ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp)); 2177 2178 zdb_nicenum(refdbytes, numbuf, sizeof (numbuf)); 2179 2180 if (verbosity >= 4) { 2181 (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp "); 2182 (void) snprintf_blkptr(blkbuf + strlen(blkbuf), 2183 sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp); 2184 } else { 2185 blkbuf[0] = '\0'; 2186 } 2187 2188 dmu_objset_name(os, osname); 2189 2190 (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, " 2191 "%s, %llu objects%s\n", 2192 osname, type, (u_longlong_t)dmu_objset_id(os), 2193 (u_longlong_t)dds.dds_creation_txg, 2194 numbuf, (u_longlong_t)usedobjs, blkbuf); 2195 2196 if (zopt_objects != 0) { 2197 for (i = 0; i < zopt_objects; i++) 2198 dump_object(os, zopt_object[i], verbosity, 2199 &print_header); 2200 (void) printf("\n"); 2201 return; 2202 } 2203 2204 if (dump_opt['i'] != 0 || verbosity >= 2) 2205 dump_intent_log(dmu_objset_zil(os)); 2206 2207 if (dmu_objset_ds(os) != NULL) { 2208 dsl_dataset_t *ds = dmu_objset_ds(os); 2209 dump_deadlist(&ds->ds_deadlist); 2210 2211 if (dsl_dataset_remap_deadlist_exists(ds)) { 2212 (void) printf("ds_remap_deadlist:\n"); 2213 dump_deadlist(&ds->ds_remap_deadlist); 2214 } 2215 } 2216 2217 if (verbosity < 2) 2218 return; 2219 2220 if (BP_IS_HOLE(os->os_rootbp)) 2221 return; 2222 2223 dump_object(os, 0, verbosity, &print_header); 2224 object_count = 0; 2225 if (DMU_USERUSED_DNODE(os) != NULL && 2226 DMU_USERUSED_DNODE(os)->dn_type != 0) { 2227 dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header); 2228 dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header); 2229 } 2230 2231 object = 0; 2232 while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) { 2233 dump_object(os, object, verbosity, &print_header); 2234 object_count++; 2235 } 2236 2237 ASSERT3U(object_count, ==, usedobjs); 2238 2239 (void) printf("\n"); 2240 2241 if (error != ESRCH) { 2242 (void) fprintf(stderr, "dmu_object_next() = %d\n", error); 2243 abort(); 2244 } 2245 } 2246 2247 static void 2248 dump_uberblock(uberblock_t *ub, const char *header, const char *footer) 2249 { 2250 time_t timestamp = ub->ub_timestamp; 2251 2252 (void) printf("%s", header ? header : ""); 2253 (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic); 2254 (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version); 2255 (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg); 2256 (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum); 2257 (void) printf("\ttimestamp = %llu UTC = %s", 2258 (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp))); 2259 if (dump_opt['u'] >= 3) { 2260 char blkbuf[BP_SPRINTF_LEN]; 2261 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp); 2262 (void) printf("\trootbp = %s\n", blkbuf); 2263 } 2264 (void) printf("%s", footer ? footer : ""); 2265 } 2266 2267 static void 2268 dump_config(spa_t *spa) 2269 { 2270 dmu_buf_t *db; 2271 size_t nvsize = 0; 2272 int error = 0; 2273 2274 2275 error = dmu_bonus_hold(spa->spa_meta_objset, 2276 spa->spa_config_object, FTAG, &db); 2277 2278 if (error == 0) { 2279 nvsize = *(uint64_t *)db->db_data; 2280 dmu_buf_rele(db, FTAG); 2281 2282 (void) printf("\nMOS Configuration:\n"); 2283 dump_packed_nvlist(spa->spa_meta_objset, 2284 spa->spa_config_object, (void *)&nvsize, 1); 2285 } else { 2286 (void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d", 2287 (u_longlong_t)spa->spa_config_object, error); 2288 } 2289 } 2290 2291 static void 2292 dump_cachefile(const char *cachefile) 2293 { 2294 int fd; 2295 struct stat64 statbuf; 2296 char *buf; 2297 nvlist_t *config; 2298 2299 if ((fd = open64(cachefile, O_RDONLY)) < 0) { 2300 (void) printf("cannot open '%s': %s\n", cachefile, 2301 strerror(errno)); 2302 exit(1); 2303 } 2304 2305 if (fstat64(fd, &statbuf) != 0) { 2306 (void) printf("failed to stat '%s': %s\n", cachefile, 2307 strerror(errno)); 2308 exit(1); 2309 } 2310 2311 if ((buf = malloc(statbuf.st_size)) == NULL) { 2312 (void) fprintf(stderr, "failed to allocate %llu bytes\n", 2313 (u_longlong_t)statbuf.st_size); 2314 exit(1); 2315 } 2316 2317 if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { 2318 (void) fprintf(stderr, "failed to read %llu bytes\n", 2319 (u_longlong_t)statbuf.st_size); 2320 exit(1); 2321 } 2322 2323 (void) close(fd); 2324 2325 if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) { 2326 (void) fprintf(stderr, "failed to unpack nvlist\n"); 2327 exit(1); 2328 } 2329 2330 free(buf); 2331 2332 dump_nvlist(config, 0); 2333 2334 nvlist_free(config); 2335 } 2336 2337 #define ZDB_MAX_UB_HEADER_SIZE 32 2338 2339 static void 2340 dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift) 2341 { 2342 vdev_t vd; 2343 vdev_t *vdp = &vd; 2344 char header[ZDB_MAX_UB_HEADER_SIZE]; 2345 2346 vd.vdev_ashift = ashift; 2347 vdp->vdev_top = vdp; 2348 2349 for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) { 2350 uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i); 2351 uberblock_t *ub = (void *)((char *)lbl + uoff); 2352 2353 if (uberblock_verify(ub)) 2354 continue; 2355 (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE, 2356 "Uberblock[%d]\n", i); 2357 dump_uberblock(ub, header, ""); 2358 } 2359 } 2360 2361 static char curpath[PATH_MAX]; 2362 2363 /* 2364 * Iterate through the path components, recursively passing 2365 * current one's obj and remaining path until we find the obj 2366 * for the last one. 2367 */ 2368 static int 2369 dump_path_impl(objset_t *os, uint64_t obj, char *name) 2370 { 2371 int err; 2372 int header = 1; 2373 uint64_t child_obj; 2374 char *s; 2375 dmu_buf_t *db; 2376 dmu_object_info_t doi; 2377 2378 if ((s = strchr(name, '/')) != NULL) 2379 *s = '\0'; 2380 err = zap_lookup(os, obj, name, 8, 1, &child_obj); 2381 2382 (void) strlcat(curpath, name, sizeof (curpath)); 2383 2384 if (err != 0) { 2385 (void) fprintf(stderr, "failed to lookup %s: %s\n", 2386 curpath, strerror(err)); 2387 return (err); 2388 } 2389 2390 child_obj = ZFS_DIRENT_OBJ(child_obj); 2391 err = sa_buf_hold(os, child_obj, FTAG, &db); 2392 if (err != 0) { 2393 (void) fprintf(stderr, 2394 "failed to get SA dbuf for obj %llu: %s\n", 2395 (u_longlong_t)child_obj, strerror(err)); 2396 return (EINVAL); 2397 } 2398 dmu_object_info_from_db(db, &doi); 2399 sa_buf_rele(db, FTAG); 2400 2401 if (doi.doi_bonus_type != DMU_OT_SA && 2402 doi.doi_bonus_type != DMU_OT_ZNODE) { 2403 (void) fprintf(stderr, "invalid bonus type %d for obj %llu\n", 2404 doi.doi_bonus_type, (u_longlong_t)child_obj); 2405 return (EINVAL); 2406 } 2407 2408 if (dump_opt['v'] > 6) { 2409 (void) printf("obj=%llu %s type=%d bonustype=%d\n", 2410 (u_longlong_t)child_obj, curpath, doi.doi_type, 2411 doi.doi_bonus_type); 2412 } 2413 2414 (void) strlcat(curpath, "/", sizeof (curpath)); 2415 2416 switch (doi.doi_type) { 2417 case DMU_OT_DIRECTORY_CONTENTS: 2418 if (s != NULL && *(s + 1) != '\0') 2419 return (dump_path_impl(os, child_obj, s + 1)); 2420 /*FALLTHROUGH*/ 2421 case DMU_OT_PLAIN_FILE_CONTENTS: 2422 dump_object(os, child_obj, dump_opt['v'], &header); 2423 return (0); 2424 default: 2425 (void) fprintf(stderr, "object %llu has non-file/directory " 2426 "type %d\n", (u_longlong_t)obj, doi.doi_type); 2427 break; 2428 } 2429 2430 return (EINVAL); 2431 } 2432 2433 /* 2434 * Dump the blocks for the object specified by path inside the dataset. 2435 */ 2436 static int 2437 dump_path(char *ds, char *path) 2438 { 2439 int err; 2440 objset_t *os; 2441 uint64_t root_obj; 2442 2443 err = open_objset(ds, DMU_OST_ZFS, FTAG, &os); 2444 if (err != 0) 2445 return (err); 2446 2447 err = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &root_obj); 2448 if (err != 0) { 2449 (void) fprintf(stderr, "can't lookup root znode: %s\n", 2450 strerror(err)); 2451 dmu_objset_disown(os, FTAG); 2452 return (EINVAL); 2453 } 2454 2455 (void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds); 2456 2457 err = dump_path_impl(os, root_obj, path); 2458 2459 close_objset(os, FTAG); 2460 return (err); 2461 } 2462 2463 static int 2464 dump_label(const char *dev) 2465 { 2466 int fd; 2467 vdev_label_t label; 2468 char path[MAXPATHLEN]; 2469 char *buf = label.vl_vdev_phys.vp_nvlist; 2470 size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist); 2471 struct stat64 statbuf; 2472 uint64_t psize, ashift; 2473 boolean_t label_found = B_FALSE; 2474 2475 (void) strlcpy(path, dev, sizeof (path)); 2476 if (dev[0] == '/') { 2477 if (strncmp(dev, ZFS_DISK_ROOTD, 2478 strlen(ZFS_DISK_ROOTD)) == 0) { 2479 (void) snprintf(path, sizeof (path), "%s%s", 2480 ZFS_RDISK_ROOTD, dev + strlen(ZFS_DISK_ROOTD)); 2481 } 2482 } else if (stat64(path, &statbuf) != 0) { 2483 char *s; 2484 2485 (void) snprintf(path, sizeof (path), "%s%s", ZFS_RDISK_ROOTD, 2486 dev); 2487 if (((s = strrchr(dev, 's')) == NULL && 2488 (s = strchr(dev, 'p')) == NULL) || 2489 !isdigit(*(s + 1))) 2490 (void) strlcat(path, "s0", sizeof (path)); 2491 } 2492 2493 if ((fd = open64(path, O_RDONLY)) < 0) { 2494 (void) fprintf(stderr, "cannot open '%s': %s\n", path, 2495 strerror(errno)); 2496 exit(1); 2497 } 2498 2499 if (fstat64(fd, &statbuf) != 0) { 2500 (void) fprintf(stderr, "failed to stat '%s': %s\n", path, 2501 strerror(errno)); 2502 (void) close(fd); 2503 exit(1); 2504 } 2505 2506 if (S_ISBLK(statbuf.st_mode)) { 2507 (void) fprintf(stderr, 2508 "cannot use '%s': character device required\n", path); 2509 (void) close(fd); 2510 exit(1); 2511 } 2512 2513 psize = statbuf.st_size; 2514 psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t)); 2515 2516 for (int l = 0; l < VDEV_LABELS; l++) { 2517 nvlist_t *config = NULL; 2518 2519 if (!dump_opt['q']) { 2520 (void) printf("------------------------------------\n"); 2521 (void) printf("LABEL %d\n", l); 2522 (void) printf("------------------------------------\n"); 2523 } 2524 2525 if (pread64(fd, &label, sizeof (label), 2526 vdev_label_offset(psize, l, 0)) != sizeof (label)) { 2527 if (!dump_opt['q']) 2528 (void) printf("failed to read label %d\n", l); 2529 continue; 2530 } 2531 2532 if (nvlist_unpack(buf, buflen, &config, 0) != 0) { 2533 if (!dump_opt['q']) 2534 (void) printf("failed to unpack label %d\n", l); 2535 ashift = SPA_MINBLOCKSHIFT; 2536 } else { 2537 nvlist_t *vdev_tree = NULL; 2538 2539 if (!dump_opt['q']) 2540 dump_nvlist(config, 4); 2541 if ((nvlist_lookup_nvlist(config, 2542 ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) || 2543 (nvlist_lookup_uint64(vdev_tree, 2544 ZPOOL_CONFIG_ASHIFT, &ashift) != 0)) 2545 ashift = SPA_MINBLOCKSHIFT; 2546 nvlist_free(config); 2547 label_found = B_TRUE; 2548 } 2549 if (dump_opt['u']) 2550 dump_label_uberblocks(&label, ashift); 2551 } 2552 2553 (void) close(fd); 2554 2555 return (label_found ? 0 : 2); 2556 } 2557 2558 static uint64_t dataset_feature_count[SPA_FEATURES]; 2559 static uint64_t remap_deadlist_count = 0; 2560 2561 /*ARGSUSED*/ 2562 static int 2563 dump_one_dir(const char *dsname, void *arg) 2564 { 2565 int error; 2566 objset_t *os; 2567 2568 error = open_objset(dsname, DMU_OST_ANY, FTAG, &os); 2569 if (error != 0) 2570 return (0); 2571 2572 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 2573 if (!dmu_objset_ds(os)->ds_feature_inuse[f]) 2574 continue; 2575 ASSERT(spa_feature_table[f].fi_flags & 2576 ZFEATURE_FLAG_PER_DATASET); 2577 dataset_feature_count[f]++; 2578 } 2579 2580 if (dsl_dataset_remap_deadlist_exists(dmu_objset_ds(os))) { 2581 remap_deadlist_count++; 2582 } 2583 2584 dump_dir(os); 2585 close_objset(os, FTAG); 2586 fuid_table_destroy(); 2587 return (0); 2588 } 2589 2590 /* 2591 * Block statistics. 2592 */ 2593 #define PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2) 2594 typedef struct zdb_blkstats { 2595 uint64_t zb_asize; 2596 uint64_t zb_lsize; 2597 uint64_t zb_psize; 2598 uint64_t zb_count; 2599 uint64_t zb_gangs; 2600 uint64_t zb_ditto_samevdev; 2601 uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE]; 2602 } zdb_blkstats_t; 2603 2604 /* 2605 * Extended object types to report deferred frees and dedup auto-ditto blocks. 2606 */ 2607 #define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0) 2608 #define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1) 2609 #define ZDB_OT_OTHER (DMU_OT_NUMTYPES + 2) 2610 #define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 3) 2611 2612 static const char *zdb_ot_extname[] = { 2613 "deferred free", 2614 "dedup ditto", 2615 "other", 2616 "Total", 2617 }; 2618 2619 #define ZB_TOTAL DN_MAX_LEVELS 2620 2621 typedef struct zdb_cb { 2622 zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1]; 2623 uint64_t zcb_removing_size; 2624 uint64_t zcb_dedup_asize; 2625 uint64_t zcb_dedup_blocks; 2626 uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES]; 2627 uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES] 2628 [BPE_PAYLOAD_SIZE]; 2629 uint64_t zcb_start; 2630 hrtime_t zcb_lastprint; 2631 uint64_t zcb_totalasize; 2632 uint64_t zcb_errors[256]; 2633 int zcb_readfails; 2634 int zcb_haderrors; 2635 spa_t *zcb_spa; 2636 uint32_t **zcb_vd_obsolete_counts; 2637 } zdb_cb_t; 2638 2639 static void 2640 zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, 2641 dmu_object_type_t type) 2642 { 2643 uint64_t refcnt = 0; 2644 2645 ASSERT(type < ZDB_OT_TOTAL); 2646 2647 if (zilog && zil_bp_tree_add(zilog, bp) != 0) 2648 return; 2649 2650 for (int i = 0; i < 4; i++) { 2651 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; 2652 int t = (i & 1) ? type : ZDB_OT_TOTAL; 2653 int equal; 2654 zdb_blkstats_t *zb = &zcb->zcb_type[l][t]; 2655 2656 zb->zb_asize += BP_GET_ASIZE(bp); 2657 zb->zb_lsize += BP_GET_LSIZE(bp); 2658 zb->zb_psize += BP_GET_PSIZE(bp); 2659 zb->zb_count++; 2660 2661 /* 2662 * The histogram is only big enough to record blocks up to 2663 * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last, 2664 * "other", bucket. 2665 */ 2666 unsigned idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT; 2667 idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1); 2668 zb->zb_psize_histogram[idx]++; 2669 2670 zb->zb_gangs += BP_COUNT_GANG(bp); 2671 2672 switch (BP_GET_NDVAS(bp)) { 2673 case 2: 2674 if (DVA_GET_VDEV(&bp->blk_dva[0]) == 2675 DVA_GET_VDEV(&bp->blk_dva[1])) 2676 zb->zb_ditto_samevdev++; 2677 break; 2678 case 3: 2679 equal = (DVA_GET_VDEV(&bp->blk_dva[0]) == 2680 DVA_GET_VDEV(&bp->blk_dva[1])) + 2681 (DVA_GET_VDEV(&bp->blk_dva[0]) == 2682 DVA_GET_VDEV(&bp->blk_dva[2])) + 2683 (DVA_GET_VDEV(&bp->blk_dva[1]) == 2684 DVA_GET_VDEV(&bp->blk_dva[2])); 2685 if (equal != 0) 2686 zb->zb_ditto_samevdev++; 2687 break; 2688 } 2689 2690 } 2691 2692 if (BP_IS_EMBEDDED(bp)) { 2693 zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++; 2694 zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)] 2695 [BPE_GET_PSIZE(bp)]++; 2696 return; 2697 } 2698 2699 if (dump_opt['L']) 2700 return; 2701 2702 if (BP_GET_DEDUP(bp)) { 2703 ddt_t *ddt; 2704 ddt_entry_t *dde; 2705 2706 ddt = ddt_select(zcb->zcb_spa, bp); 2707 ddt_enter(ddt); 2708 dde = ddt_lookup(ddt, bp, B_FALSE); 2709 2710 if (dde == NULL) { 2711 refcnt = 0; 2712 } else { 2713 ddt_phys_t *ddp = ddt_phys_select(dde, bp); 2714 ddt_phys_decref(ddp); 2715 refcnt = ddp->ddp_refcnt; 2716 if (ddt_phys_total_refcnt(dde) == 0) 2717 ddt_remove(ddt, dde); 2718 } 2719 ddt_exit(ddt); 2720 } 2721 2722 VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa, 2723 refcnt ? 0 : spa_first_txg(zcb->zcb_spa), 2724 bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0); 2725 } 2726 2727 static void 2728 zdb_blkptr_done(zio_t *zio) 2729 { 2730 spa_t *spa = zio->io_spa; 2731 blkptr_t *bp = zio->io_bp; 2732 int ioerr = zio->io_error; 2733 zdb_cb_t *zcb = zio->io_private; 2734 zbookmark_phys_t *zb = &zio->io_bookmark; 2735 2736 abd_free(zio->io_abd); 2737 2738 mutex_enter(&spa->spa_scrub_lock); 2739 spa->spa_scrub_inflight--; 2740 cv_broadcast(&spa->spa_scrub_io_cv); 2741 2742 if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { 2743 char blkbuf[BP_SPRINTF_LEN]; 2744 2745 zcb->zcb_haderrors = 1; 2746 zcb->zcb_errors[ioerr]++; 2747 2748 if (dump_opt['b'] >= 2) 2749 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 2750 else 2751 blkbuf[0] = '\0'; 2752 2753 (void) printf("zdb_blkptr_cb: " 2754 "Got error %d reading " 2755 "<%llu, %llu, %lld, %llx> %s -- skipping\n", 2756 ioerr, 2757 (u_longlong_t)zb->zb_objset, 2758 (u_longlong_t)zb->zb_object, 2759 (u_longlong_t)zb->zb_level, 2760 (u_longlong_t)zb->zb_blkid, 2761 blkbuf); 2762 } 2763 mutex_exit(&spa->spa_scrub_lock); 2764 } 2765 2766 static int 2767 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 2768 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 2769 { 2770 zdb_cb_t *zcb = arg; 2771 dmu_object_type_t type; 2772 boolean_t is_metadata; 2773 2774 if (bp == NULL) 2775 return (0); 2776 2777 if (dump_opt['b'] >= 5 && bp->blk_birth > 0) { 2778 char blkbuf[BP_SPRINTF_LEN]; 2779 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 2780 (void) printf("objset %llu object %llu " 2781 "level %lld offset 0x%llx %s\n", 2782 (u_longlong_t)zb->zb_objset, 2783 (u_longlong_t)zb->zb_object, 2784 (longlong_t)zb->zb_level, 2785 (u_longlong_t)blkid2offset(dnp, bp, zb), 2786 blkbuf); 2787 } 2788 2789 if (BP_IS_HOLE(bp)) 2790 return (0); 2791 2792 type = BP_GET_TYPE(bp); 2793 2794 zdb_count_block(zcb, zilog, bp, 2795 (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type); 2796 2797 is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)); 2798 2799 if (!BP_IS_EMBEDDED(bp) && 2800 (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) { 2801 size_t size = BP_GET_PSIZE(bp); 2802 abd_t *abd = abd_alloc(size, B_FALSE); 2803 int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW; 2804 2805 /* If it's an intent log block, failure is expected. */ 2806 if (zb->zb_level == ZB_ZIL_LEVEL) 2807 flags |= ZIO_FLAG_SPECULATIVE; 2808 2809 mutex_enter(&spa->spa_scrub_lock); 2810 while (spa->spa_scrub_inflight > max_inflight) 2811 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 2812 spa->spa_scrub_inflight++; 2813 mutex_exit(&spa->spa_scrub_lock); 2814 2815 zio_nowait(zio_read(NULL, spa, bp, abd, size, 2816 zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb)); 2817 } 2818 2819 zcb->zcb_readfails = 0; 2820 2821 /* only call gethrtime() every 100 blocks */ 2822 static int iters; 2823 if (++iters > 100) 2824 iters = 0; 2825 else 2826 return (0); 2827 2828 if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) { 2829 uint64_t now = gethrtime(); 2830 char buf[10]; 2831 uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize; 2832 int kb_per_sec = 2833 1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000)); 2834 int sec_remaining = 2835 (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec; 2836 2837 /* make sure nicenum has enough space */ 2838 CTASSERT(sizeof (buf) >= NN_NUMBUF_SZ); 2839 2840 zfs_nicenum(bytes, buf, sizeof (buf)); 2841 (void) fprintf(stderr, 2842 "\r%5s completed (%4dMB/s) " 2843 "estimated time remaining: %uhr %02umin %02usec ", 2844 buf, kb_per_sec / 1024, 2845 sec_remaining / 60 / 60, 2846 sec_remaining / 60 % 60, 2847 sec_remaining % 60); 2848 2849 zcb->zcb_lastprint = now; 2850 } 2851 2852 return (0); 2853 } 2854 2855 static void 2856 zdb_leak(void *arg, uint64_t start, uint64_t size) 2857 { 2858 vdev_t *vd = arg; 2859 2860 (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n", 2861 (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size); 2862 } 2863 2864 static metaslab_ops_t zdb_metaslab_ops = { 2865 NULL /* alloc */ 2866 }; 2867 2868 static void 2869 zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb) 2870 { 2871 ddt_bookmark_t ddb; 2872 ddt_entry_t dde; 2873 int error; 2874 2875 bzero(&ddb, sizeof (ddb)); 2876 while ((error = ddt_walk(spa, &ddb, &dde)) == 0) { 2877 blkptr_t blk; 2878 ddt_phys_t *ddp = dde.dde_phys; 2879 2880 if (ddb.ddb_class == DDT_CLASS_UNIQUE) 2881 return; 2882 2883 ASSERT(ddt_phys_total_refcnt(&dde) > 1); 2884 2885 for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 2886 if (ddp->ddp_phys_birth == 0) 2887 continue; 2888 ddt_bp_create(ddb.ddb_checksum, 2889 &dde.dde_key, ddp, &blk); 2890 if (p == DDT_PHYS_DITTO) { 2891 zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO); 2892 } else { 2893 zcb->zcb_dedup_asize += 2894 BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1); 2895 zcb->zcb_dedup_blocks++; 2896 } 2897 } 2898 if (!dump_opt['L']) { 2899 ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum]; 2900 ddt_enter(ddt); 2901 VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL); 2902 ddt_exit(ddt); 2903 } 2904 } 2905 2906 ASSERT(error == ENOENT); 2907 } 2908 2909 /* ARGSUSED */ 2910 static void 2911 claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset, 2912 uint64_t size, void *arg) 2913 { 2914 /* 2915 * This callback was called through a remap from 2916 * a device being removed. Therefore, the vdev that 2917 * this callback is applied to is a concrete 2918 * vdev. 2919 */ 2920 ASSERT(vdev_is_concrete(vd)); 2921 2922 VERIFY0(metaslab_claim_impl(vd, offset, size, 2923 spa_first_txg(vd->vdev_spa))); 2924 } 2925 2926 static void 2927 claim_segment_cb(void *arg, uint64_t offset, uint64_t size) 2928 { 2929 vdev_t *vd = arg; 2930 2931 vdev_indirect_ops.vdev_op_remap(vd, offset, size, 2932 claim_segment_impl_cb, NULL); 2933 } 2934 2935 /* 2936 * After accounting for all allocated blocks that are directly referenced, 2937 * we might have missed a reference to a block from a partially complete 2938 * (and thus unused) indirect mapping object. We perform a secondary pass 2939 * through the metaslabs we have already mapped and claim the destination 2940 * blocks. 2941 */ 2942 static void 2943 zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb) 2944 { 2945 if (spa->spa_vdev_removal == NULL) 2946 return; 2947 2948 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 2949 2950 spa_vdev_removal_t *svr = spa->spa_vdev_removal; 2951 vdev_t *vd = svr->svr_vdev; 2952 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 2953 2954 for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) { 2955 metaslab_t *msp = vd->vdev_ms[msi]; 2956 2957 if (msp->ms_start >= vdev_indirect_mapping_max_offset(vim)) 2958 break; 2959 2960 ASSERT0(range_tree_space(svr->svr_allocd_segs)); 2961 2962 if (msp->ms_sm != NULL) { 2963 VERIFY0(space_map_load(msp->ms_sm, 2964 svr->svr_allocd_segs, SM_ALLOC)); 2965 2966 /* 2967 * Clear everything past what has been synced, 2968 * because we have not allocated mappings for it yet. 2969 */ 2970 range_tree_clear(svr->svr_allocd_segs, 2971 vdev_indirect_mapping_max_offset(vim), 2972 msp->ms_sm->sm_start + msp->ms_sm->sm_size - 2973 vdev_indirect_mapping_max_offset(vim)); 2974 } 2975 2976 zcb->zcb_removing_size += 2977 range_tree_space(svr->svr_allocd_segs); 2978 range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd); 2979 } 2980 2981 spa_config_exit(spa, SCL_CONFIG, FTAG); 2982 } 2983 2984 /* 2985 * vm_idxp is an in-out parameter which (for indirect vdevs) is the 2986 * index in vim_entries that has the first entry in this metaslab. On 2987 * return, it will be set to the first entry after this metaslab. 2988 */ 2989 static void 2990 zdb_leak_init_ms(metaslab_t *msp, uint64_t *vim_idxp) 2991 { 2992 metaslab_group_t *mg = msp->ms_group; 2993 vdev_t *vd = mg->mg_vd; 2994 vdev_t *rvd = vd->vdev_spa->spa_root_vdev; 2995 2996 mutex_enter(&msp->ms_lock); 2997 metaslab_unload(msp); 2998 2999 /* 3000 * We don't want to spend the CPU manipulating the size-ordered 3001 * tree, so clear the range_tree ops. 3002 */ 3003 msp->ms_tree->rt_ops = NULL; 3004 3005 (void) fprintf(stderr, 3006 "\rloading vdev %llu of %llu, metaslab %llu of %llu ...", 3007 (longlong_t)vd->vdev_id, 3008 (longlong_t)rvd->vdev_children, 3009 (longlong_t)msp->ms_id, 3010 (longlong_t)vd->vdev_ms_count); 3011 3012 /* 3013 * For leak detection, we overload the metaslab ms_tree to 3014 * contain allocated segments instead of free segments. As a 3015 * result, we can't use the normal metaslab_load/unload 3016 * interfaces. 3017 */ 3018 if (vd->vdev_ops == &vdev_indirect_ops) { 3019 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 3020 for (; *vim_idxp < vdev_indirect_mapping_num_entries(vim); 3021 (*vim_idxp)++) { 3022 vdev_indirect_mapping_entry_phys_t *vimep = 3023 &vim->vim_entries[*vim_idxp]; 3024 uint64_t ent_offset = DVA_MAPPING_GET_SRC_OFFSET(vimep); 3025 uint64_t ent_len = DVA_GET_ASIZE(&vimep->vimep_dst); 3026 ASSERT3U(ent_offset, >=, msp->ms_start); 3027 if (ent_offset >= msp->ms_start + msp->ms_size) 3028 break; 3029 3030 /* 3031 * Mappings do not cross metaslab boundaries, 3032 * because we create them by walking the metaslabs. 3033 */ 3034 ASSERT3U(ent_offset + ent_len, <=, 3035 msp->ms_start + msp->ms_size); 3036 range_tree_add(msp->ms_tree, ent_offset, ent_len); 3037 } 3038 } else if (msp->ms_sm != NULL) { 3039 VERIFY0(space_map_load(msp->ms_sm, msp->ms_tree, SM_ALLOC)); 3040 } 3041 3042 if (!msp->ms_loaded) { 3043 msp->ms_loaded = B_TRUE; 3044 } 3045 mutex_exit(&msp->ms_lock); 3046 } 3047 3048 /* ARGSUSED */ 3049 static int 3050 increment_indirect_mapping_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 3051 { 3052 zdb_cb_t *zcb = arg; 3053 spa_t *spa = zcb->zcb_spa; 3054 vdev_t *vd; 3055 const dva_t *dva = &bp->blk_dva[0]; 3056 3057 ASSERT(!dump_opt['L']); 3058 ASSERT3U(BP_GET_NDVAS(bp), ==, 1); 3059 3060 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 3061 vd = vdev_lookup_top(zcb->zcb_spa, DVA_GET_VDEV(dva)); 3062 ASSERT3P(vd, !=, NULL); 3063 spa_config_exit(spa, SCL_VDEV, FTAG); 3064 3065 ASSERT(vd->vdev_indirect_config.vic_mapping_object != 0); 3066 ASSERT3P(zcb->zcb_vd_obsolete_counts[vd->vdev_id], !=, NULL); 3067 3068 vdev_indirect_mapping_increment_obsolete_count( 3069 vd->vdev_indirect_mapping, 3070 DVA_GET_OFFSET(dva), DVA_GET_ASIZE(dva), 3071 zcb->zcb_vd_obsolete_counts[vd->vdev_id]); 3072 3073 return (0); 3074 } 3075 3076 static uint32_t * 3077 zdb_load_obsolete_counts(vdev_t *vd) 3078 { 3079 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 3080 spa_t *spa = vd->vdev_spa; 3081 spa_condensing_indirect_phys_t *scip = 3082 &spa->spa_condensing_indirect_phys; 3083 uint32_t *counts; 3084 3085 EQUIV(vdev_obsolete_sm_object(vd) != 0, vd->vdev_obsolete_sm != NULL); 3086 counts = vdev_indirect_mapping_load_obsolete_counts(vim); 3087 if (vd->vdev_obsolete_sm != NULL) { 3088 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts, 3089 vd->vdev_obsolete_sm); 3090 } 3091 if (scip->scip_vdev == vd->vdev_id && 3092 scip->scip_prev_obsolete_sm_object != 0) { 3093 space_map_t *prev_obsolete_sm = NULL; 3094 VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset, 3095 scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0)); 3096 space_map_update(prev_obsolete_sm); 3097 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts, 3098 prev_obsolete_sm); 3099 space_map_close(prev_obsolete_sm); 3100 } 3101 return (counts); 3102 } 3103 3104 static void 3105 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb) 3106 { 3107 zcb->zcb_spa = spa; 3108 3109 if (!dump_opt['L']) { 3110 dsl_pool_t *dp = spa->spa_dsl_pool; 3111 vdev_t *rvd = spa->spa_root_vdev; 3112 3113 /* 3114 * We are going to be changing the meaning of the metaslab's 3115 * ms_tree. Ensure that the allocator doesn't try to 3116 * use the tree. 3117 */ 3118 spa->spa_normal_class->mc_ops = &zdb_metaslab_ops; 3119 spa->spa_log_class->mc_ops = &zdb_metaslab_ops; 3120 3121 zcb->zcb_vd_obsolete_counts = 3122 umem_zalloc(rvd->vdev_children * sizeof (uint32_t *), 3123 UMEM_NOFAIL); 3124 3125 3126 for (uint64_t c = 0; c < rvd->vdev_children; c++) { 3127 vdev_t *vd = rvd->vdev_child[c]; 3128 uint64_t vim_idx = 0; 3129 3130 ASSERT3U(c, ==, vd->vdev_id); 3131 3132 /* 3133 * Note: we don't check for mapping leaks on 3134 * removing vdevs because their ms_tree's are 3135 * used to look for leaks in allocated space. 3136 */ 3137 if (vd->vdev_ops == &vdev_indirect_ops) { 3138 zcb->zcb_vd_obsolete_counts[c] = 3139 zdb_load_obsolete_counts(vd); 3140 3141 /* 3142 * Normally, indirect vdevs don't have any 3143 * metaslabs. We want to set them up for 3144 * zio_claim(). 3145 */ 3146 VERIFY0(vdev_metaslab_init(vd, 0)); 3147 } 3148 3149 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 3150 zdb_leak_init_ms(vd->vdev_ms[m], &vim_idx); 3151 } 3152 if (vd->vdev_ops == &vdev_indirect_ops) { 3153 ASSERT3U(vim_idx, ==, 3154 vdev_indirect_mapping_num_entries( 3155 vd->vdev_indirect_mapping)); 3156 } 3157 } 3158 (void) fprintf(stderr, "\n"); 3159 3160 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) { 3161 ASSERT(spa_feature_is_enabled(spa, 3162 SPA_FEATURE_DEVICE_REMOVAL)); 3163 (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj, 3164 increment_indirect_mapping_cb, zcb, NULL); 3165 } 3166 } 3167 3168 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 3169 3170 zdb_ddt_leak_init(spa, zcb); 3171 3172 spa_config_exit(spa, SCL_CONFIG, FTAG); 3173 } 3174 3175 static boolean_t 3176 zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb) 3177 { 3178 boolean_t leaks = B_FALSE; 3179 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 3180 uint64_t total_leaked = 0; 3181 3182 ASSERT(vim != NULL); 3183 3184 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) { 3185 vdev_indirect_mapping_entry_phys_t *vimep = 3186 &vim->vim_entries[i]; 3187 uint64_t obsolete_bytes = 0; 3188 uint64_t offset = DVA_MAPPING_GET_SRC_OFFSET(vimep); 3189 metaslab_t *msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 3190 3191 /* 3192 * This is not very efficient but it's easy to 3193 * verify correctness. 3194 */ 3195 for (uint64_t inner_offset = 0; 3196 inner_offset < DVA_GET_ASIZE(&vimep->vimep_dst); 3197 inner_offset += 1 << vd->vdev_ashift) { 3198 if (range_tree_contains(msp->ms_tree, 3199 offset + inner_offset, 1 << vd->vdev_ashift)) { 3200 obsolete_bytes += 1 << vd->vdev_ashift; 3201 } 3202 } 3203 3204 int64_t bytes_leaked = obsolete_bytes - 3205 zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]; 3206 ASSERT3U(DVA_GET_ASIZE(&vimep->vimep_dst), >=, 3207 zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]); 3208 if (bytes_leaked != 0 && 3209 (vdev_obsolete_counts_are_precise(vd) || 3210 dump_opt['d'] >= 5)) { 3211 (void) printf("obsolete indirect mapping count " 3212 "mismatch on %llu:%llx:%llx : %llx bytes leaked\n", 3213 (u_longlong_t)vd->vdev_id, 3214 (u_longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep), 3215 (u_longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), 3216 (u_longlong_t)bytes_leaked); 3217 } 3218 total_leaked += ABS(bytes_leaked); 3219 } 3220 3221 if (!vdev_obsolete_counts_are_precise(vd) && total_leaked > 0) { 3222 int pct_leaked = total_leaked * 100 / 3223 vdev_indirect_mapping_bytes_mapped(vim); 3224 (void) printf("cannot verify obsolete indirect mapping " 3225 "counts of vdev %llu because precise feature was not " 3226 "enabled when it was removed: %d%% (%llx bytes) of mapping" 3227 "unreferenced\n", 3228 (u_longlong_t)vd->vdev_id, pct_leaked, 3229 (u_longlong_t)total_leaked); 3230 } else if (total_leaked > 0) { 3231 (void) printf("obsolete indirect mapping count mismatch " 3232 "for vdev %llu -- %llx total bytes mismatched\n", 3233 (u_longlong_t)vd->vdev_id, 3234 (u_longlong_t)total_leaked); 3235 leaks |= B_TRUE; 3236 } 3237 3238 vdev_indirect_mapping_free_obsolete_counts(vim, 3239 zcb->zcb_vd_obsolete_counts[vd->vdev_id]); 3240 zcb->zcb_vd_obsolete_counts[vd->vdev_id] = NULL; 3241 3242 return (leaks); 3243 } 3244 3245 static boolean_t 3246 zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb) 3247 { 3248 boolean_t leaks = B_FALSE; 3249 if (!dump_opt['L']) { 3250 vdev_t *rvd = spa->spa_root_vdev; 3251 for (unsigned c = 0; c < rvd->vdev_children; c++) { 3252 vdev_t *vd = rvd->vdev_child[c]; 3253 metaslab_group_t *mg = vd->vdev_mg; 3254 3255 if (zcb->zcb_vd_obsolete_counts[c] != NULL) { 3256 leaks |= zdb_check_for_obsolete_leaks(vd, zcb); 3257 } 3258 3259 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 3260 metaslab_t *msp = vd->vdev_ms[m]; 3261 ASSERT3P(mg, ==, msp->ms_group); 3262 3263 /* 3264 * The ms_tree has been overloaded to 3265 * contain allocated segments. Now that we 3266 * finished traversing all blocks, any 3267 * block that remains in the ms_tree 3268 * represents an allocated block that we 3269 * did not claim during the traversal. 3270 * Claimed blocks would have been removed 3271 * from the ms_tree. For indirect vdevs, 3272 * space remaining in the tree represents 3273 * parts of the mapping that are not 3274 * referenced, which is not a bug. 3275 */ 3276 if (vd->vdev_ops == &vdev_indirect_ops) { 3277 range_tree_vacate(msp->ms_tree, 3278 NULL, NULL); 3279 } else { 3280 range_tree_vacate(msp->ms_tree, 3281 zdb_leak, vd); 3282 } 3283 3284 if (msp->ms_loaded) { 3285 msp->ms_loaded = B_FALSE; 3286 } 3287 } 3288 } 3289 3290 umem_free(zcb->zcb_vd_obsolete_counts, 3291 rvd->vdev_children * sizeof (uint32_t *)); 3292 zcb->zcb_vd_obsolete_counts = NULL; 3293 } 3294 return (leaks); 3295 } 3296 3297 /* ARGSUSED */ 3298 static int 3299 count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 3300 { 3301 zdb_cb_t *zcb = arg; 3302 3303 if (dump_opt['b'] >= 5) { 3304 char blkbuf[BP_SPRINTF_LEN]; 3305 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 3306 (void) printf("[%s] %s\n", 3307 "deferred free", blkbuf); 3308 } 3309 zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED); 3310 return (0); 3311 } 3312 3313 static int 3314 dump_block_stats(spa_t *spa) 3315 { 3316 zdb_cb_t zcb; 3317 zdb_blkstats_t *zb, *tzb; 3318 uint64_t norm_alloc, norm_space, total_alloc, total_found; 3319 int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD; 3320 boolean_t leaks = B_FALSE; 3321 3322 bzero(&zcb, sizeof (zcb)); 3323 (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", 3324 (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", 3325 (dump_opt['c'] == 1) ? "metadata " : "", 3326 dump_opt['c'] ? "checksums " : "", 3327 (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "", 3328 !dump_opt['L'] ? "nothing leaked " : ""); 3329 3330 /* 3331 * Load all space maps as SM_ALLOC maps, then traverse the pool 3332 * claiming each block we discover. If the pool is perfectly 3333 * consistent, the space maps will be empty when we're done. 3334 * Anything left over is a leak; any block we can't claim (because 3335 * it's not part of any space map) is a double allocation, 3336 * reference to a freed block, or an unclaimed log block. 3337 */ 3338 zdb_leak_init(spa, &zcb); 3339 3340 /* 3341 * If there's a deferred-free bplist, process that first. 3342 */ 3343 (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj, 3344 count_block_cb, &zcb, NULL); 3345 3346 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { 3347 (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj, 3348 count_block_cb, &zcb, NULL); 3349 } 3350 3351 zdb_claim_removing(spa, &zcb); 3352 3353 if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) { 3354 VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset, 3355 spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb, 3356 &zcb, NULL)); 3357 } 3358 3359 if (dump_opt['c'] > 1) 3360 flags |= TRAVERSE_PREFETCH_DATA; 3361 3362 zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa)); 3363 zcb.zcb_start = zcb.zcb_lastprint = gethrtime(); 3364 zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb); 3365 3366 /* 3367 * If we've traversed the data blocks then we need to wait for those 3368 * I/Os to complete. We leverage "The Godfather" zio to wait on 3369 * all async I/Os to complete. 3370 */ 3371 if (dump_opt['c']) { 3372 for (int i = 0; i < max_ncpus; i++) { 3373 (void) zio_wait(spa->spa_async_zio_root[i]); 3374 spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, 3375 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | 3376 ZIO_FLAG_GODFATHER); 3377 } 3378 } 3379 3380 if (zcb.zcb_haderrors) { 3381 (void) printf("\nError counts:\n\n"); 3382 (void) printf("\t%5s %s\n", "errno", "count"); 3383 for (int e = 0; e < 256; e++) { 3384 if (zcb.zcb_errors[e] != 0) { 3385 (void) printf("\t%5d %llu\n", 3386 e, (u_longlong_t)zcb.zcb_errors[e]); 3387 } 3388 } 3389 } 3390 3391 /* 3392 * Report any leaked segments. 3393 */ 3394 leaks |= zdb_leak_fini(spa, &zcb); 3395 3396 tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL]; 3397 3398 norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 3399 norm_space = metaslab_class_get_space(spa_normal_class(spa)); 3400 3401 total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa)); 3402 total_found = tzb->zb_asize - zcb.zcb_dedup_asize + 3403 zcb.zcb_removing_size; 3404 3405 if (total_found == total_alloc) { 3406 if (!dump_opt['L']) 3407 (void) printf("\n\tNo leaks (block sum matches space" 3408 " maps exactly)\n"); 3409 } else { 3410 (void) printf("block traversal size %llu != alloc %llu " 3411 "(%s %lld)\n", 3412 (u_longlong_t)total_found, 3413 (u_longlong_t)total_alloc, 3414 (dump_opt['L']) ? "unreachable" : "leaked", 3415 (longlong_t)(total_alloc - total_found)); 3416 leaks = B_TRUE; 3417 } 3418 3419 if (tzb->zb_count == 0) 3420 return (2); 3421 3422 (void) printf("\n"); 3423 (void) printf("\tbp count: %10llu\n", 3424 (u_longlong_t)tzb->zb_count); 3425 (void) printf("\tganged count: %10llu\n", 3426 (longlong_t)tzb->zb_gangs); 3427 (void) printf("\tbp logical: %10llu avg: %6llu\n", 3428 (u_longlong_t)tzb->zb_lsize, 3429 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count)); 3430 (void) printf("\tbp physical: %10llu avg:" 3431 " %6llu compression: %6.2f\n", 3432 (u_longlong_t)tzb->zb_psize, 3433 (u_longlong_t)(tzb->zb_psize / tzb->zb_count), 3434 (double)tzb->zb_lsize / tzb->zb_psize); 3435 (void) printf("\tbp allocated: %10llu avg:" 3436 " %6llu compression: %6.2f\n", 3437 (u_longlong_t)tzb->zb_asize, 3438 (u_longlong_t)(tzb->zb_asize / tzb->zb_count), 3439 (double)tzb->zb_lsize / tzb->zb_asize); 3440 (void) printf("\tbp deduped: %10llu ref>1:" 3441 " %6llu deduplication: %6.2f\n", 3442 (u_longlong_t)zcb.zcb_dedup_asize, 3443 (u_longlong_t)zcb.zcb_dedup_blocks, 3444 (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0); 3445 (void) printf("\tSPA allocated: %10llu used: %5.2f%%\n", 3446 (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space); 3447 3448 for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) { 3449 if (zcb.zcb_embedded_blocks[i] == 0) 3450 continue; 3451 (void) printf("\n"); 3452 (void) printf("\tadditional, non-pointer bps of type %u: " 3453 "%10llu\n", 3454 i, (u_longlong_t)zcb.zcb_embedded_blocks[i]); 3455 3456 if (dump_opt['b'] >= 3) { 3457 (void) printf("\t number of (compressed) bytes: " 3458 "number of bps\n"); 3459 dump_histogram(zcb.zcb_embedded_histogram[i], 3460 sizeof (zcb.zcb_embedded_histogram[i]) / 3461 sizeof (zcb.zcb_embedded_histogram[i][0]), 0); 3462 } 3463 } 3464 3465 if (tzb->zb_ditto_samevdev != 0) { 3466 (void) printf("\tDittoed blocks on same vdev: %llu\n", 3467 (longlong_t)tzb->zb_ditto_samevdev); 3468 } 3469 3470 for (uint64_t v = 0; v < spa->spa_root_vdev->vdev_children; v++) { 3471 vdev_t *vd = spa->spa_root_vdev->vdev_child[v]; 3472 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 3473 3474 if (vim == NULL) { 3475 continue; 3476 } 3477 3478 char mem[32]; 3479 zdb_nicenum(vdev_indirect_mapping_num_entries(vim), 3480 mem, vdev_indirect_mapping_size(vim)); 3481 3482 (void) printf("\tindirect vdev id %llu has %llu segments " 3483 "(%s in memory)\n", 3484 (longlong_t)vd->vdev_id, 3485 (longlong_t)vdev_indirect_mapping_num_entries(vim), mem); 3486 } 3487 3488 if (dump_opt['b'] >= 2) { 3489 int l, t, level; 3490 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" 3491 "\t avg\t comp\t%%Total\tType\n"); 3492 3493 for (t = 0; t <= ZDB_OT_TOTAL; t++) { 3494 char csize[32], lsize[32], psize[32], asize[32]; 3495 char avg[32], gang[32]; 3496 const char *typename; 3497 3498 /* make sure nicenum has enough space */ 3499 CTASSERT(sizeof (csize) >= NN_NUMBUF_SZ); 3500 CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ); 3501 CTASSERT(sizeof (psize) >= NN_NUMBUF_SZ); 3502 CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ); 3503 CTASSERT(sizeof (avg) >= NN_NUMBUF_SZ); 3504 CTASSERT(sizeof (gang) >= NN_NUMBUF_SZ); 3505 3506 if (t < DMU_OT_NUMTYPES) 3507 typename = dmu_ot[t].ot_name; 3508 else 3509 typename = zdb_ot_extname[t - DMU_OT_NUMTYPES]; 3510 3511 if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) { 3512 (void) printf("%6s\t%5s\t%5s\t%5s" 3513 "\t%5s\t%5s\t%6s\t%s\n", 3514 "-", 3515 "-", 3516 "-", 3517 "-", 3518 "-", 3519 "-", 3520 "-", 3521 typename); 3522 continue; 3523 } 3524 3525 for (l = ZB_TOTAL - 1; l >= -1; l--) { 3526 level = (l == -1 ? ZB_TOTAL : l); 3527 zb = &zcb.zcb_type[level][t]; 3528 3529 if (zb->zb_asize == 0) 3530 continue; 3531 3532 if (dump_opt['b'] < 3 && level != ZB_TOTAL) 3533 continue; 3534 3535 if (level == 0 && zb->zb_asize == 3536 zcb.zcb_type[ZB_TOTAL][t].zb_asize) 3537 continue; 3538 3539 zdb_nicenum(zb->zb_count, csize, 3540 sizeof (csize)); 3541 zdb_nicenum(zb->zb_lsize, lsize, 3542 sizeof (lsize)); 3543 zdb_nicenum(zb->zb_psize, psize, 3544 sizeof (psize)); 3545 zdb_nicenum(zb->zb_asize, asize, 3546 sizeof (asize)); 3547 zdb_nicenum(zb->zb_asize / zb->zb_count, avg, 3548 sizeof (avg)); 3549 zdb_nicenum(zb->zb_gangs, gang, sizeof (gang)); 3550 3551 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s" 3552 "\t%5.2f\t%6.2f\t", 3553 csize, lsize, psize, asize, avg, 3554 (double)zb->zb_lsize / zb->zb_psize, 3555 100.0 * zb->zb_asize / tzb->zb_asize); 3556 3557 if (level == ZB_TOTAL) 3558 (void) printf("%s\n", typename); 3559 else 3560 (void) printf(" L%d %s\n", 3561 level, typename); 3562 3563 if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) { 3564 (void) printf("\t number of ganged " 3565 "blocks: %s\n", gang); 3566 } 3567 3568 if (dump_opt['b'] >= 4) { 3569 (void) printf("psize " 3570 "(in 512-byte sectors): " 3571 "number of blocks\n"); 3572 dump_histogram(zb->zb_psize_histogram, 3573 PSIZE_HISTO_SIZE, 0); 3574 } 3575 } 3576 } 3577 } 3578 3579 (void) printf("\n"); 3580 3581 if (leaks) 3582 return (2); 3583 3584 if (zcb.zcb_haderrors) 3585 return (3); 3586 3587 return (0); 3588 } 3589 3590 typedef struct zdb_ddt_entry { 3591 ddt_key_t zdde_key; 3592 uint64_t zdde_ref_blocks; 3593 uint64_t zdde_ref_lsize; 3594 uint64_t zdde_ref_psize; 3595 uint64_t zdde_ref_dsize; 3596 avl_node_t zdde_node; 3597 } zdb_ddt_entry_t; 3598 3599 /* ARGSUSED */ 3600 static int 3601 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 3602 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 3603 { 3604 avl_tree_t *t = arg; 3605 avl_index_t where; 3606 zdb_ddt_entry_t *zdde, zdde_search; 3607 3608 if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) 3609 return (0); 3610 3611 if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { 3612 (void) printf("traversing objset %llu, %llu objects, " 3613 "%lu blocks so far\n", 3614 (u_longlong_t)zb->zb_objset, 3615 (u_longlong_t)BP_GET_FILL(bp), 3616 avl_numnodes(t)); 3617 } 3618 3619 if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF || 3620 BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) 3621 return (0); 3622 3623 ddt_key_fill(&zdde_search.zdde_key, bp); 3624 3625 zdde = avl_find(t, &zdde_search, &where); 3626 3627 if (zdde == NULL) { 3628 zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL); 3629 zdde->zdde_key = zdde_search.zdde_key; 3630 avl_insert(t, zdde, where); 3631 } 3632 3633 zdde->zdde_ref_blocks += 1; 3634 zdde->zdde_ref_lsize += BP_GET_LSIZE(bp); 3635 zdde->zdde_ref_psize += BP_GET_PSIZE(bp); 3636 zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp); 3637 3638 return (0); 3639 } 3640 3641 static void 3642 dump_simulated_ddt(spa_t *spa) 3643 { 3644 avl_tree_t t; 3645 void *cookie = NULL; 3646 zdb_ddt_entry_t *zdde; 3647 ddt_histogram_t ddh_total; 3648 ddt_stat_t dds_total; 3649 3650 bzero(&ddh_total, sizeof (ddh_total)); 3651 bzero(&dds_total, sizeof (dds_total)); 3652 avl_create(&t, ddt_entry_compare, 3653 sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node)); 3654 3655 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 3656 3657 (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, 3658 zdb_ddt_add_cb, &t); 3659 3660 spa_config_exit(spa, SCL_CONFIG, FTAG); 3661 3662 while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) { 3663 ddt_stat_t dds; 3664 uint64_t refcnt = zdde->zdde_ref_blocks; 3665 ASSERT(refcnt != 0); 3666 3667 dds.dds_blocks = zdde->zdde_ref_blocks / refcnt; 3668 dds.dds_lsize = zdde->zdde_ref_lsize / refcnt; 3669 dds.dds_psize = zdde->zdde_ref_psize / refcnt; 3670 dds.dds_dsize = zdde->zdde_ref_dsize / refcnt; 3671 3672 dds.dds_ref_blocks = zdde->zdde_ref_blocks; 3673 dds.dds_ref_lsize = zdde->zdde_ref_lsize; 3674 dds.dds_ref_psize = zdde->zdde_ref_psize; 3675 dds.dds_ref_dsize = zdde->zdde_ref_dsize; 3676 3677 ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1], 3678 &dds, 0); 3679 3680 umem_free(zdde, sizeof (*zdde)); 3681 } 3682 3683 avl_destroy(&t); 3684 3685 ddt_histogram_stat(&dds_total, &ddh_total); 3686 3687 (void) printf("Simulated DDT histogram:\n"); 3688 3689 zpool_dump_ddt(&dds_total, &ddh_total); 3690 3691 dump_dedup_ratio(&dds_total); 3692 } 3693 3694 static int 3695 verify_device_removal_feature_counts(spa_t *spa) 3696 { 3697 uint64_t dr_feature_refcount = 0; 3698 uint64_t oc_feature_refcount = 0; 3699 uint64_t indirect_vdev_count = 0; 3700 uint64_t precise_vdev_count = 0; 3701 uint64_t obsolete_counts_object_count = 0; 3702 uint64_t obsolete_sm_count = 0; 3703 uint64_t obsolete_counts_count = 0; 3704 uint64_t scip_count = 0; 3705 uint64_t obsolete_bpobj_count = 0; 3706 int ret = 0; 3707 3708 spa_condensing_indirect_phys_t *scip = 3709 &spa->spa_condensing_indirect_phys; 3710 if (scip->scip_next_mapping_object != 0) { 3711 vdev_t *vd = spa->spa_root_vdev->vdev_child[scip->scip_vdev]; 3712 ASSERT(scip->scip_prev_obsolete_sm_object != 0); 3713 ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops); 3714 3715 (void) printf("Condensing indirect vdev %llu: new mapping " 3716 "object %llu, prev obsolete sm %llu\n", 3717 (u_longlong_t)scip->scip_vdev, 3718 (u_longlong_t)scip->scip_next_mapping_object, 3719 (u_longlong_t)scip->scip_prev_obsolete_sm_object); 3720 if (scip->scip_prev_obsolete_sm_object != 0) { 3721 space_map_t *prev_obsolete_sm = NULL; 3722 VERIFY0(space_map_open(&prev_obsolete_sm, 3723 spa->spa_meta_objset, 3724 scip->scip_prev_obsolete_sm_object, 3725 0, vd->vdev_asize, 0)); 3726 space_map_update(prev_obsolete_sm); 3727 dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm); 3728 (void) printf("\n"); 3729 space_map_close(prev_obsolete_sm); 3730 } 3731 3732 scip_count += 2; 3733 } 3734 3735 for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) { 3736 vdev_t *vd = spa->spa_root_vdev->vdev_child[i]; 3737 vdev_indirect_config_t *vic = &vd->vdev_indirect_config; 3738 3739 if (vic->vic_mapping_object != 0) { 3740 ASSERT(vd->vdev_ops == &vdev_indirect_ops || 3741 vd->vdev_removing); 3742 indirect_vdev_count++; 3743 3744 if (vd->vdev_indirect_mapping->vim_havecounts) { 3745 obsolete_counts_count++; 3746 } 3747 } 3748 if (vdev_obsolete_counts_are_precise(vd)) { 3749 ASSERT(vic->vic_mapping_object != 0); 3750 precise_vdev_count++; 3751 } 3752 if (vdev_obsolete_sm_object(vd) != 0) { 3753 ASSERT(vic->vic_mapping_object != 0); 3754 obsolete_sm_count++; 3755 } 3756 } 3757 3758 (void) feature_get_refcount(spa, 3759 &spa_feature_table[SPA_FEATURE_DEVICE_REMOVAL], 3760 &dr_feature_refcount); 3761 (void) feature_get_refcount(spa, 3762 &spa_feature_table[SPA_FEATURE_OBSOLETE_COUNTS], 3763 &oc_feature_refcount); 3764 3765 if (dr_feature_refcount != indirect_vdev_count) { 3766 ret = 1; 3767 (void) printf("Number of indirect vdevs (%llu) " \ 3768 "does not match feature count (%llu)\n", 3769 (u_longlong_t)indirect_vdev_count, 3770 (u_longlong_t)dr_feature_refcount); 3771 } else { 3772 (void) printf("Verified device_removal feature refcount " \ 3773 "of %llu is correct\n", 3774 (u_longlong_t)dr_feature_refcount); 3775 } 3776 3777 if (zap_contains(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT, 3778 DMU_POOL_OBSOLETE_BPOBJ) == 0) { 3779 obsolete_bpobj_count++; 3780 } 3781 3782 3783 obsolete_counts_object_count = precise_vdev_count; 3784 obsolete_counts_object_count += obsolete_sm_count; 3785 obsolete_counts_object_count += obsolete_counts_count; 3786 obsolete_counts_object_count += scip_count; 3787 obsolete_counts_object_count += obsolete_bpobj_count; 3788 obsolete_counts_object_count += remap_deadlist_count; 3789 3790 if (oc_feature_refcount != obsolete_counts_object_count) { 3791 ret = 1; 3792 (void) printf("Number of obsolete counts objects (%llu) " \ 3793 "does not match feature count (%llu)\n", 3794 (u_longlong_t)obsolete_counts_object_count, 3795 (u_longlong_t)oc_feature_refcount); 3796 (void) printf("pv:%llu os:%llu oc:%llu sc:%llu " 3797 "ob:%llu rd:%llu\n", 3798 (u_longlong_t)precise_vdev_count, 3799 (u_longlong_t)obsolete_sm_count, 3800 (u_longlong_t)obsolete_counts_count, 3801 (u_longlong_t)scip_count, 3802 (u_longlong_t)obsolete_bpobj_count, 3803 (u_longlong_t)remap_deadlist_count); 3804 } else { 3805 (void) printf("Verified indirect_refcount feature refcount " \ 3806 "of %llu is correct\n", 3807 (u_longlong_t)oc_feature_refcount); 3808 } 3809 return (ret); 3810 } 3811 3812 static void 3813 dump_zpool(spa_t *spa) 3814 { 3815 dsl_pool_t *dp = spa_get_dsl(spa); 3816 int rc = 0; 3817 3818 if (dump_opt['S']) { 3819 dump_simulated_ddt(spa); 3820 return; 3821 } 3822 3823 if (!dump_opt['e'] && dump_opt['C'] > 1) { 3824 (void) printf("\nCached configuration:\n"); 3825 dump_nvlist(spa->spa_config, 8); 3826 } 3827 3828 if (dump_opt['C']) 3829 dump_config(spa); 3830 3831 if (dump_opt['u']) 3832 dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n"); 3833 3834 if (dump_opt['D']) 3835 dump_all_ddts(spa); 3836 3837 if (dump_opt['d'] > 2 || dump_opt['m']) 3838 dump_metaslabs(spa); 3839 if (dump_opt['M']) 3840 dump_metaslab_groups(spa); 3841 3842 if (dump_opt['d'] || dump_opt['i']) { 3843 dump_dir(dp->dp_meta_objset); 3844 if (dump_opt['d'] >= 3) { 3845 dsl_pool_t *dp = spa->spa_dsl_pool; 3846 dump_full_bpobj(&spa->spa_deferred_bpobj, 3847 "Deferred frees", 0); 3848 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { 3849 dump_full_bpobj(&dp->dp_free_bpobj, 3850 "Pool snapshot frees", 0); 3851 } 3852 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) { 3853 ASSERT(spa_feature_is_enabled(spa, 3854 SPA_FEATURE_DEVICE_REMOVAL)); 3855 dump_full_bpobj(&dp->dp_obsolete_bpobj, 3856 "Pool obsolete blocks", 0); 3857 } 3858 3859 if (spa_feature_is_active(spa, 3860 SPA_FEATURE_ASYNC_DESTROY)) { 3861 dump_bptree(spa->spa_meta_objset, 3862 dp->dp_bptree_obj, 3863 "Pool dataset frees"); 3864 } 3865 dump_dtl(spa->spa_root_vdev, 0); 3866 } 3867 (void) dmu_objset_find(spa_name(spa), dump_one_dir, 3868 NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); 3869 3870 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 3871 uint64_t refcount; 3872 3873 if (!(spa_feature_table[f].fi_flags & 3874 ZFEATURE_FLAG_PER_DATASET) || 3875 !spa_feature_is_enabled(spa, f)) { 3876 ASSERT0(dataset_feature_count[f]); 3877 continue; 3878 } 3879 (void) feature_get_refcount(spa, 3880 &spa_feature_table[f], &refcount); 3881 if (dataset_feature_count[f] != refcount) { 3882 (void) printf("%s feature refcount mismatch: " 3883 "%lld datasets != %lld refcount\n", 3884 spa_feature_table[f].fi_uname, 3885 (longlong_t)dataset_feature_count[f], 3886 (longlong_t)refcount); 3887 rc = 2; 3888 } else { 3889 (void) printf("Verified %s feature refcount " 3890 "of %llu is correct\n", 3891 spa_feature_table[f].fi_uname, 3892 (longlong_t)refcount); 3893 } 3894 } 3895 3896 if (rc == 0) { 3897 rc = verify_device_removal_feature_counts(spa); 3898 } 3899 } 3900 if (rc == 0 && (dump_opt['b'] || dump_opt['c'])) 3901 rc = dump_block_stats(spa); 3902 3903 if (rc == 0) 3904 rc = verify_spacemap_refcounts(spa); 3905 3906 if (dump_opt['s']) 3907 show_pool_stats(spa); 3908 3909 if (dump_opt['h']) 3910 dump_history(spa); 3911 3912 if (rc != 0) { 3913 dump_debug_buffer(); 3914 exit(rc); 3915 } 3916 } 3917 3918 #define ZDB_FLAG_CHECKSUM 0x0001 3919 #define ZDB_FLAG_DECOMPRESS 0x0002 3920 #define ZDB_FLAG_BSWAP 0x0004 3921 #define ZDB_FLAG_GBH 0x0008 3922 #define ZDB_FLAG_INDIRECT 0x0010 3923 #define ZDB_FLAG_PHYS 0x0020 3924 #define ZDB_FLAG_RAW 0x0040 3925 #define ZDB_FLAG_PRINT_BLKPTR 0x0080 3926 3927 static int flagbits[256]; 3928 3929 static void 3930 zdb_print_blkptr(blkptr_t *bp, int flags) 3931 { 3932 char blkbuf[BP_SPRINTF_LEN]; 3933 3934 if (flags & ZDB_FLAG_BSWAP) 3935 byteswap_uint64_array((void *)bp, sizeof (blkptr_t)); 3936 3937 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 3938 (void) printf("%s\n", blkbuf); 3939 } 3940 3941 static void 3942 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags) 3943 { 3944 int i; 3945 3946 for (i = 0; i < nbps; i++) 3947 zdb_print_blkptr(&bp[i], flags); 3948 } 3949 3950 static void 3951 zdb_dump_gbh(void *buf, int flags) 3952 { 3953 zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags); 3954 } 3955 3956 static void 3957 zdb_dump_block_raw(void *buf, uint64_t size, int flags) 3958 { 3959 if (flags & ZDB_FLAG_BSWAP) 3960 byteswap_uint64_array(buf, size); 3961 (void) write(1, buf, size); 3962 } 3963 3964 static void 3965 zdb_dump_block(char *label, void *buf, uint64_t size, int flags) 3966 { 3967 uint64_t *d = (uint64_t *)buf; 3968 unsigned nwords = size / sizeof (uint64_t); 3969 int do_bswap = !!(flags & ZDB_FLAG_BSWAP); 3970 unsigned i, j; 3971 const char *hdr; 3972 char *c; 3973 3974 3975 if (do_bswap) 3976 hdr = " 7 6 5 4 3 2 1 0 f e d c b a 9 8"; 3977 else 3978 hdr = " 0 1 2 3 4 5 6 7 8 9 a b c d e f"; 3979 3980 (void) printf("\n%s\n%6s %s 0123456789abcdef\n", label, "", hdr); 3981 3982 for (i = 0; i < nwords; i += 2) { 3983 (void) printf("%06llx: %016llx %016llx ", 3984 (u_longlong_t)(i * sizeof (uint64_t)), 3985 (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]), 3986 (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1])); 3987 3988 c = (char *)&d[i]; 3989 for (j = 0; j < 2 * sizeof (uint64_t); j++) 3990 (void) printf("%c", isprint(c[j]) ? c[j] : '.'); 3991 (void) printf("\n"); 3992 } 3993 } 3994 3995 /* 3996 * There are two acceptable formats: 3997 * leaf_name - For example: c1t0d0 or /tmp/ztest.0a 3998 * child[.child]* - For example: 0.1.1 3999 * 4000 * The second form can be used to specify arbitrary vdevs anywhere 4001 * in the heirarchy. For example, in a pool with a mirror of 4002 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 . 4003 */ 4004 static vdev_t * 4005 zdb_vdev_lookup(vdev_t *vdev, const char *path) 4006 { 4007 char *s, *p, *q; 4008 unsigned i; 4009 4010 if (vdev == NULL) 4011 return (NULL); 4012 4013 /* First, assume the x.x.x.x format */ 4014 i = strtoul(path, &s, 10); 4015 if (s == path || (s && *s != '.' && *s != '\0')) 4016 goto name; 4017 if (i >= vdev->vdev_children) 4018 return (NULL); 4019 4020 vdev = vdev->vdev_child[i]; 4021 if (*s == '\0') 4022 return (vdev); 4023 return (zdb_vdev_lookup(vdev, s+1)); 4024 4025 name: 4026 for (i = 0; i < vdev->vdev_children; i++) { 4027 vdev_t *vc = vdev->vdev_child[i]; 4028 4029 if (vc->vdev_path == NULL) { 4030 vc = zdb_vdev_lookup(vc, path); 4031 if (vc == NULL) 4032 continue; 4033 else 4034 return (vc); 4035 } 4036 4037 p = strrchr(vc->vdev_path, '/'); 4038 p = p ? p + 1 : vc->vdev_path; 4039 q = &vc->vdev_path[strlen(vc->vdev_path) - 2]; 4040 4041 if (strcmp(vc->vdev_path, path) == 0) 4042 return (vc); 4043 if (strcmp(p, path) == 0) 4044 return (vc); 4045 if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0) 4046 return (vc); 4047 } 4048 4049 return (NULL); 4050 } 4051 4052 /* ARGSUSED */ 4053 static int 4054 random_get_pseudo_bytes_cb(void *buf, size_t len, void *unused) 4055 { 4056 return (random_get_pseudo_bytes(buf, len)); 4057 } 4058 4059 /* 4060 * Read a block from a pool and print it out. The syntax of the 4061 * block descriptor is: 4062 * 4063 * pool:vdev_specifier:offset:size[:flags] 4064 * 4065 * pool - The name of the pool you wish to read from 4066 * vdev_specifier - Which vdev (see comment for zdb_vdev_lookup) 4067 * offset - offset, in hex, in bytes 4068 * size - Amount of data to read, in hex, in bytes 4069 * flags - A string of characters specifying options 4070 * b: Decode a blkptr at given offset within block 4071 * *c: Calculate and display checksums 4072 * d: Decompress data before dumping 4073 * e: Byteswap data before dumping 4074 * g: Display data as a gang block header 4075 * i: Display as an indirect block 4076 * p: Do I/O to physical offset 4077 * r: Dump raw data to stdout 4078 * 4079 * * = not yet implemented 4080 */ 4081 static void 4082 zdb_read_block(char *thing, spa_t *spa) 4083 { 4084 blkptr_t blk, *bp = &blk; 4085 dva_t *dva = bp->blk_dva; 4086 int flags = 0; 4087 uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0; 4088 zio_t *zio; 4089 vdev_t *vd; 4090 abd_t *pabd; 4091 void *lbuf, *buf; 4092 const char *s, *vdev; 4093 char *p, *dup, *flagstr; 4094 int i, error; 4095 4096 dup = strdup(thing); 4097 s = strtok(dup, ":"); 4098 vdev = s ? s : ""; 4099 s = strtok(NULL, ":"); 4100 offset = strtoull(s ? s : "", NULL, 16); 4101 s = strtok(NULL, ":"); 4102 size = strtoull(s ? s : "", NULL, 16); 4103 s = strtok(NULL, ":"); 4104 if (s) 4105 flagstr = strdup(s); 4106 else 4107 flagstr = strdup(""); 4108 4109 s = NULL; 4110 if (size == 0) 4111 s = "size must not be zero"; 4112 if (!IS_P2ALIGNED(size, DEV_BSIZE)) 4113 s = "size must be a multiple of sector size"; 4114 if (!IS_P2ALIGNED(offset, DEV_BSIZE)) 4115 s = "offset must be a multiple of sector size"; 4116 if (s) { 4117 (void) printf("Invalid block specifier: %s - %s\n", thing, s); 4118 free(dup); 4119 return; 4120 } 4121 4122 for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) { 4123 for (i = 0; flagstr[i]; i++) { 4124 int bit = flagbits[(uchar_t)flagstr[i]]; 4125 4126 if (bit == 0) { 4127 (void) printf("***Invalid flag: %c\n", 4128 flagstr[i]); 4129 continue; 4130 } 4131 flags |= bit; 4132 4133 /* If it's not something with an argument, keep going */ 4134 if ((bit & (ZDB_FLAG_CHECKSUM | 4135 ZDB_FLAG_PRINT_BLKPTR)) == 0) 4136 continue; 4137 4138 p = &flagstr[i + 1]; 4139 if (bit == ZDB_FLAG_PRINT_BLKPTR) 4140 blkptr_offset = strtoull(p, &p, 16); 4141 if (*p != ':' && *p != '\0') { 4142 (void) printf("***Invalid flag arg: '%s'\n", s); 4143 free(dup); 4144 return; 4145 } 4146 } 4147 } 4148 free(flagstr); 4149 4150 vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev); 4151 if (vd == NULL) { 4152 (void) printf("***Invalid vdev: %s\n", vdev); 4153 free(dup); 4154 return; 4155 } else { 4156 if (vd->vdev_path) 4157 (void) fprintf(stderr, "Found vdev: %s\n", 4158 vd->vdev_path); 4159 else 4160 (void) fprintf(stderr, "Found vdev type: %s\n", 4161 vd->vdev_ops->vdev_op_type); 4162 } 4163 4164 psize = size; 4165 lsize = size; 4166 4167 pabd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_FALSE); 4168 lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 4169 4170 BP_ZERO(bp); 4171 4172 DVA_SET_VDEV(&dva[0], vd->vdev_id); 4173 DVA_SET_OFFSET(&dva[0], offset); 4174 DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH)); 4175 DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize)); 4176 4177 BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL); 4178 4179 BP_SET_LSIZE(bp, lsize); 4180 BP_SET_PSIZE(bp, psize); 4181 BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF); 4182 BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF); 4183 BP_SET_TYPE(bp, DMU_OT_NONE); 4184 BP_SET_LEVEL(bp, 0); 4185 BP_SET_DEDUP(bp, 0); 4186 BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); 4187 4188 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4189 zio = zio_root(spa, NULL, NULL, 0); 4190 4191 if (vd == vd->vdev_top) { 4192 /* 4193 * Treat this as a normal block read. 4194 */ 4195 zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL, 4196 ZIO_PRIORITY_SYNC_READ, 4197 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL)); 4198 } else { 4199 /* 4200 * Treat this as a vdev child I/O. 4201 */ 4202 zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd, 4203 psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, 4204 ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE | 4205 ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY | 4206 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | ZIO_FLAG_OPTIONAL, 4207 NULL, NULL)); 4208 } 4209 4210 error = zio_wait(zio); 4211 spa_config_exit(spa, SCL_STATE, FTAG); 4212 4213 if (error) { 4214 (void) printf("Read of %s failed, error: %d\n", thing, error); 4215 goto out; 4216 } 4217 4218 if (flags & ZDB_FLAG_DECOMPRESS) { 4219 /* 4220 * We don't know how the data was compressed, so just try 4221 * every decompress function at every inflated blocksize. 4222 */ 4223 enum zio_compress c; 4224 void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 4225 void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 4226 4227 abd_copy_to_buf(pbuf2, pabd, psize); 4228 4229 VERIFY0(abd_iterate_func(pabd, psize, SPA_MAXBLOCKSIZE - psize, 4230 random_get_pseudo_bytes_cb, NULL)); 4231 4232 VERIFY0(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize, 4233 SPA_MAXBLOCKSIZE - psize)); 4234 4235 for (lsize = SPA_MAXBLOCKSIZE; lsize > psize; 4236 lsize -= SPA_MINBLOCKSIZE) { 4237 for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) { 4238 if (zio_decompress_data(c, pabd, 4239 lbuf, psize, lsize) == 0 && 4240 zio_decompress_data_buf(c, pbuf2, 4241 lbuf2, psize, lsize) == 0 && 4242 bcmp(lbuf, lbuf2, lsize) == 0) 4243 break; 4244 } 4245 if (c != ZIO_COMPRESS_FUNCTIONS) 4246 break; 4247 lsize -= SPA_MINBLOCKSIZE; 4248 } 4249 4250 umem_free(pbuf2, SPA_MAXBLOCKSIZE); 4251 umem_free(lbuf2, SPA_MAXBLOCKSIZE); 4252 4253 if (lsize <= psize) { 4254 (void) printf("Decompress of %s failed\n", thing); 4255 goto out; 4256 } 4257 buf = lbuf; 4258 size = lsize; 4259 } else { 4260 buf = abd_to_buf(pabd); 4261 size = psize; 4262 } 4263 4264 if (flags & ZDB_FLAG_PRINT_BLKPTR) 4265 zdb_print_blkptr((blkptr_t *)(void *) 4266 ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags); 4267 else if (flags & ZDB_FLAG_RAW) 4268 zdb_dump_block_raw(buf, size, flags); 4269 else if (flags & ZDB_FLAG_INDIRECT) 4270 zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t), 4271 flags); 4272 else if (flags & ZDB_FLAG_GBH) 4273 zdb_dump_gbh(buf, flags); 4274 else 4275 zdb_dump_block(thing, buf, size, flags); 4276 4277 out: 4278 abd_free(pabd); 4279 umem_free(lbuf, SPA_MAXBLOCKSIZE); 4280 free(dup); 4281 } 4282 4283 static void 4284 zdb_embedded_block(char *thing) 4285 { 4286 blkptr_t bp; 4287 unsigned long long *words = (void *)&bp; 4288 char buf[SPA_MAXBLOCKSIZE]; 4289 int err; 4290 4291 bzero(&bp, sizeof (bp)); 4292 err = sscanf(thing, "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx:" 4293 "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx", 4294 words + 0, words + 1, words + 2, words + 3, 4295 words + 4, words + 5, words + 6, words + 7, 4296 words + 8, words + 9, words + 10, words + 11, 4297 words + 12, words + 13, words + 14, words + 15); 4298 if (err != 16) { 4299 (void) printf("invalid input format\n"); 4300 exit(1); 4301 } 4302 ASSERT3U(BPE_GET_LSIZE(&bp), <=, SPA_MAXBLOCKSIZE); 4303 err = decode_embedded_bp(&bp, buf, BPE_GET_LSIZE(&bp)); 4304 if (err != 0) { 4305 (void) printf("decode failed: %u\n", err); 4306 exit(1); 4307 } 4308 zdb_dump_block_raw(buf, BPE_GET_LSIZE(&bp), 0); 4309 } 4310 4311 static boolean_t 4312 pool_match(nvlist_t *cfg, char *tgt) 4313 { 4314 uint64_t v, guid = strtoull(tgt, NULL, 0); 4315 char *s; 4316 4317 if (guid != 0) { 4318 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0) 4319 return (v == guid); 4320 } else { 4321 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0) 4322 return (strcmp(s, tgt) == 0); 4323 } 4324 return (B_FALSE); 4325 } 4326 4327 static char * 4328 find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv) 4329 { 4330 nvlist_t *pools; 4331 nvlist_t *match = NULL; 4332 char *name = NULL; 4333 char *sepp = NULL; 4334 char sep = '\0'; 4335 int count = 0; 4336 importargs_t args; 4337 4338 bzero(&args, sizeof (args)); 4339 args.paths = dirc; 4340 args.path = dirv; 4341 args.can_be_active = B_TRUE; 4342 4343 if ((sepp = strpbrk(*target, "/@")) != NULL) { 4344 sep = *sepp; 4345 *sepp = '\0'; 4346 } 4347 4348 pools = zpool_search_import(g_zfs, &args); 4349 4350 if (pools != NULL) { 4351 nvpair_t *elem = NULL; 4352 while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { 4353 verify(nvpair_value_nvlist(elem, configp) == 0); 4354 if (pool_match(*configp, *target)) { 4355 count++; 4356 if (match != NULL) { 4357 /* print previously found config */ 4358 if (name != NULL) { 4359 (void) printf("%s\n", name); 4360 dump_nvlist(match, 8); 4361 name = NULL; 4362 } 4363 (void) printf("%s\n", 4364 nvpair_name(elem)); 4365 dump_nvlist(*configp, 8); 4366 } else { 4367 match = *configp; 4368 name = nvpair_name(elem); 4369 } 4370 } 4371 } 4372 } 4373 if (count > 1) 4374 (void) fatal("\tMatched %d pools - use pool GUID " 4375 "instead of pool name or \n" 4376 "\tpool name part of a dataset name to select pool", count); 4377 4378 if (sepp) 4379 *sepp = sep; 4380 /* 4381 * If pool GUID was specified for pool id, replace it with pool name 4382 */ 4383 if (name && (strstr(*target, name) != *target)) { 4384 int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0); 4385 4386 *target = umem_alloc(sz, UMEM_NOFAIL); 4387 (void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : ""); 4388 } 4389 4390 *configp = name ? match : NULL; 4391 4392 return (name); 4393 } 4394 4395 int 4396 main(int argc, char **argv) 4397 { 4398 int c; 4399 struct rlimit rl = { 1024, 1024 }; 4400 spa_t *spa = NULL; 4401 objset_t *os = NULL; 4402 int dump_all = 1; 4403 int verbose = 0; 4404 int error = 0; 4405 char **searchdirs = NULL; 4406 int nsearch = 0; 4407 char *target; 4408 nvlist_t *policy = NULL; 4409 uint64_t max_txg = UINT64_MAX; 4410 int flags = ZFS_IMPORT_MISSING_LOG; 4411 int rewind = ZPOOL_NEVER_REWIND; 4412 char *spa_config_path_env; 4413 boolean_t target_is_spa = B_TRUE; 4414 4415 (void) setrlimit(RLIMIT_NOFILE, &rl); 4416 (void) enable_extended_FILE_stdio(-1, -1); 4417 4418 dprintf_setup(&argc, argv); 4419 4420 /* 4421 * If there is an environment variable SPA_CONFIG_PATH it overrides 4422 * default spa_config_path setting. If -U flag is specified it will 4423 * override this environment variable settings once again. 4424 */ 4425 spa_config_path_env = getenv("SPA_CONFIG_PATH"); 4426 if (spa_config_path_env != NULL) 4427 spa_config_path = spa_config_path_env; 4428 4429 while ((c = getopt(argc, argv, 4430 "AbcCdDeEFGhiI:lLmMo:Op:PqRsSt:uU:vVx:X")) != -1) { 4431 switch (c) { 4432 case 'b': 4433 case 'c': 4434 case 'C': 4435 case 'd': 4436 case 'D': 4437 case 'E': 4438 case 'G': 4439 case 'h': 4440 case 'i': 4441 case 'l': 4442 case 'm': 4443 case 'M': 4444 case 'O': 4445 case 'R': 4446 case 's': 4447 case 'S': 4448 case 'u': 4449 dump_opt[c]++; 4450 dump_all = 0; 4451 break; 4452 case 'A': 4453 case 'e': 4454 case 'F': 4455 case 'L': 4456 case 'P': 4457 case 'q': 4458 case 'X': 4459 dump_opt[c]++; 4460 break; 4461 /* NB: Sort single match options below. */ 4462 case 'I': 4463 max_inflight = strtoull(optarg, NULL, 0); 4464 if (max_inflight == 0) { 4465 (void) fprintf(stderr, "maximum number " 4466 "of inflight I/Os must be greater " 4467 "than 0\n"); 4468 usage(); 4469 } 4470 break; 4471 case 'o': 4472 error = set_global_var(optarg); 4473 if (error != 0) 4474 usage(); 4475 break; 4476 case 'p': 4477 if (searchdirs == NULL) { 4478 searchdirs = umem_alloc(sizeof (char *), 4479 UMEM_NOFAIL); 4480 } else { 4481 char **tmp = umem_alloc((nsearch + 1) * 4482 sizeof (char *), UMEM_NOFAIL); 4483 bcopy(searchdirs, tmp, nsearch * 4484 sizeof (char *)); 4485 umem_free(searchdirs, 4486 nsearch * sizeof (char *)); 4487 searchdirs = tmp; 4488 } 4489 searchdirs[nsearch++] = optarg; 4490 break; 4491 case 't': 4492 max_txg = strtoull(optarg, NULL, 0); 4493 if (max_txg < TXG_INITIAL) { 4494 (void) fprintf(stderr, "incorrect txg " 4495 "specified: %s\n", optarg); 4496 usage(); 4497 } 4498 break; 4499 case 'U': 4500 spa_config_path = optarg; 4501 if (spa_config_path[0] != '/') { 4502 (void) fprintf(stderr, 4503 "cachefile must be an absolute path " 4504 "(i.e. start with a slash)\n"); 4505 usage(); 4506 } 4507 break; 4508 case 'v': 4509 verbose++; 4510 break; 4511 case 'V': 4512 flags = ZFS_IMPORT_VERBATIM; 4513 break; 4514 case 'x': 4515 vn_dumpdir = optarg; 4516 break; 4517 default: 4518 usage(); 4519 break; 4520 } 4521 } 4522 4523 if (!dump_opt['e'] && searchdirs != NULL) { 4524 (void) fprintf(stderr, "-p option requires use of -e\n"); 4525 usage(); 4526 } 4527 4528 /* 4529 * ZDB does not typically re-read blocks; therefore limit the ARC 4530 * to 256 MB, which can be used entirely for metadata. 4531 */ 4532 zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024; 4533 4534 /* 4535 * "zdb -c" uses checksum-verifying scrub i/os which are async reads. 4536 * "zdb -b" uses traversal prefetch which uses async reads. 4537 * For good performance, let several of them be active at once. 4538 */ 4539 zfs_vdev_async_read_max_active = 10; 4540 4541 /* 4542 * Disable reference tracking for better performance. 4543 */ 4544 reference_tracking_enable = B_FALSE; 4545 4546 kernel_init(FREAD); 4547 g_zfs = libzfs_init(); 4548 ASSERT(g_zfs != NULL); 4549 4550 if (dump_all) 4551 verbose = MAX(verbose, 1); 4552 4553 for (c = 0; c < 256; c++) { 4554 if (dump_all && strchr("AeEFlLOPRSX", c) == NULL) 4555 dump_opt[c] = 1; 4556 if (dump_opt[c]) 4557 dump_opt[c] += verbose; 4558 } 4559 4560 aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2); 4561 zfs_recover = (dump_opt['A'] > 1); 4562 4563 argc -= optind; 4564 argv += optind; 4565 4566 if (argc < 2 && dump_opt['R']) 4567 usage(); 4568 4569 if (dump_opt['E']) { 4570 if (argc != 1) 4571 usage(); 4572 zdb_embedded_block(argv[0]); 4573 return (0); 4574 } 4575 4576 if (argc < 1) { 4577 if (!dump_opt['e'] && dump_opt['C']) { 4578 dump_cachefile(spa_config_path); 4579 return (0); 4580 } 4581 usage(); 4582 } 4583 4584 if (dump_opt['l']) 4585 return (dump_label(argv[0])); 4586 4587 if (dump_opt['O']) { 4588 if (argc != 2) 4589 usage(); 4590 dump_opt['v'] = verbose + 3; 4591 return (dump_path(argv[0], argv[1])); 4592 } 4593 4594 if (dump_opt['X'] || dump_opt['F']) 4595 rewind = ZPOOL_DO_REWIND | 4596 (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0); 4597 4598 if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 || 4599 nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 || 4600 nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0) 4601 fatal("internal error: %s", strerror(ENOMEM)); 4602 4603 error = 0; 4604 target = argv[0]; 4605 4606 if (dump_opt['e']) { 4607 nvlist_t *cfg = NULL; 4608 char *name = find_zpool(&target, &cfg, nsearch, searchdirs); 4609 4610 error = ENOENT; 4611 if (name) { 4612 if (dump_opt['C'] > 1) { 4613 (void) printf("\nConfiguration for import:\n"); 4614 dump_nvlist(cfg, 8); 4615 } 4616 if (nvlist_add_nvlist(cfg, 4617 ZPOOL_REWIND_POLICY, policy) != 0) { 4618 fatal("can't open '%s': %s", 4619 target, strerror(ENOMEM)); 4620 } 4621 error = spa_import(name, cfg, NULL, flags); 4622 } 4623 } 4624 4625 if (strpbrk(target, "/@") != NULL) { 4626 size_t targetlen; 4627 4628 target_is_spa = B_FALSE; 4629 /* 4630 * Remove any trailing slash. Later code would get confused 4631 * by it, but we want to allow it so that "pool/" can 4632 * indicate that we want to dump the topmost filesystem, 4633 * rather than the whole pool. 4634 */ 4635 targetlen = strlen(target); 4636 if (targetlen != 0 && target[targetlen - 1] == '/') 4637 target[targetlen - 1] = '\0'; 4638 } 4639 4640 if (error == 0) { 4641 if (target_is_spa || dump_opt['R']) { 4642 error = spa_open_rewind(target, &spa, FTAG, policy, 4643 NULL); 4644 if (error) { 4645 /* 4646 * If we're missing the log device then 4647 * try opening the pool after clearing the 4648 * log state. 4649 */ 4650 mutex_enter(&spa_namespace_lock); 4651 if ((spa = spa_lookup(target)) != NULL && 4652 spa->spa_log_state == SPA_LOG_MISSING) { 4653 spa->spa_log_state = SPA_LOG_CLEAR; 4654 error = 0; 4655 } 4656 mutex_exit(&spa_namespace_lock); 4657 4658 if (!error) { 4659 error = spa_open_rewind(target, &spa, 4660 FTAG, policy, NULL); 4661 } 4662 } 4663 } else { 4664 error = open_objset(target, DMU_OST_ANY, FTAG, &os); 4665 } 4666 } 4667 nvlist_free(policy); 4668 4669 if (error) 4670 fatal("can't open '%s': %s", target, strerror(error)); 4671 4672 argv++; 4673 argc--; 4674 if (!dump_opt['R']) { 4675 if (argc > 0) { 4676 zopt_objects = argc; 4677 zopt_object = calloc(zopt_objects, sizeof (uint64_t)); 4678 for (unsigned i = 0; i < zopt_objects; i++) { 4679 errno = 0; 4680 zopt_object[i] = strtoull(argv[i], NULL, 0); 4681 if (zopt_object[i] == 0 && errno != 0) 4682 fatal("bad number %s: %s", 4683 argv[i], strerror(errno)); 4684 } 4685 } 4686 if (os != NULL) { 4687 dump_dir(os); 4688 } else if (zopt_objects > 0 && !dump_opt['m']) { 4689 dump_dir(spa->spa_meta_objset); 4690 } else { 4691 dump_zpool(spa); 4692 } 4693 } else { 4694 flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR; 4695 flagbits['c'] = ZDB_FLAG_CHECKSUM; 4696 flagbits['d'] = ZDB_FLAG_DECOMPRESS; 4697 flagbits['e'] = ZDB_FLAG_BSWAP; 4698 flagbits['g'] = ZDB_FLAG_GBH; 4699 flagbits['i'] = ZDB_FLAG_INDIRECT; 4700 flagbits['p'] = ZDB_FLAG_PHYS; 4701 flagbits['r'] = ZDB_FLAG_RAW; 4702 4703 for (int i = 0; i < argc; i++) 4704 zdb_read_block(argv[i], spa); 4705 } 4706 4707 if (os != NULL) 4708 close_objset(os, FTAG); 4709 else 4710 spa_close(spa, FTAG); 4711 4712 fuid_table_destroy(); 4713 4714 dump_debug_buffer(); 4715 4716 libzfs_fini(g_zfs); 4717 kernel_fini(); 4718 4719 return (0); 4720 } 4721