xref: /illumos-gate/usr/src/cmd/zdb/zdb.c (revision 0a586cea3ceec7e5e50e7e54c745082a7a333ac2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <stdio.h>
27 #include <stdio_ext.h>
28 #include <stdlib.h>
29 #include <ctype.h>
30 #include <sys/zfs_context.h>
31 #include <sys/spa.h>
32 #include <sys/spa_impl.h>
33 #include <sys/dmu.h>
34 #include <sys/zap.h>
35 #include <sys/fs/zfs.h>
36 #include <sys/zfs_znode.h>
37 #include <sys/zfs_sa.h>
38 #include <sys/sa.h>
39 #include <sys/sa_impl.h>
40 #include <sys/vdev.h>
41 #include <sys/vdev_impl.h>
42 #include <sys/metaslab_impl.h>
43 #include <sys/dmu_objset.h>
44 #include <sys/dsl_dir.h>
45 #include <sys/dsl_dataset.h>
46 #include <sys/dsl_pool.h>
47 #include <sys/dbuf.h>
48 #include <sys/zil.h>
49 #include <sys/zil_impl.h>
50 #include <sys/stat.h>
51 #include <sys/resource.h>
52 #include <sys/dmu_traverse.h>
53 #include <sys/zio_checksum.h>
54 #include <sys/zio_compress.h>
55 #include <sys/zfs_fuid.h>
56 #include <sys/arc.h>
57 #include <sys/ddt.h>
58 #undef ZFS_MAXNAMELEN
59 #undef verify
60 #include <libzfs.h>
61 
62 #define	ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
63     zio_compress_table[(idx)].ci_name : "UNKNOWN")
64 #define	ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
65     zio_checksum_table[(idx)].ci_name : "UNKNOWN")
66 #define	ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \
67     dmu_ot[(idx)].ot_name : "UNKNOWN")
68 #define	ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : DMU_OT_NUMTYPES)
69 
70 #ifndef lint
71 extern int zfs_recover;
72 #else
73 int zfs_recover;
74 #endif
75 
76 const char cmdname[] = "zdb";
77 uint8_t dump_opt[256];
78 
79 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
80 
81 extern void dump_intent_log(zilog_t *);
82 uint64_t *zopt_object = NULL;
83 int zopt_objects = 0;
84 libzfs_handle_t *g_zfs;
85 
86 /*
87  * These libumem hooks provide a reasonable set of defaults for the allocator's
88  * debugging facilities.
89  */
90 const char *
91 _umem_debug_init()
92 {
93 	return ("default,verbose"); /* $UMEM_DEBUG setting */
94 }
95 
96 const char *
97 _umem_logging_init(void)
98 {
99 	return ("fail,contents"); /* $UMEM_LOGGING setting */
100 }
101 
102 static void
103 usage(void)
104 {
105 	(void) fprintf(stderr,
106 	    "Usage: %s [-CumdibcsDvhL] poolname [object...]\n"
107 	    "       %s [-div] dataset [object...]\n"
108 	    "       %s -m [-L] poolname [vdev [metaslab...]]\n"
109 	    "       %s -R poolname vdev:offset:size[:flags]\n"
110 	    "       %s -S poolname\n"
111 	    "       %s -l [-u] device\n"
112 	    "       %s -C\n\n",
113 	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
114 
115 	(void) fprintf(stderr, "    Dataset name must include at least one "
116 	    "separator character '/' or '@'\n");
117 	(void) fprintf(stderr, "    If dataset name is specified, only that "
118 	    "dataset is dumped\n");
119 	(void) fprintf(stderr, "    If object numbers are specified, only "
120 	    "those objects are dumped\n\n");
121 	(void) fprintf(stderr, "    Options to control amount of output:\n");
122 	(void) fprintf(stderr, "        -u uberblock\n");
123 	(void) fprintf(stderr, "        -d dataset(s)\n");
124 	(void) fprintf(stderr, "        -i intent logs\n");
125 	(void) fprintf(stderr, "        -C config (or cachefile if alone)\n");
126 	(void) fprintf(stderr, "        -h pool history\n");
127 	(void) fprintf(stderr, "        -b block statistics\n");
128 	(void) fprintf(stderr, "        -m metaslabs\n");
129 	(void) fprintf(stderr, "        -c checksum all metadata (twice for "
130 	    "all data) blocks\n");
131 	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
132 	(void) fprintf(stderr, "        -D dedup statistics\n");
133 	(void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
134 	(void) fprintf(stderr, "        -v verbose (applies to all others)\n");
135 	(void) fprintf(stderr, "        -l dump label contents\n");
136 	(void) fprintf(stderr, "        -L disable leak tracking (do not "
137 	    "load spacemaps)\n");
138 	(void) fprintf(stderr, "        -R read and display block from a "
139 	    "device\n\n");
140 	(void) fprintf(stderr, "    Below options are intended for use "
141 	    "with other options (except -l):\n");
142 	(void) fprintf(stderr, "        -A ignore assertions (-A), enable "
143 	    "panic recovery (-AA) or both (-AAA)\n");
144 	(void) fprintf(stderr, "        -F attempt automatic rewind within "
145 	    "safe range of transaction groups\n");
146 	(void) fprintf(stderr, "        -U <cachefile_path> -- use alternate "
147 	    "cachefile\n");
148 	(void) fprintf(stderr, "        -X attempt extreme rewind (does not "
149 	    "work with dataset)\n");
150 	(void) fprintf(stderr, "        -e pool is exported/destroyed/"
151 	    "has altroot/not in a cachefile\n");
152 	(void) fprintf(stderr, "        -p <path> -- use one or more with "
153 	    "-e to specify path to vdev dir\n");
154 	(void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
155 	    "searching for uberblocks\n");
156 	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
157 	    "to make only that option verbose\n");
158 	(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
159 	exit(1);
160 }
161 
162 /*
163  * Called for usage errors that are discovered after a call to spa_open(),
164  * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
165  */
166 
167 static void
168 fatal(const char *fmt, ...)
169 {
170 	va_list ap;
171 
172 	va_start(ap, fmt);
173 	(void) fprintf(stderr, "%s: ", cmdname);
174 	(void) vfprintf(stderr, fmt, ap);
175 	va_end(ap);
176 	(void) fprintf(stderr, "\n");
177 
178 	exit(1);
179 }
180 
181 /* ARGSUSED */
182 static void
183 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
184 {
185 	nvlist_t *nv;
186 	size_t nvsize = *(uint64_t *)data;
187 	char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
188 
189 	VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
190 
191 	VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
192 
193 	umem_free(packed, nvsize);
194 
195 	dump_nvlist(nv, 8);
196 
197 	nvlist_free(nv);
198 }
199 
200 const char dump_zap_stars[] = "****************************************";
201 const int dump_zap_width = sizeof (dump_zap_stars) - 1;
202 
203 static void
204 dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE])
205 {
206 	int i;
207 	int minidx = ZAP_HISTOGRAM_SIZE - 1;
208 	int maxidx = 0;
209 	uint64_t max = 0;
210 
211 	for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) {
212 		if (histo[i] > max)
213 			max = histo[i];
214 		if (histo[i] > 0 && i > maxidx)
215 			maxidx = i;
216 		if (histo[i] > 0 && i < minidx)
217 			minidx = i;
218 	}
219 
220 	if (max < dump_zap_width)
221 		max = dump_zap_width;
222 
223 	for (i = minidx; i <= maxidx; i++)
224 		(void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i],
225 		    &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]);
226 }
227 
228 static void
229 dump_zap_stats(objset_t *os, uint64_t object)
230 {
231 	int error;
232 	zap_stats_t zs;
233 
234 	error = zap_get_stats(os, object, &zs);
235 	if (error)
236 		return;
237 
238 	if (zs.zs_ptrtbl_len == 0) {
239 		ASSERT(zs.zs_num_blocks == 1);
240 		(void) printf("\tmicrozap: %llu bytes, %llu entries\n",
241 		    (u_longlong_t)zs.zs_blocksize,
242 		    (u_longlong_t)zs.zs_num_entries);
243 		return;
244 	}
245 
246 	(void) printf("\tFat ZAP stats:\n");
247 
248 	(void) printf("\t\tPointer table:\n");
249 	(void) printf("\t\t\t%llu elements\n",
250 	    (u_longlong_t)zs.zs_ptrtbl_len);
251 	(void) printf("\t\t\tzt_blk: %llu\n",
252 	    (u_longlong_t)zs.zs_ptrtbl_zt_blk);
253 	(void) printf("\t\t\tzt_numblks: %llu\n",
254 	    (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
255 	(void) printf("\t\t\tzt_shift: %llu\n",
256 	    (u_longlong_t)zs.zs_ptrtbl_zt_shift);
257 	(void) printf("\t\t\tzt_blks_copied: %llu\n",
258 	    (u_longlong_t)zs.zs_ptrtbl_blks_copied);
259 	(void) printf("\t\t\tzt_nextblk: %llu\n",
260 	    (u_longlong_t)zs.zs_ptrtbl_nextblk);
261 
262 	(void) printf("\t\tZAP entries: %llu\n",
263 	    (u_longlong_t)zs.zs_num_entries);
264 	(void) printf("\t\tLeaf blocks: %llu\n",
265 	    (u_longlong_t)zs.zs_num_leafs);
266 	(void) printf("\t\tTotal blocks: %llu\n",
267 	    (u_longlong_t)zs.zs_num_blocks);
268 	(void) printf("\t\tzap_block_type: 0x%llx\n",
269 	    (u_longlong_t)zs.zs_block_type);
270 	(void) printf("\t\tzap_magic: 0x%llx\n",
271 	    (u_longlong_t)zs.zs_magic);
272 	(void) printf("\t\tzap_salt: 0x%llx\n",
273 	    (u_longlong_t)zs.zs_salt);
274 
275 	(void) printf("\t\tLeafs with 2^n pointers:\n");
276 	dump_zap_histogram(zs.zs_leafs_with_2n_pointers);
277 
278 	(void) printf("\t\tBlocks with n*5 entries:\n");
279 	dump_zap_histogram(zs.zs_blocks_with_n5_entries);
280 
281 	(void) printf("\t\tBlocks n/10 full:\n");
282 	dump_zap_histogram(zs.zs_blocks_n_tenths_full);
283 
284 	(void) printf("\t\tEntries with n chunks:\n");
285 	dump_zap_histogram(zs.zs_entries_using_n_chunks);
286 
287 	(void) printf("\t\tBuckets with n entries:\n");
288 	dump_zap_histogram(zs.zs_buckets_with_n_entries);
289 }
290 
291 /*ARGSUSED*/
292 static void
293 dump_none(objset_t *os, uint64_t object, void *data, size_t size)
294 {
295 }
296 
297 /*ARGSUSED*/
298 static void
299 dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
300 {
301 	(void) printf("\tUNKNOWN OBJECT TYPE\n");
302 }
303 
304 /*ARGSUSED*/
305 void
306 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
307 {
308 }
309 
310 /*ARGSUSED*/
311 static void
312 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
313 {
314 }
315 
316 /*ARGSUSED*/
317 static void
318 dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
319 {
320 	zap_cursor_t zc;
321 	zap_attribute_t attr;
322 	void *prop;
323 	int i;
324 
325 	dump_zap_stats(os, object);
326 	(void) printf("\n");
327 
328 	for (zap_cursor_init(&zc, os, object);
329 	    zap_cursor_retrieve(&zc, &attr) == 0;
330 	    zap_cursor_advance(&zc)) {
331 		(void) printf("\t\t%s = ", attr.za_name);
332 		if (attr.za_num_integers == 0) {
333 			(void) printf("\n");
334 			continue;
335 		}
336 		prop = umem_zalloc(attr.za_num_integers *
337 		    attr.za_integer_length, UMEM_NOFAIL);
338 		(void) zap_lookup(os, object, attr.za_name,
339 		    attr.za_integer_length, attr.za_num_integers, prop);
340 		if (attr.za_integer_length == 1) {
341 			(void) printf("%s", (char *)prop);
342 		} else {
343 			for (i = 0; i < attr.za_num_integers; i++) {
344 				switch (attr.za_integer_length) {
345 				case 2:
346 					(void) printf("%u ",
347 					    ((uint16_t *)prop)[i]);
348 					break;
349 				case 4:
350 					(void) printf("%u ",
351 					    ((uint32_t *)prop)[i]);
352 					break;
353 				case 8:
354 					(void) printf("%lld ",
355 					    (u_longlong_t)((int64_t *)prop)[i]);
356 					break;
357 				}
358 			}
359 		}
360 		(void) printf("\n");
361 		umem_free(prop, attr.za_num_integers * attr.za_integer_length);
362 	}
363 	zap_cursor_fini(&zc);
364 }
365 
366 /*ARGSUSED*/
367 static void
368 dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
369 {
370 	dump_zap_stats(os, object);
371 	/* contents are printed elsewhere, properly decoded */
372 }
373 
374 /*ARGSUSED*/
375 static void
376 dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
377 {
378 	zap_cursor_t zc;
379 	zap_attribute_t attr;
380 
381 	dump_zap_stats(os, object);
382 	(void) printf("\n");
383 
384 	for (zap_cursor_init(&zc, os, object);
385 	    zap_cursor_retrieve(&zc, &attr) == 0;
386 	    zap_cursor_advance(&zc)) {
387 		(void) printf("\t\t%s = ", attr.za_name);
388 		if (attr.za_num_integers == 0) {
389 			(void) printf("\n");
390 			continue;
391 		}
392 		(void) printf(" %llx : [%d:%d:%d]\n",
393 		    (u_longlong_t)attr.za_first_integer,
394 		    (int)ATTR_LENGTH(attr.za_first_integer),
395 		    (int)ATTR_BSWAP(attr.za_first_integer),
396 		    (int)ATTR_NUM(attr.za_first_integer));
397 	}
398 	zap_cursor_fini(&zc);
399 }
400 
401 /*ARGSUSED*/
402 static void
403 dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
404 {
405 	zap_cursor_t zc;
406 	zap_attribute_t attr;
407 	uint16_t *layout_attrs;
408 	int i;
409 
410 	dump_zap_stats(os, object);
411 	(void) printf("\n");
412 
413 	for (zap_cursor_init(&zc, os, object);
414 	    zap_cursor_retrieve(&zc, &attr) == 0;
415 	    zap_cursor_advance(&zc)) {
416 		(void) printf("\t\t%s = [", attr.za_name);
417 		if (attr.za_num_integers == 0) {
418 			(void) printf("\n");
419 			continue;
420 		}
421 
422 		VERIFY(attr.za_integer_length == 2);
423 		layout_attrs = umem_zalloc(attr.za_num_integers *
424 		    attr.za_integer_length, UMEM_NOFAIL);
425 
426 		VERIFY(zap_lookup(os, object, attr.za_name,
427 		    attr.za_integer_length,
428 		    attr.za_num_integers, layout_attrs) == 0);
429 
430 		for (i = 0; i != attr.za_num_integers; i++)
431 			(void) printf(" %d ", (int)layout_attrs[i]);
432 		(void) printf("]\n");
433 		umem_free(layout_attrs,
434 		    attr.za_num_integers * attr.za_integer_length);
435 	}
436 	zap_cursor_fini(&zc);
437 }
438 
439 /*ARGSUSED*/
440 static void
441 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
442 {
443 	zap_cursor_t zc;
444 	zap_attribute_t attr;
445 	const char *typenames[] = {
446 		/* 0 */ "not specified",
447 		/* 1 */ "FIFO",
448 		/* 2 */ "Character Device",
449 		/* 3 */ "3 (invalid)",
450 		/* 4 */ "Directory",
451 		/* 5 */ "5 (invalid)",
452 		/* 6 */ "Block Device",
453 		/* 7 */ "7 (invalid)",
454 		/* 8 */ "Regular File",
455 		/* 9 */ "9 (invalid)",
456 		/* 10 */ "Symbolic Link",
457 		/* 11 */ "11 (invalid)",
458 		/* 12 */ "Socket",
459 		/* 13 */ "Door",
460 		/* 14 */ "Event Port",
461 		/* 15 */ "15 (invalid)",
462 	};
463 
464 	dump_zap_stats(os, object);
465 	(void) printf("\n");
466 
467 	for (zap_cursor_init(&zc, os, object);
468 	    zap_cursor_retrieve(&zc, &attr) == 0;
469 	    zap_cursor_advance(&zc)) {
470 		(void) printf("\t\t%s = %lld (type: %s)\n",
471 		    attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
472 		    typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
473 	}
474 	zap_cursor_fini(&zc);
475 }
476 
477 static void
478 dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
479 {
480 	uint64_t alloc, offset, entry;
481 	uint8_t mapshift = sm->sm_shift;
482 	uint64_t mapstart = sm->sm_start;
483 	char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
484 			    "INVALID", "INVALID", "INVALID", "INVALID" };
485 
486 	if (smo->smo_object == 0)
487 		return;
488 
489 	/*
490 	 * Print out the freelist entries in both encoded and decoded form.
491 	 */
492 	alloc = 0;
493 	for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
494 		VERIFY(0 == dmu_read(os, smo->smo_object, offset,
495 		    sizeof (entry), &entry, DMU_READ_PREFETCH));
496 		if (SM_DEBUG_DECODE(entry)) {
497 			(void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
498 			    (u_longlong_t)(offset / sizeof (entry)),
499 			    ddata[SM_DEBUG_ACTION_DECODE(entry)],
500 			    (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
501 			    (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
502 		} else {
503 			(void) printf("\t    [%6llu]    %c  range:"
504 			    " %010llx-%010llx  size: %06llx\n",
505 			    (u_longlong_t)(offset / sizeof (entry)),
506 			    SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
507 			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
508 			    mapshift) + mapstart),
509 			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
510 			    mapshift) + mapstart + (SM_RUN_DECODE(entry) <<
511 			    mapshift)),
512 			    (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
513 			if (SM_TYPE_DECODE(entry) == SM_ALLOC)
514 				alloc += SM_RUN_DECODE(entry) << mapshift;
515 			else
516 				alloc -= SM_RUN_DECODE(entry) << mapshift;
517 		}
518 	}
519 	if (alloc != smo->smo_alloc) {
520 		(void) printf("space_map_object alloc (%llu) INCONSISTENT "
521 		    "with space map summary (%llu)\n",
522 		    (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc);
523 	}
524 }
525 
526 static void
527 dump_metaslab_stats(metaslab_t *msp)
528 {
529 	char maxbuf[5];
530 	space_map_t *sm = &msp->ms_map;
531 	avl_tree_t *t = sm->sm_pp_root;
532 	int free_pct = sm->sm_space * 100 / sm->sm_size;
533 
534 	nicenum(space_map_maxsize(sm), maxbuf);
535 
536 	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
537 	    "segments", avl_numnodes(t), "maxsize", maxbuf,
538 	    "freepct", free_pct);
539 }
540 
541 static void
542 dump_metaslab(metaslab_t *msp)
543 {
544 	vdev_t *vd = msp->ms_group->mg_vd;
545 	spa_t *spa = vd->vdev_spa;
546 	space_map_t *sm = &msp->ms_map;
547 	space_map_obj_t *smo = &msp->ms_smo;
548 	char freebuf[5];
549 
550 	nicenum(sm->sm_size - smo->smo_alloc, freebuf);
551 
552 	(void) printf(
553 	    "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
554 	    (u_longlong_t)(sm->sm_start / sm->sm_size),
555 	    (u_longlong_t)sm->sm_start, (u_longlong_t)smo->smo_object, freebuf);
556 
557 	if (dump_opt['m'] > 1 && !dump_opt['L']) {
558 		mutex_enter(&msp->ms_lock);
559 		space_map_load_wait(sm);
560 		if (!sm->sm_loaded)
561 			VERIFY(space_map_load(sm, zfs_metaslab_ops,
562 			    SM_FREE, smo, spa->spa_meta_objset) == 0);
563 		dump_metaslab_stats(msp);
564 		space_map_unload(sm);
565 		mutex_exit(&msp->ms_lock);
566 	}
567 
568 	if (dump_opt['d'] > 5 || dump_opt['m'] > 2) {
569 		ASSERT(sm->sm_size == (1ULL << vd->vdev_ms_shift));
570 
571 		mutex_enter(&msp->ms_lock);
572 		dump_spacemap(spa->spa_meta_objset, smo, sm);
573 		mutex_exit(&msp->ms_lock);
574 	}
575 }
576 
577 static void
578 print_vdev_metaslab_header(vdev_t *vd)
579 {
580 	(void) printf("\tvdev %10llu\n\t%-10s%5llu   %-19s   %-15s   %-10s\n",
581 	    (u_longlong_t)vd->vdev_id,
582 	    "metaslabs", (u_longlong_t)vd->vdev_ms_count,
583 	    "offset", "spacemap", "free");
584 	(void) printf("\t%15s   %19s   %15s   %10s\n",
585 	    "---------------", "-------------------",
586 	    "---------------", "-------------");
587 }
588 
589 static void
590 dump_metaslabs(spa_t *spa)
591 {
592 	vdev_t *vd, *rvd = spa->spa_root_vdev;
593 	uint64_t m, c = 0, children = rvd->vdev_children;
594 
595 	(void) printf("\nMetaslabs:\n");
596 
597 	if (!dump_opt['d'] && zopt_objects > 0) {
598 		c = zopt_object[0];
599 
600 		if (c >= children)
601 			(void) fatal("bad vdev id: %llu", (u_longlong_t)c);
602 
603 		if (zopt_objects > 1) {
604 			vd = rvd->vdev_child[c];
605 			print_vdev_metaslab_header(vd);
606 
607 			for (m = 1; m < zopt_objects; m++) {
608 				if (zopt_object[m] < vd->vdev_ms_count)
609 					dump_metaslab(
610 					    vd->vdev_ms[zopt_object[m]]);
611 				else
612 					(void) fprintf(stderr, "bad metaslab "
613 					    "number %llu\n",
614 					    (u_longlong_t)zopt_object[m]);
615 			}
616 			(void) printf("\n");
617 			return;
618 		}
619 		children = c + 1;
620 	}
621 	for (; c < children; c++) {
622 		vd = rvd->vdev_child[c];
623 		print_vdev_metaslab_header(vd);
624 
625 		for (m = 0; m < vd->vdev_ms_count; m++)
626 			dump_metaslab(vd->vdev_ms[m]);
627 		(void) printf("\n");
628 	}
629 }
630 
631 static void
632 dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
633 {
634 	const ddt_phys_t *ddp = dde->dde_phys;
635 	const ddt_key_t *ddk = &dde->dde_key;
636 	char *types[4] = { "ditto", "single", "double", "triple" };
637 	char blkbuf[BP_SPRINTF_LEN];
638 	blkptr_t blk;
639 
640 	for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
641 		if (ddp->ddp_phys_birth == 0)
642 			continue;
643 		ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
644 		sprintf_blkptr(blkbuf, &blk);
645 		(void) printf("index %llx refcnt %llu %s %s\n",
646 		    (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
647 		    types[p], blkbuf);
648 	}
649 }
650 
651 static void
652 dump_dedup_ratio(const ddt_stat_t *dds)
653 {
654 	double rL, rP, rD, D, dedup, compress, copies;
655 
656 	if (dds->dds_blocks == 0)
657 		return;
658 
659 	rL = (double)dds->dds_ref_lsize;
660 	rP = (double)dds->dds_ref_psize;
661 	rD = (double)dds->dds_ref_dsize;
662 	D = (double)dds->dds_dsize;
663 
664 	dedup = rD / D;
665 	compress = rL / rP;
666 	copies = rD / rP;
667 
668 	(void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
669 	    "dedup * compress / copies = %.2f\n\n",
670 	    dedup, compress, copies, dedup * compress / copies);
671 }
672 
673 static void
674 dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
675 {
676 	char name[DDT_NAMELEN];
677 	ddt_entry_t dde;
678 	uint64_t walk = 0;
679 	dmu_object_info_t doi;
680 	uint64_t count, dspace, mspace;
681 	int error;
682 
683 	error = ddt_object_info(ddt, type, class, &doi);
684 
685 	if (error == ENOENT)
686 		return;
687 	ASSERT(error == 0);
688 
689 	count = ddt_object_count(ddt, type, class);
690 	dspace = doi.doi_physical_blocks_512 << 9;
691 	mspace = doi.doi_fill_count * doi.doi_data_block_size;
692 
693 	ASSERT(count != 0);	/* we should have destroyed it */
694 
695 	ddt_object_name(ddt, type, class, name);
696 
697 	(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
698 	    name,
699 	    (u_longlong_t)count,
700 	    (u_longlong_t)(dspace / count),
701 	    (u_longlong_t)(mspace / count));
702 
703 	if (dump_opt['D'] < 3)
704 		return;
705 
706 	zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
707 
708 	if (dump_opt['D'] < 4)
709 		return;
710 
711 	if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
712 		return;
713 
714 	(void) printf("%s contents:\n\n", name);
715 
716 	while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
717 		dump_dde(ddt, &dde, walk);
718 
719 	ASSERT(error == ENOENT);
720 
721 	(void) printf("\n");
722 }
723 
724 static void
725 dump_all_ddts(spa_t *spa)
726 {
727 	ddt_histogram_t ddh_total = { 0 };
728 	ddt_stat_t dds_total = { 0 };
729 
730 	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
731 		ddt_t *ddt = spa->spa_ddt[c];
732 		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
733 			for (enum ddt_class class = 0; class < DDT_CLASSES;
734 			    class++) {
735 				dump_ddt(ddt, type, class);
736 			}
737 		}
738 	}
739 
740 	ddt_get_dedup_stats(spa, &dds_total);
741 
742 	if (dds_total.dds_blocks == 0) {
743 		(void) printf("All DDTs are empty\n");
744 		return;
745 	}
746 
747 	(void) printf("\n");
748 
749 	if (dump_opt['D'] > 1) {
750 		(void) printf("DDT histogram (aggregated over all DDTs):\n");
751 		ddt_get_dedup_histogram(spa, &ddh_total);
752 		zpool_dump_ddt(&dds_total, &ddh_total);
753 	}
754 
755 	dump_dedup_ratio(&dds_total);
756 }
757 
758 static void
759 dump_dtl_seg(space_map_t *sm, uint64_t start, uint64_t size)
760 {
761 	char *prefix = (void *)sm;
762 
763 	(void) printf("%s [%llu,%llu) length %llu\n",
764 	    prefix,
765 	    (u_longlong_t)start,
766 	    (u_longlong_t)(start + size),
767 	    (u_longlong_t)(size));
768 }
769 
770 static void
771 dump_dtl(vdev_t *vd, int indent)
772 {
773 	spa_t *spa = vd->vdev_spa;
774 	boolean_t required;
775 	char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" };
776 	char prefix[256];
777 
778 	spa_vdev_state_enter(spa, SCL_NONE);
779 	required = vdev_dtl_required(vd);
780 	(void) spa_vdev_state_exit(spa, NULL, 0);
781 
782 	if (indent == 0)
783 		(void) printf("\nDirty time logs:\n\n");
784 
785 	(void) printf("\t%*s%s [%s]\n", indent, "",
786 	    vd->vdev_path ? vd->vdev_path :
787 	    vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
788 	    required ? "DTL-required" : "DTL-expendable");
789 
790 	for (int t = 0; t < DTL_TYPES; t++) {
791 		space_map_t *sm = &vd->vdev_dtl[t];
792 		if (sm->sm_space == 0)
793 			continue;
794 		(void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
795 		    indent + 2, "", name[t]);
796 		mutex_enter(sm->sm_lock);
797 		space_map_walk(sm, dump_dtl_seg, (void *)prefix);
798 		mutex_exit(sm->sm_lock);
799 		if (dump_opt['d'] > 5 && vd->vdev_children == 0)
800 			dump_spacemap(spa->spa_meta_objset,
801 			    &vd->vdev_dtl_smo, sm);
802 	}
803 
804 	for (int c = 0; c < vd->vdev_children; c++)
805 		dump_dtl(vd->vdev_child[c], indent + 4);
806 }
807 
808 static void
809 dump_history(spa_t *spa)
810 {
811 	nvlist_t **events = NULL;
812 	char buf[SPA_MAXBLOCKSIZE];
813 	uint64_t resid, len, off = 0;
814 	uint_t num = 0;
815 	int error;
816 	time_t tsec;
817 	struct tm t;
818 	char tbuf[30];
819 	char internalstr[MAXPATHLEN];
820 
821 	do {
822 		len = sizeof (buf);
823 
824 		if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
825 			(void) fprintf(stderr, "Unable to read history: "
826 			    "error %d\n", error);
827 			return;
828 		}
829 
830 		if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
831 			break;
832 
833 		off -= resid;
834 	} while (len != 0);
835 
836 	(void) printf("\nHistory:\n");
837 	for (int i = 0; i < num; i++) {
838 		uint64_t time, txg, ievent;
839 		char *cmd, *intstr;
840 
841 		if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
842 		    &time) != 0)
843 			continue;
844 		if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
845 		    &cmd) != 0) {
846 			if (nvlist_lookup_uint64(events[i],
847 			    ZPOOL_HIST_INT_EVENT, &ievent) != 0)
848 				continue;
849 			verify(nvlist_lookup_uint64(events[i],
850 			    ZPOOL_HIST_TXG, &txg) == 0);
851 			verify(nvlist_lookup_string(events[i],
852 			    ZPOOL_HIST_INT_STR, &intstr) == 0);
853 			if (ievent >= LOG_END)
854 				continue;
855 
856 			(void) snprintf(internalstr,
857 			    sizeof (internalstr),
858 			    "[internal %s txg:%lld] %s",
859 			    hist_event_table[ievent], txg,
860 			    intstr);
861 			cmd = internalstr;
862 		}
863 		tsec = time;
864 		(void) localtime_r(&tsec, &t);
865 		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
866 		(void) printf("%s %s\n", tbuf, cmd);
867 	}
868 }
869 
870 /*ARGSUSED*/
871 static void
872 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
873 {
874 }
875 
876 static uint64_t
877 blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
878 {
879 	if (dnp == NULL) {
880 		ASSERT(zb->zb_level < 0);
881 		if (zb->zb_object == 0)
882 			return (zb->zb_blkid);
883 		return (zb->zb_blkid * BP_GET_LSIZE(bp));
884 	}
885 
886 	ASSERT(zb->zb_level >= 0);
887 
888 	return ((zb->zb_blkid <<
889 	    (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
890 	    dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
891 }
892 
893 static void
894 sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp)
895 {
896 	dva_t *dva = bp->blk_dva;
897 	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
898 
899 	if (dump_opt['b'] >= 5) {
900 		sprintf_blkptr(blkbuf, bp);
901 		return;
902 	}
903 
904 	blkbuf[0] = '\0';
905 
906 	for (int i = 0; i < ndvas; i++)
907 		(void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
908 		    (u_longlong_t)DVA_GET_VDEV(&dva[i]),
909 		    (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
910 		    (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
911 
912 	(void) sprintf(blkbuf + strlen(blkbuf),
913 	    "%llxL/%llxP F=%llu B=%llu/%llu",
914 	    (u_longlong_t)BP_GET_LSIZE(bp),
915 	    (u_longlong_t)BP_GET_PSIZE(bp),
916 	    (u_longlong_t)bp->blk_fill,
917 	    (u_longlong_t)bp->blk_birth,
918 	    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
919 }
920 
921 static void
922 print_indirect(blkptr_t *bp, const zbookmark_t *zb,
923     const dnode_phys_t *dnp)
924 {
925 	char blkbuf[BP_SPRINTF_LEN];
926 	int l;
927 
928 	ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
929 	ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
930 
931 	(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
932 
933 	ASSERT(zb->zb_level >= 0);
934 
935 	for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
936 		if (l == zb->zb_level) {
937 			(void) printf("L%llx", (u_longlong_t)zb->zb_level);
938 		} else {
939 			(void) printf(" ");
940 		}
941 	}
942 
943 	sprintf_blkptr_compact(blkbuf, bp);
944 	(void) printf("%s\n", blkbuf);
945 }
946 
947 static int
948 visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
949     blkptr_t *bp, const zbookmark_t *zb)
950 {
951 	int err = 0;
952 
953 	if (bp->blk_birth == 0)
954 		return (0);
955 
956 	print_indirect(bp, zb, dnp);
957 
958 	if (BP_GET_LEVEL(bp) > 0) {
959 		uint32_t flags = ARC_WAIT;
960 		int i;
961 		blkptr_t *cbp;
962 		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
963 		arc_buf_t *buf;
964 		uint64_t fill = 0;
965 
966 		err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf,
967 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
968 		if (err)
969 			return (err);
970 
971 		/* recursively visit blocks below this */
972 		cbp = buf->b_data;
973 		for (i = 0; i < epb; i++, cbp++) {
974 			zbookmark_t czb;
975 
976 			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
977 			    zb->zb_level - 1,
978 			    zb->zb_blkid * epb + i);
979 			err = visit_indirect(spa, dnp, cbp, &czb);
980 			if (err)
981 				break;
982 			fill += cbp->blk_fill;
983 		}
984 		if (!err)
985 			ASSERT3U(fill, ==, bp->blk_fill);
986 		(void) arc_buf_remove_ref(buf, &buf);
987 	}
988 
989 	return (err);
990 }
991 
992 /*ARGSUSED*/
993 static void
994 dump_indirect(dnode_t *dn)
995 {
996 	dnode_phys_t *dnp = dn->dn_phys;
997 	int j;
998 	zbookmark_t czb;
999 
1000 	(void) printf("Indirect blocks:\n");
1001 
1002 	SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
1003 	    dn->dn_object, dnp->dn_nlevels - 1, 0);
1004 	for (j = 0; j < dnp->dn_nblkptr; j++) {
1005 		czb.zb_blkid = j;
1006 		(void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
1007 		    &dnp->dn_blkptr[j], &czb);
1008 	}
1009 
1010 	(void) printf("\n");
1011 }
1012 
1013 /*ARGSUSED*/
1014 static void
1015 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
1016 {
1017 	dsl_dir_phys_t *dd = data;
1018 	time_t crtime;
1019 	char nice[6];
1020 
1021 	if (dd == NULL)
1022 		return;
1023 
1024 	ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
1025 
1026 	crtime = dd->dd_creation_time;
1027 	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1028 	(void) printf("\t\thead_dataset_obj = %llu\n",
1029 	    (u_longlong_t)dd->dd_head_dataset_obj);
1030 	(void) printf("\t\tparent_dir_obj = %llu\n",
1031 	    (u_longlong_t)dd->dd_parent_obj);
1032 	(void) printf("\t\torigin_obj = %llu\n",
1033 	    (u_longlong_t)dd->dd_origin_obj);
1034 	(void) printf("\t\tchild_dir_zapobj = %llu\n",
1035 	    (u_longlong_t)dd->dd_child_dir_zapobj);
1036 	nicenum(dd->dd_used_bytes, nice);
1037 	(void) printf("\t\tused_bytes = %s\n", nice);
1038 	nicenum(dd->dd_compressed_bytes, nice);
1039 	(void) printf("\t\tcompressed_bytes = %s\n", nice);
1040 	nicenum(dd->dd_uncompressed_bytes, nice);
1041 	(void) printf("\t\tuncompressed_bytes = %s\n", nice);
1042 	nicenum(dd->dd_quota, nice);
1043 	(void) printf("\t\tquota = %s\n", nice);
1044 	nicenum(dd->dd_reserved, nice);
1045 	(void) printf("\t\treserved = %s\n", nice);
1046 	(void) printf("\t\tprops_zapobj = %llu\n",
1047 	    (u_longlong_t)dd->dd_props_zapobj);
1048 	(void) printf("\t\tdeleg_zapobj = %llu\n",
1049 	    (u_longlong_t)dd->dd_deleg_zapobj);
1050 	(void) printf("\t\tflags = %llx\n",
1051 	    (u_longlong_t)dd->dd_flags);
1052 
1053 #define	DO(which) \
1054 	nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
1055 	(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
1056 	DO(HEAD);
1057 	DO(SNAP);
1058 	DO(CHILD);
1059 	DO(CHILD_RSRV);
1060 	DO(REFRSRV);
1061 #undef DO
1062 }
1063 
1064 /*ARGSUSED*/
1065 static void
1066 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
1067 {
1068 	dsl_dataset_phys_t *ds = data;
1069 	time_t crtime;
1070 	char used[6], compressed[6], uncompressed[6], unique[6];
1071 	char blkbuf[BP_SPRINTF_LEN];
1072 
1073 	if (ds == NULL)
1074 		return;
1075 
1076 	ASSERT(size == sizeof (*ds));
1077 	crtime = ds->ds_creation_time;
1078 	nicenum(ds->ds_used_bytes, used);
1079 	nicenum(ds->ds_compressed_bytes, compressed);
1080 	nicenum(ds->ds_uncompressed_bytes, uncompressed);
1081 	nicenum(ds->ds_unique_bytes, unique);
1082 	sprintf_blkptr(blkbuf, &ds->ds_bp);
1083 
1084 	(void) printf("\t\tdir_obj = %llu\n",
1085 	    (u_longlong_t)ds->ds_dir_obj);
1086 	(void) printf("\t\tprev_snap_obj = %llu\n",
1087 	    (u_longlong_t)ds->ds_prev_snap_obj);
1088 	(void) printf("\t\tprev_snap_txg = %llu\n",
1089 	    (u_longlong_t)ds->ds_prev_snap_txg);
1090 	(void) printf("\t\tnext_snap_obj = %llu\n",
1091 	    (u_longlong_t)ds->ds_next_snap_obj);
1092 	(void) printf("\t\tsnapnames_zapobj = %llu\n",
1093 	    (u_longlong_t)ds->ds_snapnames_zapobj);
1094 	(void) printf("\t\tnum_children = %llu\n",
1095 	    (u_longlong_t)ds->ds_num_children);
1096 	(void) printf("\t\tuserrefs_obj = %llu\n",
1097 	    (u_longlong_t)ds->ds_userrefs_obj);
1098 	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1099 	(void) printf("\t\tcreation_txg = %llu\n",
1100 	    (u_longlong_t)ds->ds_creation_txg);
1101 	(void) printf("\t\tdeadlist_obj = %llu\n",
1102 	    (u_longlong_t)ds->ds_deadlist_obj);
1103 	(void) printf("\t\tused_bytes = %s\n", used);
1104 	(void) printf("\t\tcompressed_bytes = %s\n", compressed);
1105 	(void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
1106 	(void) printf("\t\tunique = %s\n", unique);
1107 	(void) printf("\t\tfsid_guid = %llu\n",
1108 	    (u_longlong_t)ds->ds_fsid_guid);
1109 	(void) printf("\t\tguid = %llu\n",
1110 	    (u_longlong_t)ds->ds_guid);
1111 	(void) printf("\t\tflags = %llx\n",
1112 	    (u_longlong_t)ds->ds_flags);
1113 	(void) printf("\t\tnext_clones_obj = %llu\n",
1114 	    (u_longlong_t)ds->ds_next_clones_obj);
1115 	(void) printf("\t\tprops_obj = %llu\n",
1116 	    (u_longlong_t)ds->ds_props_obj);
1117 	(void) printf("\t\tbp = %s\n", blkbuf);
1118 }
1119 
1120 static void
1121 dump_bplist(objset_t *mos, uint64_t object, char *name)
1122 {
1123 	bplist_t bpl = { 0 };
1124 	blkptr_t blk, *bp = &blk;
1125 	uint64_t itor = 0;
1126 	char bytes[6];
1127 	char comp[6];
1128 	char uncomp[6];
1129 
1130 	if (dump_opt['d'] < 3)
1131 		return;
1132 
1133 	bplist_init(&bpl);
1134 	VERIFY(0 == bplist_open(&bpl, mos, object));
1135 	if (bplist_empty(&bpl)) {
1136 		bplist_close(&bpl);
1137 		bplist_fini(&bpl);
1138 		return;
1139 	}
1140 
1141 	nicenum(bpl.bpl_phys->bpl_bytes, bytes);
1142 	if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) {
1143 		nicenum(bpl.bpl_phys->bpl_comp, comp);
1144 		nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
1145 		(void) printf("\n    %s: %llu entries, %s (%s/%s comp)\n",
1146 		    name, (u_longlong_t)bpl.bpl_phys->bpl_entries,
1147 		    bytes, comp, uncomp);
1148 	} else {
1149 		(void) printf("\n    %s: %llu entries, %s\n",
1150 		    name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes);
1151 	}
1152 
1153 	if (dump_opt['d'] < 5) {
1154 		bplist_close(&bpl);
1155 		bplist_fini(&bpl);
1156 		return;
1157 	}
1158 
1159 	(void) printf("\n");
1160 
1161 	while (bplist_iterate(&bpl, &itor, bp) == 0) {
1162 		char blkbuf[BP_SPRINTF_LEN];
1163 
1164 		ASSERT(bp->blk_birth != 0);
1165 		sprintf_blkptr_compact(blkbuf, bp);
1166 		(void) printf("\tItem %3llu: %s\n",
1167 		    (u_longlong_t)itor - 1, blkbuf);
1168 	}
1169 
1170 	bplist_close(&bpl);
1171 	bplist_fini(&bpl);
1172 }
1173 
1174 static avl_tree_t idx_tree;
1175 static avl_tree_t domain_tree;
1176 static boolean_t fuid_table_loaded;
1177 static boolean_t sa_loaded;
1178 sa_attr_type_t *sa_attr_table;
1179 
1180 static void
1181 fuid_table_destroy()
1182 {
1183 	if (fuid_table_loaded) {
1184 		zfs_fuid_table_destroy(&idx_tree, &domain_tree);
1185 		fuid_table_loaded = B_FALSE;
1186 	}
1187 }
1188 
1189 /*
1190  * print uid or gid information.
1191  * For normal POSIX id just the id is printed in decimal format.
1192  * For CIFS files with FUID the fuid is printed in hex followed by
1193  * the doman-rid string.
1194  */
1195 static void
1196 print_idstr(uint64_t id, const char *id_type)
1197 {
1198 	if (FUID_INDEX(id)) {
1199 		char *domain;
1200 
1201 		domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
1202 		(void) printf("\t%s     %llx [%s-%d]\n", id_type,
1203 		    (u_longlong_t)id, domain, (int)FUID_RID(id));
1204 	} else {
1205 		(void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
1206 	}
1207 
1208 }
1209 
1210 static void
1211 dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
1212 {
1213 	uint32_t uid_idx, gid_idx;
1214 
1215 	uid_idx = FUID_INDEX(uid);
1216 	gid_idx = FUID_INDEX(gid);
1217 
1218 	/* Load domain table, if not already loaded */
1219 	if (!fuid_table_loaded && (uid_idx || gid_idx)) {
1220 		uint64_t fuid_obj;
1221 
1222 		/* first find the fuid object.  It lives in the master node */
1223 		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
1224 		    8, 1, &fuid_obj) == 0);
1225 		zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
1226 		(void) zfs_fuid_table_load(os, fuid_obj,
1227 		    &idx_tree, &domain_tree);
1228 		fuid_table_loaded = B_TRUE;
1229 	}
1230 
1231 	print_idstr(uid, "uid");
1232 	print_idstr(gid, "gid");
1233 }
1234 
1235 /*ARGSUSED*/
1236 static void
1237 dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
1238 {
1239 	char path[MAXPATHLEN * 2];	/* allow for xattr and failure prefix */
1240 	sa_handle_t *hdl;
1241 	uint64_t xattr, rdev, gen;
1242 	uint64_t uid, gid, mode, fsize, parent, links;
1243 	uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
1244 	time_t z_crtime, z_atime, z_mtime, z_ctime;
1245 	sa_bulk_attr_t bulk[11];
1246 	int idx = 0;
1247 	int error;
1248 
1249 	if (!sa_loaded) {
1250 		uint64_t sa_attrs = 0;
1251 		uint64_t version;
1252 
1253 		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
1254 		    8, 1, &version) == 0);
1255 		if (version >= ZPL_VERSION_SA) {
1256 			VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
1257 			    8, 1, &sa_attrs) == 0);
1258 		}
1259 		sa_attr_table = sa_setup(os, sa_attrs,
1260 		    zfs_attr_table, ZPL_END);
1261 		sa_loaded = B_TRUE;
1262 	}
1263 
1264 	if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
1265 		(void) printf("Failed to get handle for SA znode\n");
1266 		return;
1267 	}
1268 
1269 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
1270 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
1271 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
1272 	    &links, 8);
1273 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
1274 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
1275 	    &mode, 8);
1276 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
1277 	    NULL, &parent, 8);
1278 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
1279 	    &fsize, 8);
1280 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
1281 	    acctm, 16);
1282 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
1283 	    modtm, 16);
1284 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
1285 	    crtm, 16);
1286 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
1287 	    chgtm, 16);
1288 
1289 	if (sa_bulk_lookup(hdl, bulk, idx)) {
1290 		(void) sa_handle_destroy(hdl);
1291 		return;
1292 	}
1293 
1294 	error = zfs_obj_to_path(os, object, path, sizeof (path));
1295 	if (error != 0) {
1296 		(void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
1297 		    (u_longlong_t)object);
1298 	}
1299 	if (dump_opt['d'] < 3) {
1300 		(void) printf("\t%s\n", path);
1301 		(void) sa_handle_destroy(hdl);
1302 		return;
1303 	}
1304 
1305 	z_crtime = (time_t)crtm[0];
1306 	z_atime = (time_t)acctm[0];
1307 	z_mtime = (time_t)modtm[0];
1308 	z_ctime = (time_t)chgtm[0];
1309 
1310 	(void) printf("\tpath	%s\n", path);
1311 	dump_uidgid(os, uid, gid);
1312 	(void) printf("\tatime	%s", ctime(&z_atime));
1313 	(void) printf("\tmtime	%s", ctime(&z_mtime));
1314 	(void) printf("\tctime	%s", ctime(&z_ctime));
1315 	(void) printf("\tcrtime	%s", ctime(&z_crtime));
1316 	(void) printf("\tgen	%llu\n", (u_longlong_t)gen);
1317 	(void) printf("\tmode	%llo\n", (u_longlong_t)mode);
1318 	(void) printf("\tsize	%llu\n", (u_longlong_t)fsize);
1319 	(void) printf("\tparent	%llu\n", (u_longlong_t)parent);
1320 	(void) printf("\tlinks	%llu\n", (u_longlong_t)links);
1321 	if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
1322 	    sizeof (uint64_t)) == 0)
1323 		(void) printf("\txattr	%llu\n", (u_longlong_t)xattr);
1324 	if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
1325 	    sizeof (uint64_t)) == 0)
1326 		(void) printf("\trdev	0x%016llx\n", (u_longlong_t)rdev);
1327 	sa_handle_destroy(hdl);
1328 }
1329 
1330 /*ARGSUSED*/
1331 static void
1332 dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
1333 {
1334 }
1335 
1336 /*ARGSUSED*/
1337 static void
1338 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
1339 {
1340 }
1341 
1342 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
1343 	dump_none,		/* unallocated			*/
1344 	dump_zap,		/* object directory		*/
1345 	dump_uint64,		/* object array			*/
1346 	dump_none,		/* packed nvlist		*/
1347 	dump_packed_nvlist,	/* packed nvlist size		*/
1348 	dump_none,		/* bplist			*/
1349 	dump_none,		/* bplist header		*/
1350 	dump_none,		/* SPA space map header		*/
1351 	dump_none,		/* SPA space map		*/
1352 	dump_none,		/* ZIL intent log		*/
1353 	dump_dnode,		/* DMU dnode			*/
1354 	dump_dmu_objset,	/* DMU objset			*/
1355 	dump_dsl_dir,		/* DSL directory		*/
1356 	dump_zap,		/* DSL directory child map	*/
1357 	dump_zap,		/* DSL dataset snap map		*/
1358 	dump_zap,		/* DSL props			*/
1359 	dump_dsl_dataset,	/* DSL dataset			*/
1360 	dump_znode,		/* ZFS znode			*/
1361 	dump_acl,		/* ZFS V0 ACL			*/
1362 	dump_uint8,		/* ZFS plain file		*/
1363 	dump_zpldir,		/* ZFS directory		*/
1364 	dump_zap,		/* ZFS master node		*/
1365 	dump_zap,		/* ZFS delete queue		*/
1366 	dump_uint8,		/* zvol object			*/
1367 	dump_zap,		/* zvol prop			*/
1368 	dump_uint8,		/* other uint8[]		*/
1369 	dump_uint64,		/* other uint64[]		*/
1370 	dump_zap,		/* other ZAP			*/
1371 	dump_zap,		/* persistent error log		*/
1372 	dump_uint8,		/* SPA history			*/
1373 	dump_uint64,		/* SPA history offsets		*/
1374 	dump_zap,		/* Pool properties		*/
1375 	dump_zap,		/* DSL permissions		*/
1376 	dump_acl,		/* ZFS ACL			*/
1377 	dump_uint8,		/* ZFS SYSACL			*/
1378 	dump_none,		/* FUID nvlist			*/
1379 	dump_packed_nvlist,	/* FUID nvlist size		*/
1380 	dump_zap,		/* DSL dataset next clones	*/
1381 	dump_zap,		/* DSL scrub queue		*/
1382 	dump_zap,		/* ZFS user/group used		*/
1383 	dump_zap,		/* ZFS user/group quota		*/
1384 	dump_zap,		/* snapshot refcount tags	*/
1385 	dump_ddt_zap,		/* DDT ZAP object		*/
1386 	dump_zap,		/* DDT statistics		*/
1387 	dump_znode,		/* SA object			*/
1388 	dump_zap,		/* SA Master Node		*/
1389 	dump_sa_attrs,		/* SA attribute registration	*/
1390 	dump_sa_layouts,	/* SA attribute layouts		*/
1391 	dump_unknown,		/* Unknown type, must be last	*/
1392 };
1393 
1394 static void
1395 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
1396 {
1397 	dmu_buf_t *db = NULL;
1398 	dmu_object_info_t doi;
1399 	dnode_t *dn;
1400 	void *bonus = NULL;
1401 	size_t bsize = 0;
1402 	char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], fill[7];
1403 	char aux[50];
1404 	int error;
1405 
1406 	if (*print_header) {
1407 		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1408 		    "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
1409 		    "%full", "type");
1410 		*print_header = 0;
1411 	}
1412 
1413 	if (object == 0) {
1414 		dn = os->os_meta_dnode;
1415 	} else {
1416 		error = dmu_bonus_hold(os, object, FTAG, &db);
1417 		if (error)
1418 			fatal("dmu_bonus_hold(%llu) failed, errno %u",
1419 			    object, error);
1420 		bonus = db->db_data;
1421 		bsize = db->db_size;
1422 		dn = ((dmu_buf_impl_t *)db)->db_dnode;
1423 	}
1424 	dmu_object_info_from_dnode(dn, &doi);
1425 
1426 	nicenum(doi.doi_metadata_block_size, iblk);
1427 	nicenum(doi.doi_data_block_size, dblk);
1428 	nicenum(doi.doi_max_offset, lsize);
1429 	nicenum(doi.doi_physical_blocks_512 << 9, asize);
1430 	nicenum(doi.doi_bonus_size, bonus_size);
1431 	(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
1432 	    doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
1433 	    doi.doi_max_offset);
1434 
1435 	aux[0] = '\0';
1436 
1437 	if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
1438 		(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
1439 		    ZDB_CHECKSUM_NAME(doi.doi_checksum));
1440 	}
1441 
1442 	if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
1443 		(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
1444 		    ZDB_COMPRESS_NAME(doi.doi_compress));
1445 	}
1446 
1447 	(void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %6s  %s%s\n",
1448 	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
1449 	    asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
1450 
1451 	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
1452 		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1453 		    "", "", "", "", "", bonus_size, "bonus",
1454 		    ZDB_OT_NAME(doi.doi_bonus_type));
1455 	}
1456 
1457 	if (verbosity >= 4) {
1458 		(void) printf("\tdnode flags: %s%s%s\n",
1459 		    (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
1460 		    "USED_BYTES " : "",
1461 		    (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
1462 		    "USERUSED_ACCOUNTED " : "",
1463 		    (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
1464 		    "SPILL_BLKPTR" : "");
1465 		(void) printf("\tdnode maxblkid: %llu\n",
1466 		    (longlong_t)dn->dn_phys->dn_maxblkid);
1467 
1468 		object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
1469 		    bonus, bsize);
1470 		object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
1471 		*print_header = 1;
1472 	}
1473 
1474 	if (verbosity >= 5)
1475 		dump_indirect(dn);
1476 
1477 	if (verbosity >= 5) {
1478 		/*
1479 		 * Report the list of segments that comprise the object.
1480 		 */
1481 		uint64_t start = 0;
1482 		uint64_t end;
1483 		uint64_t blkfill = 1;
1484 		int minlvl = 1;
1485 
1486 		if (dn->dn_type == DMU_OT_DNODE) {
1487 			minlvl = 0;
1488 			blkfill = DNODES_PER_BLOCK;
1489 		}
1490 
1491 		for (;;) {
1492 			char segsize[6];
1493 			error = dnode_next_offset(dn,
1494 			    0, &start, minlvl, blkfill, 0);
1495 			if (error)
1496 				break;
1497 			end = start;
1498 			error = dnode_next_offset(dn,
1499 			    DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
1500 			nicenum(end - start, segsize);
1501 			(void) printf("\t\tsegment [%016llx, %016llx)"
1502 			    " size %5s\n", (u_longlong_t)start,
1503 			    (u_longlong_t)end, segsize);
1504 			if (error)
1505 				break;
1506 			start = end;
1507 		}
1508 	}
1509 
1510 	if (db != NULL)
1511 		dmu_buf_rele(db, FTAG);
1512 }
1513 
1514 static char *objset_types[DMU_OST_NUMTYPES] = {
1515 	"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
1516 
1517 static void
1518 dump_dir(objset_t *os)
1519 {
1520 	dmu_objset_stats_t dds;
1521 	uint64_t object, object_count;
1522 	uint64_t refdbytes, usedobjs, scratch;
1523 	char numbuf[8];
1524 	char blkbuf[BP_SPRINTF_LEN + 20];
1525 	char osname[MAXNAMELEN];
1526 	char *type = "UNKNOWN";
1527 	int verbosity = dump_opt['d'];
1528 	int print_header = 1;
1529 	int i, error;
1530 
1531 	dmu_objset_fast_stat(os, &dds);
1532 
1533 	if (dds.dds_type < DMU_OST_NUMTYPES)
1534 		type = objset_types[dds.dds_type];
1535 
1536 	if (dds.dds_type == DMU_OST_META) {
1537 		dds.dds_creation_txg = TXG_INITIAL;
1538 		usedobjs = os->os_rootbp->blk_fill;
1539 		refdbytes = os->os_spa->spa_dsl_pool->
1540 		    dp_mos_dir->dd_phys->dd_used_bytes;
1541 	} else {
1542 		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
1543 	}
1544 
1545 	ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
1546 
1547 	nicenum(refdbytes, numbuf);
1548 
1549 	if (verbosity >= 4) {
1550 		(void) sprintf(blkbuf, ", rootbp ");
1551 		(void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp);
1552 	} else {
1553 		blkbuf[0] = '\0';
1554 	}
1555 
1556 	dmu_objset_name(os, osname);
1557 
1558 	(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
1559 	    "%s, %llu objects%s\n",
1560 	    osname, type, (u_longlong_t)dmu_objset_id(os),
1561 	    (u_longlong_t)dds.dds_creation_txg,
1562 	    numbuf, (u_longlong_t)usedobjs, blkbuf);
1563 
1564 	if (zopt_objects != 0) {
1565 		for (i = 0; i < zopt_objects; i++)
1566 			dump_object(os, zopt_object[i], verbosity,
1567 			    &print_header);
1568 		(void) printf("\n");
1569 		return;
1570 	}
1571 
1572 	if (dump_opt['i'] != 0 || verbosity >= 2)
1573 		dump_intent_log(dmu_objset_zil(os));
1574 
1575 	if (dmu_objset_ds(os) != NULL)
1576 		dump_bplist(dmu_objset_pool(os)->dp_meta_objset,
1577 		    dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist");
1578 
1579 	if (verbosity < 2)
1580 		return;
1581 
1582 	if (os->os_rootbp->blk_birth == 0)
1583 		return;
1584 
1585 	dump_object(os, 0, verbosity, &print_header);
1586 	object_count = 0;
1587 	if (os->os_userused_dnode &&
1588 	    os->os_userused_dnode->dn_type != 0) {
1589 		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
1590 		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
1591 	}
1592 
1593 	object = 0;
1594 	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
1595 		dump_object(os, object, verbosity, &print_header);
1596 		object_count++;
1597 	}
1598 
1599 	ASSERT3U(object_count, ==, usedobjs);
1600 
1601 	(void) printf("\n");
1602 
1603 	if (error != ESRCH) {
1604 		(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
1605 		abort();
1606 	}
1607 }
1608 
1609 static void
1610 dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
1611 {
1612 	time_t timestamp = ub->ub_timestamp;
1613 
1614 	(void) printf(header ? header : "");
1615 	(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
1616 	(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
1617 	(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
1618 	(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
1619 	(void) printf("\ttimestamp = %llu UTC = %s",
1620 	    (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
1621 	if (dump_opt['u'] >= 3) {
1622 		char blkbuf[BP_SPRINTF_LEN];
1623 		sprintf_blkptr(blkbuf, &ub->ub_rootbp);
1624 		(void) printf("\trootbp = %s\n", blkbuf);
1625 	}
1626 	(void) printf(footer ? footer : "");
1627 }
1628 
1629 static void
1630 dump_config(spa_t *spa)
1631 {
1632 	dmu_buf_t *db;
1633 	size_t nvsize = 0;
1634 	int error = 0;
1635 
1636 
1637 	error = dmu_bonus_hold(spa->spa_meta_objset,
1638 	    spa->spa_config_object, FTAG, &db);
1639 
1640 	if (error == 0) {
1641 		nvsize = *(uint64_t *)db->db_data;
1642 		dmu_buf_rele(db, FTAG);
1643 
1644 		(void) printf("\nMOS Configuration:\n");
1645 		dump_packed_nvlist(spa->spa_meta_objset,
1646 		    spa->spa_config_object, (void *)&nvsize, 1);
1647 	} else {
1648 		(void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
1649 		    (u_longlong_t)spa->spa_config_object, error);
1650 	}
1651 }
1652 
1653 static void
1654 dump_cachefile(const char *cachefile)
1655 {
1656 	int fd;
1657 	struct stat64 statbuf;
1658 	char *buf;
1659 	nvlist_t *config;
1660 
1661 	if ((fd = open64(cachefile, O_RDONLY)) < 0) {
1662 		(void) printf("cannot open '%s': %s\n", cachefile,
1663 		    strerror(errno));
1664 		exit(1);
1665 	}
1666 
1667 	if (fstat64(fd, &statbuf) != 0) {
1668 		(void) printf("failed to stat '%s': %s\n", cachefile,
1669 		    strerror(errno));
1670 		exit(1);
1671 	}
1672 
1673 	if ((buf = malloc(statbuf.st_size)) == NULL) {
1674 		(void) fprintf(stderr, "failed to allocate %llu bytes\n",
1675 		    (u_longlong_t)statbuf.st_size);
1676 		exit(1);
1677 	}
1678 
1679 	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1680 		(void) fprintf(stderr, "failed to read %llu bytes\n",
1681 		    (u_longlong_t)statbuf.st_size);
1682 		exit(1);
1683 	}
1684 
1685 	(void) close(fd);
1686 
1687 	if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
1688 		(void) fprintf(stderr, "failed to unpack nvlist\n");
1689 		exit(1);
1690 	}
1691 
1692 	free(buf);
1693 
1694 	dump_nvlist(config, 0);
1695 
1696 	nvlist_free(config);
1697 }
1698 
1699 #define	ZDB_MAX_UB_HEADER_SIZE 32
1700 
1701 static void
1702 dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
1703 {
1704 	vdev_t vd;
1705 	vdev_t *vdp = &vd;
1706 	char header[ZDB_MAX_UB_HEADER_SIZE];
1707 
1708 	vd.vdev_ashift = ashift;
1709 	vdp->vdev_top = vdp;
1710 
1711 	for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
1712 		uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
1713 		uberblock_t *ub = (void *)((char *)lbl + uoff);
1714 
1715 		if (uberblock_verify(ub))
1716 			continue;
1717 		(void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
1718 		    "Uberblock[%d]\n", i);
1719 		dump_uberblock(ub, header, "");
1720 	}
1721 }
1722 
1723 static void
1724 dump_label(const char *dev)
1725 {
1726 	int fd;
1727 	vdev_label_t label;
1728 	char *path, *buf = label.vl_vdev_phys.vp_nvlist;
1729 	size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
1730 	struct stat64 statbuf;
1731 	uint64_t psize, ashift;
1732 	int len = strlen(dev) + 1;
1733 
1734 	if (strncmp(dev, "/dev/dsk/", 9) == 0) {
1735 		len++;
1736 		path = malloc(len);
1737 		(void) snprintf(path, len, "%s%s", "/dev/rdsk/", dev + 9);
1738 	} else {
1739 		path = strdup(dev);
1740 	}
1741 
1742 	if ((fd = open64(path, O_RDONLY)) < 0) {
1743 		(void) printf("cannot open '%s': %s\n", path, strerror(errno));
1744 		free(path);
1745 		exit(1);
1746 	}
1747 
1748 	if (fstat64(fd, &statbuf) != 0) {
1749 		(void) printf("failed to stat '%s': %s\n", path,
1750 		    strerror(errno));
1751 		free(path);
1752 		(void) close(fd);
1753 		exit(1);
1754 	}
1755 
1756 	if (S_ISBLK(statbuf.st_mode)) {
1757 		(void) printf("cannot use '%s': character device required\n",
1758 		    path);
1759 		free(path);
1760 		(void) close(fd);
1761 		exit(1);
1762 	}
1763 
1764 	psize = statbuf.st_size;
1765 	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
1766 
1767 	for (int l = 0; l < VDEV_LABELS; l++) {
1768 		nvlist_t *config = NULL;
1769 
1770 		(void) printf("--------------------------------------------\n");
1771 		(void) printf("LABEL %d\n", l);
1772 		(void) printf("--------------------------------------------\n");
1773 
1774 		if (pread64(fd, &label, sizeof (label),
1775 		    vdev_label_offset(psize, l, 0)) != sizeof (label)) {
1776 			(void) printf("failed to read label %d\n", l);
1777 			continue;
1778 		}
1779 
1780 		if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
1781 			(void) printf("failed to unpack label %d\n", l);
1782 			ashift = SPA_MINBLOCKSHIFT;
1783 		} else {
1784 			nvlist_t *vdev_tree = NULL;
1785 
1786 			dump_nvlist(config, 4);
1787 			if ((nvlist_lookup_nvlist(config,
1788 			    ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
1789 			    (nvlist_lookup_uint64(vdev_tree,
1790 			    ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
1791 				ashift = SPA_MINBLOCKSHIFT;
1792 			nvlist_free(config);
1793 		}
1794 		if (dump_opt['u'])
1795 			dump_label_uberblocks(&label, ashift);
1796 	}
1797 
1798 	free(path);
1799 	(void) close(fd);
1800 }
1801 
1802 /*ARGSUSED*/
1803 static int
1804 dump_one_dir(const char *dsname, void *arg)
1805 {
1806 	int error;
1807 	objset_t *os;
1808 
1809 	error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
1810 	if (error) {
1811 		(void) printf("Could not open %s, error %d\n", dsname, error);
1812 		return (0);
1813 	}
1814 	dump_dir(os);
1815 	dmu_objset_disown(os, FTAG);
1816 	fuid_table_destroy();
1817 	sa_loaded = B_FALSE;
1818 	return (0);
1819 }
1820 
1821 /*
1822  * Block statistics.
1823  */
1824 typedef struct zdb_blkstats {
1825 	uint64_t	zb_asize;
1826 	uint64_t	zb_lsize;
1827 	uint64_t	zb_psize;
1828 	uint64_t	zb_count;
1829 } zdb_blkstats_t;
1830 
1831 /*
1832  * Extended object types to report deferred frees and dedup auto-ditto blocks.
1833  */
1834 #define	ZDB_OT_DEFERRED	(DMU_OT_NUMTYPES + 0)
1835 #define	ZDB_OT_DITTO	(DMU_OT_NUMTYPES + 1)
1836 #define	ZDB_OT_TOTAL	(DMU_OT_NUMTYPES + 2)
1837 
1838 static char *zdb_ot_extname[] = {
1839 	"deferred free",
1840 	"dedup ditto",
1841 	"Total",
1842 };
1843 
1844 #define	ZB_TOTAL	DN_MAX_LEVELS
1845 
1846 typedef struct zdb_cb {
1847 	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
1848 	uint64_t	zcb_dedup_asize;
1849 	uint64_t	zcb_dedup_blocks;
1850 	uint64_t	zcb_errors[256];
1851 	int		zcb_readfails;
1852 	int		zcb_haderrors;
1853 } zdb_cb_t;
1854 
1855 static void
1856 zdb_count_block(spa_t *spa, zilog_t *zilog, zdb_cb_t *zcb, const blkptr_t *bp,
1857     dmu_object_type_t type)
1858 {
1859 	uint64_t refcnt = 0;
1860 
1861 	ASSERT(type < ZDB_OT_TOTAL);
1862 
1863 	if (zilog && zil_bp_tree_add(zilog, bp) != 0)
1864 		return;
1865 
1866 	for (int i = 0; i < 4; i++) {
1867 		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
1868 		int t = (i & 1) ? type : ZDB_OT_TOTAL;
1869 		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
1870 
1871 		zb->zb_asize += BP_GET_ASIZE(bp);
1872 		zb->zb_lsize += BP_GET_LSIZE(bp);
1873 		zb->zb_psize += BP_GET_PSIZE(bp);
1874 		zb->zb_count++;
1875 	}
1876 
1877 	if (dump_opt['L'])
1878 		return;
1879 
1880 	if (BP_GET_DEDUP(bp)) {
1881 		ddt_t *ddt;
1882 		ddt_entry_t *dde;
1883 
1884 		ddt = ddt_select(spa, bp);
1885 		ddt_enter(ddt);
1886 		dde = ddt_lookup(ddt, bp, B_FALSE);
1887 
1888 		if (dde == NULL) {
1889 			refcnt = 0;
1890 		} else {
1891 			ddt_phys_t *ddp = ddt_phys_select(dde, bp);
1892 			ddt_phys_decref(ddp);
1893 			refcnt = ddp->ddp_refcnt;
1894 			if (ddt_phys_total_refcnt(dde) == 0)
1895 				ddt_remove(ddt, dde);
1896 		}
1897 		ddt_exit(ddt);
1898 	}
1899 
1900 	VERIFY3U(zio_wait(zio_claim(NULL, spa,
1901 	    refcnt ? 0 : spa_first_txg(spa),
1902 	    bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
1903 }
1904 
1905 static int
1906 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
1907     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
1908 {
1909 	zdb_cb_t *zcb = arg;
1910 	char blkbuf[BP_SPRINTF_LEN];
1911 	dmu_object_type_t type;
1912 	boolean_t is_metadata;
1913 
1914 	if (bp == NULL)
1915 		return (0);
1916 
1917 	type = BP_GET_TYPE(bp);
1918 
1919 	zdb_count_block(spa, zilog, zcb, bp, type);
1920 
1921 	is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata);
1922 
1923 	if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
1924 		int ioerr;
1925 		size_t size = BP_GET_PSIZE(bp);
1926 		void *data = malloc(size);
1927 		int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
1928 
1929 		/* If it's an intent log block, failure is expected. */
1930 		if (zb->zb_level == ZB_ZIL_LEVEL)
1931 			flags |= ZIO_FLAG_SPECULATIVE;
1932 
1933 		ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
1934 		    NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
1935 
1936 		free(data);
1937 
1938 		if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
1939 			zcb->zcb_haderrors = 1;
1940 			zcb->zcb_errors[ioerr]++;
1941 
1942 			if (dump_opt['b'] >= 2)
1943 				sprintf_blkptr(blkbuf, bp);
1944 			else
1945 				blkbuf[0] = '\0';
1946 
1947 			(void) printf("zdb_blkptr_cb: "
1948 			    "Got error %d reading "
1949 			    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
1950 			    ioerr,
1951 			    (u_longlong_t)zb->zb_objset,
1952 			    (u_longlong_t)zb->zb_object,
1953 			    (u_longlong_t)zb->zb_level,
1954 			    (u_longlong_t)zb->zb_blkid,
1955 			    blkbuf);
1956 		}
1957 	}
1958 
1959 	zcb->zcb_readfails = 0;
1960 
1961 	if (dump_opt['b'] >= 4) {
1962 		sprintf_blkptr(blkbuf, bp);
1963 		(void) printf("objset %llu object %llu "
1964 		    "level %lld offset 0x%llx %s\n",
1965 		    (u_longlong_t)zb->zb_objset,
1966 		    (u_longlong_t)zb->zb_object,
1967 		    (longlong_t)zb->zb_level,
1968 		    (u_longlong_t)blkid2offset(dnp, bp, zb),
1969 		    blkbuf);
1970 	}
1971 
1972 	return (0);
1973 }
1974 
1975 static void
1976 zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
1977 {
1978 	vdev_t *vd = sm->sm_ppd;
1979 
1980 	(void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
1981 	    (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
1982 }
1983 
1984 /* ARGSUSED */
1985 static void
1986 zdb_space_map_load(space_map_t *sm)
1987 {
1988 }
1989 
1990 static void
1991 zdb_space_map_unload(space_map_t *sm)
1992 {
1993 	space_map_vacate(sm, zdb_leak, sm);
1994 }
1995 
1996 /* ARGSUSED */
1997 static void
1998 zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
1999 {
2000 }
2001 
2002 static space_map_ops_t zdb_space_map_ops = {
2003 	zdb_space_map_load,
2004 	zdb_space_map_unload,
2005 	NULL,	/* alloc */
2006 	zdb_space_map_claim,
2007 	NULL,	/* free */
2008 	NULL	/* maxsize */
2009 };
2010 
2011 static void
2012 zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
2013 {
2014 	ddt_bookmark_t ddb = { 0 };
2015 	ddt_entry_t dde;
2016 	int error;
2017 
2018 	while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
2019 		blkptr_t blk;
2020 		ddt_phys_t *ddp = dde.dde_phys;
2021 
2022 		if (ddb.ddb_class == DDT_CLASS_UNIQUE)
2023 			return;
2024 
2025 		ASSERT(ddt_phys_total_refcnt(&dde) > 1);
2026 
2027 		for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
2028 			if (ddp->ddp_phys_birth == 0)
2029 				continue;
2030 			ddt_bp_create(ddb.ddb_checksum,
2031 			    &dde.dde_key, ddp, &blk);
2032 			if (p == DDT_PHYS_DITTO) {
2033 				zdb_count_block(spa, NULL, zcb, &blk,
2034 				    ZDB_OT_DITTO);
2035 			} else {
2036 				zcb->zcb_dedup_asize +=
2037 				    BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
2038 				zcb->zcb_dedup_blocks++;
2039 			}
2040 		}
2041 		if (!dump_opt['L']) {
2042 			ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
2043 			ddt_enter(ddt);
2044 			VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
2045 			ddt_exit(ddt);
2046 		}
2047 	}
2048 
2049 	ASSERT(error == ENOENT);
2050 }
2051 
2052 static void
2053 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
2054 {
2055 	if (!dump_opt['L']) {
2056 		vdev_t *rvd = spa->spa_root_vdev;
2057 		for (int c = 0; c < rvd->vdev_children; c++) {
2058 			vdev_t *vd = rvd->vdev_child[c];
2059 			for (int m = 0; m < vd->vdev_ms_count; m++) {
2060 				metaslab_t *msp = vd->vdev_ms[m];
2061 				mutex_enter(&msp->ms_lock);
2062 				space_map_unload(&msp->ms_map);
2063 				VERIFY(space_map_load(&msp->ms_map,
2064 				    &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo,
2065 				    spa->spa_meta_objset) == 0);
2066 				msp->ms_map.sm_ppd = vd;
2067 				mutex_exit(&msp->ms_lock);
2068 			}
2069 		}
2070 	}
2071 
2072 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2073 
2074 	zdb_ddt_leak_init(spa, zcb);
2075 
2076 	spa_config_exit(spa, SCL_CONFIG, FTAG);
2077 }
2078 
2079 static void
2080 zdb_leak_fini(spa_t *spa)
2081 {
2082 	if (!dump_opt['L']) {
2083 		vdev_t *rvd = spa->spa_root_vdev;
2084 		for (int c = 0; c < rvd->vdev_children; c++) {
2085 			vdev_t *vd = rvd->vdev_child[c];
2086 			for (int m = 0; m < vd->vdev_ms_count; m++) {
2087 				metaslab_t *msp = vd->vdev_ms[m];
2088 				mutex_enter(&msp->ms_lock);
2089 				space_map_unload(&msp->ms_map);
2090 				mutex_exit(&msp->ms_lock);
2091 			}
2092 		}
2093 	}
2094 }
2095 
2096 static int
2097 dump_block_stats(spa_t *spa)
2098 {
2099 	zdb_cb_t zcb = { 0 };
2100 	zdb_blkstats_t *zb, *tzb;
2101 	uint64_t norm_alloc, norm_space, total_alloc, total_found;
2102 	int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
2103 	int leaks = 0;
2104 
2105 	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
2106 	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
2107 	    (dump_opt['c'] == 1) ? "metadata " : "",
2108 	    dump_opt['c'] ? "checksums " : "",
2109 	    (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
2110 	    !dump_opt['L'] ? "nothing leaked " : "");
2111 
2112 	/*
2113 	 * Load all space maps as SM_ALLOC maps, then traverse the pool
2114 	 * claiming each block we discover.  If the pool is perfectly
2115 	 * consistent, the space maps will be empty when we're done.
2116 	 * Anything left over is a leak; any block we can't claim (because
2117 	 * it's not part of any space map) is a double allocation,
2118 	 * reference to a freed block, or an unclaimed log block.
2119 	 */
2120 	zdb_leak_init(spa, &zcb);
2121 
2122 	/*
2123 	 * If there's a deferred-free bplist, process that first.
2124 	 */
2125 	if (spa->spa_deferred_bplist_obj != 0) {
2126 		bplist_t *bpl = &spa->spa_deferred_bplist;
2127 		blkptr_t blk;
2128 		uint64_t itor = 0;
2129 
2130 		VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset,
2131 		    spa->spa_deferred_bplist_obj));
2132 
2133 		while (bplist_iterate(bpl, &itor, &blk) == 0) {
2134 			if (dump_opt['b'] >= 4) {
2135 				char blkbuf[BP_SPRINTF_LEN];
2136 				sprintf_blkptr(blkbuf, &blk);
2137 				(void) printf("[%s] %s\n",
2138 				    "deferred free", blkbuf);
2139 			}
2140 			zdb_count_block(spa, NULL, &zcb, &blk, ZDB_OT_DEFERRED);
2141 		}
2142 
2143 		bplist_close(bpl);
2144 	}
2145 
2146 	if (dump_opt['c'] > 1)
2147 		flags |= TRAVERSE_PREFETCH_DATA;
2148 
2149 	zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
2150 
2151 	if (zcb.zcb_haderrors) {
2152 		(void) printf("\nError counts:\n\n");
2153 		(void) printf("\t%5s  %s\n", "errno", "count");
2154 		for (int e = 0; e < 256; e++) {
2155 			if (zcb.zcb_errors[e] != 0) {
2156 				(void) printf("\t%5d  %llu\n",
2157 				    e, (u_longlong_t)zcb.zcb_errors[e]);
2158 			}
2159 		}
2160 	}
2161 
2162 	/*
2163 	 * Report any leaked segments.
2164 	 */
2165 	zdb_leak_fini(spa);
2166 
2167 	tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
2168 
2169 	norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
2170 	norm_space = metaslab_class_get_space(spa_normal_class(spa));
2171 
2172 	total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
2173 	total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
2174 
2175 	if (total_found == total_alloc) {
2176 		if (!dump_opt['L'])
2177 			(void) printf("\n\tNo leaks (block sum matches space"
2178 			    " maps exactly)\n");
2179 	} else {
2180 		(void) printf("block traversal size %llu != alloc %llu "
2181 		    "(%s %lld)\n",
2182 		    (u_longlong_t)total_found,
2183 		    (u_longlong_t)total_alloc,
2184 		    (dump_opt['L']) ? "unreachable" : "leaked",
2185 		    (longlong_t)(total_alloc - total_found));
2186 		leaks = 1;
2187 	}
2188 
2189 	if (tzb->zb_count == 0)
2190 		return (2);
2191 
2192 	(void) printf("\n");
2193 	(void) printf("\tbp count:      %10llu\n",
2194 	    (u_longlong_t)tzb->zb_count);
2195 	(void) printf("\tbp logical:    %10llu      avg: %6llu\n",
2196 	    (u_longlong_t)tzb->zb_lsize,
2197 	    (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
2198 	(void) printf("\tbp physical:   %10llu      avg:"
2199 	    " %6llu     compression: %6.2f\n",
2200 	    (u_longlong_t)tzb->zb_psize,
2201 	    (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
2202 	    (double)tzb->zb_lsize / tzb->zb_psize);
2203 	(void) printf("\tbp allocated:  %10llu      avg:"
2204 	    " %6llu     compression: %6.2f\n",
2205 	    (u_longlong_t)tzb->zb_asize,
2206 	    (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
2207 	    (double)tzb->zb_lsize / tzb->zb_asize);
2208 	(void) printf("\tbp deduped:    %10llu    ref>1:"
2209 	    " %6llu   deduplication: %6.2f\n",
2210 	    (u_longlong_t)zcb.zcb_dedup_asize,
2211 	    (u_longlong_t)zcb.zcb_dedup_blocks,
2212 	    (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
2213 	(void) printf("\tSPA allocated: %10llu     used: %5.2f%%\n",
2214 	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
2215 
2216 	if (dump_opt['b'] >= 2) {
2217 		int l, t, level;
2218 		(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2219 		    "\t  avg\t comp\t%%Total\tType\n");
2220 
2221 		for (t = 0; t <= ZDB_OT_TOTAL; t++) {
2222 			char csize[6], lsize[6], psize[6], asize[6], avg[6];
2223 			char *typename;
2224 
2225 			if (t < DMU_OT_NUMTYPES)
2226 				typename = dmu_ot[t].ot_name;
2227 			else
2228 				typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
2229 
2230 			if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
2231 				(void) printf("%6s\t%5s\t%5s\t%5s"
2232 				    "\t%5s\t%5s\t%6s\t%s\n",
2233 				    "-",
2234 				    "-",
2235 				    "-",
2236 				    "-",
2237 				    "-",
2238 				    "-",
2239 				    "-",
2240 				    typename);
2241 				continue;
2242 			}
2243 
2244 			for (l = ZB_TOTAL - 1; l >= -1; l--) {
2245 				level = (l == -1 ? ZB_TOTAL : l);
2246 				zb = &zcb.zcb_type[level][t];
2247 
2248 				if (zb->zb_asize == 0)
2249 					continue;
2250 
2251 				if (dump_opt['b'] < 3 && level != ZB_TOTAL)
2252 					continue;
2253 
2254 				if (level == 0 && zb->zb_asize ==
2255 				    zcb.zcb_type[ZB_TOTAL][t].zb_asize)
2256 					continue;
2257 
2258 				nicenum(zb->zb_count, csize);
2259 				nicenum(zb->zb_lsize, lsize);
2260 				nicenum(zb->zb_psize, psize);
2261 				nicenum(zb->zb_asize, asize);
2262 				nicenum(zb->zb_asize / zb->zb_count, avg);
2263 
2264 				(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
2265 				    "\t%5.2f\t%6.2f\t",
2266 				    csize, lsize, psize, asize, avg,
2267 				    (double)zb->zb_lsize / zb->zb_psize,
2268 				    100.0 * zb->zb_asize / tzb->zb_asize);
2269 
2270 				if (level == ZB_TOTAL)
2271 					(void) printf("%s\n", typename);
2272 				else
2273 					(void) printf("    L%d %s\n",
2274 					    level, typename);
2275 			}
2276 		}
2277 	}
2278 
2279 	(void) printf("\n");
2280 
2281 	if (leaks)
2282 		return (2);
2283 
2284 	if (zcb.zcb_haderrors)
2285 		return (3);
2286 
2287 	return (0);
2288 }
2289 
2290 typedef struct zdb_ddt_entry {
2291 	ddt_key_t	zdde_key;
2292 	uint64_t	zdde_ref_blocks;
2293 	uint64_t	zdde_ref_lsize;
2294 	uint64_t	zdde_ref_psize;
2295 	uint64_t	zdde_ref_dsize;
2296 	avl_node_t	zdde_node;
2297 } zdb_ddt_entry_t;
2298 
2299 /* ARGSUSED */
2300 static int
2301 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2302     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
2303 {
2304 	avl_tree_t *t = arg;
2305 	avl_index_t where;
2306 	zdb_ddt_entry_t *zdde, zdde_search;
2307 
2308 	if (bp == NULL)
2309 		return (0);
2310 
2311 	if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
2312 		(void) printf("traversing objset %llu, %llu objects, "
2313 		    "%lu blocks so far\n",
2314 		    (u_longlong_t)zb->zb_objset,
2315 		    (u_longlong_t)bp->blk_fill,
2316 		    avl_numnodes(t));
2317 	}
2318 
2319 	if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
2320 	    BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata)
2321 		return (0);
2322 
2323 	ddt_key_fill(&zdde_search.zdde_key, bp);
2324 
2325 	zdde = avl_find(t, &zdde_search, &where);
2326 
2327 	if (zdde == NULL) {
2328 		zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
2329 		zdde->zdde_key = zdde_search.zdde_key;
2330 		avl_insert(t, zdde, where);
2331 	}
2332 
2333 	zdde->zdde_ref_blocks += 1;
2334 	zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
2335 	zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
2336 	zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
2337 
2338 	return (0);
2339 }
2340 
2341 static void
2342 dump_simulated_ddt(spa_t *spa)
2343 {
2344 	avl_tree_t t;
2345 	void *cookie = NULL;
2346 	zdb_ddt_entry_t *zdde;
2347 	ddt_histogram_t ddh_total = { 0 };
2348 	ddt_stat_t dds_total = { 0 };
2349 
2350 	avl_create(&t, ddt_entry_compare,
2351 	    sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
2352 
2353 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2354 
2355 	(void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
2356 	    zdb_ddt_add_cb, &t);
2357 
2358 	spa_config_exit(spa, SCL_CONFIG, FTAG);
2359 
2360 	while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
2361 		ddt_stat_t dds;
2362 		uint64_t refcnt = zdde->zdde_ref_blocks;
2363 		ASSERT(refcnt != 0);
2364 
2365 		dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
2366 		dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
2367 		dds.dds_psize = zdde->zdde_ref_psize / refcnt;
2368 		dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
2369 
2370 		dds.dds_ref_blocks = zdde->zdde_ref_blocks;
2371 		dds.dds_ref_lsize = zdde->zdde_ref_lsize;
2372 		dds.dds_ref_psize = zdde->zdde_ref_psize;
2373 		dds.dds_ref_dsize = zdde->zdde_ref_dsize;
2374 
2375 		ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
2376 
2377 		umem_free(zdde, sizeof (*zdde));
2378 	}
2379 
2380 	avl_destroy(&t);
2381 
2382 	ddt_histogram_stat(&dds_total, &ddh_total);
2383 
2384 	(void) printf("Simulated DDT histogram:\n");
2385 
2386 	zpool_dump_ddt(&dds_total, &ddh_total);
2387 
2388 	dump_dedup_ratio(&dds_total);
2389 }
2390 
2391 static void
2392 dump_zpool(spa_t *spa)
2393 {
2394 	dsl_pool_t *dp = spa_get_dsl(spa);
2395 	int rc = 0;
2396 
2397 	if (dump_opt['S']) {
2398 		dump_simulated_ddt(spa);
2399 		return;
2400 	}
2401 
2402 	if (!dump_opt['e'] && dump_opt['C'] > 1) {
2403 		(void) printf("\nCached configuration:\n");
2404 		dump_nvlist(spa->spa_config, 8);
2405 	}
2406 
2407 	if (dump_opt['C'])
2408 		dump_config(spa);
2409 
2410 	if (dump_opt['u'])
2411 		dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
2412 
2413 	if (dump_opt['D'])
2414 		dump_all_ddts(spa);
2415 
2416 	if (dump_opt['d'] > 2 || dump_opt['m'])
2417 		dump_metaslabs(spa);
2418 
2419 	if (dump_opt['d'] || dump_opt['i']) {
2420 		dump_dir(dp->dp_meta_objset);
2421 		if (dump_opt['d'] >= 3) {
2422 			dump_bplist(dp->dp_meta_objset,
2423 			    spa->spa_deferred_bplist_obj, "Deferred frees");
2424 			dump_dtl(spa->spa_root_vdev, 0);
2425 		}
2426 		(void) dmu_objset_find(spa_name(spa), dump_one_dir,
2427 		    NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
2428 	}
2429 	if (dump_opt['b'] || dump_opt['c'])
2430 		rc = dump_block_stats(spa);
2431 
2432 	if (dump_opt['s'])
2433 		show_pool_stats(spa);
2434 
2435 	if (dump_opt['h'])
2436 		dump_history(spa);
2437 
2438 	if (rc != 0)
2439 		exit(rc);
2440 }
2441 
2442 #define	ZDB_FLAG_CHECKSUM	0x0001
2443 #define	ZDB_FLAG_DECOMPRESS	0x0002
2444 #define	ZDB_FLAG_BSWAP		0x0004
2445 #define	ZDB_FLAG_GBH		0x0008
2446 #define	ZDB_FLAG_INDIRECT	0x0010
2447 #define	ZDB_FLAG_PHYS		0x0020
2448 #define	ZDB_FLAG_RAW		0x0040
2449 #define	ZDB_FLAG_PRINT_BLKPTR	0x0080
2450 
2451 int flagbits[256];
2452 
2453 static void
2454 zdb_print_blkptr(blkptr_t *bp, int flags)
2455 {
2456 	char blkbuf[BP_SPRINTF_LEN];
2457 
2458 	if (flags & ZDB_FLAG_BSWAP)
2459 		byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
2460 
2461 	sprintf_blkptr(blkbuf, bp);
2462 	(void) printf("%s\n", blkbuf);
2463 }
2464 
2465 static void
2466 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
2467 {
2468 	int i;
2469 
2470 	for (i = 0; i < nbps; i++)
2471 		zdb_print_blkptr(&bp[i], flags);
2472 }
2473 
2474 static void
2475 zdb_dump_gbh(void *buf, int flags)
2476 {
2477 	zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
2478 }
2479 
2480 static void
2481 zdb_dump_block_raw(void *buf, uint64_t size, int flags)
2482 {
2483 	if (flags & ZDB_FLAG_BSWAP)
2484 		byteswap_uint64_array(buf, size);
2485 	(void) write(1, buf, size);
2486 }
2487 
2488 static void
2489 zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
2490 {
2491 	uint64_t *d = (uint64_t *)buf;
2492 	int nwords = size / sizeof (uint64_t);
2493 	int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
2494 	int i, j;
2495 	char *hdr, *c;
2496 
2497 
2498 	if (do_bswap)
2499 		hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
2500 	else
2501 		hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
2502 
2503 	(void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
2504 
2505 	for (i = 0; i < nwords; i += 2) {
2506 		(void) printf("%06llx:  %016llx  %016llx  ",
2507 		    (u_longlong_t)(i * sizeof (uint64_t)),
2508 		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
2509 		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
2510 
2511 		c = (char *)&d[i];
2512 		for (j = 0; j < 2 * sizeof (uint64_t); j++)
2513 			(void) printf("%c", isprint(c[j]) ? c[j] : '.');
2514 		(void) printf("\n");
2515 	}
2516 }
2517 
2518 /*
2519  * There are two acceptable formats:
2520  *	leaf_name	  - For example: c1t0d0 or /tmp/ztest.0a
2521  *	child[.child]*    - For example: 0.1.1
2522  *
2523  * The second form can be used to specify arbitrary vdevs anywhere
2524  * in the heirarchy.  For example, in a pool with a mirror of
2525  * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
2526  */
2527 static vdev_t *
2528 zdb_vdev_lookup(vdev_t *vdev, char *path)
2529 {
2530 	char *s, *p, *q;
2531 	int i;
2532 
2533 	if (vdev == NULL)
2534 		return (NULL);
2535 
2536 	/* First, assume the x.x.x.x format */
2537 	i = (int)strtoul(path, &s, 10);
2538 	if (s == path || (s && *s != '.' && *s != '\0'))
2539 		goto name;
2540 	if (i < 0 || i >= vdev->vdev_children)
2541 		return (NULL);
2542 
2543 	vdev = vdev->vdev_child[i];
2544 	if (*s == '\0')
2545 		return (vdev);
2546 	return (zdb_vdev_lookup(vdev, s+1));
2547 
2548 name:
2549 	for (i = 0; i < vdev->vdev_children; i++) {
2550 		vdev_t *vc = vdev->vdev_child[i];
2551 
2552 		if (vc->vdev_path == NULL) {
2553 			vc = zdb_vdev_lookup(vc, path);
2554 			if (vc == NULL)
2555 				continue;
2556 			else
2557 				return (vc);
2558 		}
2559 
2560 		p = strrchr(vc->vdev_path, '/');
2561 		p = p ? p + 1 : vc->vdev_path;
2562 		q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
2563 
2564 		if (strcmp(vc->vdev_path, path) == 0)
2565 			return (vc);
2566 		if (strcmp(p, path) == 0)
2567 			return (vc);
2568 		if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
2569 			return (vc);
2570 	}
2571 
2572 	return (NULL);
2573 }
2574 
2575 /*
2576  * Read a block from a pool and print it out.  The syntax of the
2577  * block descriptor is:
2578  *
2579  *	pool:vdev_specifier:offset:size[:flags]
2580  *
2581  *	pool           - The name of the pool you wish to read from
2582  *	vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
2583  *	offset         - offset, in hex, in bytes
2584  *	size           - Amount of data to read, in hex, in bytes
2585  *	flags          - A string of characters specifying options
2586  *		 b: Decode a blkptr at given offset within block
2587  *		*c: Calculate and display checksums
2588  *		 d: Decompress data before dumping
2589  *		 e: Byteswap data before dumping
2590  *		 g: Display data as a gang block header
2591  *		 i: Display as an indirect block
2592  *		 p: Do I/O to physical offset
2593  *		 r: Dump raw data to stdout
2594  *
2595  *              * = not yet implemented
2596  */
2597 static void
2598 zdb_read_block(char *thing, spa_t *spa)
2599 {
2600 	blkptr_t blk, *bp = &blk;
2601 	dva_t *dva = bp->blk_dva;
2602 	int flags = 0;
2603 	uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
2604 	zio_t *zio;
2605 	vdev_t *vd;
2606 	void *pbuf, *lbuf, *buf;
2607 	char *s, *p, *dup, *vdev, *flagstr;
2608 	int i, error;
2609 
2610 	dup = strdup(thing);
2611 	s = strtok(dup, ":");
2612 	vdev = s ? s : "";
2613 	s = strtok(NULL, ":");
2614 	offset = strtoull(s ? s : "", NULL, 16);
2615 	s = strtok(NULL, ":");
2616 	size = strtoull(s ? s : "", NULL, 16);
2617 	s = strtok(NULL, ":");
2618 	flagstr = s ? s : "";
2619 
2620 	s = NULL;
2621 	if (size == 0)
2622 		s = "size must not be zero";
2623 	if (!IS_P2ALIGNED(size, DEV_BSIZE))
2624 		s = "size must be a multiple of sector size";
2625 	if (!IS_P2ALIGNED(offset, DEV_BSIZE))
2626 		s = "offset must be a multiple of sector size";
2627 	if (s) {
2628 		(void) printf("Invalid block specifier: %s  - %s\n", thing, s);
2629 		free(dup);
2630 		return;
2631 	}
2632 
2633 	for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
2634 		for (i = 0; flagstr[i]; i++) {
2635 			int bit = flagbits[(uchar_t)flagstr[i]];
2636 
2637 			if (bit == 0) {
2638 				(void) printf("***Invalid flag: %c\n",
2639 				    flagstr[i]);
2640 				continue;
2641 			}
2642 			flags |= bit;
2643 
2644 			/* If it's not something with an argument, keep going */
2645 			if ((bit & (ZDB_FLAG_CHECKSUM |
2646 			    ZDB_FLAG_PRINT_BLKPTR)) == 0)
2647 				continue;
2648 
2649 			p = &flagstr[i + 1];
2650 			if (bit == ZDB_FLAG_PRINT_BLKPTR)
2651 				blkptr_offset = strtoull(p, &p, 16);
2652 			if (*p != ':' && *p != '\0') {
2653 				(void) printf("***Invalid flag arg: '%s'\n", s);
2654 				free(dup);
2655 				return;
2656 			}
2657 		}
2658 	}
2659 
2660 	vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
2661 	if (vd == NULL) {
2662 		(void) printf("***Invalid vdev: %s\n", vdev);
2663 		free(dup);
2664 		return;
2665 	} else {
2666 		if (vd->vdev_path)
2667 			(void) fprintf(stderr, "Found vdev: %s\n",
2668 			    vd->vdev_path);
2669 		else
2670 			(void) fprintf(stderr, "Found vdev type: %s\n",
2671 			    vd->vdev_ops->vdev_op_type);
2672 	}
2673 
2674 	psize = size;
2675 	lsize = size;
2676 
2677 	pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2678 	lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2679 
2680 	BP_ZERO(bp);
2681 
2682 	DVA_SET_VDEV(&dva[0], vd->vdev_id);
2683 	DVA_SET_OFFSET(&dva[0], offset);
2684 	DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
2685 	DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
2686 
2687 	BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
2688 
2689 	BP_SET_LSIZE(bp, lsize);
2690 	BP_SET_PSIZE(bp, psize);
2691 	BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
2692 	BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
2693 	BP_SET_TYPE(bp, DMU_OT_NONE);
2694 	BP_SET_LEVEL(bp, 0);
2695 	BP_SET_DEDUP(bp, 0);
2696 	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
2697 
2698 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
2699 	zio = zio_root(spa, NULL, NULL, 0);
2700 
2701 	if (vd == vd->vdev_top) {
2702 		/*
2703 		 * Treat this as a normal block read.
2704 		 */
2705 		zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
2706 		    ZIO_PRIORITY_SYNC_READ,
2707 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
2708 	} else {
2709 		/*
2710 		 * Treat this as a vdev child I/O.
2711 		 */
2712 		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
2713 		    ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
2714 		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
2715 		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
2716 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
2717 	}
2718 
2719 	error = zio_wait(zio);
2720 	spa_config_exit(spa, SCL_STATE, FTAG);
2721 
2722 	if (error) {
2723 		(void) printf("Read of %s failed, error: %d\n", thing, error);
2724 		goto out;
2725 	}
2726 
2727 	if (flags & ZDB_FLAG_DECOMPRESS) {
2728 		/*
2729 		 * We don't know how the data was compressed, so just try
2730 		 * every decompress function at every inflated blocksize.
2731 		 */
2732 		enum zio_compress c;
2733 		void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2734 		void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2735 
2736 		bcopy(pbuf, pbuf2, psize);
2737 
2738 		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
2739 		    SPA_MAXBLOCKSIZE - psize) == 0);
2740 
2741 		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
2742 		    SPA_MAXBLOCKSIZE - psize) == 0);
2743 
2744 		for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
2745 		    lsize -= SPA_MINBLOCKSIZE) {
2746 			for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
2747 				if (zio_decompress_data(c, pbuf, lbuf,
2748 				    psize, lsize) == 0 &&
2749 				    zio_decompress_data(c, pbuf2, lbuf2,
2750 				    psize, lsize) == 0 &&
2751 				    bcmp(lbuf, lbuf2, lsize) == 0)
2752 					break;
2753 			}
2754 			if (c != ZIO_COMPRESS_FUNCTIONS)
2755 				break;
2756 			lsize -= SPA_MINBLOCKSIZE;
2757 		}
2758 
2759 		umem_free(pbuf2, SPA_MAXBLOCKSIZE);
2760 		umem_free(lbuf2, SPA_MAXBLOCKSIZE);
2761 
2762 		if (lsize <= psize) {
2763 			(void) printf("Decompress of %s failed\n", thing);
2764 			goto out;
2765 		}
2766 		buf = lbuf;
2767 		size = lsize;
2768 	} else {
2769 		buf = pbuf;
2770 		size = psize;
2771 	}
2772 
2773 	if (flags & ZDB_FLAG_PRINT_BLKPTR)
2774 		zdb_print_blkptr((blkptr_t *)(void *)
2775 		    ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
2776 	else if (flags & ZDB_FLAG_RAW)
2777 		zdb_dump_block_raw(buf, size, flags);
2778 	else if (flags & ZDB_FLAG_INDIRECT)
2779 		zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
2780 		    flags);
2781 	else if (flags & ZDB_FLAG_GBH)
2782 		zdb_dump_gbh(buf, flags);
2783 	else
2784 		zdb_dump_block(thing, buf, size, flags);
2785 
2786 out:
2787 	umem_free(pbuf, SPA_MAXBLOCKSIZE);
2788 	umem_free(lbuf, SPA_MAXBLOCKSIZE);
2789 	free(dup);
2790 }
2791 
2792 static boolean_t
2793 pool_match(nvlist_t *cfg, char *tgt)
2794 {
2795 	uint64_t v, guid = strtoull(tgt, NULL, 0);
2796 	char *s;
2797 
2798 	if (guid != 0) {
2799 		if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
2800 			return (v == guid);
2801 	} else {
2802 		if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
2803 			return (strcmp(s, tgt) == 0);
2804 	}
2805 	return (B_FALSE);
2806 }
2807 
2808 static char *
2809 find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
2810 {
2811 	nvlist_t *pools;
2812 	nvlist_t *match = NULL;
2813 	char *name = NULL;
2814 	char *sepp = NULL;
2815 	char sep;
2816 	int count = 0;
2817 	importargs_t args = { 0 };
2818 
2819 	args.paths = dirc;
2820 	args.path = dirv;
2821 	args.can_be_active = B_TRUE;
2822 
2823 	if ((sepp = strpbrk(*target, "/@")) != NULL) {
2824 		sep = *sepp;
2825 		*sepp = '\0';
2826 	}
2827 
2828 	pools = zpool_search_import(g_zfs, &args);
2829 
2830 	if (pools != NULL) {
2831 		nvpair_t *elem = NULL;
2832 		while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
2833 			verify(nvpair_value_nvlist(elem, configp) == 0);
2834 			if (pool_match(*configp, *target)) {
2835 				count++;
2836 				if (match != NULL) {
2837 					/* print previously found config */
2838 					if (name != NULL) {
2839 						(void) printf("%s\n", name);
2840 						dump_nvlist(match, 8);
2841 						name = NULL;
2842 					}
2843 					(void) printf("%s\n",
2844 					    nvpair_name(elem));
2845 					dump_nvlist(*configp, 8);
2846 				} else {
2847 					match = *configp;
2848 					name = nvpair_name(elem);
2849 				}
2850 			}
2851 		}
2852 	}
2853 	if (count > 1)
2854 		(void) fatal("\tMatched %d pools - use pool GUID "
2855 		    "instead of pool name or \n"
2856 		    "\tpool name part of a dataset name to select pool", count);
2857 
2858 	if (sepp)
2859 		*sepp = sep;
2860 	/*
2861 	 * If pool GUID was specified for pool id, replace it with pool name
2862 	 */
2863 	if (name && (strstr(*target, name) != *target)) {
2864 		int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
2865 
2866 		*target = umem_alloc(sz, UMEM_NOFAIL);
2867 		(void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
2868 	}
2869 
2870 	*configp = name ? match : NULL;
2871 
2872 	return (name);
2873 }
2874 
2875 int
2876 main(int argc, char **argv)
2877 {
2878 	int i, c;
2879 	struct rlimit rl = { 1024, 1024 };
2880 	spa_t *spa = NULL;
2881 	objset_t *os = NULL;
2882 	int dump_all = 1;
2883 	int verbose = 0;
2884 	int error = 0;
2885 	char **searchdirs = NULL;
2886 	int nsearch = 0;
2887 	char *target;
2888 	nvlist_t *policy = NULL;
2889 	uint64_t max_txg = UINT64_MAX;
2890 	int rewind = ZPOOL_NEVER_REWIND;
2891 
2892 	(void) setrlimit(RLIMIT_NOFILE, &rl);
2893 	(void) enable_extended_FILE_stdio(-1, -1);
2894 
2895 	dprintf_setup(&argc, argv);
2896 
2897 	while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:")) != -1) {
2898 		switch (c) {
2899 		case 'b':
2900 		case 'c':
2901 		case 'd':
2902 		case 'h':
2903 		case 'i':
2904 		case 'l':
2905 		case 'm':
2906 		case 's':
2907 		case 'u':
2908 		case 'C':
2909 		case 'D':
2910 		case 'R':
2911 		case 'S':
2912 			dump_opt[c]++;
2913 			dump_all = 0;
2914 			break;
2915 		case 'A':
2916 		case 'F':
2917 		case 'L':
2918 		case 'X':
2919 		case 'e':
2920 			dump_opt[c]++;
2921 			break;
2922 		case 'v':
2923 			verbose++;
2924 			break;
2925 		case 'p':
2926 			if (searchdirs == NULL) {
2927 				searchdirs = umem_alloc(sizeof (char *),
2928 				    UMEM_NOFAIL);
2929 			} else {
2930 				char **tmp = umem_alloc((nsearch + 1) *
2931 				    sizeof (char *), UMEM_NOFAIL);
2932 				bcopy(searchdirs, tmp, nsearch *
2933 				    sizeof (char *));
2934 				umem_free(searchdirs,
2935 				    nsearch * sizeof (char *));
2936 				searchdirs = tmp;
2937 			}
2938 			searchdirs[nsearch++] = optarg;
2939 			break;
2940 		case 't':
2941 			max_txg = strtoull(optarg, NULL, 0);
2942 			if (max_txg < TXG_INITIAL) {
2943 				(void) fprintf(stderr, "incorrect txg "
2944 				    "specified: %s\n", optarg);
2945 				usage();
2946 			}
2947 			break;
2948 		case 'U':
2949 			spa_config_path = optarg;
2950 			break;
2951 		default:
2952 			usage();
2953 			break;
2954 		}
2955 	}
2956 
2957 	if (!dump_opt['e'] && searchdirs != NULL) {
2958 		(void) fprintf(stderr, "-p option requires use of -e\n");
2959 		usage();
2960 	}
2961 
2962 	kernel_init(FREAD);
2963 	g_zfs = libzfs_init();
2964 	ASSERT(g_zfs != NULL);
2965 
2966 	if (dump_all)
2967 		verbose = MAX(verbose, 1);
2968 
2969 	for (c = 0; c < 256; c++) {
2970 		if (dump_all && !strchr("elAFLRSX", c))
2971 			dump_opt[c] = 1;
2972 		if (dump_opt[c])
2973 			dump_opt[c] += verbose;
2974 	}
2975 
2976 	aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
2977 	zfs_recover = (dump_opt['A'] > 1);
2978 
2979 	argc -= optind;
2980 	argv += optind;
2981 
2982 	if (argc < 2 && dump_opt['R'])
2983 		usage();
2984 	if (argc < 1) {
2985 		if (!dump_opt['e'] && dump_opt['C']) {
2986 			dump_cachefile(spa_config_path);
2987 			return (0);
2988 		}
2989 		usage();
2990 	}
2991 
2992 	if (dump_opt['l']) {
2993 		dump_label(argv[0]);
2994 		return (0);
2995 	}
2996 
2997 	if (dump_opt['X'] || dump_opt['F'])
2998 		rewind = ZPOOL_DO_REWIND |
2999 		    (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
3000 
3001 	if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
3002 	    nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
3003 	    nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0)
3004 		fatal("internal error: %s", strerror(ENOMEM));
3005 
3006 	error = 0;
3007 	target = argv[0];
3008 
3009 	if (dump_opt['e']) {
3010 		nvlist_t *cfg = NULL;
3011 		char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
3012 
3013 		error = ENOENT;
3014 		if (name) {
3015 			if (dump_opt['C'] > 1) {
3016 				(void) printf("\nConfiguration for import:\n");
3017 				dump_nvlist(cfg, 8);
3018 			}
3019 			if (nvlist_add_nvlist(cfg,
3020 			    ZPOOL_REWIND_POLICY, policy) != 0) {
3021 				fatal("can't open '%s': %s",
3022 				    target, strerror(ENOMEM));
3023 			}
3024 			if ((error = spa_import(name, cfg, NULL)) != 0)
3025 				error = spa_import_verbatim(name, cfg, NULL);
3026 		}
3027 	}
3028 
3029 	if (error == 0) {
3030 		if (strpbrk(target, "/@") == NULL || dump_opt['R']) {
3031 			error = spa_open_rewind(target, &spa, FTAG, policy,
3032 			    NULL);
3033 			if (error) {
3034 				/*
3035 				 * If we're missing the log device then
3036 				 * try opening the pool after clearing the
3037 				 * log state.
3038 				 */
3039 				mutex_enter(&spa_namespace_lock);
3040 				if ((spa = spa_lookup(target)) != NULL &&
3041 				    spa->spa_log_state == SPA_LOG_MISSING) {
3042 					spa->spa_log_state = SPA_LOG_CLEAR;
3043 					error = 0;
3044 				}
3045 				mutex_exit(&spa_namespace_lock);
3046 
3047 				if (!error) {
3048 					error = spa_open_rewind(target, &spa,
3049 					    FTAG, policy, NULL);
3050 				}
3051 			}
3052 		} else {
3053 			error = dmu_objset_own(target, DMU_OST_ANY,
3054 			    B_TRUE, FTAG, &os);
3055 		}
3056 	}
3057 	nvlist_free(policy);
3058 
3059 	if (error)
3060 		fatal("can't open '%s': %s", target, strerror(error));
3061 
3062 	argv++;
3063 	argc--;
3064 	if (!dump_opt['R']) {
3065 		if (argc > 0) {
3066 			zopt_objects = argc;
3067 			zopt_object = calloc(zopt_objects, sizeof (uint64_t));
3068 			for (i = 0; i < zopt_objects; i++) {
3069 				errno = 0;
3070 				zopt_object[i] = strtoull(argv[i], NULL, 0);
3071 				if (zopt_object[i] == 0 && errno != 0)
3072 					fatal("bad number %s: %s",
3073 					    argv[i], strerror(errno));
3074 			}
3075 		}
3076 		(os != NULL) ? dump_dir(os) : dump_zpool(spa);
3077 	} else {
3078 		flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
3079 		flagbits['c'] = ZDB_FLAG_CHECKSUM;
3080 		flagbits['d'] = ZDB_FLAG_DECOMPRESS;
3081 		flagbits['e'] = ZDB_FLAG_BSWAP;
3082 		flagbits['g'] = ZDB_FLAG_GBH;
3083 		flagbits['i'] = ZDB_FLAG_INDIRECT;
3084 		flagbits['p'] = ZDB_FLAG_PHYS;
3085 		flagbits['r'] = ZDB_FLAG_RAW;
3086 
3087 		for (i = 0; i < argc; i++)
3088 			zdb_read_block(argv[i], spa);
3089 	}
3090 
3091 	(os != NULL) ? dmu_objset_disown(os, FTAG) : spa_close(spa, FTAG);
3092 
3093 	fuid_table_destroy();
3094 	sa_loaded = B_FALSE;
3095 
3096 	libzfs_fini(g_zfs);
3097 	kernel_fini();
3098 
3099 	return (0);
3100 }
3101