zdb.c revision 06be98028b8a84e7f327188613dd09a708294d1a
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
25 */
26
27#include <stdio.h>
28#include <unistd.h>
29#include <stdio_ext.h>
30#include <stdlib.h>
31#include <ctype.h>
32#include <sys/zfs_context.h>
33#include <sys/spa.h>
34#include <sys/spa_impl.h>
35#include <sys/dmu.h>
36#include <sys/zap.h>
37#include <sys/fs/zfs.h>
38#include <sys/zfs_znode.h>
39#include <sys/zfs_sa.h>
40#include <sys/sa.h>
41#include <sys/sa_impl.h>
42#include <sys/vdev.h>
43#include <sys/vdev_impl.h>
44#include <sys/metaslab_impl.h>
45#include <sys/dmu_objset.h>
46#include <sys/dsl_dir.h>
47#include <sys/dsl_dataset.h>
48#include <sys/dsl_pool.h>
49#include <sys/dbuf.h>
50#include <sys/zil.h>
51#include <sys/zil_impl.h>
52#include <sys/stat.h>
53#include <sys/resource.h>
54#include <sys/dmu_traverse.h>
55#include <sys/zio_checksum.h>
56#include <sys/zio_compress.h>
57#include <sys/zfs_fuid.h>
58#include <sys/arc.h>
59#include <sys/ddt.h>
60#include <sys/zfeature.h>
61#include <zfs_comutil.h>
62#undef ZFS_MAXNAMELEN
63#undef verify
64#include <libzfs.h>
65
66#define	ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ?	\
67	zio_compress_table[(idx)].ci_name : "UNKNOWN")
68#define	ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ?	\
69	zio_checksum_table[(idx)].ci_name : "UNKNOWN")
70#define	ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ?	\
71	dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ?	\
72	dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN")
73#define	ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) :		\
74	(((idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA) ?	\
75	DMU_OT_ZAP_OTHER : DMU_OT_NUMTYPES))
76
77#ifndef lint
78extern boolean_t zfs_recover;
79extern uint64_t zfs_arc_max, zfs_arc_meta_limit;
80#else
81boolean_t zfs_recover;
82uint64_t zfs_arc_max, zfs_arc_meta_limit;
83#endif
84
85const char cmdname[] = "zdb";
86uint8_t dump_opt[256];
87
88typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
89
90extern void dump_intent_log(zilog_t *);
91uint64_t *zopt_object = NULL;
92int zopt_objects = 0;
93libzfs_handle_t *g_zfs;
94uint64_t max_inflight = 1000;
95
96/*
97 * These libumem hooks provide a reasonable set of defaults for the allocator's
98 * debugging facilities.
99 */
100const char *
101_umem_debug_init()
102{
103	return ("default,verbose"); /* $UMEM_DEBUG setting */
104}
105
106const char *
107_umem_logging_init(void)
108{
109	return ("fail,contents"); /* $UMEM_LOGGING setting */
110}
111
112static void
113usage(void)
114{
115	(void) fprintf(stderr,
116	    "Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
117	    "[-U config] [-I inflight I/Os] [-x dumpdir] poolname [object...]\n"
118	    "       %s [-divPA] [-e -p path...] [-U config] dataset "
119	    "[object...]\n"
120	    "       %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
121	    "poolname [vdev [metaslab...]]\n"
122	    "       %s -R [-A] [-e [-p path...]] poolname "
123	    "vdev:offset:size[:flags]\n"
124	    "       %s -S [-PA] [-e [-p path...]] [-U config] poolname\n"
125	    "       %s -l [-uA] device\n"
126	    "       %s -C [-A] [-U config]\n\n",
127	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
128
129	(void) fprintf(stderr, "    Dataset name must include at least one "
130	    "separator character '/' or '@'\n");
131	(void) fprintf(stderr, "    If dataset name is specified, only that "
132	    "dataset is dumped\n");
133	(void) fprintf(stderr, "    If object numbers are specified, only "
134	    "those objects are dumped\n\n");
135	(void) fprintf(stderr, "    Options to control amount of output:\n");
136	(void) fprintf(stderr, "        -u uberblock\n");
137	(void) fprintf(stderr, "        -d dataset(s)\n");
138	(void) fprintf(stderr, "        -i intent logs\n");
139	(void) fprintf(stderr, "        -C config (or cachefile if alone)\n");
140	(void) fprintf(stderr, "        -h pool history\n");
141	(void) fprintf(stderr, "        -b block statistics\n");
142	(void) fprintf(stderr, "        -m metaslabs\n");
143	(void) fprintf(stderr, "        -M metaslab groups\n");
144	(void) fprintf(stderr, "        -c checksum all metadata (twice for "
145	    "all data) blocks\n");
146	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
147	(void) fprintf(stderr, "        -D dedup statistics\n");
148	(void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
149	(void) fprintf(stderr, "        -v verbose (applies to all others)\n");
150	(void) fprintf(stderr, "        -l dump label contents\n");
151	(void) fprintf(stderr, "        -L disable leak tracking (do not "
152	    "load spacemaps)\n");
153	(void) fprintf(stderr, "        -R read and display block from a "
154	    "device\n\n");
155	(void) fprintf(stderr, "    Below options are intended for use "
156	    "with other options:\n");
157	(void) fprintf(stderr, "        -A ignore assertions (-A), enable "
158	    "panic recovery (-AA) or both (-AAA)\n");
159	(void) fprintf(stderr, "        -F attempt automatic rewind within "
160	    "safe range of transaction groups\n");
161	(void) fprintf(stderr, "        -U <cachefile_path> -- use alternate "
162	    "cachefile\n");
163	(void) fprintf(stderr, "        -X attempt extreme rewind (does not "
164	    "work with dataset)\n");
165	(void) fprintf(stderr, "        -e pool is exported/destroyed/"
166	    "has altroot/not in a cachefile\n");
167	(void) fprintf(stderr, "        -p <path> -- use one or more with "
168	    "-e to specify path to vdev dir\n");
169	(void) fprintf(stderr, "        -x <dumpdir> -- "
170	    "dump all read blocks into specified directory\n");
171	(void) fprintf(stderr, "        -P print numbers in parseable form\n");
172	(void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
173	    "searching for uberblocks\n");
174	(void) fprintf(stderr, "        -I <number of inflight I/Os> -- "
175	    "specify the maximum number of "
176	    "checksumming I/Os [default is 200]\n");
177	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
178	    "to make only that option verbose\n");
179	(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
180	exit(1);
181}
182
183/*
184 * Called for usage errors that are discovered after a call to spa_open(),
185 * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
186 */
187
188static void
189fatal(const char *fmt, ...)
190{
191	va_list ap;
192
193	va_start(ap, fmt);
194	(void) fprintf(stderr, "%s: ", cmdname);
195	(void) vfprintf(stderr, fmt, ap);
196	va_end(ap);
197	(void) fprintf(stderr, "\n");
198
199	exit(1);
200}
201
202/* ARGSUSED */
203static void
204dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
205{
206	nvlist_t *nv;
207	size_t nvsize = *(uint64_t *)data;
208	char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
209
210	VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
211
212	VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
213
214	umem_free(packed, nvsize);
215
216	dump_nvlist(nv, 8);
217
218	nvlist_free(nv);
219}
220
221/* ARGSUSED */
222static void
223dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size)
224{
225	spa_history_phys_t *shp = data;
226
227	if (shp == NULL)
228		return;
229
230	(void) printf("\t\tpool_create_len = %llu\n",
231	    (u_longlong_t)shp->sh_pool_create_len);
232	(void) printf("\t\tphys_max_off = %llu\n",
233	    (u_longlong_t)shp->sh_phys_max_off);
234	(void) printf("\t\tbof = %llu\n",
235	    (u_longlong_t)shp->sh_bof);
236	(void) printf("\t\teof = %llu\n",
237	    (u_longlong_t)shp->sh_eof);
238	(void) printf("\t\trecords_lost = %llu\n",
239	    (u_longlong_t)shp->sh_records_lost);
240}
241
242static void
243zdb_nicenum(uint64_t num, char *buf)
244{
245	if (dump_opt['P'])
246		(void) sprintf(buf, "%llu", (longlong_t)num);
247	else
248		nicenum(num, buf);
249}
250
251const char histo_stars[] = "****************************************";
252const int histo_width = sizeof (histo_stars) - 1;
253
254static void
255dump_histogram(const uint64_t *histo, int size, int offset)
256{
257	int i;
258	int minidx = size - 1;
259	int maxidx = 0;
260	uint64_t max = 0;
261
262	for (i = 0; i < size; i++) {
263		if (histo[i] > max)
264			max = histo[i];
265		if (histo[i] > 0 && i > maxidx)
266			maxidx = i;
267		if (histo[i] > 0 && i < minidx)
268			minidx = i;
269	}
270
271	if (max < histo_width)
272		max = histo_width;
273
274	for (i = minidx; i <= maxidx; i++) {
275		(void) printf("\t\t\t%3u: %6llu %s\n",
276		    i + offset, (u_longlong_t)histo[i],
277		    &histo_stars[(max - histo[i]) * histo_width / max]);
278	}
279}
280
281static void
282dump_zap_stats(objset_t *os, uint64_t object)
283{
284	int error;
285	zap_stats_t zs;
286
287	error = zap_get_stats(os, object, &zs);
288	if (error)
289		return;
290
291	if (zs.zs_ptrtbl_len == 0) {
292		ASSERT(zs.zs_num_blocks == 1);
293		(void) printf("\tmicrozap: %llu bytes, %llu entries\n",
294		    (u_longlong_t)zs.zs_blocksize,
295		    (u_longlong_t)zs.zs_num_entries);
296		return;
297	}
298
299	(void) printf("\tFat ZAP stats:\n");
300
301	(void) printf("\t\tPointer table:\n");
302	(void) printf("\t\t\t%llu elements\n",
303	    (u_longlong_t)zs.zs_ptrtbl_len);
304	(void) printf("\t\t\tzt_blk: %llu\n",
305	    (u_longlong_t)zs.zs_ptrtbl_zt_blk);
306	(void) printf("\t\t\tzt_numblks: %llu\n",
307	    (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
308	(void) printf("\t\t\tzt_shift: %llu\n",
309	    (u_longlong_t)zs.zs_ptrtbl_zt_shift);
310	(void) printf("\t\t\tzt_blks_copied: %llu\n",
311	    (u_longlong_t)zs.zs_ptrtbl_blks_copied);
312	(void) printf("\t\t\tzt_nextblk: %llu\n",
313	    (u_longlong_t)zs.zs_ptrtbl_nextblk);
314
315	(void) printf("\t\tZAP entries: %llu\n",
316	    (u_longlong_t)zs.zs_num_entries);
317	(void) printf("\t\tLeaf blocks: %llu\n",
318	    (u_longlong_t)zs.zs_num_leafs);
319	(void) printf("\t\tTotal blocks: %llu\n",
320	    (u_longlong_t)zs.zs_num_blocks);
321	(void) printf("\t\tzap_block_type: 0x%llx\n",
322	    (u_longlong_t)zs.zs_block_type);
323	(void) printf("\t\tzap_magic: 0x%llx\n",
324	    (u_longlong_t)zs.zs_magic);
325	(void) printf("\t\tzap_salt: 0x%llx\n",
326	    (u_longlong_t)zs.zs_salt);
327
328	(void) printf("\t\tLeafs with 2^n pointers:\n");
329	dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0);
330
331	(void) printf("\t\tBlocks with n*5 entries:\n");
332	dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0);
333
334	(void) printf("\t\tBlocks n/10 full:\n");
335	dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0);
336
337	(void) printf("\t\tEntries with n chunks:\n");
338	dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0);
339
340	(void) printf("\t\tBuckets with n entries:\n");
341	dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0);
342}
343
344/*ARGSUSED*/
345static void
346dump_none(objset_t *os, uint64_t object, void *data, size_t size)
347{
348}
349
350/*ARGSUSED*/
351static void
352dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
353{
354	(void) printf("\tUNKNOWN OBJECT TYPE\n");
355}
356
357/*ARGSUSED*/
358void
359dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
360{
361}
362
363/*ARGSUSED*/
364static void
365dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
366{
367}
368
369/*ARGSUSED*/
370static void
371dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
372{
373	zap_cursor_t zc;
374	zap_attribute_t attr;
375	void *prop;
376	int i;
377
378	dump_zap_stats(os, object);
379	(void) printf("\n");
380
381	for (zap_cursor_init(&zc, os, object);
382	    zap_cursor_retrieve(&zc, &attr) == 0;
383	    zap_cursor_advance(&zc)) {
384		(void) printf("\t\t%s = ", attr.za_name);
385		if (attr.za_num_integers == 0) {
386			(void) printf("\n");
387			continue;
388		}
389		prop = umem_zalloc(attr.za_num_integers *
390		    attr.za_integer_length, UMEM_NOFAIL);
391		(void) zap_lookup(os, object, attr.za_name,
392		    attr.za_integer_length, attr.za_num_integers, prop);
393		if (attr.za_integer_length == 1) {
394			(void) printf("%s", (char *)prop);
395		} else {
396			for (i = 0; i < attr.za_num_integers; i++) {
397				switch (attr.za_integer_length) {
398				case 2:
399					(void) printf("%u ",
400					    ((uint16_t *)prop)[i]);
401					break;
402				case 4:
403					(void) printf("%u ",
404					    ((uint32_t *)prop)[i]);
405					break;
406				case 8:
407					(void) printf("%lld ",
408					    (u_longlong_t)((int64_t *)prop)[i]);
409					break;
410				}
411			}
412		}
413		(void) printf("\n");
414		umem_free(prop, attr.za_num_integers * attr.za_integer_length);
415	}
416	zap_cursor_fini(&zc);
417}
418
419/*ARGSUSED*/
420static void
421dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
422{
423	dump_zap_stats(os, object);
424	/* contents are printed elsewhere, properly decoded */
425}
426
427/*ARGSUSED*/
428static void
429dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
430{
431	zap_cursor_t zc;
432	zap_attribute_t attr;
433
434	dump_zap_stats(os, object);
435	(void) printf("\n");
436
437	for (zap_cursor_init(&zc, os, object);
438	    zap_cursor_retrieve(&zc, &attr) == 0;
439	    zap_cursor_advance(&zc)) {
440		(void) printf("\t\t%s = ", attr.za_name);
441		if (attr.za_num_integers == 0) {
442			(void) printf("\n");
443			continue;
444		}
445		(void) printf(" %llx : [%d:%d:%d]\n",
446		    (u_longlong_t)attr.za_first_integer,
447		    (int)ATTR_LENGTH(attr.za_first_integer),
448		    (int)ATTR_BSWAP(attr.za_first_integer),
449		    (int)ATTR_NUM(attr.za_first_integer));
450	}
451	zap_cursor_fini(&zc);
452}
453
454/*ARGSUSED*/
455static void
456dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
457{
458	zap_cursor_t zc;
459	zap_attribute_t attr;
460	uint16_t *layout_attrs;
461	int i;
462
463	dump_zap_stats(os, object);
464	(void) printf("\n");
465
466	for (zap_cursor_init(&zc, os, object);
467	    zap_cursor_retrieve(&zc, &attr) == 0;
468	    zap_cursor_advance(&zc)) {
469		(void) printf("\t\t%s = [", attr.za_name);
470		if (attr.za_num_integers == 0) {
471			(void) printf("\n");
472			continue;
473		}
474
475		VERIFY(attr.za_integer_length == 2);
476		layout_attrs = umem_zalloc(attr.za_num_integers *
477		    attr.za_integer_length, UMEM_NOFAIL);
478
479		VERIFY(zap_lookup(os, object, attr.za_name,
480		    attr.za_integer_length,
481		    attr.za_num_integers, layout_attrs) == 0);
482
483		for (i = 0; i != attr.za_num_integers; i++)
484			(void) printf(" %d ", (int)layout_attrs[i]);
485		(void) printf("]\n");
486		umem_free(layout_attrs,
487		    attr.za_num_integers * attr.za_integer_length);
488	}
489	zap_cursor_fini(&zc);
490}
491
492/*ARGSUSED*/
493static void
494dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
495{
496	zap_cursor_t zc;
497	zap_attribute_t attr;
498	const char *typenames[] = {
499		/* 0 */ "not specified",
500		/* 1 */ "FIFO",
501		/* 2 */ "Character Device",
502		/* 3 */ "3 (invalid)",
503		/* 4 */ "Directory",
504		/* 5 */ "5 (invalid)",
505		/* 6 */ "Block Device",
506		/* 7 */ "7 (invalid)",
507		/* 8 */ "Regular File",
508		/* 9 */ "9 (invalid)",
509		/* 10 */ "Symbolic Link",
510		/* 11 */ "11 (invalid)",
511		/* 12 */ "Socket",
512		/* 13 */ "Door",
513		/* 14 */ "Event Port",
514		/* 15 */ "15 (invalid)",
515	};
516
517	dump_zap_stats(os, object);
518	(void) printf("\n");
519
520	for (zap_cursor_init(&zc, os, object);
521	    zap_cursor_retrieve(&zc, &attr) == 0;
522	    zap_cursor_advance(&zc)) {
523		(void) printf("\t\t%s = %lld (type: %s)\n",
524		    attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
525		    typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
526	}
527	zap_cursor_fini(&zc);
528}
529
530int
531get_dtl_refcount(vdev_t *vd)
532{
533	int refcount = 0;
534
535	if (vd->vdev_ops->vdev_op_leaf) {
536		space_map_t *sm = vd->vdev_dtl_sm;
537
538		if (sm != NULL &&
539		    sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
540			return (1);
541		return (0);
542	}
543
544	for (int c = 0; c < vd->vdev_children; c++)
545		refcount += get_dtl_refcount(vd->vdev_child[c]);
546	return (refcount);
547}
548
549int
550get_metaslab_refcount(vdev_t *vd)
551{
552	int refcount = 0;
553
554	if (vd->vdev_top == vd && !vd->vdev_removing) {
555		for (int m = 0; m < vd->vdev_ms_count; m++) {
556			space_map_t *sm = vd->vdev_ms[m]->ms_sm;
557
558			if (sm != NULL &&
559			    sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
560				refcount++;
561		}
562	}
563	for (int c = 0; c < vd->vdev_children; c++)
564		refcount += get_metaslab_refcount(vd->vdev_child[c]);
565
566	return (refcount);
567}
568
569static int
570verify_spacemap_refcounts(spa_t *spa)
571{
572	uint64_t expected_refcount = 0;
573	uint64_t actual_refcount;
574
575	(void) feature_get_refcount(spa,
576	    &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM],
577	    &expected_refcount);
578	actual_refcount = get_dtl_refcount(spa->spa_root_vdev);
579	actual_refcount += get_metaslab_refcount(spa->spa_root_vdev);
580
581	if (expected_refcount != actual_refcount) {
582		(void) printf("space map refcount mismatch: expected %lld != "
583		    "actual %lld\n",
584		    (longlong_t)expected_refcount,
585		    (longlong_t)actual_refcount);
586		return (2);
587	}
588	return (0);
589}
590
591static void
592dump_spacemap(objset_t *os, space_map_t *sm)
593{
594	uint64_t alloc, offset, entry;
595	char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
596			    "INVALID", "INVALID", "INVALID", "INVALID" };
597
598	if (sm == NULL)
599		return;
600
601	/*
602	 * Print out the freelist entries in both encoded and decoded form.
603	 */
604	alloc = 0;
605	for (offset = 0; offset < space_map_length(sm);
606	    offset += sizeof (entry)) {
607		uint8_t mapshift = sm->sm_shift;
608
609		VERIFY0(dmu_read(os, space_map_object(sm), offset,
610		    sizeof (entry), &entry, DMU_READ_PREFETCH));
611		if (SM_DEBUG_DECODE(entry)) {
612
613			(void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
614			    (u_longlong_t)(offset / sizeof (entry)),
615			    ddata[SM_DEBUG_ACTION_DECODE(entry)],
616			    (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
617			    (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
618		} else {
619			(void) printf("\t    [%6llu]    %c  range:"
620			    " %010llx-%010llx  size: %06llx\n",
621			    (u_longlong_t)(offset / sizeof (entry)),
622			    SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
623			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
624			    mapshift) + sm->sm_start),
625			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
626			    mapshift) + sm->sm_start +
627			    (SM_RUN_DECODE(entry) << mapshift)),
628			    (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
629			if (SM_TYPE_DECODE(entry) == SM_ALLOC)
630				alloc += SM_RUN_DECODE(entry) << mapshift;
631			else
632				alloc -= SM_RUN_DECODE(entry) << mapshift;
633		}
634	}
635	if (alloc != space_map_allocated(sm)) {
636		(void) printf("space_map_object alloc (%llu) INCONSISTENT "
637		    "with space map summary (%llu)\n",
638		    (u_longlong_t)space_map_allocated(sm), (u_longlong_t)alloc);
639	}
640}
641
642static void
643dump_metaslab_stats(metaslab_t *msp)
644{
645	char maxbuf[32];
646	range_tree_t *rt = msp->ms_tree;
647	avl_tree_t *t = &msp->ms_size_tree;
648	int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
649
650	zdb_nicenum(metaslab_block_maxsize(msp), maxbuf);
651
652	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
653	    "segments", avl_numnodes(t), "maxsize", maxbuf,
654	    "freepct", free_pct);
655	(void) printf("\tIn-memory histogram:\n");
656	dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
657}
658
659static void
660dump_metaslab(metaslab_t *msp)
661{
662	vdev_t *vd = msp->ms_group->mg_vd;
663	spa_t *spa = vd->vdev_spa;
664	space_map_t *sm = msp->ms_sm;
665	char freebuf[32];
666
667	zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf);
668
669	(void) printf(
670	    "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
671	    (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
672	    (u_longlong_t)space_map_object(sm), freebuf);
673
674	if (dump_opt['m'] > 2 && !dump_opt['L']) {
675		mutex_enter(&msp->ms_lock);
676		metaslab_load_wait(msp);
677		if (!msp->ms_loaded) {
678			VERIFY0(metaslab_load(msp));
679			range_tree_stat_verify(msp->ms_tree);
680		}
681		dump_metaslab_stats(msp);
682		metaslab_unload(msp);
683		mutex_exit(&msp->ms_lock);
684	}
685
686	if (dump_opt['m'] > 1 && sm != NULL &&
687	    spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
688		/*
689		 * The space map histogram represents free space in chunks
690		 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
691		 */
692		(void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
693		    (u_longlong_t)msp->ms_fragmentation);
694		dump_histogram(sm->sm_phys->smp_histogram,
695		    SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
696	}
697
698	if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
699		ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
700
701		mutex_enter(&msp->ms_lock);
702		dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
703		mutex_exit(&msp->ms_lock);
704	}
705}
706
707static void
708print_vdev_metaslab_header(vdev_t *vd)
709{
710	(void) printf("\tvdev %10llu\n\t%-10s%5llu   %-19s   %-15s   %-10s\n",
711	    (u_longlong_t)vd->vdev_id,
712	    "metaslabs", (u_longlong_t)vd->vdev_ms_count,
713	    "offset", "spacemap", "free");
714	(void) printf("\t%15s   %19s   %15s   %10s\n",
715	    "---------------", "-------------------",
716	    "---------------", "-------------");
717}
718
719static void
720dump_metaslab_groups(spa_t *spa)
721{
722	vdev_t *rvd = spa->spa_root_vdev;
723	metaslab_class_t *mc = spa_normal_class(spa);
724	uint64_t fragmentation;
725
726	metaslab_class_histogram_verify(mc);
727
728	for (int c = 0; c < rvd->vdev_children; c++) {
729		vdev_t *tvd = rvd->vdev_child[c];
730		metaslab_group_t *mg = tvd->vdev_mg;
731
732		if (mg->mg_class != mc)
733			continue;
734
735		metaslab_group_histogram_verify(mg);
736		mg->mg_fragmentation = metaslab_group_fragmentation(mg);
737
738		(void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t"
739		    "fragmentation",
740		    (u_longlong_t)tvd->vdev_id,
741		    (u_longlong_t)tvd->vdev_ms_count);
742		if (mg->mg_fragmentation == ZFS_FRAG_INVALID) {
743			(void) printf("%3s\n", "-");
744		} else {
745			(void) printf("%3llu%%\n",
746			    (u_longlong_t)mg->mg_fragmentation);
747		}
748		dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
749	}
750
751	(void) printf("\tpool %s\tfragmentation", spa_name(spa));
752	fragmentation = metaslab_class_fragmentation(mc);
753	if (fragmentation == ZFS_FRAG_INVALID)
754		(void) printf("\t%3s\n", "-");
755	else
756		(void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation);
757	dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
758}
759
760static void
761dump_metaslabs(spa_t *spa)
762{
763	vdev_t *vd, *rvd = spa->spa_root_vdev;
764	uint64_t m, c = 0, children = rvd->vdev_children;
765
766	(void) printf("\nMetaslabs:\n");
767
768	if (!dump_opt['d'] && zopt_objects > 0) {
769		c = zopt_object[0];
770
771		if (c >= children)
772			(void) fatal("bad vdev id: %llu", (u_longlong_t)c);
773
774		if (zopt_objects > 1) {
775			vd = rvd->vdev_child[c];
776			print_vdev_metaslab_header(vd);
777
778			for (m = 1; m < zopt_objects; m++) {
779				if (zopt_object[m] < vd->vdev_ms_count)
780					dump_metaslab(
781					    vd->vdev_ms[zopt_object[m]]);
782				else
783					(void) fprintf(stderr, "bad metaslab "
784					    "number %llu\n",
785					    (u_longlong_t)zopt_object[m]);
786			}
787			(void) printf("\n");
788			return;
789		}
790		children = c + 1;
791	}
792	for (; c < children; c++) {
793		vd = rvd->vdev_child[c];
794		print_vdev_metaslab_header(vd);
795
796		for (m = 0; m < vd->vdev_ms_count; m++)
797			dump_metaslab(vd->vdev_ms[m]);
798		(void) printf("\n");
799	}
800}
801
802static void
803dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
804{
805	const ddt_phys_t *ddp = dde->dde_phys;
806	const ddt_key_t *ddk = &dde->dde_key;
807	char *types[4] = { "ditto", "single", "double", "triple" };
808	char blkbuf[BP_SPRINTF_LEN];
809	blkptr_t blk;
810
811	for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
812		if (ddp->ddp_phys_birth == 0)
813			continue;
814		ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
815		snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
816		(void) printf("index %llx refcnt %llu %s %s\n",
817		    (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
818		    types[p], blkbuf);
819	}
820}
821
822static void
823dump_dedup_ratio(const ddt_stat_t *dds)
824{
825	double rL, rP, rD, D, dedup, compress, copies;
826
827	if (dds->dds_blocks == 0)
828		return;
829
830	rL = (double)dds->dds_ref_lsize;
831	rP = (double)dds->dds_ref_psize;
832	rD = (double)dds->dds_ref_dsize;
833	D = (double)dds->dds_dsize;
834
835	dedup = rD / D;
836	compress = rL / rP;
837	copies = rD / rP;
838
839	(void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
840	    "dedup * compress / copies = %.2f\n\n",
841	    dedup, compress, copies, dedup * compress / copies);
842}
843
844static void
845dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
846{
847	char name[DDT_NAMELEN];
848	ddt_entry_t dde;
849	uint64_t walk = 0;
850	dmu_object_info_t doi;
851	uint64_t count, dspace, mspace;
852	int error;
853
854	error = ddt_object_info(ddt, type, class, &doi);
855
856	if (error == ENOENT)
857		return;
858	ASSERT(error == 0);
859
860	if ((count = ddt_object_count(ddt, type, class)) == 0)
861		return;
862
863	dspace = doi.doi_physical_blocks_512 << 9;
864	mspace = doi.doi_fill_count * doi.doi_data_block_size;
865
866	ddt_object_name(ddt, type, class, name);
867
868	(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
869	    name,
870	    (u_longlong_t)count,
871	    (u_longlong_t)(dspace / count),
872	    (u_longlong_t)(mspace / count));
873
874	if (dump_opt['D'] < 3)
875		return;
876
877	zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
878
879	if (dump_opt['D'] < 4)
880		return;
881
882	if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
883		return;
884
885	(void) printf("%s contents:\n\n", name);
886
887	while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
888		dump_dde(ddt, &dde, walk);
889
890	ASSERT(error == ENOENT);
891
892	(void) printf("\n");
893}
894
895static void
896dump_all_ddts(spa_t *spa)
897{
898	ddt_histogram_t ddh_total = { 0 };
899	ddt_stat_t dds_total = { 0 };
900
901	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
902		ddt_t *ddt = spa->spa_ddt[c];
903		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
904			for (enum ddt_class class = 0; class < DDT_CLASSES;
905			    class++) {
906				dump_ddt(ddt, type, class);
907			}
908		}
909	}
910
911	ddt_get_dedup_stats(spa, &dds_total);
912
913	if (dds_total.dds_blocks == 0) {
914		(void) printf("All DDTs are empty\n");
915		return;
916	}
917
918	(void) printf("\n");
919
920	if (dump_opt['D'] > 1) {
921		(void) printf("DDT histogram (aggregated over all DDTs):\n");
922		ddt_get_dedup_histogram(spa, &ddh_total);
923		zpool_dump_ddt(&dds_total, &ddh_total);
924	}
925
926	dump_dedup_ratio(&dds_total);
927}
928
929static void
930dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
931{
932	char *prefix = arg;
933
934	(void) printf("%s [%llu,%llu) length %llu\n",
935	    prefix,
936	    (u_longlong_t)start,
937	    (u_longlong_t)(start + size),
938	    (u_longlong_t)(size));
939}
940
941static void
942dump_dtl(vdev_t *vd, int indent)
943{
944	spa_t *spa = vd->vdev_spa;
945	boolean_t required;
946	char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" };
947	char prefix[256];
948
949	spa_vdev_state_enter(spa, SCL_NONE);
950	required = vdev_dtl_required(vd);
951	(void) spa_vdev_state_exit(spa, NULL, 0);
952
953	if (indent == 0)
954		(void) printf("\nDirty time logs:\n\n");
955
956	(void) printf("\t%*s%s [%s]\n", indent, "",
957	    vd->vdev_path ? vd->vdev_path :
958	    vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
959	    required ? "DTL-required" : "DTL-expendable");
960
961	for (int t = 0; t < DTL_TYPES; t++) {
962		range_tree_t *rt = vd->vdev_dtl[t];
963		if (range_tree_space(rt) == 0)
964			continue;
965		(void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
966		    indent + 2, "", name[t]);
967		mutex_enter(rt->rt_lock);
968		range_tree_walk(rt, dump_dtl_seg, prefix);
969		mutex_exit(rt->rt_lock);
970		if (dump_opt['d'] > 5 && vd->vdev_children == 0)
971			dump_spacemap(spa->spa_meta_objset, vd->vdev_dtl_sm);
972	}
973
974	for (int c = 0; c < vd->vdev_children; c++)
975		dump_dtl(vd->vdev_child[c], indent + 4);
976}
977
978static void
979dump_history(spa_t *spa)
980{
981	nvlist_t **events = NULL;
982	char buf[SPA_MAXBLOCKSIZE];
983	uint64_t resid, len, off = 0;
984	uint_t num = 0;
985	int error;
986	time_t tsec;
987	struct tm t;
988	char tbuf[30];
989	char internalstr[MAXPATHLEN];
990
991	do {
992		len = sizeof (buf);
993
994		if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
995			(void) fprintf(stderr, "Unable to read history: "
996			    "error %d\n", error);
997			return;
998		}
999
1000		if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
1001			break;
1002
1003		off -= resid;
1004	} while (len != 0);
1005
1006	(void) printf("\nHistory:\n");
1007	for (int i = 0; i < num; i++) {
1008		uint64_t time, txg, ievent;
1009		char *cmd, *intstr;
1010		boolean_t printed = B_FALSE;
1011
1012		if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
1013		    &time) != 0)
1014			goto next;
1015		if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
1016		    &cmd) != 0) {
1017			if (nvlist_lookup_uint64(events[i],
1018			    ZPOOL_HIST_INT_EVENT, &ievent) != 0)
1019				goto next;
1020			verify(nvlist_lookup_uint64(events[i],
1021			    ZPOOL_HIST_TXG, &txg) == 0);
1022			verify(nvlist_lookup_string(events[i],
1023			    ZPOOL_HIST_INT_STR, &intstr) == 0);
1024			if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS)
1025				goto next;
1026
1027			(void) snprintf(internalstr,
1028			    sizeof (internalstr),
1029			    "[internal %s txg:%lld] %s",
1030			    zfs_history_event_names[ievent], txg,
1031			    intstr);
1032			cmd = internalstr;
1033		}
1034		tsec = time;
1035		(void) localtime_r(&tsec, &t);
1036		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
1037		(void) printf("%s %s\n", tbuf, cmd);
1038		printed = B_TRUE;
1039
1040next:
1041		if (dump_opt['h'] > 1) {
1042			if (!printed)
1043				(void) printf("unrecognized record:\n");
1044			dump_nvlist(events[i], 2);
1045		}
1046	}
1047}
1048
1049/*ARGSUSED*/
1050static void
1051dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
1052{
1053}
1054
1055static uint64_t
1056blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp,
1057    const zbookmark_phys_t *zb)
1058{
1059	if (dnp == NULL) {
1060		ASSERT(zb->zb_level < 0);
1061		if (zb->zb_object == 0)
1062			return (zb->zb_blkid);
1063		return (zb->zb_blkid * BP_GET_LSIZE(bp));
1064	}
1065
1066	ASSERT(zb->zb_level >= 0);
1067
1068	return ((zb->zb_blkid <<
1069	    (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
1070	    dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
1071}
1072
1073static void
1074snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
1075{
1076	const dva_t *dva = bp->blk_dva;
1077	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
1078
1079	if (dump_opt['b'] >= 6) {
1080		snprintf_blkptr(blkbuf, buflen, bp);
1081		return;
1082	}
1083
1084	if (BP_IS_EMBEDDED(bp)) {
1085		(void) sprintf(blkbuf,
1086		    "EMBEDDED et=%u %llxL/%llxP B=%llu",
1087		    (int)BPE_GET_ETYPE(bp),
1088		    (u_longlong_t)BPE_GET_LSIZE(bp),
1089		    (u_longlong_t)BPE_GET_PSIZE(bp),
1090		    (u_longlong_t)bp->blk_birth);
1091		return;
1092	}
1093
1094	blkbuf[0] = '\0';
1095	for (int i = 0; i < ndvas; i++)
1096		(void) snprintf(blkbuf + strlen(blkbuf),
1097		    buflen - strlen(blkbuf), "%llu:%llx:%llx ",
1098		    (u_longlong_t)DVA_GET_VDEV(&dva[i]),
1099		    (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
1100		    (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
1101
1102	if (BP_IS_HOLE(bp)) {
1103		(void) snprintf(blkbuf + strlen(blkbuf),
1104		    buflen - strlen(blkbuf), "B=%llu",
1105		    (u_longlong_t)bp->blk_birth);
1106	} else {
1107		(void) snprintf(blkbuf + strlen(blkbuf),
1108		    buflen - strlen(blkbuf),
1109		    "%llxL/%llxP F=%llu B=%llu/%llu",
1110		    (u_longlong_t)BP_GET_LSIZE(bp),
1111		    (u_longlong_t)BP_GET_PSIZE(bp),
1112		    (u_longlong_t)BP_GET_FILL(bp),
1113		    (u_longlong_t)bp->blk_birth,
1114		    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
1115	}
1116}
1117
1118static void
1119print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb,
1120    const dnode_phys_t *dnp)
1121{
1122	char blkbuf[BP_SPRINTF_LEN];
1123	int l;
1124
1125	if (!BP_IS_EMBEDDED(bp)) {
1126		ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
1127		ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
1128	}
1129
1130	(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
1131
1132	ASSERT(zb->zb_level >= 0);
1133
1134	for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
1135		if (l == zb->zb_level) {
1136			(void) printf("L%llx", (u_longlong_t)zb->zb_level);
1137		} else {
1138			(void) printf(" ");
1139		}
1140	}
1141
1142	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1143	(void) printf("%s\n", blkbuf);
1144}
1145
1146static int
1147visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
1148    blkptr_t *bp, const zbookmark_phys_t *zb)
1149{
1150	int err = 0;
1151
1152	if (bp->blk_birth == 0)
1153		return (0);
1154
1155	print_indirect(bp, zb, dnp);
1156
1157	if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
1158		uint32_t flags = ARC_WAIT;
1159		int i;
1160		blkptr_t *cbp;
1161		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
1162		arc_buf_t *buf;
1163		uint64_t fill = 0;
1164
1165		err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf,
1166		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
1167		if (err)
1168			return (err);
1169		ASSERT(buf->b_data);
1170
1171		/* recursively visit blocks below this */
1172		cbp = buf->b_data;
1173		for (i = 0; i < epb; i++, cbp++) {
1174			zbookmark_phys_t czb;
1175
1176			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
1177			    zb->zb_level - 1,
1178			    zb->zb_blkid * epb + i);
1179			err = visit_indirect(spa, dnp, cbp, &czb);
1180			if (err)
1181				break;
1182			fill += BP_GET_FILL(cbp);
1183		}
1184		if (!err)
1185			ASSERT3U(fill, ==, BP_GET_FILL(bp));
1186		(void) arc_buf_remove_ref(buf, &buf);
1187	}
1188
1189	return (err);
1190}
1191
1192/*ARGSUSED*/
1193static void
1194dump_indirect(dnode_t *dn)
1195{
1196	dnode_phys_t *dnp = dn->dn_phys;
1197	int j;
1198	zbookmark_phys_t czb;
1199
1200	(void) printf("Indirect blocks:\n");
1201
1202	SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
1203	    dn->dn_object, dnp->dn_nlevels - 1, 0);
1204	for (j = 0; j < dnp->dn_nblkptr; j++) {
1205		czb.zb_blkid = j;
1206		(void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
1207		    &dnp->dn_blkptr[j], &czb);
1208	}
1209
1210	(void) printf("\n");
1211}
1212
1213/*ARGSUSED*/
1214static void
1215dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
1216{
1217	dsl_dir_phys_t *dd = data;
1218	time_t crtime;
1219	char nice[32];
1220
1221	if (dd == NULL)
1222		return;
1223
1224	ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
1225
1226	crtime = dd->dd_creation_time;
1227	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1228	(void) printf("\t\thead_dataset_obj = %llu\n",
1229	    (u_longlong_t)dd->dd_head_dataset_obj);
1230	(void) printf("\t\tparent_dir_obj = %llu\n",
1231	    (u_longlong_t)dd->dd_parent_obj);
1232	(void) printf("\t\torigin_obj = %llu\n",
1233	    (u_longlong_t)dd->dd_origin_obj);
1234	(void) printf("\t\tchild_dir_zapobj = %llu\n",
1235	    (u_longlong_t)dd->dd_child_dir_zapobj);
1236	zdb_nicenum(dd->dd_used_bytes, nice);
1237	(void) printf("\t\tused_bytes = %s\n", nice);
1238	zdb_nicenum(dd->dd_compressed_bytes, nice);
1239	(void) printf("\t\tcompressed_bytes = %s\n", nice);
1240	zdb_nicenum(dd->dd_uncompressed_bytes, nice);
1241	(void) printf("\t\tuncompressed_bytes = %s\n", nice);
1242	zdb_nicenum(dd->dd_quota, nice);
1243	(void) printf("\t\tquota = %s\n", nice);
1244	zdb_nicenum(dd->dd_reserved, nice);
1245	(void) printf("\t\treserved = %s\n", nice);
1246	(void) printf("\t\tprops_zapobj = %llu\n",
1247	    (u_longlong_t)dd->dd_props_zapobj);
1248	(void) printf("\t\tdeleg_zapobj = %llu\n",
1249	    (u_longlong_t)dd->dd_deleg_zapobj);
1250	(void) printf("\t\tflags = %llx\n",
1251	    (u_longlong_t)dd->dd_flags);
1252
1253#define	DO(which) \
1254	zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
1255	(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
1256	DO(HEAD);
1257	DO(SNAP);
1258	DO(CHILD);
1259	DO(CHILD_RSRV);
1260	DO(REFRSRV);
1261#undef DO
1262}
1263
1264/*ARGSUSED*/
1265static void
1266dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
1267{
1268	dsl_dataset_phys_t *ds = data;
1269	time_t crtime;
1270	char used[32], compressed[32], uncompressed[32], unique[32];
1271	char blkbuf[BP_SPRINTF_LEN];
1272
1273	if (ds == NULL)
1274		return;
1275
1276	ASSERT(size == sizeof (*ds));
1277	crtime = ds->ds_creation_time;
1278	zdb_nicenum(ds->ds_referenced_bytes, used);
1279	zdb_nicenum(ds->ds_compressed_bytes, compressed);
1280	zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
1281	zdb_nicenum(ds->ds_unique_bytes, unique);
1282	snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp);
1283
1284	(void) printf("\t\tdir_obj = %llu\n",
1285	    (u_longlong_t)ds->ds_dir_obj);
1286	(void) printf("\t\tprev_snap_obj = %llu\n",
1287	    (u_longlong_t)ds->ds_prev_snap_obj);
1288	(void) printf("\t\tprev_snap_txg = %llu\n",
1289	    (u_longlong_t)ds->ds_prev_snap_txg);
1290	(void) printf("\t\tnext_snap_obj = %llu\n",
1291	    (u_longlong_t)ds->ds_next_snap_obj);
1292	(void) printf("\t\tsnapnames_zapobj = %llu\n",
1293	    (u_longlong_t)ds->ds_snapnames_zapobj);
1294	(void) printf("\t\tnum_children = %llu\n",
1295	    (u_longlong_t)ds->ds_num_children);
1296	(void) printf("\t\tuserrefs_obj = %llu\n",
1297	    (u_longlong_t)ds->ds_userrefs_obj);
1298	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1299	(void) printf("\t\tcreation_txg = %llu\n",
1300	    (u_longlong_t)ds->ds_creation_txg);
1301	(void) printf("\t\tdeadlist_obj = %llu\n",
1302	    (u_longlong_t)ds->ds_deadlist_obj);
1303	(void) printf("\t\tused_bytes = %s\n", used);
1304	(void) printf("\t\tcompressed_bytes = %s\n", compressed);
1305	(void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
1306	(void) printf("\t\tunique = %s\n", unique);
1307	(void) printf("\t\tfsid_guid = %llu\n",
1308	    (u_longlong_t)ds->ds_fsid_guid);
1309	(void) printf("\t\tguid = %llu\n",
1310	    (u_longlong_t)ds->ds_guid);
1311	(void) printf("\t\tflags = %llx\n",
1312	    (u_longlong_t)ds->ds_flags);
1313	(void) printf("\t\tnext_clones_obj = %llu\n",
1314	    (u_longlong_t)ds->ds_next_clones_obj);
1315	(void) printf("\t\tprops_obj = %llu\n",
1316	    (u_longlong_t)ds->ds_props_obj);
1317	(void) printf("\t\tbp = %s\n", blkbuf);
1318}
1319
1320/* ARGSUSED */
1321static int
1322dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1323{
1324	char blkbuf[BP_SPRINTF_LEN];
1325
1326	if (bp->blk_birth != 0) {
1327		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
1328		(void) printf("\t%s\n", blkbuf);
1329	}
1330	return (0);
1331}
1332
1333static void
1334dump_bptree(objset_t *os, uint64_t obj, char *name)
1335{
1336	char bytes[32];
1337	bptree_phys_t *bt;
1338	dmu_buf_t *db;
1339
1340	if (dump_opt['d'] < 3)
1341		return;
1342
1343	VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
1344	bt = db->db_data;
1345	zdb_nicenum(bt->bt_bytes, bytes);
1346	(void) printf("\n    %s: %llu datasets, %s\n",
1347	    name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
1348	dmu_buf_rele(db, FTAG);
1349
1350	if (dump_opt['d'] < 5)
1351		return;
1352
1353	(void) printf("\n");
1354
1355	(void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL);
1356}
1357
1358/* ARGSUSED */
1359static int
1360dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1361{
1362	char blkbuf[BP_SPRINTF_LEN];
1363
1364	ASSERT(bp->blk_birth != 0);
1365	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1366	(void) printf("\t%s\n", blkbuf);
1367	return (0);
1368}
1369
1370static void
1371dump_bpobj(bpobj_t *bpo, char *name, int indent)
1372{
1373	char bytes[32];
1374	char comp[32];
1375	char uncomp[32];
1376
1377	if (dump_opt['d'] < 3)
1378		return;
1379
1380	zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes);
1381	if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
1382		zdb_nicenum(bpo->bpo_phys->bpo_comp, comp);
1383		zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp);
1384		(void) printf("    %*s: object %llu, %llu local blkptrs, "
1385		    "%llu subobjs, %s (%s/%s comp)\n",
1386		    indent * 8, name,
1387		    (u_longlong_t)bpo->bpo_object,
1388		    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1389		    (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
1390		    bytes, comp, uncomp);
1391
1392		for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
1393			uint64_t subobj;
1394			bpobj_t subbpo;
1395			int error;
1396			VERIFY0(dmu_read(bpo->bpo_os,
1397			    bpo->bpo_phys->bpo_subobjs,
1398			    i * sizeof (subobj), sizeof (subobj), &subobj, 0));
1399			error = bpobj_open(&subbpo, bpo->bpo_os, subobj);
1400			if (error != 0) {
1401				(void) printf("ERROR %u while trying to open "
1402				    "subobj id %llu\n",
1403				    error, (u_longlong_t)subobj);
1404				continue;
1405			}
1406			dump_bpobj(&subbpo, "subobj", indent + 1);
1407			bpobj_close(&subbpo);
1408		}
1409	} else {
1410		(void) printf("    %*s: object %llu, %llu blkptrs, %s\n",
1411		    indent * 8, name,
1412		    (u_longlong_t)bpo->bpo_object,
1413		    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1414		    bytes);
1415	}
1416
1417	if (dump_opt['d'] < 5)
1418		return;
1419
1420
1421	if (indent == 0) {
1422		(void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
1423		(void) printf("\n");
1424	}
1425}
1426
1427static void
1428dump_deadlist(dsl_deadlist_t *dl)
1429{
1430	dsl_deadlist_entry_t *dle;
1431	uint64_t unused;
1432	char bytes[32];
1433	char comp[32];
1434	char uncomp[32];
1435
1436	if (dump_opt['d'] < 3)
1437		return;
1438
1439	if (dl->dl_oldfmt) {
1440		dump_bpobj(&dl->dl_bpobj, "old-format deadlist", 0);
1441		return;
1442	}
1443
1444	zdb_nicenum(dl->dl_phys->dl_used, bytes);
1445	zdb_nicenum(dl->dl_phys->dl_comp, comp);
1446	zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp);
1447	(void) printf("\n    Deadlist: %s (%s/%s comp)\n",
1448	    bytes, comp, uncomp);
1449
1450	if (dump_opt['d'] < 4)
1451		return;
1452
1453	(void) printf("\n");
1454
1455	/* force the tree to be loaded */
1456	dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused);
1457
1458	for (dle = avl_first(&dl->dl_tree); dle;
1459	    dle = AVL_NEXT(&dl->dl_tree, dle)) {
1460		if (dump_opt['d'] >= 5) {
1461			char buf[128];
1462			(void) snprintf(buf, sizeof (buf), "mintxg %llu -> ",
1463			    (longlong_t)dle->dle_mintxg,
1464			    (longlong_t)dle->dle_bpobj.bpo_object);
1465
1466			dump_bpobj(&dle->dle_bpobj, buf, 0);
1467		} else {
1468			(void) printf("mintxg %llu -> obj %llu\n",
1469			    (longlong_t)dle->dle_mintxg,
1470			    (longlong_t)dle->dle_bpobj.bpo_object);
1471
1472		}
1473	}
1474}
1475
1476static avl_tree_t idx_tree;
1477static avl_tree_t domain_tree;
1478static boolean_t fuid_table_loaded;
1479static boolean_t sa_loaded;
1480sa_attr_type_t *sa_attr_table;
1481
1482static void
1483fuid_table_destroy()
1484{
1485	if (fuid_table_loaded) {
1486		zfs_fuid_table_destroy(&idx_tree, &domain_tree);
1487		fuid_table_loaded = B_FALSE;
1488	}
1489}
1490
1491/*
1492 * print uid or gid information.
1493 * For normal POSIX id just the id is printed in decimal format.
1494 * For CIFS files with FUID the fuid is printed in hex followed by
1495 * the domain-rid string.
1496 */
1497static void
1498print_idstr(uint64_t id, const char *id_type)
1499{
1500	if (FUID_INDEX(id)) {
1501		char *domain;
1502
1503		domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
1504		(void) printf("\t%s     %llx [%s-%d]\n", id_type,
1505		    (u_longlong_t)id, domain, (int)FUID_RID(id));
1506	} else {
1507		(void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
1508	}
1509
1510}
1511
1512static void
1513dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
1514{
1515	uint32_t uid_idx, gid_idx;
1516
1517	uid_idx = FUID_INDEX(uid);
1518	gid_idx = FUID_INDEX(gid);
1519
1520	/* Load domain table, if not already loaded */
1521	if (!fuid_table_loaded && (uid_idx || gid_idx)) {
1522		uint64_t fuid_obj;
1523
1524		/* first find the fuid object.  It lives in the master node */
1525		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
1526		    8, 1, &fuid_obj) == 0);
1527		zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
1528		(void) zfs_fuid_table_load(os, fuid_obj,
1529		    &idx_tree, &domain_tree);
1530		fuid_table_loaded = B_TRUE;
1531	}
1532
1533	print_idstr(uid, "uid");
1534	print_idstr(gid, "gid");
1535}
1536
1537/*ARGSUSED*/
1538static void
1539dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
1540{
1541	char path[MAXPATHLEN * 2];	/* allow for xattr and failure prefix */
1542	sa_handle_t *hdl;
1543	uint64_t xattr, rdev, gen;
1544	uint64_t uid, gid, mode, fsize, parent, links;
1545	uint64_t pflags;
1546	uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
1547	time_t z_crtime, z_atime, z_mtime, z_ctime;
1548	sa_bulk_attr_t bulk[12];
1549	int idx = 0;
1550	int error;
1551
1552	if (!sa_loaded) {
1553		uint64_t sa_attrs = 0;
1554		uint64_t version;
1555
1556		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
1557		    8, 1, &version) == 0);
1558		if (version >= ZPL_VERSION_SA) {
1559			VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
1560			    8, 1, &sa_attrs) == 0);
1561		}
1562		if ((error = sa_setup(os, sa_attrs, zfs_attr_table,
1563		    ZPL_END, &sa_attr_table)) != 0) {
1564			(void) printf("sa_setup failed errno %d, can't "
1565			    "display znode contents\n", error);
1566			return;
1567		}
1568		sa_loaded = B_TRUE;
1569	}
1570
1571	if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
1572		(void) printf("Failed to get handle for SA znode\n");
1573		return;
1574	}
1575
1576	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
1577	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
1578	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
1579	    &links, 8);
1580	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
1581	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
1582	    &mode, 8);
1583	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
1584	    NULL, &parent, 8);
1585	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
1586	    &fsize, 8);
1587	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
1588	    acctm, 16);
1589	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
1590	    modtm, 16);
1591	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
1592	    crtm, 16);
1593	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
1594	    chgtm, 16);
1595	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
1596	    &pflags, 8);
1597
1598	if (sa_bulk_lookup(hdl, bulk, idx)) {
1599		(void) sa_handle_destroy(hdl);
1600		return;
1601	}
1602
1603	error = zfs_obj_to_path(os, object, path, sizeof (path));
1604	if (error != 0) {
1605		(void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
1606		    (u_longlong_t)object);
1607	}
1608	if (dump_opt['d'] < 3) {
1609		(void) printf("\t%s\n", path);
1610		(void) sa_handle_destroy(hdl);
1611		return;
1612	}
1613
1614	z_crtime = (time_t)crtm[0];
1615	z_atime = (time_t)acctm[0];
1616	z_mtime = (time_t)modtm[0];
1617	z_ctime = (time_t)chgtm[0];
1618
1619	(void) printf("\tpath	%s\n", path);
1620	dump_uidgid(os, uid, gid);
1621	(void) printf("\tatime	%s", ctime(&z_atime));
1622	(void) printf("\tmtime	%s", ctime(&z_mtime));
1623	(void) printf("\tctime	%s", ctime(&z_ctime));
1624	(void) printf("\tcrtime	%s", ctime(&z_crtime));
1625	(void) printf("\tgen	%llu\n", (u_longlong_t)gen);
1626	(void) printf("\tmode	%llo\n", (u_longlong_t)mode);
1627	(void) printf("\tsize	%llu\n", (u_longlong_t)fsize);
1628	(void) printf("\tparent	%llu\n", (u_longlong_t)parent);
1629	(void) printf("\tlinks	%llu\n", (u_longlong_t)links);
1630	(void) printf("\tpflags	%llx\n", (u_longlong_t)pflags);
1631	if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
1632	    sizeof (uint64_t)) == 0)
1633		(void) printf("\txattr	%llu\n", (u_longlong_t)xattr);
1634	if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
1635	    sizeof (uint64_t)) == 0)
1636		(void) printf("\trdev	0x%016llx\n", (u_longlong_t)rdev);
1637	sa_handle_destroy(hdl);
1638}
1639
1640/*ARGSUSED*/
1641static void
1642dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
1643{
1644}
1645
1646/*ARGSUSED*/
1647static void
1648dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
1649{
1650}
1651
1652static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
1653	dump_none,		/* unallocated			*/
1654	dump_zap,		/* object directory		*/
1655	dump_uint64,		/* object array			*/
1656	dump_none,		/* packed nvlist		*/
1657	dump_packed_nvlist,	/* packed nvlist size		*/
1658	dump_none,		/* bplist			*/
1659	dump_none,		/* bplist header		*/
1660	dump_none,		/* SPA space map header		*/
1661	dump_none,		/* SPA space map		*/
1662	dump_none,		/* ZIL intent log		*/
1663	dump_dnode,		/* DMU dnode			*/
1664	dump_dmu_objset,	/* DMU objset			*/
1665	dump_dsl_dir,		/* DSL directory		*/
1666	dump_zap,		/* DSL directory child map	*/
1667	dump_zap,		/* DSL dataset snap map		*/
1668	dump_zap,		/* DSL props			*/
1669	dump_dsl_dataset,	/* DSL dataset			*/
1670	dump_znode,		/* ZFS znode			*/
1671	dump_acl,		/* ZFS V0 ACL			*/
1672	dump_uint8,		/* ZFS plain file		*/
1673	dump_zpldir,		/* ZFS directory		*/
1674	dump_zap,		/* ZFS master node		*/
1675	dump_zap,		/* ZFS delete queue		*/
1676	dump_uint8,		/* zvol object			*/
1677	dump_zap,		/* zvol prop			*/
1678	dump_uint8,		/* other uint8[]		*/
1679	dump_uint64,		/* other uint64[]		*/
1680	dump_zap,		/* other ZAP			*/
1681	dump_zap,		/* persistent error log		*/
1682	dump_uint8,		/* SPA history			*/
1683	dump_history_offsets,	/* SPA history offsets		*/
1684	dump_zap,		/* Pool properties		*/
1685	dump_zap,		/* DSL permissions		*/
1686	dump_acl,		/* ZFS ACL			*/
1687	dump_uint8,		/* ZFS SYSACL			*/
1688	dump_none,		/* FUID nvlist			*/
1689	dump_packed_nvlist,	/* FUID nvlist size		*/
1690	dump_zap,		/* DSL dataset next clones	*/
1691	dump_zap,		/* DSL scrub queue		*/
1692	dump_zap,		/* ZFS user/group used		*/
1693	dump_zap,		/* ZFS user/group quota		*/
1694	dump_zap,		/* snapshot refcount tags	*/
1695	dump_ddt_zap,		/* DDT ZAP object		*/
1696	dump_zap,		/* DDT statistics		*/
1697	dump_znode,		/* SA object			*/
1698	dump_zap,		/* SA Master Node		*/
1699	dump_sa_attrs,		/* SA attribute registration	*/
1700	dump_sa_layouts,	/* SA attribute layouts		*/
1701	dump_zap,		/* DSL scrub translations	*/
1702	dump_none,		/* fake dedup BP		*/
1703	dump_zap,		/* deadlist			*/
1704	dump_none,		/* deadlist hdr			*/
1705	dump_zap,		/* dsl clones			*/
1706	dump_none,		/* bpobj subobjs		*/
1707	dump_unknown,		/* Unknown type, must be last	*/
1708};
1709
1710static void
1711dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
1712{
1713	dmu_buf_t *db = NULL;
1714	dmu_object_info_t doi;
1715	dnode_t *dn;
1716	void *bonus = NULL;
1717	size_t bsize = 0;
1718	char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
1719	char bonus_size[32];
1720	char aux[50];
1721	int error;
1722
1723	if (*print_header) {
1724		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1725		    "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
1726		    "%full", "type");
1727		*print_header = 0;
1728	}
1729
1730	if (object == 0) {
1731		dn = DMU_META_DNODE(os);
1732	} else {
1733		error = dmu_bonus_hold(os, object, FTAG, &db);
1734		if (error)
1735			fatal("dmu_bonus_hold(%llu) failed, errno %u",
1736			    object, error);
1737		bonus = db->db_data;
1738		bsize = db->db_size;
1739		dn = DB_DNODE((dmu_buf_impl_t *)db);
1740	}
1741	dmu_object_info_from_dnode(dn, &doi);
1742
1743	zdb_nicenum(doi.doi_metadata_block_size, iblk);
1744	zdb_nicenum(doi.doi_data_block_size, dblk);
1745	zdb_nicenum(doi.doi_max_offset, lsize);
1746	zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize);
1747	zdb_nicenum(doi.doi_bonus_size, bonus_size);
1748	(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
1749	    doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
1750	    doi.doi_max_offset);
1751
1752	aux[0] = '\0';
1753
1754	if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
1755		(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
1756		    ZDB_CHECKSUM_NAME(doi.doi_checksum));
1757	}
1758
1759	if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
1760		(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
1761		    ZDB_COMPRESS_NAME(doi.doi_compress));
1762	}
1763
1764	(void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %6s  %s%s\n",
1765	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
1766	    asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
1767
1768	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
1769		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1770		    "", "", "", "", "", bonus_size, "bonus",
1771		    ZDB_OT_NAME(doi.doi_bonus_type));
1772	}
1773
1774	if (verbosity >= 4) {
1775		(void) printf("\tdnode flags: %s%s%s\n",
1776		    (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
1777		    "USED_BYTES " : "",
1778		    (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
1779		    "USERUSED_ACCOUNTED " : "",
1780		    (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
1781		    "SPILL_BLKPTR" : "");
1782		(void) printf("\tdnode maxblkid: %llu\n",
1783		    (longlong_t)dn->dn_phys->dn_maxblkid);
1784
1785		object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
1786		    bonus, bsize);
1787		object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
1788		*print_header = 1;
1789	}
1790
1791	if (verbosity >= 5)
1792		dump_indirect(dn);
1793
1794	if (verbosity >= 5) {
1795		/*
1796		 * Report the list of segments that comprise the object.
1797		 */
1798		uint64_t start = 0;
1799		uint64_t end;
1800		uint64_t blkfill = 1;
1801		int minlvl = 1;
1802
1803		if (dn->dn_type == DMU_OT_DNODE) {
1804			minlvl = 0;
1805			blkfill = DNODES_PER_BLOCK;
1806		}
1807
1808		for (;;) {
1809			char segsize[32];
1810			error = dnode_next_offset(dn,
1811			    0, &start, minlvl, blkfill, 0);
1812			if (error)
1813				break;
1814			end = start;
1815			error = dnode_next_offset(dn,
1816			    DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
1817			zdb_nicenum(end - start, segsize);
1818			(void) printf("\t\tsegment [%016llx, %016llx)"
1819			    " size %5s\n", (u_longlong_t)start,
1820			    (u_longlong_t)end, segsize);
1821			if (error)
1822				break;
1823			start = end;
1824		}
1825	}
1826
1827	if (db != NULL)
1828		dmu_buf_rele(db, FTAG);
1829}
1830
1831static char *objset_types[DMU_OST_NUMTYPES] = {
1832	"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
1833
1834static void
1835dump_dir(objset_t *os)
1836{
1837	dmu_objset_stats_t dds;
1838	uint64_t object, object_count;
1839	uint64_t refdbytes, usedobjs, scratch;
1840	char numbuf[32];
1841	char blkbuf[BP_SPRINTF_LEN + 20];
1842	char osname[MAXNAMELEN];
1843	char *type = "UNKNOWN";
1844	int verbosity = dump_opt['d'];
1845	int print_header = 1;
1846	int i, error;
1847
1848	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
1849	dmu_objset_fast_stat(os, &dds);
1850	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
1851
1852	if (dds.dds_type < DMU_OST_NUMTYPES)
1853		type = objset_types[dds.dds_type];
1854
1855	if (dds.dds_type == DMU_OST_META) {
1856		dds.dds_creation_txg = TXG_INITIAL;
1857		usedobjs = BP_GET_FILL(os->os_rootbp);
1858		refdbytes = os->os_spa->spa_dsl_pool->
1859		    dp_mos_dir->dd_phys->dd_used_bytes;
1860	} else {
1861		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
1862	}
1863
1864	ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp));
1865
1866	zdb_nicenum(refdbytes, numbuf);
1867
1868	if (verbosity >= 4) {
1869		(void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp ");
1870		(void) snprintf_blkptr(blkbuf + strlen(blkbuf),
1871		    sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp);
1872	} else {
1873		blkbuf[0] = '\0';
1874	}
1875
1876	dmu_objset_name(os, osname);
1877
1878	(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
1879	    "%s, %llu objects%s\n",
1880	    osname, type, (u_longlong_t)dmu_objset_id(os),
1881	    (u_longlong_t)dds.dds_creation_txg,
1882	    numbuf, (u_longlong_t)usedobjs, blkbuf);
1883
1884	if (zopt_objects != 0) {
1885		for (i = 0; i < zopt_objects; i++)
1886			dump_object(os, zopt_object[i], verbosity,
1887			    &print_header);
1888		(void) printf("\n");
1889		return;
1890	}
1891
1892	if (dump_opt['i'] != 0 || verbosity >= 2)
1893		dump_intent_log(dmu_objset_zil(os));
1894
1895	if (dmu_objset_ds(os) != NULL)
1896		dump_deadlist(&dmu_objset_ds(os)->ds_deadlist);
1897
1898	if (verbosity < 2)
1899		return;
1900
1901	if (BP_IS_HOLE(os->os_rootbp))
1902		return;
1903
1904	dump_object(os, 0, verbosity, &print_header);
1905	object_count = 0;
1906	if (DMU_USERUSED_DNODE(os) != NULL &&
1907	    DMU_USERUSED_DNODE(os)->dn_type != 0) {
1908		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
1909		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
1910	}
1911
1912	object = 0;
1913	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
1914		dump_object(os, object, verbosity, &print_header);
1915		object_count++;
1916	}
1917
1918	ASSERT3U(object_count, ==, usedobjs);
1919
1920	(void) printf("\n");
1921
1922	if (error != ESRCH) {
1923		(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
1924		abort();
1925	}
1926}
1927
1928static void
1929dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
1930{
1931	time_t timestamp = ub->ub_timestamp;
1932
1933	(void) printf(header ? header : "");
1934	(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
1935	(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
1936	(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
1937	(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
1938	(void) printf("\ttimestamp = %llu UTC = %s",
1939	    (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
1940	if (dump_opt['u'] >= 3) {
1941		char blkbuf[BP_SPRINTF_LEN];
1942		snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
1943		(void) printf("\trootbp = %s\n", blkbuf);
1944	}
1945	(void) printf(footer ? footer : "");
1946}
1947
1948static void
1949dump_config(spa_t *spa)
1950{
1951	dmu_buf_t *db;
1952	size_t nvsize = 0;
1953	int error = 0;
1954
1955
1956	error = dmu_bonus_hold(spa->spa_meta_objset,
1957	    spa->spa_config_object, FTAG, &db);
1958
1959	if (error == 0) {
1960		nvsize = *(uint64_t *)db->db_data;
1961		dmu_buf_rele(db, FTAG);
1962
1963		(void) printf("\nMOS Configuration:\n");
1964		dump_packed_nvlist(spa->spa_meta_objset,
1965		    spa->spa_config_object, (void *)&nvsize, 1);
1966	} else {
1967		(void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
1968		    (u_longlong_t)spa->spa_config_object, error);
1969	}
1970}
1971
1972static void
1973dump_cachefile(const char *cachefile)
1974{
1975	int fd;
1976	struct stat64 statbuf;
1977	char *buf;
1978	nvlist_t *config;
1979
1980	if ((fd = open64(cachefile, O_RDONLY)) < 0) {
1981		(void) printf("cannot open '%s': %s\n", cachefile,
1982		    strerror(errno));
1983		exit(1);
1984	}
1985
1986	if (fstat64(fd, &statbuf) != 0) {
1987		(void) printf("failed to stat '%s': %s\n", cachefile,
1988		    strerror(errno));
1989		exit(1);
1990	}
1991
1992	if ((buf = malloc(statbuf.st_size)) == NULL) {
1993		(void) fprintf(stderr, "failed to allocate %llu bytes\n",
1994		    (u_longlong_t)statbuf.st_size);
1995		exit(1);
1996	}
1997
1998	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1999		(void) fprintf(stderr, "failed to read %llu bytes\n",
2000		    (u_longlong_t)statbuf.st_size);
2001		exit(1);
2002	}
2003
2004	(void) close(fd);
2005
2006	if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
2007		(void) fprintf(stderr, "failed to unpack nvlist\n");
2008		exit(1);
2009	}
2010
2011	free(buf);
2012
2013	dump_nvlist(config, 0);
2014
2015	nvlist_free(config);
2016}
2017
2018#define	ZDB_MAX_UB_HEADER_SIZE 32
2019
2020static void
2021dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
2022{
2023	vdev_t vd;
2024	vdev_t *vdp = &vd;
2025	char header[ZDB_MAX_UB_HEADER_SIZE];
2026
2027	vd.vdev_ashift = ashift;
2028	vdp->vdev_top = vdp;
2029
2030	for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
2031		uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
2032		uberblock_t *ub = (void *)((char *)lbl + uoff);
2033
2034		if (uberblock_verify(ub))
2035			continue;
2036		(void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
2037		    "Uberblock[%d]\n", i);
2038		dump_uberblock(ub, header, "");
2039	}
2040}
2041
2042static void
2043dump_label(const char *dev)
2044{
2045	int fd;
2046	vdev_label_t label;
2047	char *path, *buf = label.vl_vdev_phys.vp_nvlist;
2048	size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
2049	struct stat64 statbuf;
2050	uint64_t psize, ashift;
2051	int len = strlen(dev) + 1;
2052
2053	if (strncmp(dev, "/dev/dsk/", 9) == 0) {
2054		len++;
2055		path = malloc(len);
2056		(void) snprintf(path, len, "%s%s", "/dev/rdsk/", dev + 9);
2057	} else {
2058		path = strdup(dev);
2059	}
2060
2061	if ((fd = open64(path, O_RDONLY)) < 0) {
2062		(void) printf("cannot open '%s': %s\n", path, strerror(errno));
2063		free(path);
2064		exit(1);
2065	}
2066
2067	if (fstat64(fd, &statbuf) != 0) {
2068		(void) printf("failed to stat '%s': %s\n", path,
2069		    strerror(errno));
2070		free(path);
2071		(void) close(fd);
2072		exit(1);
2073	}
2074
2075	if (S_ISBLK(statbuf.st_mode)) {
2076		(void) printf("cannot use '%s': character device required\n",
2077		    path);
2078		free(path);
2079		(void) close(fd);
2080		exit(1);
2081	}
2082
2083	psize = statbuf.st_size;
2084	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
2085
2086	for (int l = 0; l < VDEV_LABELS; l++) {
2087		nvlist_t *config = NULL;
2088
2089		(void) printf("--------------------------------------------\n");
2090		(void) printf("LABEL %d\n", l);
2091		(void) printf("--------------------------------------------\n");
2092
2093		if (pread64(fd, &label, sizeof (label),
2094		    vdev_label_offset(psize, l, 0)) != sizeof (label)) {
2095			(void) printf("failed to read label %d\n", l);
2096			continue;
2097		}
2098
2099		if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
2100			(void) printf("failed to unpack label %d\n", l);
2101			ashift = SPA_MINBLOCKSHIFT;
2102		} else {
2103			nvlist_t *vdev_tree = NULL;
2104
2105			dump_nvlist(config, 4);
2106			if ((nvlist_lookup_nvlist(config,
2107			    ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
2108			    (nvlist_lookup_uint64(vdev_tree,
2109			    ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
2110				ashift = SPA_MINBLOCKSHIFT;
2111			nvlist_free(config);
2112		}
2113		if (dump_opt['u'])
2114			dump_label_uberblocks(&label, ashift);
2115	}
2116
2117	free(path);
2118	(void) close(fd);
2119}
2120
2121/*ARGSUSED*/
2122static int
2123dump_one_dir(const char *dsname, void *arg)
2124{
2125	int error;
2126	objset_t *os;
2127
2128	error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
2129	if (error) {
2130		(void) printf("Could not open %s, error %d\n", dsname, error);
2131		return (0);
2132	}
2133	dump_dir(os);
2134	dmu_objset_disown(os, FTAG);
2135	fuid_table_destroy();
2136	sa_loaded = B_FALSE;
2137	return (0);
2138}
2139
2140/*
2141 * Block statistics.
2142 */
2143#define	PSIZE_HISTO_SIZE (SPA_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1)
2144typedef struct zdb_blkstats {
2145	uint64_t zb_asize;
2146	uint64_t zb_lsize;
2147	uint64_t zb_psize;
2148	uint64_t zb_count;
2149	uint64_t zb_gangs;
2150	uint64_t zb_ditto_samevdev;
2151	uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
2152} zdb_blkstats_t;
2153
2154/*
2155 * Extended object types to report deferred frees and dedup auto-ditto blocks.
2156 */
2157#define	ZDB_OT_DEFERRED	(DMU_OT_NUMTYPES + 0)
2158#define	ZDB_OT_DITTO	(DMU_OT_NUMTYPES + 1)
2159#define	ZDB_OT_OTHER	(DMU_OT_NUMTYPES + 2)
2160#define	ZDB_OT_TOTAL	(DMU_OT_NUMTYPES + 3)
2161
2162static char *zdb_ot_extname[] = {
2163	"deferred free",
2164	"dedup ditto",
2165	"other",
2166	"Total",
2167};
2168
2169#define	ZB_TOTAL	DN_MAX_LEVELS
2170
2171typedef struct zdb_cb {
2172	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
2173	uint64_t	zcb_dedup_asize;
2174	uint64_t	zcb_dedup_blocks;
2175	uint64_t	zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
2176	uint64_t	zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
2177	    [BPE_PAYLOAD_SIZE];
2178	uint64_t	zcb_start;
2179	uint64_t	zcb_lastprint;
2180	uint64_t	zcb_totalasize;
2181	uint64_t	zcb_errors[256];
2182	int		zcb_readfails;
2183	int		zcb_haderrors;
2184	spa_t		*zcb_spa;
2185} zdb_cb_t;
2186
2187static void
2188zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
2189    dmu_object_type_t type)
2190{
2191	uint64_t refcnt = 0;
2192
2193	ASSERT(type < ZDB_OT_TOTAL);
2194
2195	if (zilog && zil_bp_tree_add(zilog, bp) != 0)
2196		return;
2197
2198	for (int i = 0; i < 4; i++) {
2199		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
2200		int t = (i & 1) ? type : ZDB_OT_TOTAL;
2201		int equal;
2202		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
2203
2204		zb->zb_asize += BP_GET_ASIZE(bp);
2205		zb->zb_lsize += BP_GET_LSIZE(bp);
2206		zb->zb_psize += BP_GET_PSIZE(bp);
2207		zb->zb_count++;
2208		zb->zb_psize_histogram[BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT]++;
2209
2210		zb->zb_gangs += BP_COUNT_GANG(bp);
2211
2212		switch (BP_GET_NDVAS(bp)) {
2213		case 2:
2214			if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2215			    DVA_GET_VDEV(&bp->blk_dva[1]))
2216				zb->zb_ditto_samevdev++;
2217			break;
2218		case 3:
2219			equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2220			    DVA_GET_VDEV(&bp->blk_dva[1])) +
2221			    (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2222			    DVA_GET_VDEV(&bp->blk_dva[2])) +
2223			    (DVA_GET_VDEV(&bp->blk_dva[1]) ==
2224			    DVA_GET_VDEV(&bp->blk_dva[2]));
2225			if (equal != 0)
2226				zb->zb_ditto_samevdev++;
2227			break;
2228		}
2229
2230	}
2231
2232	if (BP_IS_EMBEDDED(bp)) {
2233		zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
2234		zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
2235		    [BPE_GET_PSIZE(bp)]++;
2236		return;
2237	}
2238
2239	if (dump_opt['L'])
2240		return;
2241
2242	if (BP_GET_DEDUP(bp)) {
2243		ddt_t *ddt;
2244		ddt_entry_t *dde;
2245
2246		ddt = ddt_select(zcb->zcb_spa, bp);
2247		ddt_enter(ddt);
2248		dde = ddt_lookup(ddt, bp, B_FALSE);
2249
2250		if (dde == NULL) {
2251			refcnt = 0;
2252		} else {
2253			ddt_phys_t *ddp = ddt_phys_select(dde, bp);
2254			ddt_phys_decref(ddp);
2255			refcnt = ddp->ddp_refcnt;
2256			if (ddt_phys_total_refcnt(dde) == 0)
2257				ddt_remove(ddt, dde);
2258		}
2259		ddt_exit(ddt);
2260	}
2261
2262	VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
2263	    refcnt ? 0 : spa_first_txg(zcb->zcb_spa),
2264	    bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
2265}
2266
2267static void
2268zdb_blkptr_done(zio_t *zio)
2269{
2270	spa_t *spa = zio->io_spa;
2271	blkptr_t *bp = zio->io_bp;
2272	int ioerr = zio->io_error;
2273	zdb_cb_t *zcb = zio->io_private;
2274	zbookmark_phys_t *zb = &zio->io_bookmark;
2275
2276	zio_data_buf_free(zio->io_data, zio->io_size);
2277
2278	mutex_enter(&spa->spa_scrub_lock);
2279	spa->spa_scrub_inflight--;
2280	cv_broadcast(&spa->spa_scrub_io_cv);
2281
2282	if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
2283		char blkbuf[BP_SPRINTF_LEN];
2284
2285		zcb->zcb_haderrors = 1;
2286		zcb->zcb_errors[ioerr]++;
2287
2288		if (dump_opt['b'] >= 2)
2289			snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2290		else
2291			blkbuf[0] = '\0';
2292
2293		(void) printf("zdb_blkptr_cb: "
2294		    "Got error %d reading "
2295		    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
2296		    ioerr,
2297		    (u_longlong_t)zb->zb_objset,
2298		    (u_longlong_t)zb->zb_object,
2299		    (u_longlong_t)zb->zb_level,
2300		    (u_longlong_t)zb->zb_blkid,
2301		    blkbuf);
2302	}
2303	mutex_exit(&spa->spa_scrub_lock);
2304}
2305
2306static int
2307zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2308    const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
2309{
2310	zdb_cb_t *zcb = arg;
2311	dmu_object_type_t type;
2312	boolean_t is_metadata;
2313
2314	if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
2315		char blkbuf[BP_SPRINTF_LEN];
2316		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2317		(void) printf("objset %llu object %llu "
2318		    "level %lld offset 0x%llx %s\n",
2319		    (u_longlong_t)zb->zb_objset,
2320		    (u_longlong_t)zb->zb_object,
2321		    (longlong_t)zb->zb_level,
2322		    (u_longlong_t)blkid2offset(dnp, bp, zb),
2323		    blkbuf);
2324	}
2325
2326	if (BP_IS_HOLE(bp))
2327		return (0);
2328
2329	type = BP_GET_TYPE(bp);
2330
2331	zdb_count_block(zcb, zilog, bp,
2332	    (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type);
2333
2334	is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
2335
2336	if (!BP_IS_EMBEDDED(bp) &&
2337	    (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
2338		size_t size = BP_GET_PSIZE(bp);
2339		void *data = zio_data_buf_alloc(size);
2340		int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
2341
2342		/* If it's an intent log block, failure is expected. */
2343		if (zb->zb_level == ZB_ZIL_LEVEL)
2344			flags |= ZIO_FLAG_SPECULATIVE;
2345
2346		mutex_enter(&spa->spa_scrub_lock);
2347		while (spa->spa_scrub_inflight > max_inflight)
2348			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
2349		spa->spa_scrub_inflight++;
2350		mutex_exit(&spa->spa_scrub_lock);
2351
2352		zio_nowait(zio_read(NULL, spa, bp, data, size,
2353		    zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
2354	}
2355
2356	zcb->zcb_readfails = 0;
2357
2358	if (dump_opt['b'] < 5 &&
2359	    gethrtime() > zcb->zcb_lastprint + NANOSEC) {
2360		uint64_t now = gethrtime();
2361		char buf[10];
2362		uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize;
2363		int kb_per_sec =
2364		    1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000));
2365		int sec_remaining =
2366		    (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec;
2367
2368		zfs_nicenum(bytes, buf, sizeof (buf));
2369		(void) fprintf(stderr,
2370		    "\r%5s completed (%4dMB/s) "
2371		    "estimated time remaining: %uhr %02umin %02usec        ",
2372		    buf, kb_per_sec / 1024,
2373		    sec_remaining / 60 / 60,
2374		    sec_remaining / 60 % 60,
2375		    sec_remaining % 60);
2376
2377		zcb->zcb_lastprint = now;
2378	}
2379
2380	return (0);
2381}
2382
2383static void
2384zdb_leak(void *arg, uint64_t start, uint64_t size)
2385{
2386	vdev_t *vd = arg;
2387
2388	(void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
2389	    (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
2390}
2391
2392static metaslab_ops_t zdb_metaslab_ops = {
2393	NULL	/* alloc */
2394};
2395
2396static void
2397zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
2398{
2399	ddt_bookmark_t ddb = { 0 };
2400	ddt_entry_t dde;
2401	int error;
2402
2403	while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
2404		blkptr_t blk;
2405		ddt_phys_t *ddp = dde.dde_phys;
2406
2407		if (ddb.ddb_class == DDT_CLASS_UNIQUE)
2408			return;
2409
2410		ASSERT(ddt_phys_total_refcnt(&dde) > 1);
2411
2412		for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
2413			if (ddp->ddp_phys_birth == 0)
2414				continue;
2415			ddt_bp_create(ddb.ddb_checksum,
2416			    &dde.dde_key, ddp, &blk);
2417			if (p == DDT_PHYS_DITTO) {
2418				zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
2419			} else {
2420				zcb->zcb_dedup_asize +=
2421				    BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
2422				zcb->zcb_dedup_blocks++;
2423			}
2424		}
2425		if (!dump_opt['L']) {
2426			ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
2427			ddt_enter(ddt);
2428			VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
2429			ddt_exit(ddt);
2430		}
2431	}
2432
2433	ASSERT(error == ENOENT);
2434}
2435
2436static void
2437zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
2438{
2439	zcb->zcb_spa = spa;
2440
2441	if (!dump_opt['L']) {
2442		vdev_t *rvd = spa->spa_root_vdev;
2443		for (uint64_t c = 0; c < rvd->vdev_children; c++) {
2444			vdev_t *vd = rvd->vdev_child[c];
2445			for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
2446				metaslab_t *msp = vd->vdev_ms[m];
2447				mutex_enter(&msp->ms_lock);
2448				metaslab_unload(msp);
2449
2450				/*
2451				 * For leak detection, we overload the metaslab
2452				 * ms_tree to contain allocated segments
2453				 * instead of free segments. As a result,
2454				 * we can't use the normal metaslab_load/unload
2455				 * interfaces.
2456				 */
2457				if (msp->ms_sm != NULL) {
2458					(void) fprintf(stderr,
2459					    "\rloading space map for "
2460					    "vdev %llu of %llu, "
2461					    "metaslab %llu of %llu ...",
2462					    (longlong_t)c,
2463					    (longlong_t)rvd->vdev_children,
2464					    (longlong_t)m,
2465					    (longlong_t)vd->vdev_ms_count);
2466
2467					msp->ms_ops = &zdb_metaslab_ops;
2468					VERIFY0(space_map_load(msp->ms_sm,
2469					    msp->ms_tree, SM_ALLOC));
2470					msp->ms_loaded = B_TRUE;
2471				}
2472				mutex_exit(&msp->ms_lock);
2473			}
2474		}
2475		(void) fprintf(stderr, "\n");
2476	}
2477
2478	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2479
2480	zdb_ddt_leak_init(spa, zcb);
2481
2482	spa_config_exit(spa, SCL_CONFIG, FTAG);
2483}
2484
2485static void
2486zdb_leak_fini(spa_t *spa)
2487{
2488	if (!dump_opt['L']) {
2489		vdev_t *rvd = spa->spa_root_vdev;
2490		for (int c = 0; c < rvd->vdev_children; c++) {
2491			vdev_t *vd = rvd->vdev_child[c];
2492			for (int m = 0; m < vd->vdev_ms_count; m++) {
2493				metaslab_t *msp = vd->vdev_ms[m];
2494				mutex_enter(&msp->ms_lock);
2495
2496				/*
2497				 * The ms_tree has been overloaded to
2498				 * contain allocated segments. Now that we
2499				 * finished traversing all blocks, any
2500				 * block that remains in the ms_tree
2501				 * represents an allocated block that we
2502				 * did not claim during the traversal.
2503				 * Claimed blocks would have been removed
2504				 * from the ms_tree.
2505				 */
2506				range_tree_vacate(msp->ms_tree, zdb_leak, vd);
2507				msp->ms_loaded = B_FALSE;
2508
2509				mutex_exit(&msp->ms_lock);
2510			}
2511		}
2512	}
2513}
2514
2515/* ARGSUSED */
2516static int
2517count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
2518{
2519	zdb_cb_t *zcb = arg;
2520
2521	if (dump_opt['b'] >= 5) {
2522		char blkbuf[BP_SPRINTF_LEN];
2523		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2524		(void) printf("[%s] %s\n",
2525		    "deferred free", blkbuf);
2526	}
2527	zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
2528	return (0);
2529}
2530
2531static int
2532dump_block_stats(spa_t *spa)
2533{
2534	zdb_cb_t zcb = { 0 };
2535	zdb_blkstats_t *zb, *tzb;
2536	uint64_t norm_alloc, norm_space, total_alloc, total_found;
2537	int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
2538	boolean_t leaks = B_FALSE;
2539
2540	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
2541	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
2542	    (dump_opt['c'] == 1) ? "metadata " : "",
2543	    dump_opt['c'] ? "checksums " : "",
2544	    (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
2545	    !dump_opt['L'] ? "nothing leaked " : "");
2546
2547	/*
2548	 * Load all space maps as SM_ALLOC maps, then traverse the pool
2549	 * claiming each block we discover.  If the pool is perfectly
2550	 * consistent, the space maps will be empty when we're done.
2551	 * Anything left over is a leak; any block we can't claim (because
2552	 * it's not part of any space map) is a double allocation,
2553	 * reference to a freed block, or an unclaimed log block.
2554	 */
2555	zdb_leak_init(spa, &zcb);
2556
2557	/*
2558	 * If there's a deferred-free bplist, process that first.
2559	 */
2560	(void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
2561	    count_block_cb, &zcb, NULL);
2562	if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
2563		(void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
2564		    count_block_cb, &zcb, NULL);
2565	}
2566	if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
2567		VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
2568		    spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
2569		    &zcb, NULL));
2570	}
2571
2572	if (dump_opt['c'] > 1)
2573		flags |= TRAVERSE_PREFETCH_DATA;
2574
2575	zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
2576	zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
2577	zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
2578
2579	/*
2580	 * If we've traversed the data blocks then we need to wait for those
2581	 * I/Os to complete. We leverage "The Godfather" zio to wait on
2582	 * all async I/Os to complete.
2583	 */
2584	if (dump_opt['c']) {
2585		for (int i = 0; i < max_ncpus; i++) {
2586			(void) zio_wait(spa->spa_async_zio_root[i]);
2587			spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL,
2588			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
2589			    ZIO_FLAG_GODFATHER);
2590		}
2591	}
2592
2593	if (zcb.zcb_haderrors) {
2594		(void) printf("\nError counts:\n\n");
2595		(void) printf("\t%5s  %s\n", "errno", "count");
2596		for (int e = 0; e < 256; e++) {
2597			if (zcb.zcb_errors[e] != 0) {
2598				(void) printf("\t%5d  %llu\n",
2599				    e, (u_longlong_t)zcb.zcb_errors[e]);
2600			}
2601		}
2602	}
2603
2604	/*
2605	 * Report any leaked segments.
2606	 */
2607	zdb_leak_fini(spa);
2608
2609	tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
2610
2611	norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
2612	norm_space = metaslab_class_get_space(spa_normal_class(spa));
2613
2614	total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
2615	total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
2616
2617	if (total_found == total_alloc) {
2618		if (!dump_opt['L'])
2619			(void) printf("\n\tNo leaks (block sum matches space"
2620			    " maps exactly)\n");
2621	} else {
2622		(void) printf("block traversal size %llu != alloc %llu "
2623		    "(%s %lld)\n",
2624		    (u_longlong_t)total_found,
2625		    (u_longlong_t)total_alloc,
2626		    (dump_opt['L']) ? "unreachable" : "leaked",
2627		    (longlong_t)(total_alloc - total_found));
2628		leaks = B_TRUE;
2629	}
2630
2631	if (tzb->zb_count == 0)
2632		return (2);
2633
2634	(void) printf("\n");
2635	(void) printf("\tbp count:      %10llu\n",
2636	    (u_longlong_t)tzb->zb_count);
2637	(void) printf("\tganged count:  %10llu\n",
2638	    (longlong_t)tzb->zb_gangs);
2639	(void) printf("\tbp logical:    %10llu      avg: %6llu\n",
2640	    (u_longlong_t)tzb->zb_lsize,
2641	    (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
2642	(void) printf("\tbp physical:   %10llu      avg:"
2643	    " %6llu     compression: %6.2f\n",
2644	    (u_longlong_t)tzb->zb_psize,
2645	    (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
2646	    (double)tzb->zb_lsize / tzb->zb_psize);
2647	(void) printf("\tbp allocated:  %10llu      avg:"
2648	    " %6llu     compression: %6.2f\n",
2649	    (u_longlong_t)tzb->zb_asize,
2650	    (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
2651	    (double)tzb->zb_lsize / tzb->zb_asize);
2652	(void) printf("\tbp deduped:    %10llu    ref>1:"
2653	    " %6llu   deduplication: %6.2f\n",
2654	    (u_longlong_t)zcb.zcb_dedup_asize,
2655	    (u_longlong_t)zcb.zcb_dedup_blocks,
2656	    (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
2657	(void) printf("\tSPA allocated: %10llu     used: %5.2f%%\n",
2658	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
2659
2660	for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
2661		if (zcb.zcb_embedded_blocks[i] == 0)
2662			continue;
2663		(void) printf("\n");
2664		(void) printf("\tadditional, non-pointer bps of type %u: "
2665		    "%10llu\n",
2666		    i, (u_longlong_t)zcb.zcb_embedded_blocks[i]);
2667
2668		if (dump_opt['b'] >= 3) {
2669			(void) printf("\t number of (compressed) bytes:  "
2670			    "number of bps\n");
2671			dump_histogram(zcb.zcb_embedded_histogram[i],
2672			    sizeof (zcb.zcb_embedded_histogram[i]) /
2673			    sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
2674		}
2675	}
2676
2677	if (tzb->zb_ditto_samevdev != 0) {
2678		(void) printf("\tDittoed blocks on same vdev: %llu\n",
2679		    (longlong_t)tzb->zb_ditto_samevdev);
2680	}
2681
2682	if (dump_opt['b'] >= 2) {
2683		int l, t, level;
2684		(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2685		    "\t  avg\t comp\t%%Total\tType\n");
2686
2687		for (t = 0; t <= ZDB_OT_TOTAL; t++) {
2688			char csize[32], lsize[32], psize[32], asize[32];
2689			char avg[32], gang[32];
2690			char *typename;
2691
2692			if (t < DMU_OT_NUMTYPES)
2693				typename = dmu_ot[t].ot_name;
2694			else
2695				typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
2696
2697			if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
2698				(void) printf("%6s\t%5s\t%5s\t%5s"
2699				    "\t%5s\t%5s\t%6s\t%s\n",
2700				    "-",
2701				    "-",
2702				    "-",
2703				    "-",
2704				    "-",
2705				    "-",
2706				    "-",
2707				    typename);
2708				continue;
2709			}
2710
2711			for (l = ZB_TOTAL - 1; l >= -1; l--) {
2712				level = (l == -1 ? ZB_TOTAL : l);
2713				zb = &zcb.zcb_type[level][t];
2714
2715				if (zb->zb_asize == 0)
2716					continue;
2717
2718				if (dump_opt['b'] < 3 && level != ZB_TOTAL)
2719					continue;
2720
2721				if (level == 0 && zb->zb_asize ==
2722				    zcb.zcb_type[ZB_TOTAL][t].zb_asize)
2723					continue;
2724
2725				zdb_nicenum(zb->zb_count, csize);
2726				zdb_nicenum(zb->zb_lsize, lsize);
2727				zdb_nicenum(zb->zb_psize, psize);
2728				zdb_nicenum(zb->zb_asize, asize);
2729				zdb_nicenum(zb->zb_asize / zb->zb_count, avg);
2730				zdb_nicenum(zb->zb_gangs, gang);
2731
2732				(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
2733				    "\t%5.2f\t%6.2f\t",
2734				    csize, lsize, psize, asize, avg,
2735				    (double)zb->zb_lsize / zb->zb_psize,
2736				    100.0 * zb->zb_asize / tzb->zb_asize);
2737
2738				if (level == ZB_TOTAL)
2739					(void) printf("%s\n", typename);
2740				else
2741					(void) printf("    L%d %s\n",
2742					    level, typename);
2743
2744				if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) {
2745					(void) printf("\t number of ganged "
2746					    "blocks: %s\n", gang);
2747				}
2748
2749				if (dump_opt['b'] >= 4) {
2750					(void) printf("psize "
2751					    "(in 512-byte sectors): "
2752					    "number of blocks\n");
2753					dump_histogram(zb->zb_psize_histogram,
2754					    PSIZE_HISTO_SIZE, 0);
2755				}
2756			}
2757		}
2758	}
2759
2760	(void) printf("\n");
2761
2762	if (leaks)
2763		return (2);
2764
2765	if (zcb.zcb_haderrors)
2766		return (3);
2767
2768	return (0);
2769}
2770
2771typedef struct zdb_ddt_entry {
2772	ddt_key_t	zdde_key;
2773	uint64_t	zdde_ref_blocks;
2774	uint64_t	zdde_ref_lsize;
2775	uint64_t	zdde_ref_psize;
2776	uint64_t	zdde_ref_dsize;
2777	avl_node_t	zdde_node;
2778} zdb_ddt_entry_t;
2779
2780/* ARGSUSED */
2781static int
2782zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2783    const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
2784{
2785	avl_tree_t *t = arg;
2786	avl_index_t where;
2787	zdb_ddt_entry_t *zdde, zdde_search;
2788
2789	if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
2790		return (0);
2791
2792	if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
2793		(void) printf("traversing objset %llu, %llu objects, "
2794		    "%lu blocks so far\n",
2795		    (u_longlong_t)zb->zb_objset,
2796		    (u_longlong_t)BP_GET_FILL(bp),
2797		    avl_numnodes(t));
2798	}
2799
2800	if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
2801	    BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
2802		return (0);
2803
2804	ddt_key_fill(&zdde_search.zdde_key, bp);
2805
2806	zdde = avl_find(t, &zdde_search, &where);
2807
2808	if (zdde == NULL) {
2809		zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
2810		zdde->zdde_key = zdde_search.zdde_key;
2811		avl_insert(t, zdde, where);
2812	}
2813
2814	zdde->zdde_ref_blocks += 1;
2815	zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
2816	zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
2817	zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
2818
2819	return (0);
2820}
2821
2822static void
2823dump_simulated_ddt(spa_t *spa)
2824{
2825	avl_tree_t t;
2826	void *cookie = NULL;
2827	zdb_ddt_entry_t *zdde;
2828	ddt_histogram_t ddh_total = { 0 };
2829	ddt_stat_t dds_total = { 0 };
2830
2831	avl_create(&t, ddt_entry_compare,
2832	    sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
2833
2834	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2835
2836	(void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
2837	    zdb_ddt_add_cb, &t);
2838
2839	spa_config_exit(spa, SCL_CONFIG, FTAG);
2840
2841	while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
2842		ddt_stat_t dds;
2843		uint64_t refcnt = zdde->zdde_ref_blocks;
2844		ASSERT(refcnt != 0);
2845
2846		dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
2847		dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
2848		dds.dds_psize = zdde->zdde_ref_psize / refcnt;
2849		dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
2850
2851		dds.dds_ref_blocks = zdde->zdde_ref_blocks;
2852		dds.dds_ref_lsize = zdde->zdde_ref_lsize;
2853		dds.dds_ref_psize = zdde->zdde_ref_psize;
2854		dds.dds_ref_dsize = zdde->zdde_ref_dsize;
2855
2856		ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
2857		    &dds, 0);
2858
2859		umem_free(zdde, sizeof (*zdde));
2860	}
2861
2862	avl_destroy(&t);
2863
2864	ddt_histogram_stat(&dds_total, &ddh_total);
2865
2866	(void) printf("Simulated DDT histogram:\n");
2867
2868	zpool_dump_ddt(&dds_total, &ddh_total);
2869
2870	dump_dedup_ratio(&dds_total);
2871}
2872
2873static void
2874dump_zpool(spa_t *spa)
2875{
2876	dsl_pool_t *dp = spa_get_dsl(spa);
2877	int rc = 0;
2878
2879	if (dump_opt['S']) {
2880		dump_simulated_ddt(spa);
2881		return;
2882	}
2883
2884	if (!dump_opt['e'] && dump_opt['C'] > 1) {
2885		(void) printf("\nCached configuration:\n");
2886		dump_nvlist(spa->spa_config, 8);
2887	}
2888
2889	if (dump_opt['C'])
2890		dump_config(spa);
2891
2892	if (dump_opt['u'])
2893		dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
2894
2895	if (dump_opt['D'])
2896		dump_all_ddts(spa);
2897
2898	if (dump_opt['d'] > 2 || dump_opt['m'])
2899		dump_metaslabs(spa);
2900	if (dump_opt['M'])
2901		dump_metaslab_groups(spa);
2902
2903	if (dump_opt['d'] || dump_opt['i']) {
2904		dump_dir(dp->dp_meta_objset);
2905		if (dump_opt['d'] >= 3) {
2906			dump_bpobj(&spa->spa_deferred_bpobj,
2907			    "Deferred frees", 0);
2908			if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
2909				dump_bpobj(&spa->spa_dsl_pool->dp_free_bpobj,
2910				    "Pool snapshot frees", 0);
2911			}
2912
2913			if (spa_feature_is_active(spa,
2914			    SPA_FEATURE_ASYNC_DESTROY)) {
2915				dump_bptree(spa->spa_meta_objset,
2916				    spa->spa_dsl_pool->dp_bptree_obj,
2917				    "Pool dataset frees");
2918			}
2919			dump_dtl(spa->spa_root_vdev, 0);
2920		}
2921		(void) dmu_objset_find(spa_name(spa), dump_one_dir,
2922		    NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
2923	}
2924	if (dump_opt['b'] || dump_opt['c'])
2925		rc = dump_block_stats(spa);
2926
2927	if (rc == 0)
2928		rc = verify_spacemap_refcounts(spa);
2929
2930	if (dump_opt['s'])
2931		show_pool_stats(spa);
2932
2933	if (dump_opt['h'])
2934		dump_history(spa);
2935
2936	if (rc != 0)
2937		exit(rc);
2938}
2939
2940#define	ZDB_FLAG_CHECKSUM	0x0001
2941#define	ZDB_FLAG_DECOMPRESS	0x0002
2942#define	ZDB_FLAG_BSWAP		0x0004
2943#define	ZDB_FLAG_GBH		0x0008
2944#define	ZDB_FLAG_INDIRECT	0x0010
2945#define	ZDB_FLAG_PHYS		0x0020
2946#define	ZDB_FLAG_RAW		0x0040
2947#define	ZDB_FLAG_PRINT_BLKPTR	0x0080
2948
2949int flagbits[256];
2950
2951static void
2952zdb_print_blkptr(blkptr_t *bp, int flags)
2953{
2954	char blkbuf[BP_SPRINTF_LEN];
2955
2956	if (flags & ZDB_FLAG_BSWAP)
2957		byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
2958
2959	snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2960	(void) printf("%s\n", blkbuf);
2961}
2962
2963static void
2964zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
2965{
2966	int i;
2967
2968	for (i = 0; i < nbps; i++)
2969		zdb_print_blkptr(&bp[i], flags);
2970}
2971
2972static void
2973zdb_dump_gbh(void *buf, int flags)
2974{
2975	zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
2976}
2977
2978static void
2979zdb_dump_block_raw(void *buf, uint64_t size, int flags)
2980{
2981	if (flags & ZDB_FLAG_BSWAP)
2982		byteswap_uint64_array(buf, size);
2983	(void) write(1, buf, size);
2984}
2985
2986static void
2987zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
2988{
2989	uint64_t *d = (uint64_t *)buf;
2990	int nwords = size / sizeof (uint64_t);
2991	int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
2992	int i, j;
2993	char *hdr, *c;
2994
2995
2996	if (do_bswap)
2997		hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
2998	else
2999		hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
3000
3001	(void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
3002
3003	for (i = 0; i < nwords; i += 2) {
3004		(void) printf("%06llx:  %016llx  %016llx  ",
3005		    (u_longlong_t)(i * sizeof (uint64_t)),
3006		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
3007		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
3008
3009		c = (char *)&d[i];
3010		for (j = 0; j < 2 * sizeof (uint64_t); j++)
3011			(void) printf("%c", isprint(c[j]) ? c[j] : '.');
3012		(void) printf("\n");
3013	}
3014}
3015
3016/*
3017 * There are two acceptable formats:
3018 *	leaf_name	  - For example: c1t0d0 or /tmp/ztest.0a
3019 *	child[.child]*    - For example: 0.1.1
3020 *
3021 * The second form can be used to specify arbitrary vdevs anywhere
3022 * in the heirarchy.  For example, in a pool with a mirror of
3023 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
3024 */
3025static vdev_t *
3026zdb_vdev_lookup(vdev_t *vdev, char *path)
3027{
3028	char *s, *p, *q;
3029	int i;
3030
3031	if (vdev == NULL)
3032		return (NULL);
3033
3034	/* First, assume the x.x.x.x format */
3035	i = (int)strtoul(path, &s, 10);
3036	if (s == path || (s && *s != '.' && *s != '\0'))
3037		goto name;
3038	if (i < 0 || i >= vdev->vdev_children)
3039		return (NULL);
3040
3041	vdev = vdev->vdev_child[i];
3042	if (*s == '\0')
3043		return (vdev);
3044	return (zdb_vdev_lookup(vdev, s+1));
3045
3046name:
3047	for (i = 0; i < vdev->vdev_children; i++) {
3048		vdev_t *vc = vdev->vdev_child[i];
3049
3050		if (vc->vdev_path == NULL) {
3051			vc = zdb_vdev_lookup(vc, path);
3052			if (vc == NULL)
3053				continue;
3054			else
3055				return (vc);
3056		}
3057
3058		p = strrchr(vc->vdev_path, '/');
3059		p = p ? p + 1 : vc->vdev_path;
3060		q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
3061
3062		if (strcmp(vc->vdev_path, path) == 0)
3063			return (vc);
3064		if (strcmp(p, path) == 0)
3065			return (vc);
3066		if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
3067			return (vc);
3068	}
3069
3070	return (NULL);
3071}
3072
3073/*
3074 * Read a block from a pool and print it out.  The syntax of the
3075 * block descriptor is:
3076 *
3077 *	pool:vdev_specifier:offset:size[:flags]
3078 *
3079 *	pool           - The name of the pool you wish to read from
3080 *	vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
3081 *	offset         - offset, in hex, in bytes
3082 *	size           - Amount of data to read, in hex, in bytes
3083 *	flags          - A string of characters specifying options
3084 *		 b: Decode a blkptr at given offset within block
3085 *		*c: Calculate and display checksums
3086 *		 d: Decompress data before dumping
3087 *		 e: Byteswap data before dumping
3088 *		 g: Display data as a gang block header
3089 *		 i: Display as an indirect block
3090 *		 p: Do I/O to physical offset
3091 *		 r: Dump raw data to stdout
3092 *
3093 *              * = not yet implemented
3094 */
3095static void
3096zdb_read_block(char *thing, spa_t *spa)
3097{
3098	blkptr_t blk, *bp = &blk;
3099	dva_t *dva = bp->blk_dva;
3100	int flags = 0;
3101	uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
3102	zio_t *zio;
3103	vdev_t *vd;
3104	void *pbuf, *lbuf, *buf;
3105	char *s, *p, *dup, *vdev, *flagstr;
3106	int i, error;
3107
3108	dup = strdup(thing);
3109	s = strtok(dup, ":");
3110	vdev = s ? s : "";
3111	s = strtok(NULL, ":");
3112	offset = strtoull(s ? s : "", NULL, 16);
3113	s = strtok(NULL, ":");
3114	size = strtoull(s ? s : "", NULL, 16);
3115	s = strtok(NULL, ":");
3116	flagstr = s ? s : "";
3117
3118	s = NULL;
3119	if (size == 0)
3120		s = "size must not be zero";
3121	if (!IS_P2ALIGNED(size, DEV_BSIZE))
3122		s = "size must be a multiple of sector size";
3123	if (!IS_P2ALIGNED(offset, DEV_BSIZE))
3124		s = "offset must be a multiple of sector size";
3125	if (s) {
3126		(void) printf("Invalid block specifier: %s  - %s\n", thing, s);
3127		free(dup);
3128		return;
3129	}
3130
3131	for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
3132		for (i = 0; flagstr[i]; i++) {
3133			int bit = flagbits[(uchar_t)flagstr[i]];
3134
3135			if (bit == 0) {
3136				(void) printf("***Invalid flag: %c\n",
3137				    flagstr[i]);
3138				continue;
3139			}
3140			flags |= bit;
3141
3142			/* If it's not something with an argument, keep going */
3143			if ((bit & (ZDB_FLAG_CHECKSUM |
3144			    ZDB_FLAG_PRINT_BLKPTR)) == 0)
3145				continue;
3146
3147			p = &flagstr[i + 1];
3148			if (bit == ZDB_FLAG_PRINT_BLKPTR)
3149				blkptr_offset = strtoull(p, &p, 16);
3150			if (*p != ':' && *p != '\0') {
3151				(void) printf("***Invalid flag arg: '%s'\n", s);
3152				free(dup);
3153				return;
3154			}
3155		}
3156	}
3157
3158	vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
3159	if (vd == NULL) {
3160		(void) printf("***Invalid vdev: %s\n", vdev);
3161		free(dup);
3162		return;
3163	} else {
3164		if (vd->vdev_path)
3165			(void) fprintf(stderr, "Found vdev: %s\n",
3166			    vd->vdev_path);
3167		else
3168			(void) fprintf(stderr, "Found vdev type: %s\n",
3169			    vd->vdev_ops->vdev_op_type);
3170	}
3171
3172	psize = size;
3173	lsize = size;
3174
3175	pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3176	lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3177
3178	BP_ZERO(bp);
3179
3180	DVA_SET_VDEV(&dva[0], vd->vdev_id);
3181	DVA_SET_OFFSET(&dva[0], offset);
3182	DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
3183	DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
3184
3185	BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
3186
3187	BP_SET_LSIZE(bp, lsize);
3188	BP_SET_PSIZE(bp, psize);
3189	BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
3190	BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
3191	BP_SET_TYPE(bp, DMU_OT_NONE);
3192	BP_SET_LEVEL(bp, 0);
3193	BP_SET_DEDUP(bp, 0);
3194	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
3195
3196	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
3197	zio = zio_root(spa, NULL, NULL, 0);
3198
3199	if (vd == vd->vdev_top) {
3200		/*
3201		 * Treat this as a normal block read.
3202		 */
3203		zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
3204		    ZIO_PRIORITY_SYNC_READ,
3205		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
3206	} else {
3207		/*
3208		 * Treat this as a vdev child I/O.
3209		 */
3210		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
3211		    ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
3212		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
3213		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
3214		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
3215	}
3216
3217	error = zio_wait(zio);
3218	spa_config_exit(spa, SCL_STATE, FTAG);
3219
3220	if (error) {
3221		(void) printf("Read of %s failed, error: %d\n", thing, error);
3222		goto out;
3223	}
3224
3225	if (flags & ZDB_FLAG_DECOMPRESS) {
3226		/*
3227		 * We don't know how the data was compressed, so just try
3228		 * every decompress function at every inflated blocksize.
3229		 */
3230		enum zio_compress c;
3231		void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3232		void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3233
3234		bcopy(pbuf, pbuf2, psize);
3235
3236		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
3237		    SPA_MAXBLOCKSIZE - psize) == 0);
3238
3239		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
3240		    SPA_MAXBLOCKSIZE - psize) == 0);
3241
3242		for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
3243		    lsize -= SPA_MINBLOCKSIZE) {
3244			for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
3245				if (zio_decompress_data(c, pbuf, lbuf,
3246				    psize, lsize) == 0 &&
3247				    zio_decompress_data(c, pbuf2, lbuf2,
3248				    psize, lsize) == 0 &&
3249				    bcmp(lbuf, lbuf2, lsize) == 0)
3250					break;
3251			}
3252			if (c != ZIO_COMPRESS_FUNCTIONS)
3253				break;
3254			lsize -= SPA_MINBLOCKSIZE;
3255		}
3256
3257		umem_free(pbuf2, SPA_MAXBLOCKSIZE);
3258		umem_free(lbuf2, SPA_MAXBLOCKSIZE);
3259
3260		if (lsize <= psize) {
3261			(void) printf("Decompress of %s failed\n", thing);
3262			goto out;
3263		}
3264		buf = lbuf;
3265		size = lsize;
3266	} else {
3267		buf = pbuf;
3268		size = psize;
3269	}
3270
3271	if (flags & ZDB_FLAG_PRINT_BLKPTR)
3272		zdb_print_blkptr((blkptr_t *)(void *)
3273		    ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
3274	else if (flags & ZDB_FLAG_RAW)
3275		zdb_dump_block_raw(buf, size, flags);
3276	else if (flags & ZDB_FLAG_INDIRECT)
3277		zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
3278		    flags);
3279	else if (flags & ZDB_FLAG_GBH)
3280		zdb_dump_gbh(buf, flags);
3281	else
3282		zdb_dump_block(thing, buf, size, flags);
3283
3284out:
3285	umem_free(pbuf, SPA_MAXBLOCKSIZE);
3286	umem_free(lbuf, SPA_MAXBLOCKSIZE);
3287	free(dup);
3288}
3289
3290static boolean_t
3291pool_match(nvlist_t *cfg, char *tgt)
3292{
3293	uint64_t v, guid = strtoull(tgt, NULL, 0);
3294	char *s;
3295
3296	if (guid != 0) {
3297		if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
3298			return (v == guid);
3299	} else {
3300		if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
3301			return (strcmp(s, tgt) == 0);
3302	}
3303	return (B_FALSE);
3304}
3305
3306static char *
3307find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
3308{
3309	nvlist_t *pools;
3310	nvlist_t *match = NULL;
3311	char *name = NULL;
3312	char *sepp = NULL;
3313	char sep;
3314	int count = 0;
3315	importargs_t args = { 0 };
3316
3317	args.paths = dirc;
3318	args.path = dirv;
3319	args.can_be_active = B_TRUE;
3320
3321	if ((sepp = strpbrk(*target, "/@")) != NULL) {
3322		sep = *sepp;
3323		*sepp = '\0';
3324	}
3325
3326	pools = zpool_search_import(g_zfs, &args);
3327
3328	if (pools != NULL) {
3329		nvpair_t *elem = NULL;
3330		while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
3331			verify(nvpair_value_nvlist(elem, configp) == 0);
3332			if (pool_match(*configp, *target)) {
3333				count++;
3334				if (match != NULL) {
3335					/* print previously found config */
3336					if (name != NULL) {
3337						(void) printf("%s\n", name);
3338						dump_nvlist(match, 8);
3339						name = NULL;
3340					}
3341					(void) printf("%s\n",
3342					    nvpair_name(elem));
3343					dump_nvlist(*configp, 8);
3344				} else {
3345					match = *configp;
3346					name = nvpair_name(elem);
3347				}
3348			}
3349		}
3350	}
3351	if (count > 1)
3352		(void) fatal("\tMatched %d pools - use pool GUID "
3353		    "instead of pool name or \n"
3354		    "\tpool name part of a dataset name to select pool", count);
3355
3356	if (sepp)
3357		*sepp = sep;
3358	/*
3359	 * If pool GUID was specified for pool id, replace it with pool name
3360	 */
3361	if (name && (strstr(*target, name) != *target)) {
3362		int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
3363
3364		*target = umem_alloc(sz, UMEM_NOFAIL);
3365		(void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
3366	}
3367
3368	*configp = name ? match : NULL;
3369
3370	return (name);
3371}
3372
3373int
3374main(int argc, char **argv)
3375{
3376	int i, c;
3377	struct rlimit rl = { 1024, 1024 };
3378	spa_t *spa = NULL;
3379	objset_t *os = NULL;
3380	int dump_all = 1;
3381	int verbose = 0;
3382	int error = 0;
3383	char **searchdirs = NULL;
3384	int nsearch = 0;
3385	char *target;
3386	nvlist_t *policy = NULL;
3387	uint64_t max_txg = UINT64_MAX;
3388	int rewind = ZPOOL_NEVER_REWIND;
3389
3390	(void) setrlimit(RLIMIT_NOFILE, &rl);
3391	(void) enable_extended_FILE_stdio(-1, -1);
3392
3393	dprintf_setup(&argc, argv);
3394
3395	while ((c = getopt(argc, argv,
3396	    "bcdhilmMI:suCDRSAFLXx:evp:t:U:P")) != -1) {
3397		switch (c) {
3398		case 'b':
3399		case 'c':
3400		case 'd':
3401		case 'h':
3402		case 'i':
3403		case 'l':
3404		case 'm':
3405		case 's':
3406		case 'u':
3407		case 'C':
3408		case 'D':
3409		case 'M':
3410		case 'R':
3411		case 'S':
3412			dump_opt[c]++;
3413			dump_all = 0;
3414			break;
3415		case 'A':
3416		case 'F':
3417		case 'L':
3418		case 'X':
3419		case 'e':
3420		case 'P':
3421			dump_opt[c]++;
3422			break;
3423		case 'I':
3424			max_inflight = strtoull(optarg, NULL, 0);
3425			if (max_inflight == 0) {
3426				(void) fprintf(stderr, "maximum number "
3427				    "of inflight I/Os must be greater "
3428				    "than 0\n");
3429				usage();
3430			}
3431			break;
3432		case 'p':
3433			if (searchdirs == NULL) {
3434				searchdirs = umem_alloc(sizeof (char *),
3435				    UMEM_NOFAIL);
3436			} else {
3437				char **tmp = umem_alloc((nsearch + 1) *
3438				    sizeof (char *), UMEM_NOFAIL);
3439				bcopy(searchdirs, tmp, nsearch *
3440				    sizeof (char *));
3441				umem_free(searchdirs,
3442				    nsearch * sizeof (char *));
3443				searchdirs = tmp;
3444			}
3445			searchdirs[nsearch++] = optarg;
3446			break;
3447		case 't':
3448			max_txg = strtoull(optarg, NULL, 0);
3449			if (max_txg < TXG_INITIAL) {
3450				(void) fprintf(stderr, "incorrect txg "
3451				    "specified: %s\n", optarg);
3452				usage();
3453			}
3454			break;
3455		case 'U':
3456			spa_config_path = optarg;
3457			break;
3458		case 'v':
3459			verbose++;
3460			break;
3461		case 'x':
3462			vn_dumpdir = optarg;
3463			break;
3464		default:
3465			usage();
3466			break;
3467		}
3468	}
3469
3470	if (!dump_opt['e'] && searchdirs != NULL) {
3471		(void) fprintf(stderr, "-p option requires use of -e\n");
3472		usage();
3473	}
3474
3475	/*
3476	 * ZDB does not typically re-read blocks; therefore limit the ARC
3477	 * to 256 MB, which can be used entirely for metadata.
3478	 */
3479	zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024;
3480
3481	kernel_init(FREAD);
3482	g_zfs = libzfs_init();
3483	ASSERT(g_zfs != NULL);
3484
3485	if (dump_all)
3486		verbose = MAX(verbose, 1);
3487
3488	for (c = 0; c < 256; c++) {
3489		if (dump_all && !strchr("elAFLRSXP", c))
3490			dump_opt[c] = 1;
3491		if (dump_opt[c])
3492			dump_opt[c] += verbose;
3493	}
3494
3495	aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
3496	zfs_recover = (dump_opt['A'] > 1);
3497
3498	argc -= optind;
3499	argv += optind;
3500
3501	if (argc < 2 && dump_opt['R'])
3502		usage();
3503	if (argc < 1) {
3504		if (!dump_opt['e'] && dump_opt['C']) {
3505			dump_cachefile(spa_config_path);
3506			return (0);
3507		}
3508		usage();
3509	}
3510
3511	if (dump_opt['l']) {
3512		dump_label(argv[0]);
3513		return (0);
3514	}
3515
3516	if (dump_opt['X'] || dump_opt['F'])
3517		rewind = ZPOOL_DO_REWIND |
3518		    (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
3519
3520	if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
3521	    nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
3522	    nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0)
3523		fatal("internal error: %s", strerror(ENOMEM));
3524
3525	error = 0;
3526	target = argv[0];
3527
3528	if (dump_opt['e']) {
3529		nvlist_t *cfg = NULL;
3530		char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
3531
3532		error = ENOENT;
3533		if (name) {
3534			if (dump_opt['C'] > 1) {
3535				(void) printf("\nConfiguration for import:\n");
3536				dump_nvlist(cfg, 8);
3537			}
3538			if (nvlist_add_nvlist(cfg,
3539			    ZPOOL_REWIND_POLICY, policy) != 0) {
3540				fatal("can't open '%s': %s",
3541				    target, strerror(ENOMEM));
3542			}
3543			if ((error = spa_import(name, cfg, NULL,
3544			    ZFS_IMPORT_MISSING_LOG)) != 0) {
3545				error = spa_import(name, cfg, NULL,
3546				    ZFS_IMPORT_VERBATIM);
3547			}
3548		}
3549	}
3550
3551	if (error == 0) {
3552		if (strpbrk(target, "/@") == NULL || dump_opt['R']) {
3553			error = spa_open_rewind(target, &spa, FTAG, policy,
3554			    NULL);
3555			if (error) {
3556				/*
3557				 * If we're missing the log device then
3558				 * try opening the pool after clearing the
3559				 * log state.
3560				 */
3561				mutex_enter(&spa_namespace_lock);
3562				if ((spa = spa_lookup(target)) != NULL &&
3563				    spa->spa_log_state == SPA_LOG_MISSING) {
3564					spa->spa_log_state = SPA_LOG_CLEAR;
3565					error = 0;
3566				}
3567				mutex_exit(&spa_namespace_lock);
3568
3569				if (!error) {
3570					error = spa_open_rewind(target, &spa,
3571					    FTAG, policy, NULL);
3572				}
3573			}
3574		} else {
3575			error = dmu_objset_own(target, DMU_OST_ANY,
3576			    B_TRUE, FTAG, &os);
3577		}
3578	}
3579	nvlist_free(policy);
3580
3581	if (error)
3582		fatal("can't open '%s': %s", target, strerror(error));
3583
3584	argv++;
3585	argc--;
3586	if (!dump_opt['R']) {
3587		if (argc > 0) {
3588			zopt_objects = argc;
3589			zopt_object = calloc(zopt_objects, sizeof (uint64_t));
3590			for (i = 0; i < zopt_objects; i++) {
3591				errno = 0;
3592				zopt_object[i] = strtoull(argv[i], NULL, 0);
3593				if (zopt_object[i] == 0 && errno != 0)
3594					fatal("bad number %s: %s",
3595					    argv[i], strerror(errno));
3596			}
3597		}
3598		if (os != NULL) {
3599			dump_dir(os);
3600		} else if (zopt_objects > 0 && !dump_opt['m']) {
3601			dump_dir(spa->spa_meta_objset);
3602		} else {
3603			dump_zpool(spa);
3604		}
3605	} else {
3606		flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
3607		flagbits['c'] = ZDB_FLAG_CHECKSUM;
3608		flagbits['d'] = ZDB_FLAG_DECOMPRESS;
3609		flagbits['e'] = ZDB_FLAG_BSWAP;
3610		flagbits['g'] = ZDB_FLAG_GBH;
3611		flagbits['i'] = ZDB_FLAG_INDIRECT;
3612		flagbits['p'] = ZDB_FLAG_PHYS;
3613		flagbits['r'] = ZDB_FLAG_RAW;
3614
3615		for (i = 0; i < argc; i++)
3616			zdb_read_block(argv[i], spa);
3617	}
3618
3619	(os != NULL) ? dmu_objset_disown(os, FTAG) : spa_close(spa, FTAG);
3620
3621	fuid_table_destroy();
3622	sa_loaded = B_FALSE;
3623
3624	libzfs_fini(g_zfs);
3625	kernel_fini();
3626
3627	return (0);
3628}
3629