zdb.c revision 80eb36f241abf8c076119fb4c49a55fd61ebc710
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <stdio.h>
27#include <stdio_ext.h>
28#include <stdlib.h>
29#include <ctype.h>
30#include <sys/zfs_context.h>
31#include <sys/spa.h>
32#include <sys/spa_impl.h>
33#include <sys/dmu.h>
34#include <sys/zap.h>
35#include <sys/fs/zfs.h>
36#include <sys/zfs_znode.h>
37#include <sys/vdev.h>
38#include <sys/vdev_impl.h>
39#include <sys/metaslab_impl.h>
40#include <sys/dmu_objset.h>
41#include <sys/dsl_dir.h>
42#include <sys/dsl_dataset.h>
43#include <sys/dsl_pool.h>
44#include <sys/dbuf.h>
45#include <sys/zil.h>
46#include <sys/zil_impl.h>
47#include <sys/stat.h>
48#include <sys/resource.h>
49#include <sys/dmu_traverse.h>
50#include <sys/zio_checksum.h>
51#include <sys/zio_compress.h>
52#include <sys/zfs_fuid.h>
53#include <sys/arc.h>
54#include <sys/ddt.h>
55#undef ZFS_MAXNAMELEN
56#undef verify
57#include <libzfs.h>
58
59#define	ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
60    zio_compress_table[(idx)].ci_name : "UNKNOWN")
61#define	ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
62    zio_checksum_table[(idx)].ci_name : "UNKNOWN")
63#define	ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \
64    dmu_ot[(idx)].ot_name : "UNKNOWN")
65#define	ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : DMU_OT_NUMTYPES)
66
67const char cmdname[] = "zdb";
68uint8_t dump_opt[256];
69
70typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
71
72extern void dump_intent_log(zilog_t *);
73uint64_t *zopt_object = NULL;
74int zopt_objects = 0;
75libzfs_handle_t *g_zfs;
76
77/*
78 * These libumem hooks provide a reasonable set of defaults for the allocator's
79 * debugging facilities.
80 */
81const char *
82_umem_debug_init()
83{
84	return ("default,verbose"); /* $UMEM_DEBUG setting */
85}
86
87const char *
88_umem_logging_init(void)
89{
90	return ("fail,contents"); /* $UMEM_LOGGING setting */
91}
92
93static void
94usage(void)
95{
96	(void) fprintf(stderr,
97	    "Usage: %s [-CumdibcsvhL] "
98	    "poolname [object...]\n"
99	    "       %s [-div] dataset [object...]\n"
100	    "       %s -m [-L] poolname [vdev [metaslab...]]\n"
101	    "       %s -R poolname vdev:offset:size[:flags]\n"
102	    "       %s -S poolname\n"
103	    "       %s -l device\n"
104	    "       %s -C\n\n",
105	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
106
107	(void) fprintf(stderr, "    Dataset name must include at least one "
108	    "separator character '/' or '@'\n");
109	(void) fprintf(stderr, "    If dataset name is specified, only that "
110	    "dataset is dumped\n");
111	(void) fprintf(stderr, "    If object numbers are specified, only "
112	    "those objects are dumped\n\n");
113	(void) fprintf(stderr, "    Options to control amount of output:\n");
114	(void) fprintf(stderr, "        -u uberblock\n");
115	(void) fprintf(stderr, "        -d dataset(s)\n");
116	(void) fprintf(stderr, "        -i intent logs\n");
117	(void) fprintf(stderr, "        -C config (or cachefile if alone)\n");
118	(void) fprintf(stderr, "        -h pool history\n");
119	(void) fprintf(stderr, "        -b block statistics\n");
120	(void) fprintf(stderr, "        -m metaslabs\n");
121	(void) fprintf(stderr, "        -c checksum all metadata (twice for "
122	    "all data) blocks\n");
123	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
124	(void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
125	(void) fprintf(stderr, "        -v verbose (applies to all others)\n");
126	(void) fprintf(stderr, "        -l dump label contents\n");
127	(void) fprintf(stderr, "        -L disable leak tracking (do not "
128	    "load spacemaps)\n");
129	(void) fprintf(stderr, "        -R read and display block from a "
130	    "device\n\n");
131	(void) fprintf(stderr, "    Below options are intended for use "
132	    "with other options (except -l):\n");
133	(void) fprintf(stderr, "        -U <cachefile_path> -- use alternate "
134	    "cachefile\n");
135	(void) fprintf(stderr, "        -e pool is exported/destroyed/"
136	    "has altroot/not in a cachefile\n");
137	(void) fprintf(stderr, "        -p <path> -- use one or more with "
138	    "-e to specify path to vdev dir\n");
139	(void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
140	    "searching for uberblocks\n");
141	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
142	    "to make only that option verbose\n");
143	(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
144	exit(1);
145}
146
147/*
148 * Called for usage errors that are discovered after a call to spa_open(),
149 * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
150 */
151
152static void
153fatal(const char *fmt, ...)
154{
155	va_list ap;
156
157	va_start(ap, fmt);
158	(void) fprintf(stderr, "%s: ", cmdname);
159	(void) vfprintf(stderr, fmt, ap);
160	va_end(ap);
161	(void) fprintf(stderr, "\n");
162
163	exit(1);
164}
165
166/* ARGSUSED */
167static void
168dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
169{
170	nvlist_t *nv;
171	size_t nvsize = *(uint64_t *)data;
172	char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
173
174	VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
175
176	VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
177
178	umem_free(packed, nvsize);
179
180	dump_nvlist(nv, 8);
181
182	nvlist_free(nv);
183}
184
185const char dump_zap_stars[] = "****************************************";
186const int dump_zap_width = sizeof (dump_zap_stars) - 1;
187
188static void
189dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE])
190{
191	int i;
192	int minidx = ZAP_HISTOGRAM_SIZE - 1;
193	int maxidx = 0;
194	uint64_t max = 0;
195
196	for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) {
197		if (histo[i] > max)
198			max = histo[i];
199		if (histo[i] > 0 && i > maxidx)
200			maxidx = i;
201		if (histo[i] > 0 && i < minidx)
202			minidx = i;
203	}
204
205	if (max < dump_zap_width)
206		max = dump_zap_width;
207
208	for (i = minidx; i <= maxidx; i++)
209		(void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i],
210		    &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]);
211}
212
213static void
214dump_zap_stats(objset_t *os, uint64_t object)
215{
216	int error;
217	zap_stats_t zs;
218
219	error = zap_get_stats(os, object, &zs);
220	if (error)
221		return;
222
223	if (zs.zs_ptrtbl_len == 0) {
224		ASSERT(zs.zs_num_blocks == 1);
225		(void) printf("\tmicrozap: %llu bytes, %llu entries\n",
226		    (u_longlong_t)zs.zs_blocksize,
227		    (u_longlong_t)zs.zs_num_entries);
228		return;
229	}
230
231	(void) printf("\tFat ZAP stats:\n");
232
233	(void) printf("\t\tPointer table:\n");
234	(void) printf("\t\t\t%llu elements\n",
235	    (u_longlong_t)zs.zs_ptrtbl_len);
236	(void) printf("\t\t\tzt_blk: %llu\n",
237	    (u_longlong_t)zs.zs_ptrtbl_zt_blk);
238	(void) printf("\t\t\tzt_numblks: %llu\n",
239	    (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
240	(void) printf("\t\t\tzt_shift: %llu\n",
241	    (u_longlong_t)zs.zs_ptrtbl_zt_shift);
242	(void) printf("\t\t\tzt_blks_copied: %llu\n",
243	    (u_longlong_t)zs.zs_ptrtbl_blks_copied);
244	(void) printf("\t\t\tzt_nextblk: %llu\n",
245	    (u_longlong_t)zs.zs_ptrtbl_nextblk);
246
247	(void) printf("\t\tZAP entries: %llu\n",
248	    (u_longlong_t)zs.zs_num_entries);
249	(void) printf("\t\tLeaf blocks: %llu\n",
250	    (u_longlong_t)zs.zs_num_leafs);
251	(void) printf("\t\tTotal blocks: %llu\n",
252	    (u_longlong_t)zs.zs_num_blocks);
253	(void) printf("\t\tzap_block_type: 0x%llx\n",
254	    (u_longlong_t)zs.zs_block_type);
255	(void) printf("\t\tzap_magic: 0x%llx\n",
256	    (u_longlong_t)zs.zs_magic);
257	(void) printf("\t\tzap_salt: 0x%llx\n",
258	    (u_longlong_t)zs.zs_salt);
259
260	(void) printf("\t\tLeafs with 2^n pointers:\n");
261	dump_zap_histogram(zs.zs_leafs_with_2n_pointers);
262
263	(void) printf("\t\tBlocks with n*5 entries:\n");
264	dump_zap_histogram(zs.zs_blocks_with_n5_entries);
265
266	(void) printf("\t\tBlocks n/10 full:\n");
267	dump_zap_histogram(zs.zs_blocks_n_tenths_full);
268
269	(void) printf("\t\tEntries with n chunks:\n");
270	dump_zap_histogram(zs.zs_entries_using_n_chunks);
271
272	(void) printf("\t\tBuckets with n entries:\n");
273	dump_zap_histogram(zs.zs_buckets_with_n_entries);
274}
275
276/*ARGSUSED*/
277static void
278dump_none(objset_t *os, uint64_t object, void *data, size_t size)
279{
280}
281
282/*ARGSUSED*/
283static void
284dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
285{
286	(void) printf("\tUNKNOWN OBJECT TYPE\n");
287}
288
289/*ARGSUSED*/
290void
291dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
292{
293}
294
295/*ARGSUSED*/
296static void
297dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
298{
299}
300
301/*ARGSUSED*/
302static void
303dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
304{
305	zap_cursor_t zc;
306	zap_attribute_t attr;
307	void *prop;
308	int i;
309
310	dump_zap_stats(os, object);
311	(void) printf("\n");
312
313	for (zap_cursor_init(&zc, os, object);
314	    zap_cursor_retrieve(&zc, &attr) == 0;
315	    zap_cursor_advance(&zc)) {
316		(void) printf("\t\t%s = ", attr.za_name);
317		if (attr.za_num_integers == 0) {
318			(void) printf("\n");
319			continue;
320		}
321		prop = umem_zalloc(attr.za_num_integers *
322		    attr.za_integer_length, UMEM_NOFAIL);
323		(void) zap_lookup(os, object, attr.za_name,
324		    attr.za_integer_length, attr.za_num_integers, prop);
325		if (attr.za_integer_length == 1) {
326			(void) printf("%s", (char *)prop);
327		} else {
328			for (i = 0; i < attr.za_num_integers; i++) {
329				switch (attr.za_integer_length) {
330				case 2:
331					(void) printf("%u ",
332					    ((uint16_t *)prop)[i]);
333					break;
334				case 4:
335					(void) printf("%u ",
336					    ((uint32_t *)prop)[i]);
337					break;
338				case 8:
339					(void) printf("%lld ",
340					    (u_longlong_t)((int64_t *)prop)[i]);
341					break;
342				}
343			}
344		}
345		(void) printf("\n");
346		umem_free(prop, attr.za_num_integers * attr.za_integer_length);
347	}
348	zap_cursor_fini(&zc);
349}
350
351/*ARGSUSED*/
352static void
353dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
354{
355	zap_cursor_t zc;
356	zap_attribute_t attr;
357	const char *typenames[] = {
358		/* 0 */ "not specified",
359		/* 1 */ "FIFO",
360		/* 2 */ "Character Device",
361		/* 3 */ "3 (invalid)",
362		/* 4 */ "Directory",
363		/* 5 */ "5 (invalid)",
364		/* 6 */ "Block Device",
365		/* 7 */ "7 (invalid)",
366		/* 8 */ "Regular File",
367		/* 9 */ "9 (invalid)",
368		/* 10 */ "Symbolic Link",
369		/* 11 */ "11 (invalid)",
370		/* 12 */ "Socket",
371		/* 13 */ "Door",
372		/* 14 */ "Event Port",
373		/* 15 */ "15 (invalid)",
374	};
375
376	dump_zap_stats(os, object);
377	(void) printf("\n");
378
379	for (zap_cursor_init(&zc, os, object);
380	    zap_cursor_retrieve(&zc, &attr) == 0;
381	    zap_cursor_advance(&zc)) {
382		(void) printf("\t\t%s = %lld (type: %s)\n",
383		    attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
384		    typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
385	}
386	zap_cursor_fini(&zc);
387}
388
389static void
390dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
391{
392	uint64_t alloc, offset, entry;
393	uint8_t mapshift = sm->sm_shift;
394	uint64_t mapstart = sm->sm_start;
395	char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
396			    "INVALID", "INVALID", "INVALID", "INVALID" };
397
398	if (smo->smo_object == 0)
399		return;
400
401	/*
402	 * Print out the freelist entries in both encoded and decoded form.
403	 */
404	alloc = 0;
405	for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
406		VERIFY(0 == dmu_read(os, smo->smo_object, offset,
407		    sizeof (entry), &entry, DMU_READ_PREFETCH));
408		if (SM_DEBUG_DECODE(entry)) {
409			(void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
410			    (u_longlong_t)(offset / sizeof (entry)),
411			    ddata[SM_DEBUG_ACTION_DECODE(entry)],
412			    (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
413			    (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
414		} else {
415			(void) printf("\t    [%6llu]    %c  range:"
416			    " %010llx-%010llx  size: %06llx\n",
417			    (u_longlong_t)(offset / sizeof (entry)),
418			    SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
419			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
420			    mapshift) + mapstart),
421			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
422			    mapshift) + mapstart + (SM_RUN_DECODE(entry) <<
423			    mapshift)),
424			    (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
425			if (SM_TYPE_DECODE(entry) == SM_ALLOC)
426				alloc += SM_RUN_DECODE(entry) << mapshift;
427			else
428				alloc -= SM_RUN_DECODE(entry) << mapshift;
429		}
430	}
431	if (alloc != smo->smo_alloc) {
432		(void) printf("space_map_object alloc (%llu) INCONSISTENT "
433		    "with space map summary (%llu)\n",
434		    (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc);
435	}
436}
437
438static void
439dump_metaslab_stats(metaslab_t *msp)
440{
441	char maxbuf[5];
442	space_map_t *sm = &msp->ms_map;
443	avl_tree_t *t = sm->sm_pp_root;
444	int free_pct = sm->sm_space * 100 / sm->sm_size;
445
446	nicenum(space_map_maxsize(sm), maxbuf);
447
448	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
449	    "segments", avl_numnodes(t), "maxsize", maxbuf,
450	    "freepct", free_pct);
451}
452
453static void
454dump_metaslab(metaslab_t *msp)
455{
456	vdev_t *vd = msp->ms_group->mg_vd;
457	spa_t *spa = vd->vdev_spa;
458	space_map_t *sm = &msp->ms_map;
459	space_map_obj_t *smo = &msp->ms_smo;
460	char freebuf[5];
461
462	nicenum(sm->sm_size - smo->smo_alloc, freebuf);
463
464	(void) printf(
465	    "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
466	    (u_longlong_t)(sm->sm_start / sm->sm_size),
467	    (u_longlong_t)sm->sm_start, (u_longlong_t)smo->smo_object, freebuf);
468
469	if (dump_opt['m'] > 1 && !dump_opt['L']) {
470		mutex_enter(&msp->ms_lock);
471		space_map_load_wait(sm);
472		if (!sm->sm_loaded &&
473		    (smo->smo_object != 0 || dump_opt['m'] > 2)) {
474			VERIFY(space_map_load(sm, zfs_metaslab_ops,
475			    SM_FREE, smo, spa->spa_meta_objset) == 0);
476			dump_metaslab_stats(msp);
477			space_map_unload(sm);
478		}
479		mutex_exit(&msp->ms_lock);
480	}
481
482	if (dump_opt['d'] > 5 || dump_opt['m'] > 2) {
483		ASSERT(sm->sm_size == (1ULL << vd->vdev_ms_shift));
484
485		mutex_enter(&msp->ms_lock);
486		dump_spacemap(spa->spa_meta_objset, smo, sm);
487		mutex_exit(&msp->ms_lock);
488	}
489}
490
491static void
492print_vdev_metaslab_header(vdev_t *vd)
493{
494	(void) printf("\tvdev %10llu\n\t%-10s%5llu   %-19s   %-15s   %-10s\n",
495	    (u_longlong_t)vd->vdev_id,
496	    "metaslabs", (u_longlong_t)vd->vdev_ms_count,
497	    "offset", "spacemap", "free");
498	(void) printf("\t%15s   %19s   %15s   %10s\n",
499	    "---------------", "-------------------",
500	    "---------------", "-------------");
501}
502
503static void
504dump_metaslabs(spa_t *spa)
505{
506	vdev_t *vd, *rvd = spa->spa_root_vdev;
507	uint64_t m, c = 0, children = rvd->vdev_children;
508
509	(void) printf("\nMetaslabs:\n");
510
511	if (!dump_opt['d'] && zopt_objects > 0) {
512		c = zopt_object[0];
513
514		if (c >= children)
515			(void) fatal("bad vdev id: %llu", (u_longlong_t)c);
516
517		if (zopt_objects > 1) {
518			vd = rvd->vdev_child[c];
519			print_vdev_metaslab_header(vd);
520
521			for (m = 1; m < zopt_objects; m++) {
522				if (zopt_object[m] < vd->vdev_ms_count)
523					dump_metaslab(
524					    vd->vdev_ms[zopt_object[m]]);
525				else
526					(void) fprintf(stderr, "bad metaslab "
527					    "number %llu\n",
528					    (u_longlong_t)zopt_object[m]);
529			}
530			(void) printf("\n");
531			return;
532		}
533		children = c + 1;
534	}
535	for (; c < children; c++) {
536		vd = rvd->vdev_child[c];
537		print_vdev_metaslab_header(vd);
538
539		for (m = 0; m < vd->vdev_ms_count; m++)
540			dump_metaslab(vd->vdev_ms[m]);
541		(void) printf("\n");
542	}
543}
544
545static void
546dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
547{
548	const ddt_phys_t *ddp = dde->dde_phys;
549	const ddt_key_t *ddk = &dde->dde_key;
550	char *types[4] = { "ditto", "single", "double", "triple" };
551	char blkbuf[BP_SPRINTF_LEN];
552	blkptr_t blk;
553
554	for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
555		if (ddp->ddp_phys_birth == 0)
556			continue;
557		ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
558		sprintf_blkptr(blkbuf, &blk);
559		(void) printf("index %llx refcnt %llu %s %s\n",
560		    (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
561		    types[p], blkbuf);
562	}
563}
564
565static void
566dump_dedup_ratio(const ddt_stat_t *dds)
567{
568	double rL, rP, rD, D, dedup, compress, copies;
569
570	if (dds->dds_blocks == 0)
571		return;
572
573	rL = (double)dds->dds_ref_lsize;
574	rP = (double)dds->dds_ref_psize;
575	rD = (double)dds->dds_ref_dsize;
576	D = (double)dds->dds_dsize;
577
578	dedup = rD / D;
579	compress = rL / rP;
580	copies = rD / rP;
581
582	(void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
583	    "dedup * compress / copies = %.2f\n\n",
584	    dedup, compress, copies, dedup * compress / copies);
585}
586
587static void
588dump_ddt_stat(const ddt_stat_t *dds, int h)
589{
590	char refcnt[6];
591	char blocks[6], lsize[6], psize[6], dsize[6];
592	char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6];
593
594	if (dds->dds_blocks == 0)
595		return;
596
597	if (h == -1)
598		(void) strcpy(refcnt, "Total");
599	else
600		nicenum(1ULL << h, refcnt);
601
602	nicenum(dds->dds_blocks, blocks);
603	nicenum(dds->dds_lsize, lsize);
604	nicenum(dds->dds_psize, psize);
605	nicenum(dds->dds_dsize, dsize);
606	nicenum(dds->dds_ref_blocks, ref_blocks);
607	nicenum(dds->dds_ref_lsize, ref_lsize);
608	nicenum(dds->dds_ref_psize, ref_psize);
609	nicenum(dds->dds_ref_dsize, ref_dsize);
610
611	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
612	    refcnt,
613	    blocks, lsize, psize, dsize,
614	    ref_blocks, ref_lsize, ref_psize, ref_dsize);
615}
616
617static void
618dump_ddt_histogram(const ddt_histogram_t *ddh)
619{
620	ddt_stat_t dds_total = { 0 };
621
622	ddt_histogram_stat(&dds_total, ddh);
623
624	(void) printf("\n");
625
626	(void) printf("bucket   "
627	    "           allocated             "
628	    "          referenced          \n");
629	(void) printf("______   "
630	    "______________________________   "
631	    "______________________________\n");
632
633	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
634	    "refcnt",
635	    "blocks", "LSIZE", "PSIZE", "DSIZE",
636	    "blocks", "LSIZE", "PSIZE", "DSIZE");
637
638	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
639	    "------",
640	    "------", "-----", "-----", "-----",
641	    "------", "-----", "-----", "-----");
642
643	for (int h = 0; h < 64; h++)
644		dump_ddt_stat(&ddh->ddh_stat[h], h);
645
646	dump_ddt_stat(&dds_total, -1);
647
648	(void) printf("\n");
649}
650
651static void
652dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
653{
654	char name[DDT_NAMELEN];
655	ddt_entry_t dde;
656	uint64_t walk = 0;
657	dmu_object_info_t doi;
658	uint64_t count, dspace, mspace;
659	int error;
660
661	error = ddt_object_info(ddt, type, class, &doi);
662
663	if (error == ENOENT)
664		return;
665	ASSERT(error == 0);
666
667	count = ddt_object_count(ddt, type, class);
668	dspace = doi.doi_physical_blocks_512 << 9;
669	mspace = doi.doi_fill_count * doi.doi_data_block_size;
670
671	ASSERT(count != 0);	/* we should have destroyed it */
672
673	ddt_object_name(ddt, type, class, name);
674
675	(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
676	    name,
677	    (u_longlong_t)count,
678	    (u_longlong_t)(dspace / count),
679	    (u_longlong_t)(mspace / count));
680
681	if (dump_opt['D'] < 3)
682		return;
683
684	dump_ddt_histogram(&ddt->ddt_histogram[type][class]);
685
686	if (dump_opt['D'] < 4)
687		return;
688
689	if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
690		return;
691
692	(void) printf("%s contents:\n\n", name);
693
694	while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
695		dump_dde(ddt, &dde, walk);
696
697	ASSERT(error == ENOENT);
698
699	(void) printf("\n");
700}
701
702static void
703dump_all_ddts(spa_t *spa)
704{
705	ddt_histogram_t ddh_total = { 0 };
706	ddt_stat_t dds_total = { 0 };
707
708	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
709		ddt_t *ddt = spa->spa_ddt[c];
710		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
711			for (enum ddt_class class = 0; class < DDT_CLASSES;
712			    class++) {
713				ddt_histogram_add(&ddh_total,
714				    &ddt->ddt_histogram[type][class]);
715				dump_ddt(ddt, type, class);
716			}
717		}
718	}
719
720	ddt_histogram_stat(&dds_total, &ddh_total);
721
722	if (dds_total.dds_blocks == 0) {
723		(void) printf("All DDTs are empty\n");
724		return;
725	}
726
727	(void) printf("\n");
728
729	if (dump_opt['D'] > 1) {
730		(void) printf("DDT histogram (aggregated over all DDTs):\n");
731		dump_ddt_histogram(&ddh_total);
732	}
733
734	dump_dedup_ratio(&dds_total);
735}
736
737static void
738dump_dtl_seg(space_map_t *sm, uint64_t start, uint64_t size)
739{
740	char *prefix = (void *)sm;
741
742	(void) printf("%s [%llu,%llu) length %llu\n",
743	    prefix,
744	    (u_longlong_t)start,
745	    (u_longlong_t)(start + size),
746	    (u_longlong_t)(size));
747}
748
749static void
750dump_dtl(vdev_t *vd, int indent)
751{
752	spa_t *spa = vd->vdev_spa;
753	boolean_t required;
754	char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" };
755	char prefix[256];
756
757	spa_vdev_state_enter(spa, SCL_NONE);
758	required = vdev_dtl_required(vd);
759	(void) spa_vdev_state_exit(spa, NULL, 0);
760
761	if (indent == 0)
762		(void) printf("\nDirty time logs:\n\n");
763
764	(void) printf("\t%*s%s [%s]\n", indent, "",
765	    vd->vdev_path ? vd->vdev_path :
766	    vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
767	    required ? "DTL-required" : "DTL-expendable");
768
769	for (int t = 0; t < DTL_TYPES; t++) {
770		space_map_t *sm = &vd->vdev_dtl[t];
771		if (sm->sm_space == 0)
772			continue;
773		(void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
774		    indent + 2, "", name[t]);
775		mutex_enter(sm->sm_lock);
776		space_map_walk(sm, dump_dtl_seg, (void *)prefix);
777		mutex_exit(sm->sm_lock);
778		if (dump_opt['d'] > 5 && vd->vdev_children == 0)
779			dump_spacemap(spa->spa_meta_objset,
780			    &vd->vdev_dtl_smo, sm);
781	}
782
783	for (int c = 0; c < vd->vdev_children; c++)
784		dump_dtl(vd->vdev_child[c], indent + 4);
785}
786
787static void
788dump_history(spa_t *spa)
789{
790	nvlist_t **events = NULL;
791	char buf[SPA_MAXBLOCKSIZE];
792	uint64_t resid, len, off = 0;
793	uint_t num = 0;
794	int error;
795	time_t tsec;
796	struct tm t;
797	char tbuf[30];
798	char internalstr[MAXPATHLEN];
799
800	do {
801		len = sizeof (buf);
802
803		if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
804			(void) fprintf(stderr, "Unable to read history: "
805			    "error %d\n", error);
806			return;
807		}
808
809		if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
810			break;
811
812		off -= resid;
813	} while (len != 0);
814
815	(void) printf("\nHistory:\n");
816	for (int i = 0; i < num; i++) {
817		uint64_t time, txg, ievent;
818		char *cmd, *intstr;
819
820		if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
821		    &time) != 0)
822			continue;
823		if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
824		    &cmd) != 0) {
825			if (nvlist_lookup_uint64(events[i],
826			    ZPOOL_HIST_INT_EVENT, &ievent) != 0)
827				continue;
828			verify(nvlist_lookup_uint64(events[i],
829			    ZPOOL_HIST_TXG, &txg) == 0);
830			verify(nvlist_lookup_string(events[i],
831			    ZPOOL_HIST_INT_STR, &intstr) == 0);
832			if (ievent >= LOG_END)
833				continue;
834
835			(void) snprintf(internalstr,
836			    sizeof (internalstr),
837			    "[internal %s txg:%lld] %s",
838			    hist_event_table[ievent], txg,
839			    intstr);
840			cmd = internalstr;
841		}
842		tsec = time;
843		(void) localtime_r(&tsec, &t);
844		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
845		(void) printf("%s %s\n", tbuf, cmd);
846	}
847}
848
849/*ARGSUSED*/
850static void
851dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
852{
853}
854
855static uint64_t
856blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
857{
858	if (dnp == NULL) {
859		ASSERT(zb->zb_level < 0);
860		if (zb->zb_object == 0)
861			return (zb->zb_blkid);
862		return (zb->zb_blkid * BP_GET_LSIZE(bp));
863	}
864
865	ASSERT(zb->zb_level >= 0);
866
867	return ((zb->zb_blkid <<
868	    (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
869	    dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
870}
871
872static void
873sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp)
874{
875	dva_t *dva = bp->blk_dva;
876	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
877
878	if (dump_opt['b'] >= 5) {
879		sprintf_blkptr(blkbuf, bp);
880		return;
881	}
882
883	blkbuf[0] = '\0';
884
885	for (int i = 0; i < ndvas; i++)
886		(void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
887		    (u_longlong_t)DVA_GET_VDEV(&dva[i]),
888		    (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
889		    (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
890
891	(void) sprintf(blkbuf + strlen(blkbuf),
892	    "%llxL/%llxP F=%llu B=%llu/%llu",
893	    (u_longlong_t)BP_GET_LSIZE(bp),
894	    (u_longlong_t)BP_GET_PSIZE(bp),
895	    (u_longlong_t)bp->blk_fill,
896	    (u_longlong_t)bp->blk_birth,
897	    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
898}
899
900static void
901print_indirect(blkptr_t *bp, const zbookmark_t *zb,
902    const dnode_phys_t *dnp)
903{
904	char blkbuf[BP_SPRINTF_LEN];
905	int l;
906
907	ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
908	ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
909
910	(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
911
912	ASSERT(zb->zb_level >= 0);
913
914	for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
915		if (l == zb->zb_level) {
916			(void) printf("L%llx", (u_longlong_t)zb->zb_level);
917		} else {
918			(void) printf(" ");
919		}
920	}
921
922	sprintf_blkptr_compact(blkbuf, bp);
923	(void) printf("%s\n", blkbuf);
924}
925
926static int
927visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
928    blkptr_t *bp, const zbookmark_t *zb)
929{
930	int err = 0;
931
932	if (bp->blk_birth == 0)
933		return (0);
934
935	print_indirect(bp, zb, dnp);
936
937	if (BP_GET_LEVEL(bp) > 0) {
938		uint32_t flags = ARC_WAIT;
939		int i;
940		blkptr_t *cbp;
941		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
942		arc_buf_t *buf;
943		uint64_t fill = 0;
944
945		err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf,
946		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
947		if (err)
948			return (err);
949
950		/* recursively visit blocks below this */
951		cbp = buf->b_data;
952		for (i = 0; i < epb; i++, cbp++) {
953			zbookmark_t czb;
954
955			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
956			    zb->zb_level - 1,
957			    zb->zb_blkid * epb + i);
958			err = visit_indirect(spa, dnp, cbp, &czb);
959			if (err)
960				break;
961			fill += cbp->blk_fill;
962		}
963		if (!err)
964			ASSERT3U(fill, ==, bp->blk_fill);
965		(void) arc_buf_remove_ref(buf, &buf);
966	}
967
968	return (err);
969}
970
971/*ARGSUSED*/
972static void
973dump_indirect(dnode_t *dn)
974{
975	dnode_phys_t *dnp = dn->dn_phys;
976	int j;
977	zbookmark_t czb;
978
979	(void) printf("Indirect blocks:\n");
980
981	SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
982	    dn->dn_object, dnp->dn_nlevels - 1, 0);
983	for (j = 0; j < dnp->dn_nblkptr; j++) {
984		czb.zb_blkid = j;
985		(void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
986		    &dnp->dn_blkptr[j], &czb);
987	}
988
989	(void) printf("\n");
990}
991
992/*ARGSUSED*/
993static void
994dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
995{
996	dsl_dir_phys_t *dd = data;
997	time_t crtime;
998	char nice[6];
999
1000	if (dd == NULL)
1001		return;
1002
1003	ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
1004
1005	crtime = dd->dd_creation_time;
1006	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1007	(void) printf("\t\thead_dataset_obj = %llu\n",
1008	    (u_longlong_t)dd->dd_head_dataset_obj);
1009	(void) printf("\t\tparent_dir_obj = %llu\n",
1010	    (u_longlong_t)dd->dd_parent_obj);
1011	(void) printf("\t\torigin_obj = %llu\n",
1012	    (u_longlong_t)dd->dd_origin_obj);
1013	(void) printf("\t\tchild_dir_zapobj = %llu\n",
1014	    (u_longlong_t)dd->dd_child_dir_zapobj);
1015	nicenum(dd->dd_used_bytes, nice);
1016	(void) printf("\t\tused_bytes = %s\n", nice);
1017	nicenum(dd->dd_compressed_bytes, nice);
1018	(void) printf("\t\tcompressed_bytes = %s\n", nice);
1019	nicenum(dd->dd_uncompressed_bytes, nice);
1020	(void) printf("\t\tuncompressed_bytes = %s\n", nice);
1021	nicenum(dd->dd_quota, nice);
1022	(void) printf("\t\tquota = %s\n", nice);
1023	nicenum(dd->dd_reserved, nice);
1024	(void) printf("\t\treserved = %s\n", nice);
1025	(void) printf("\t\tprops_zapobj = %llu\n",
1026	    (u_longlong_t)dd->dd_props_zapobj);
1027	(void) printf("\t\tdeleg_zapobj = %llu\n",
1028	    (u_longlong_t)dd->dd_deleg_zapobj);
1029	(void) printf("\t\tflags = %llx\n",
1030	    (u_longlong_t)dd->dd_flags);
1031
1032#define	DO(which) \
1033	nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
1034	(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
1035	DO(HEAD);
1036	DO(SNAP);
1037	DO(CHILD);
1038	DO(CHILD_RSRV);
1039	DO(REFRSRV);
1040#undef DO
1041}
1042
1043/*ARGSUSED*/
1044static void
1045dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
1046{
1047	dsl_dataset_phys_t *ds = data;
1048	time_t crtime;
1049	char used[6], compressed[6], uncompressed[6], unique[6];
1050	char blkbuf[BP_SPRINTF_LEN];
1051
1052	if (ds == NULL)
1053		return;
1054
1055	ASSERT(size == sizeof (*ds));
1056	crtime = ds->ds_creation_time;
1057	nicenum(ds->ds_used_bytes, used);
1058	nicenum(ds->ds_compressed_bytes, compressed);
1059	nicenum(ds->ds_uncompressed_bytes, uncompressed);
1060	nicenum(ds->ds_unique_bytes, unique);
1061	sprintf_blkptr(blkbuf, &ds->ds_bp);
1062
1063	(void) printf("\t\tdir_obj = %llu\n",
1064	    (u_longlong_t)ds->ds_dir_obj);
1065	(void) printf("\t\tprev_snap_obj = %llu\n",
1066	    (u_longlong_t)ds->ds_prev_snap_obj);
1067	(void) printf("\t\tprev_snap_txg = %llu\n",
1068	    (u_longlong_t)ds->ds_prev_snap_txg);
1069	(void) printf("\t\tnext_snap_obj = %llu\n",
1070	    (u_longlong_t)ds->ds_next_snap_obj);
1071	(void) printf("\t\tsnapnames_zapobj = %llu\n",
1072	    (u_longlong_t)ds->ds_snapnames_zapobj);
1073	(void) printf("\t\tnum_children = %llu\n",
1074	    (u_longlong_t)ds->ds_num_children);
1075	(void) printf("\t\tuserrefs_obj = %llu\n",
1076	    (u_longlong_t)ds->ds_userrefs_obj);
1077	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1078	(void) printf("\t\tcreation_txg = %llu\n",
1079	    (u_longlong_t)ds->ds_creation_txg);
1080	(void) printf("\t\tdeadlist_obj = %llu\n",
1081	    (u_longlong_t)ds->ds_deadlist_obj);
1082	(void) printf("\t\tused_bytes = %s\n", used);
1083	(void) printf("\t\tcompressed_bytes = %s\n", compressed);
1084	(void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
1085	(void) printf("\t\tunique = %s\n", unique);
1086	(void) printf("\t\tfsid_guid = %llu\n",
1087	    (u_longlong_t)ds->ds_fsid_guid);
1088	(void) printf("\t\tguid = %llu\n",
1089	    (u_longlong_t)ds->ds_guid);
1090	(void) printf("\t\tflags = %llx\n",
1091	    (u_longlong_t)ds->ds_flags);
1092	(void) printf("\t\tnext_clones_obj = %llu\n",
1093	    (u_longlong_t)ds->ds_next_clones_obj);
1094	(void) printf("\t\tprops_obj = %llu\n",
1095	    (u_longlong_t)ds->ds_props_obj);
1096	(void) printf("\t\tbp = %s\n", blkbuf);
1097}
1098
1099static void
1100dump_bplist(objset_t *mos, uint64_t object, char *name)
1101{
1102	bplist_t bpl = { 0 };
1103	blkptr_t blk, *bp = &blk;
1104	uint64_t itor = 0;
1105	char bytes[6];
1106	char comp[6];
1107	char uncomp[6];
1108
1109	if (dump_opt['d'] < 3)
1110		return;
1111
1112	bplist_init(&bpl);
1113	VERIFY(0 == bplist_open(&bpl, mos, object));
1114	if (bplist_empty(&bpl)) {
1115		bplist_close(&bpl);
1116		bplist_fini(&bpl);
1117		return;
1118	}
1119
1120	nicenum(bpl.bpl_phys->bpl_bytes, bytes);
1121	if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) {
1122		nicenum(bpl.bpl_phys->bpl_comp, comp);
1123		nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
1124		(void) printf("\n    %s: %llu entries, %s (%s/%s comp)\n",
1125		    name, (u_longlong_t)bpl.bpl_phys->bpl_entries,
1126		    bytes, comp, uncomp);
1127	} else {
1128		(void) printf("\n    %s: %llu entries, %s\n",
1129		    name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes);
1130	}
1131
1132	if (dump_opt['d'] < 5) {
1133		bplist_close(&bpl);
1134		bplist_fini(&bpl);
1135		return;
1136	}
1137
1138	(void) printf("\n");
1139
1140	while (bplist_iterate(&bpl, &itor, bp) == 0) {
1141		char blkbuf[BP_SPRINTF_LEN];
1142
1143		ASSERT(bp->blk_birth != 0);
1144		sprintf_blkptr_compact(blkbuf, bp);
1145		(void) printf("\tItem %3llu: %s\n",
1146		    (u_longlong_t)itor - 1, blkbuf);
1147	}
1148
1149	bplist_close(&bpl);
1150	bplist_fini(&bpl);
1151}
1152
1153static avl_tree_t idx_tree;
1154static avl_tree_t domain_tree;
1155static boolean_t fuid_table_loaded;
1156
1157static void
1158fuid_table_destroy()
1159{
1160	if (fuid_table_loaded) {
1161		zfs_fuid_table_destroy(&idx_tree, &domain_tree);
1162		fuid_table_loaded = B_FALSE;
1163	}
1164}
1165
1166/*
1167 * print uid or gid information.
1168 * For normal POSIX id just the id is printed in decimal format.
1169 * For CIFS files with FUID the fuid is printed in hex followed by
1170 * the doman-rid string.
1171 */
1172static void
1173print_idstr(uint64_t id, const char *id_type)
1174{
1175	if (FUID_INDEX(id)) {
1176		char *domain;
1177
1178		domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
1179		(void) printf("\t%s     %llx [%s-%d]\n", id_type,
1180		    (u_longlong_t)id, domain, (int)FUID_RID(id));
1181	} else {
1182		(void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
1183	}
1184
1185}
1186
1187static void
1188dump_uidgid(objset_t *os, znode_phys_t *zp)
1189{
1190	uint32_t uid_idx, gid_idx;
1191
1192	uid_idx = FUID_INDEX(zp->zp_uid);
1193	gid_idx = FUID_INDEX(zp->zp_gid);
1194
1195	/* Load domain table, if not already loaded */
1196	if (!fuid_table_loaded && (uid_idx || gid_idx)) {
1197		uint64_t fuid_obj;
1198
1199		/* first find the fuid object.  It lives in the master node */
1200		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
1201		    8, 1, &fuid_obj) == 0);
1202		zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
1203		(void) zfs_fuid_table_load(os, fuid_obj,
1204		    &idx_tree, &domain_tree);
1205		fuid_table_loaded = B_TRUE;
1206	}
1207
1208	print_idstr(zp->zp_uid, "uid");
1209	print_idstr(zp->zp_gid, "gid");
1210}
1211
1212/*ARGSUSED*/
1213static void
1214dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
1215{
1216	znode_phys_t *zp = data;
1217	time_t z_crtime, z_atime, z_mtime, z_ctime;
1218	char path[MAXPATHLEN * 2];	/* allow for xattr and failure prefix */
1219	int error;
1220
1221	ASSERT(size >= sizeof (znode_phys_t));
1222
1223	error = zfs_obj_to_path(os, object, path, sizeof (path));
1224	if (error != 0) {
1225		(void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
1226		    (u_longlong_t)object);
1227	}
1228
1229	if (dump_opt['d'] < 3) {
1230		(void) printf("\t%s\n", path);
1231		return;
1232	}
1233
1234	z_crtime = (time_t)zp->zp_crtime[0];
1235	z_atime = (time_t)zp->zp_atime[0];
1236	z_mtime = (time_t)zp->zp_mtime[0];
1237	z_ctime = (time_t)zp->zp_ctime[0];
1238
1239	(void) printf("\tpath	%s\n", path);
1240	dump_uidgid(os, zp);
1241	(void) printf("\tatime	%s", ctime(&z_atime));
1242	(void) printf("\tmtime	%s", ctime(&z_mtime));
1243	(void) printf("\tctime	%s", ctime(&z_ctime));
1244	(void) printf("\tcrtime	%s", ctime(&z_crtime));
1245	(void) printf("\tgen	%llu\n", (u_longlong_t)zp->zp_gen);
1246	(void) printf("\tmode	%llo\n", (u_longlong_t)zp->zp_mode);
1247	(void) printf("\tsize	%llu\n", (u_longlong_t)zp->zp_size);
1248	(void) printf("\tparent	%llu\n", (u_longlong_t)zp->zp_parent);
1249	(void) printf("\tlinks	%llu\n", (u_longlong_t)zp->zp_links);
1250	(void) printf("\txattr	%llu\n", (u_longlong_t)zp->zp_xattr);
1251	(void) printf("\trdev	0x%016llx\n", (u_longlong_t)zp->zp_rdev);
1252}
1253
1254/*ARGSUSED*/
1255static void
1256dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
1257{
1258}
1259
1260/*ARGSUSED*/
1261static void
1262dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
1263{
1264}
1265
1266static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
1267	dump_none,		/* unallocated			*/
1268	dump_zap,		/* object directory		*/
1269	dump_uint64,		/* object array			*/
1270	dump_none,		/* packed nvlist		*/
1271	dump_packed_nvlist,	/* packed nvlist size		*/
1272	dump_none,		/* bplist			*/
1273	dump_none,		/* bplist header		*/
1274	dump_none,		/* SPA space map header		*/
1275	dump_none,		/* SPA space map		*/
1276	dump_none,		/* ZIL intent log		*/
1277	dump_dnode,		/* DMU dnode			*/
1278	dump_dmu_objset,	/* DMU objset			*/
1279	dump_dsl_dir,		/* DSL directory		*/
1280	dump_zap,		/* DSL directory child map	*/
1281	dump_zap,		/* DSL dataset snap map		*/
1282	dump_zap,		/* DSL props			*/
1283	dump_dsl_dataset,	/* DSL dataset			*/
1284	dump_znode,		/* ZFS znode			*/
1285	dump_acl,		/* ZFS V0 ACL			*/
1286	dump_uint8,		/* ZFS plain file		*/
1287	dump_zpldir,		/* ZFS directory		*/
1288	dump_zap,		/* ZFS master node		*/
1289	dump_zap,		/* ZFS delete queue		*/
1290	dump_uint8,		/* zvol object			*/
1291	dump_zap,		/* zvol prop			*/
1292	dump_uint8,		/* other uint8[]		*/
1293	dump_uint64,		/* other uint64[]		*/
1294	dump_zap,		/* other ZAP			*/
1295	dump_zap,		/* persistent error log		*/
1296	dump_uint8,		/* SPA history			*/
1297	dump_uint64,		/* SPA history offsets		*/
1298	dump_zap,		/* Pool properties		*/
1299	dump_zap,		/* DSL permissions		*/
1300	dump_acl,		/* ZFS ACL			*/
1301	dump_uint8,		/* ZFS SYSACL			*/
1302	dump_none,		/* FUID nvlist			*/
1303	dump_packed_nvlist,	/* FUID nvlist size		*/
1304	dump_zap,		/* DSL dataset next clones	*/
1305	dump_zap,		/* DSL scrub queue		*/
1306	dump_zap,		/* ZFS user/group used		*/
1307	dump_zap,		/* ZFS user/group quota		*/
1308	dump_zap,		/* snapshot refcount tags	*/
1309	dump_none,		/* DDT ZAP object		*/
1310	dump_zap,		/* DDT statistics		*/
1311	dump_unknown		/* Unknown type, must be last	*/
1312};
1313
1314static void
1315dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
1316{
1317	dmu_buf_t *db = NULL;
1318	dmu_object_info_t doi;
1319	dnode_t *dn;
1320	void *bonus = NULL;
1321	size_t bsize = 0;
1322	char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], fill[7];
1323	char aux[50];
1324	int error;
1325
1326	if (*print_header) {
1327		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1328		    "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
1329		    "%full", "type");
1330		*print_header = 0;
1331	}
1332
1333	if (object == 0) {
1334		dn = os->os_meta_dnode;
1335	} else {
1336		error = dmu_bonus_hold(os, object, FTAG, &db);
1337		if (error)
1338			fatal("dmu_bonus_hold(%llu) failed, errno %u",
1339			    object, error);
1340		bonus = db->db_data;
1341		bsize = db->db_size;
1342		dn = ((dmu_buf_impl_t *)db)->db_dnode;
1343	}
1344	dmu_object_info_from_dnode(dn, &doi);
1345
1346	nicenum(doi.doi_metadata_block_size, iblk);
1347	nicenum(doi.doi_data_block_size, dblk);
1348	nicenum(doi.doi_max_offset, lsize);
1349	nicenum(doi.doi_physical_blocks_512 << 9, asize);
1350	nicenum(doi.doi_bonus_size, bonus_size);
1351	(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
1352	    doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
1353	    doi.doi_max_offset);
1354
1355	aux[0] = '\0';
1356
1357	if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
1358		(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
1359		    ZDB_CHECKSUM_NAME(doi.doi_checksum));
1360	}
1361
1362	if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
1363		(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
1364		    ZDB_COMPRESS_NAME(doi.doi_compress));
1365	}
1366
1367	(void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %6s  %s%s\n",
1368	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
1369	    asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
1370
1371	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
1372		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1373		    "", "", "", "", "", bonus_size, "bonus",
1374		    ZDB_OT_NAME(doi.doi_bonus_type));
1375	}
1376
1377	if (verbosity >= 4) {
1378		(void) printf("\tdnode flags: %s%s\n",
1379		    (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
1380		    "USED_BYTES " : "",
1381		    (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
1382		    "USERUSED_ACCOUNTED " : "");
1383		(void) printf("\tdnode maxblkid: %llu\n",
1384		    (longlong_t)dn->dn_phys->dn_maxblkid);
1385
1386		object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
1387		    bonus, bsize);
1388		object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
1389		*print_header = 1;
1390	}
1391
1392	if (verbosity >= 5)
1393		dump_indirect(dn);
1394
1395	if (verbosity >= 5) {
1396		/*
1397		 * Report the list of segments that comprise the object.
1398		 */
1399		uint64_t start = 0;
1400		uint64_t end;
1401		uint64_t blkfill = 1;
1402		int minlvl = 1;
1403
1404		if (dn->dn_type == DMU_OT_DNODE) {
1405			minlvl = 0;
1406			blkfill = DNODES_PER_BLOCK;
1407		}
1408
1409		for (;;) {
1410			char segsize[6];
1411			error = dnode_next_offset(dn,
1412			    0, &start, minlvl, blkfill, 0);
1413			if (error)
1414				break;
1415			end = start;
1416			error = dnode_next_offset(dn,
1417			    DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
1418			nicenum(end - start, segsize);
1419			(void) printf("\t\tsegment [%016llx, %016llx)"
1420			    " size %5s\n", (u_longlong_t)start,
1421			    (u_longlong_t)end, segsize);
1422			if (error)
1423				break;
1424			start = end;
1425		}
1426	}
1427
1428	if (db != NULL)
1429		dmu_buf_rele(db, FTAG);
1430}
1431
1432static char *objset_types[DMU_OST_NUMTYPES] = {
1433	"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
1434
1435static void
1436dump_dir(objset_t *os)
1437{
1438	dmu_objset_stats_t dds;
1439	uint64_t object, object_count;
1440	uint64_t refdbytes, usedobjs, scratch;
1441	char numbuf[8];
1442	char blkbuf[BP_SPRINTF_LEN + 20];
1443	char osname[MAXNAMELEN];
1444	char *type = "UNKNOWN";
1445	int verbosity = dump_opt['d'];
1446	int print_header = 1;
1447	int i, error;
1448
1449	dmu_objset_fast_stat(os, &dds);
1450
1451	if (dds.dds_type < DMU_OST_NUMTYPES)
1452		type = objset_types[dds.dds_type];
1453
1454	if (dds.dds_type == DMU_OST_META) {
1455		dds.dds_creation_txg = TXG_INITIAL;
1456		usedobjs = os->os_rootbp->blk_fill;
1457		refdbytes = os->os_spa->spa_dsl_pool->
1458		    dp_mos_dir->dd_phys->dd_used_bytes;
1459	} else {
1460		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
1461	}
1462
1463	ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
1464
1465	nicenum(refdbytes, numbuf);
1466
1467	if (verbosity >= 4) {
1468		(void) sprintf(blkbuf, ", rootbp ");
1469		(void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp);
1470	} else {
1471		blkbuf[0] = '\0';
1472	}
1473
1474	dmu_objset_name(os, osname);
1475
1476	(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
1477	    "%s, %llu objects%s\n",
1478	    osname, type, (u_longlong_t)dmu_objset_id(os),
1479	    (u_longlong_t)dds.dds_creation_txg,
1480	    numbuf, (u_longlong_t)usedobjs, blkbuf);
1481
1482	if (zopt_objects != 0) {
1483		for (i = 0; i < zopt_objects; i++)
1484			dump_object(os, zopt_object[i], verbosity,
1485			    &print_header);
1486		(void) printf("\n");
1487		return;
1488	}
1489
1490	if (dump_opt['i'] != 0 || verbosity >= 2)
1491		dump_intent_log(dmu_objset_zil(os));
1492
1493	if (dmu_objset_ds(os) != NULL)
1494		dump_bplist(dmu_objset_pool(os)->dp_meta_objset,
1495		    dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist");
1496
1497	if (verbosity < 2)
1498		return;
1499
1500	if (os->os_rootbp->blk_birth == 0)
1501		return;
1502
1503	dump_object(os, 0, verbosity, &print_header);
1504	object_count = 0;
1505	if (os->os_userused_dnode &&
1506	    os->os_userused_dnode->dn_type != 0) {
1507		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
1508		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
1509	}
1510
1511	object = 0;
1512	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
1513		dump_object(os, object, verbosity, &print_header);
1514		object_count++;
1515	}
1516
1517	ASSERT3U(object_count, ==, usedobjs);
1518
1519	(void) printf("\n");
1520
1521	if (error != ESRCH) {
1522		(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
1523		abort();
1524	}
1525}
1526
1527static void
1528dump_uberblock(uberblock_t *ub)
1529{
1530	time_t timestamp = ub->ub_timestamp;
1531
1532	(void) printf("\nUberblock:\n");
1533	(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
1534	(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
1535	(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
1536	(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
1537	(void) printf("\ttimestamp = %llu UTC = %s",
1538	    (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
1539	if (dump_opt['u'] >= 3) {
1540		char blkbuf[BP_SPRINTF_LEN];
1541		sprintf_blkptr(blkbuf, &ub->ub_rootbp);
1542		(void) printf("\trootbp = %s\n", blkbuf);
1543	}
1544	(void) printf("\n");
1545}
1546
1547static void
1548dump_config(spa_t *spa)
1549{
1550	dmu_buf_t *db;
1551	size_t nvsize = 0;
1552	int error = 0;
1553
1554
1555	error = dmu_bonus_hold(spa->spa_meta_objset,
1556	    spa->spa_config_object, FTAG, &db);
1557
1558	if (error == 0) {
1559		nvsize = *(uint64_t *)db->db_data;
1560		dmu_buf_rele(db, FTAG);
1561
1562		(void) printf("\nMOS Configuration:\n");
1563		dump_packed_nvlist(spa->spa_meta_objset,
1564		    spa->spa_config_object, (void *)&nvsize, 1);
1565	} else {
1566		(void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
1567		    (u_longlong_t)spa->spa_config_object, error);
1568	}
1569}
1570
1571static void
1572dump_cachefile(const char *cachefile)
1573{
1574	int fd;
1575	struct stat64 statbuf;
1576	char *buf;
1577	nvlist_t *config;
1578
1579	if ((fd = open64(cachefile, O_RDONLY)) < 0) {
1580		(void) printf("cannot open '%s': %s\n", cachefile,
1581		    strerror(errno));
1582		exit(1);
1583	}
1584
1585	if (fstat64(fd, &statbuf) != 0) {
1586		(void) printf("failed to stat '%s': %s\n", cachefile,
1587		    strerror(errno));
1588		exit(1);
1589	}
1590
1591	if ((buf = malloc(statbuf.st_size)) == NULL) {
1592		(void) fprintf(stderr, "failed to allocate %llu bytes\n",
1593		    (u_longlong_t)statbuf.st_size);
1594		exit(1);
1595	}
1596
1597	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1598		(void) fprintf(stderr, "failed to read %llu bytes\n",
1599		    (u_longlong_t)statbuf.st_size);
1600		exit(1);
1601	}
1602
1603	(void) close(fd);
1604
1605	if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
1606		(void) fprintf(stderr, "failed to unpack nvlist\n");
1607		exit(1);
1608	}
1609
1610	free(buf);
1611
1612	dump_nvlist(config, 0);
1613
1614	nvlist_free(config);
1615}
1616
1617static void
1618dump_label(const char *dev)
1619{
1620	int fd;
1621	vdev_label_t label;
1622	char *buf = label.vl_vdev_phys.vp_nvlist;
1623	size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
1624	struct stat64 statbuf;
1625	uint64_t psize;
1626	int l;
1627
1628	if ((fd = open64(dev, O_RDONLY)) < 0) {
1629		(void) printf("cannot open '%s': %s\n", dev, strerror(errno));
1630		exit(1);
1631	}
1632
1633	if (fstat64(fd, &statbuf) != 0) {
1634		(void) printf("failed to stat '%s': %s\n", dev,
1635		    strerror(errno));
1636		exit(1);
1637	}
1638
1639	psize = statbuf.st_size;
1640	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
1641
1642	for (l = 0; l < VDEV_LABELS; l++) {
1643
1644		nvlist_t *config = NULL;
1645
1646		(void) printf("--------------------------------------------\n");
1647		(void) printf("LABEL %d\n", l);
1648		(void) printf("--------------------------------------------\n");
1649
1650		if (pread64(fd, &label, sizeof (label),
1651		    vdev_label_offset(psize, l, 0)) != sizeof (label)) {
1652			(void) printf("failed to read label %d\n", l);
1653			continue;
1654		}
1655
1656		if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
1657			(void) printf("failed to unpack label %d\n", l);
1658			continue;
1659		}
1660		dump_nvlist(config, 4);
1661		nvlist_free(config);
1662	}
1663}
1664
1665/*ARGSUSED*/
1666static int
1667dump_one_dir(char *dsname, void *arg)
1668{
1669	int error;
1670	objset_t *os;
1671
1672	error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
1673	if (error) {
1674		(void) printf("Could not open %s, error %d\n", dsname, error);
1675		return (0);
1676	}
1677	dump_dir(os);
1678	dmu_objset_disown(os, FTAG);
1679	fuid_table_destroy();
1680	return (0);
1681}
1682
1683/*
1684 * Block statistics.
1685 */
1686typedef struct zdb_blkstats {
1687	uint64_t	zb_asize;
1688	uint64_t	zb_lsize;
1689	uint64_t	zb_psize;
1690	uint64_t	zb_count;
1691} zdb_blkstats_t;
1692
1693/*
1694 * Extended object types to report deferred frees and dedup auto-ditto blocks.
1695 */
1696#define	ZDB_OT_DEFERRED	(DMU_OT_NUMTYPES + 0)
1697#define	ZDB_OT_DITTO	(DMU_OT_NUMTYPES + 1)
1698#define	ZDB_OT_TOTAL	(DMU_OT_NUMTYPES + 2)
1699
1700static char *zdb_ot_extname[] = {
1701	"deferred free",
1702	"dedup ditto",
1703	"Total",
1704};
1705
1706#define	ZB_TOTAL	DN_MAX_LEVELS
1707
1708typedef struct zdb_cb {
1709	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
1710	uint64_t	zcb_dedup_asize;
1711	uint64_t	zcb_dedup_blocks;
1712	uint64_t	zcb_errors[256];
1713	int		zcb_readfails;
1714	int		zcb_haderrors;
1715} zdb_cb_t;
1716
1717static void
1718zdb_count_block(spa_t *spa, zilog_t *zilog, zdb_cb_t *zcb, const blkptr_t *bp,
1719    dmu_object_type_t type)
1720{
1721	uint64_t refcnt = 0;
1722
1723	ASSERT(type < ZDB_OT_TOTAL);
1724
1725	if (zilog && zil_bp_tree_add(zilog, bp) != 0)
1726		return;
1727
1728	for (int i = 0; i < 4; i++) {
1729		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
1730		int t = (i & 1) ? type : ZDB_OT_TOTAL;
1731		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
1732
1733		zb->zb_asize += BP_GET_ASIZE(bp);
1734		zb->zb_lsize += BP_GET_LSIZE(bp);
1735		zb->zb_psize += BP_GET_PSIZE(bp);
1736		zb->zb_count++;
1737	}
1738
1739	if (dump_opt['L'])
1740		return;
1741
1742	if (BP_GET_DEDUP(bp)) {
1743		ddt_t *ddt;
1744		ddt_entry_t *dde;
1745
1746		ddt = ddt_select(spa, bp);
1747		ddt_enter(ddt);
1748		dde = ddt_lookup(ddt, bp, B_FALSE);
1749
1750		if (dde == NULL) {
1751			refcnt = 0;
1752		} else {
1753			ddt_phys_t *ddp = ddt_phys_select(dde, bp);
1754			ddt_phys_decref(ddp);
1755			refcnt = ddp->ddp_refcnt;
1756			if (ddt_phys_total_refcnt(dde) == 0)
1757				ddt_remove(ddt, dde);
1758		}
1759		ddt_exit(ddt);
1760	}
1761
1762	VERIFY3U(zio_wait(zio_claim(NULL, spa,
1763	    refcnt ? 0 : spa_first_txg(spa),
1764	    bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
1765}
1766
1767static int
1768zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
1769    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
1770{
1771	zdb_cb_t *zcb = arg;
1772	char blkbuf[BP_SPRINTF_LEN];
1773	dmu_object_type_t type;
1774	boolean_t is_metadata;
1775
1776	if (bp == NULL)
1777		return (0);
1778
1779	type = BP_GET_TYPE(bp);
1780
1781	zdb_count_block(spa, zilog, zcb, bp, type);
1782
1783	is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata);
1784
1785	if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
1786		int ioerr;
1787		size_t size = BP_GET_PSIZE(bp);
1788		void *data = malloc(size);
1789		int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
1790
1791		/* If it's an intent log block, failure is expected. */
1792		if (zb->zb_level == ZB_ZIL_LEVEL)
1793			flags |= ZIO_FLAG_SPECULATIVE;
1794
1795		ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
1796		    NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
1797
1798		free(data);
1799
1800		if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
1801			zcb->zcb_haderrors = 1;
1802			zcb->zcb_errors[ioerr]++;
1803
1804			if (dump_opt['b'] >= 2)
1805				sprintf_blkptr(blkbuf, bp);
1806			else
1807				blkbuf[0] = '\0';
1808
1809			(void) printf("zdb_blkptr_cb: "
1810			    "Got error %d reading "
1811			    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
1812			    ioerr,
1813			    (u_longlong_t)zb->zb_objset,
1814			    (u_longlong_t)zb->zb_object,
1815			    (u_longlong_t)zb->zb_level,
1816			    (u_longlong_t)zb->zb_blkid,
1817			    blkbuf);
1818		}
1819	}
1820
1821	zcb->zcb_readfails = 0;
1822
1823	if (dump_opt['b'] >= 4) {
1824		sprintf_blkptr(blkbuf, bp);
1825		(void) printf("objset %llu object %llu "
1826		    "level %lld offset 0x%llx %s\n",
1827		    (u_longlong_t)zb->zb_objset,
1828		    (u_longlong_t)zb->zb_object,
1829		    (longlong_t)zb->zb_level,
1830		    (u_longlong_t)blkid2offset(dnp, bp, zb),
1831		    blkbuf);
1832	}
1833
1834	return (0);
1835}
1836
1837static void
1838zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
1839{
1840	vdev_t *vd = sm->sm_ppd;
1841
1842	(void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
1843	    (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
1844}
1845
1846/* ARGSUSED */
1847static void
1848zdb_space_map_load(space_map_t *sm)
1849{
1850}
1851
1852static void
1853zdb_space_map_unload(space_map_t *sm)
1854{
1855	space_map_vacate(sm, zdb_leak, sm);
1856}
1857
1858/* ARGSUSED */
1859static void
1860zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
1861{
1862}
1863
1864static space_map_ops_t zdb_space_map_ops = {
1865	zdb_space_map_load,
1866	zdb_space_map_unload,
1867	NULL,	/* alloc */
1868	zdb_space_map_claim,
1869	NULL,	/* free */
1870	NULL	/* maxsize */
1871};
1872
1873static void
1874zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
1875{
1876	ddt_bookmark_t ddb = { 0 };
1877	ddt_entry_t dde;
1878	int error;
1879
1880	while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
1881		blkptr_t blk;
1882		ddt_phys_t *ddp = dde.dde_phys;
1883
1884		if (ddb.ddb_class == DDT_CLASS_UNIQUE)
1885			return;
1886
1887		ASSERT(ddt_phys_total_refcnt(&dde) > 1);
1888
1889		for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
1890			if (ddp->ddp_phys_birth == 0)
1891				continue;
1892			ddt_bp_create(ddb.ddb_checksum,
1893			    &dde.dde_key, ddp, &blk);
1894			if (p == DDT_PHYS_DITTO) {
1895				zdb_count_block(spa, NULL, zcb, &blk,
1896				    ZDB_OT_DITTO);
1897			} else {
1898				zcb->zcb_dedup_asize +=
1899				    BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
1900				zcb->zcb_dedup_blocks++;
1901			}
1902		}
1903		if (!dump_opt['L']) {
1904			ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
1905			ddt_enter(ddt);
1906			VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
1907			ddt_exit(ddt);
1908		}
1909	}
1910
1911	ASSERT(error == ENOENT);
1912}
1913
1914static void
1915zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
1916{
1917	if (!dump_opt['L']) {
1918		vdev_t *rvd = spa->spa_root_vdev;
1919		for (int c = 0; c < rvd->vdev_children; c++) {
1920			vdev_t *vd = rvd->vdev_child[c];
1921			for (int m = 0; m < vd->vdev_ms_count; m++) {
1922				metaslab_t *msp = vd->vdev_ms[m];
1923				mutex_enter(&msp->ms_lock);
1924				space_map_unload(&msp->ms_map);
1925				VERIFY(space_map_load(&msp->ms_map,
1926				    &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo,
1927				    spa->spa_meta_objset) == 0);
1928				msp->ms_map.sm_ppd = vd;
1929				mutex_exit(&msp->ms_lock);
1930			}
1931		}
1932	}
1933
1934	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
1935
1936	zdb_ddt_leak_init(spa, zcb);
1937
1938	spa_config_exit(spa, SCL_CONFIG, FTAG);
1939}
1940
1941static void
1942zdb_leak_fini(spa_t *spa)
1943{
1944	if (!dump_opt['L']) {
1945		vdev_t *rvd = spa->spa_root_vdev;
1946		for (int c = 0; c < rvd->vdev_children; c++) {
1947			vdev_t *vd = rvd->vdev_child[c];
1948			for (int m = 0; m < vd->vdev_ms_count; m++) {
1949				metaslab_t *msp = vd->vdev_ms[m];
1950				mutex_enter(&msp->ms_lock);
1951				space_map_unload(&msp->ms_map);
1952				mutex_exit(&msp->ms_lock);
1953			}
1954		}
1955	}
1956}
1957
1958static int
1959dump_block_stats(spa_t *spa)
1960{
1961	zdb_cb_t zcb = { 0 };
1962	zdb_blkstats_t *zb, *tzb;
1963	uint64_t norm_alloc, norm_space, total_alloc, total_found;
1964	int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA;
1965	int leaks = 0;
1966
1967	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
1968	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
1969	    (dump_opt['c'] == 1) ? "metadata " : "",
1970	    dump_opt['c'] ? "checksums " : "",
1971	    (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
1972	    !dump_opt['L'] ? "nothing leaked " : "");
1973
1974	/*
1975	 * Load all space maps as SM_ALLOC maps, then traverse the pool
1976	 * claiming each block we discover.  If the pool is perfectly
1977	 * consistent, the space maps will be empty when we're done.
1978	 * Anything left over is a leak; any block we can't claim (because
1979	 * it's not part of any space map) is a double allocation,
1980	 * reference to a freed block, or an unclaimed log block.
1981	 */
1982	zdb_leak_init(spa, &zcb);
1983
1984	/*
1985	 * If there's a deferred-free bplist, process that first.
1986	 */
1987	if (spa->spa_deferred_bplist_obj != 0) {
1988		bplist_t *bpl = &spa->spa_deferred_bplist;
1989		blkptr_t blk;
1990		uint64_t itor = 0;
1991
1992		VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset,
1993		    spa->spa_deferred_bplist_obj));
1994
1995		while (bplist_iterate(bpl, &itor, &blk) == 0) {
1996			if (dump_opt['b'] >= 4) {
1997				char blkbuf[BP_SPRINTF_LEN];
1998				sprintf_blkptr(blkbuf, &blk);
1999				(void) printf("[%s] %s\n",
2000				    "deferred free", blkbuf);
2001			}
2002			zdb_count_block(spa, NULL, &zcb, &blk, ZDB_OT_DEFERRED);
2003		}
2004
2005		bplist_close(bpl);
2006	}
2007
2008	if (dump_opt['c'] > 1)
2009		flags |= TRAVERSE_PREFETCH_DATA;
2010
2011	zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
2012
2013	if (zcb.zcb_haderrors) {
2014		(void) printf("\nError counts:\n\n");
2015		(void) printf("\t%5s  %s\n", "errno", "count");
2016		for (int e = 0; e < 256; e++) {
2017			if (zcb.zcb_errors[e] != 0) {
2018				(void) printf("\t%5d  %llu\n",
2019				    e, (u_longlong_t)zcb.zcb_errors[e]);
2020			}
2021		}
2022	}
2023
2024	/*
2025	 * Report any leaked segments.
2026	 */
2027	zdb_leak_fini(spa);
2028
2029	tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
2030
2031	norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
2032	norm_space = metaslab_class_get_space(spa_normal_class(spa));
2033
2034	total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
2035	total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
2036
2037	if (total_found == total_alloc) {
2038		if (!dump_opt['L'])
2039			(void) printf("\n\tNo leaks (block sum matches space"
2040			    " maps exactly)\n");
2041	} else {
2042		(void) printf("block traversal size %llu != alloc %llu "
2043		    "(%s %lld)\n",
2044		    (u_longlong_t)total_found,
2045		    (u_longlong_t)total_alloc,
2046		    (dump_opt['L']) ? "unreachable" : "leaked",
2047		    (longlong_t)(total_alloc - total_found));
2048		leaks = 1;
2049	}
2050
2051	if (tzb->zb_count == 0)
2052		return (2);
2053
2054	(void) printf("\n");
2055	(void) printf("\tbp count:      %10llu\n",
2056	    (u_longlong_t)tzb->zb_count);
2057	(void) printf("\tbp logical:    %10llu      avg: %6llu\n",
2058	    (u_longlong_t)tzb->zb_lsize,
2059	    (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
2060	(void) printf("\tbp physical:   %10llu      avg:"
2061	    " %6llu     compression: %6.2f\n",
2062	    (u_longlong_t)tzb->zb_psize,
2063	    (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
2064	    (double)tzb->zb_lsize / tzb->zb_psize);
2065	(void) printf("\tbp allocated:  %10llu      avg:"
2066	    " %6llu     compression: %6.2f\n",
2067	    (u_longlong_t)tzb->zb_asize,
2068	    (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
2069	    (double)tzb->zb_lsize / tzb->zb_asize);
2070	(void) printf("\tbp deduped:    %10llu    ref>1:"
2071	    " %6llu   deduplication: %6.2f\n",
2072	    (u_longlong_t)zcb.zcb_dedup_asize,
2073	    (u_longlong_t)zcb.zcb_dedup_blocks,
2074	    (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
2075	(void) printf("\tSPA allocated: %10llu     used: %5.2f%%\n",
2076	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
2077
2078	if (dump_opt['b'] >= 2) {
2079		int l, t, level;
2080		(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2081		    "\t  avg\t comp\t%%Total\tType\n");
2082
2083		for (t = 0; t <= ZDB_OT_TOTAL; t++) {
2084			char csize[6], lsize[6], psize[6], asize[6], avg[6];
2085			char *typename;
2086
2087			if (t < DMU_OT_NUMTYPES)
2088				typename = dmu_ot[t].ot_name;
2089			else
2090				typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
2091
2092			if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
2093				(void) printf("%6s\t%5s\t%5s\t%5s"
2094				    "\t%5s\t%5s\t%6s\t%s\n",
2095				    "-",
2096				    "-",
2097				    "-",
2098				    "-",
2099				    "-",
2100				    "-",
2101				    "-",
2102				    typename);
2103				continue;
2104			}
2105
2106			for (l = ZB_TOTAL - 1; l >= -1; l--) {
2107				level = (l == -1 ? ZB_TOTAL : l);
2108				zb = &zcb.zcb_type[level][t];
2109
2110				if (zb->zb_asize == 0)
2111					continue;
2112
2113				if (dump_opt['b'] < 3 && level != ZB_TOTAL)
2114					continue;
2115
2116				if (level == 0 && zb->zb_asize ==
2117				    zcb.zcb_type[ZB_TOTAL][t].zb_asize)
2118					continue;
2119
2120				nicenum(zb->zb_count, csize);
2121				nicenum(zb->zb_lsize, lsize);
2122				nicenum(zb->zb_psize, psize);
2123				nicenum(zb->zb_asize, asize);
2124				nicenum(zb->zb_asize / zb->zb_count, avg);
2125
2126				(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
2127				    "\t%5.2f\t%6.2f\t",
2128				    csize, lsize, psize, asize, avg,
2129				    (double)zb->zb_lsize / zb->zb_psize,
2130				    100.0 * zb->zb_asize / tzb->zb_asize);
2131
2132				if (level == ZB_TOTAL)
2133					(void) printf("%s\n", typename);
2134				else
2135					(void) printf("    L%d %s\n",
2136					    level, typename);
2137			}
2138		}
2139	}
2140
2141	(void) printf("\n");
2142
2143	if (leaks)
2144		return (2);
2145
2146	if (zcb.zcb_haderrors)
2147		return (3);
2148
2149	return (0);
2150}
2151
2152typedef struct zdb_ddt_entry {
2153	ddt_key_t	zdde_key;
2154	uint64_t	zdde_ref_blocks;
2155	uint64_t	zdde_ref_lsize;
2156	uint64_t	zdde_ref_psize;
2157	uint64_t	zdde_ref_dsize;
2158	avl_node_t	zdde_node;
2159} zdb_ddt_entry_t;
2160
2161/* ARGSUSED */
2162static int
2163zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2164    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
2165{
2166	avl_tree_t *t = arg;
2167	avl_index_t where;
2168	zdb_ddt_entry_t *zdde, zdde_search;
2169
2170	if (bp == NULL)
2171		return (0);
2172
2173	if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
2174		(void) printf("traversing objset %llu, %llu objects, "
2175		    "%lu blocks so far\n",
2176		    (u_longlong_t)zb->zb_objset,
2177		    (u_longlong_t)bp->blk_fill,
2178		    avl_numnodes(t));
2179	}
2180
2181	if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
2182	    BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata)
2183		return (0);
2184
2185	ddt_key_fill(&zdde_search.zdde_key, bp);
2186
2187	zdde = avl_find(t, &zdde_search, &where);
2188
2189	if (zdde == NULL) {
2190		zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
2191		zdde->zdde_key = zdde_search.zdde_key;
2192		avl_insert(t, zdde, where);
2193	}
2194
2195	zdde->zdde_ref_blocks += 1;
2196	zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
2197	zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
2198	zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
2199
2200	return (0);
2201}
2202
2203static void
2204dump_simulated_ddt(spa_t *spa)
2205{
2206	avl_tree_t t;
2207	void *cookie = NULL;
2208	zdb_ddt_entry_t *zdde;
2209	ddt_histogram_t ddh_total = { 0 };
2210	ddt_stat_t dds_total = { 0 };
2211
2212	avl_create(&t, ddt_entry_compare,
2213	    sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
2214
2215	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2216
2217	(void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
2218	    zdb_ddt_add_cb, &t);
2219
2220	spa_config_exit(spa, SCL_CONFIG, FTAG);
2221
2222	while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
2223		ddt_stat_t dds;
2224		uint64_t refcnt = zdde->zdde_ref_blocks;
2225		ASSERT(refcnt != 0);
2226
2227		dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
2228		dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
2229		dds.dds_psize = zdde->zdde_ref_psize / refcnt;
2230		dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
2231
2232		dds.dds_ref_blocks = zdde->zdde_ref_blocks;
2233		dds.dds_ref_lsize = zdde->zdde_ref_lsize;
2234		dds.dds_ref_psize = zdde->zdde_ref_psize;
2235		dds.dds_ref_dsize = zdde->zdde_ref_dsize;
2236
2237		ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
2238
2239		umem_free(zdde, sizeof (*zdde));
2240	}
2241
2242	avl_destroy(&t);
2243
2244	ddt_histogram_stat(&dds_total, &ddh_total);
2245
2246	(void) printf("Simulated DDT histogram:\n");
2247
2248	dump_ddt_histogram(&ddh_total);
2249
2250	dump_dedup_ratio(&dds_total);
2251}
2252
2253static void
2254dump_zpool(spa_t *spa)
2255{
2256	dsl_pool_t *dp = spa_get_dsl(spa);
2257	int rc = 0;
2258
2259	if (dump_opt['S']) {
2260		dump_simulated_ddt(spa);
2261		return;
2262	}
2263
2264	if (!dump_opt['e'] && dump_opt['C'] > 1) {
2265		(void) printf("\nCached configuration:\n");
2266		dump_nvlist(spa->spa_config, 8);
2267	}
2268
2269	if (dump_opt['C'])
2270		dump_config(spa);
2271
2272	if (dump_opt['u'])
2273		dump_uberblock(&spa->spa_uberblock);
2274
2275	if (dump_opt['D'])
2276		dump_all_ddts(spa);
2277
2278	if (dump_opt['d'] > 2 || dump_opt['m'])
2279		dump_metaslabs(spa);
2280
2281	if (dump_opt['d'] || dump_opt['i']) {
2282		dump_dir(dp->dp_meta_objset);
2283		if (dump_opt['d'] >= 3) {
2284			dump_bplist(dp->dp_meta_objset,
2285			    spa->spa_deferred_bplist_obj, "Deferred frees");
2286			dump_dtl(spa->spa_root_vdev, 0);
2287		}
2288		(void) dmu_objset_find(spa_name(spa), dump_one_dir,
2289		    NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
2290	}
2291	if (dump_opt['b'] || dump_opt['c'])
2292		rc = dump_block_stats(spa);
2293
2294	if (dump_opt['s'])
2295		show_pool_stats(spa);
2296
2297	if (dump_opt['h'])
2298		dump_history(spa);
2299
2300	if (rc != 0)
2301		exit(rc);
2302}
2303
2304#define	ZDB_FLAG_CHECKSUM	0x0001
2305#define	ZDB_FLAG_DECOMPRESS	0x0002
2306#define	ZDB_FLAG_BSWAP		0x0004
2307#define	ZDB_FLAG_GBH		0x0008
2308#define	ZDB_FLAG_INDIRECT	0x0010
2309#define	ZDB_FLAG_PHYS		0x0020
2310#define	ZDB_FLAG_RAW		0x0040
2311#define	ZDB_FLAG_PRINT_BLKPTR	0x0080
2312
2313int flagbits[256];
2314
2315static void
2316zdb_print_blkptr(blkptr_t *bp, int flags)
2317{
2318	char blkbuf[BP_SPRINTF_LEN];
2319
2320	if (flags & ZDB_FLAG_BSWAP)
2321		byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
2322
2323	sprintf_blkptr(blkbuf, bp);
2324	(void) printf("%s\n", blkbuf);
2325}
2326
2327static void
2328zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
2329{
2330	int i;
2331
2332	for (i = 0; i < nbps; i++)
2333		zdb_print_blkptr(&bp[i], flags);
2334}
2335
2336static void
2337zdb_dump_gbh(void *buf, int flags)
2338{
2339	zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
2340}
2341
2342static void
2343zdb_dump_block_raw(void *buf, uint64_t size, int flags)
2344{
2345	if (flags & ZDB_FLAG_BSWAP)
2346		byteswap_uint64_array(buf, size);
2347	(void) write(1, buf, size);
2348}
2349
2350static void
2351zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
2352{
2353	uint64_t *d = (uint64_t *)buf;
2354	int nwords = size / sizeof (uint64_t);
2355	int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
2356	int i, j;
2357	char *hdr, *c;
2358
2359
2360	if (do_bswap)
2361		hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
2362	else
2363		hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
2364
2365	(void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
2366
2367	for (i = 0; i < nwords; i += 2) {
2368		(void) printf("%06llx:  %016llx  %016llx  ",
2369		    (u_longlong_t)(i * sizeof (uint64_t)),
2370		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
2371		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
2372
2373		c = (char *)&d[i];
2374		for (j = 0; j < 2 * sizeof (uint64_t); j++)
2375			(void) printf("%c", isprint(c[j]) ? c[j] : '.');
2376		(void) printf("\n");
2377	}
2378}
2379
2380/*
2381 * There are two acceptable formats:
2382 *	leaf_name	  - For example: c1t0d0 or /tmp/ztest.0a
2383 *	child[.child]*    - For example: 0.1.1
2384 *
2385 * The second form can be used to specify arbitrary vdevs anywhere
2386 * in the heirarchy.  For example, in a pool with a mirror of
2387 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
2388 */
2389static vdev_t *
2390zdb_vdev_lookup(vdev_t *vdev, char *path)
2391{
2392	char *s, *p, *q;
2393	int i;
2394
2395	if (vdev == NULL)
2396		return (NULL);
2397
2398	/* First, assume the x.x.x.x format */
2399	i = (int)strtoul(path, &s, 10);
2400	if (s == path || (s && *s != '.' && *s != '\0'))
2401		goto name;
2402	if (i < 0 || i >= vdev->vdev_children)
2403		return (NULL);
2404
2405	vdev = vdev->vdev_child[i];
2406	if (*s == '\0')
2407		return (vdev);
2408	return (zdb_vdev_lookup(vdev, s+1));
2409
2410name:
2411	for (i = 0; i < vdev->vdev_children; i++) {
2412		vdev_t *vc = vdev->vdev_child[i];
2413
2414		if (vc->vdev_path == NULL) {
2415			vc = zdb_vdev_lookup(vc, path);
2416			if (vc == NULL)
2417				continue;
2418			else
2419				return (vc);
2420		}
2421
2422		p = strrchr(vc->vdev_path, '/');
2423		p = p ? p + 1 : vc->vdev_path;
2424		q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
2425
2426		if (strcmp(vc->vdev_path, path) == 0)
2427			return (vc);
2428		if (strcmp(p, path) == 0)
2429			return (vc);
2430		if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
2431			return (vc);
2432	}
2433
2434	return (NULL);
2435}
2436
2437/*
2438 * Read a block from a pool and print it out.  The syntax of the
2439 * block descriptor is:
2440 *
2441 *	pool:vdev_specifier:offset:size[:flags]
2442 *
2443 *	pool           - The name of the pool you wish to read from
2444 *	vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
2445 *	offset         - offset, in hex, in bytes
2446 *	size           - Amount of data to read, in hex, in bytes
2447 *	flags          - A string of characters specifying options
2448 *		 b: Decode a blkptr at given offset within block
2449 *		*c: Calculate and display checksums
2450 *		 d: Decompress data before dumping
2451 *		 e: Byteswap data before dumping
2452 *		 g: Display data as a gang block header
2453 *		 i: Display as an indirect block
2454 *		 p: Do I/O to physical offset
2455 *		 r: Dump raw data to stdout
2456 *
2457 *              * = not yet implemented
2458 */
2459static void
2460zdb_read_block(char *thing, spa_t *spa)
2461{
2462	blkptr_t blk, *bp = &blk;
2463	dva_t *dva = bp->blk_dva;
2464	int flags = 0;
2465	uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
2466	zio_t *zio;
2467	vdev_t *vd;
2468	void *pbuf, *lbuf, *buf;
2469	char *s, *p, *dup, *vdev, *flagstr;
2470	int i, error;
2471
2472	dup = strdup(thing);
2473	s = strtok(dup, ":");
2474	vdev = s ? s : "";
2475	s = strtok(NULL, ":");
2476	offset = strtoull(s ? s : "", NULL, 16);
2477	s = strtok(NULL, ":");
2478	size = strtoull(s ? s : "", NULL, 16);
2479	s = strtok(NULL, ":");
2480	flagstr = s ? s : "";
2481
2482	s = NULL;
2483	if (size == 0)
2484		s = "size must not be zero";
2485	if (!IS_P2ALIGNED(size, DEV_BSIZE))
2486		s = "size must be a multiple of sector size";
2487	if (!IS_P2ALIGNED(offset, DEV_BSIZE))
2488		s = "offset must be a multiple of sector size";
2489	if (s) {
2490		(void) printf("Invalid block specifier: %s  - %s\n", thing, s);
2491		free(dup);
2492		return;
2493	}
2494
2495	for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
2496		for (i = 0; flagstr[i]; i++) {
2497			int bit = flagbits[(uchar_t)flagstr[i]];
2498
2499			if (bit == 0) {
2500				(void) printf("***Invalid flag: %c\n",
2501				    flagstr[i]);
2502				continue;
2503			}
2504			flags |= bit;
2505
2506			/* If it's not something with an argument, keep going */
2507			if ((bit & (ZDB_FLAG_CHECKSUM |
2508			    ZDB_FLAG_PRINT_BLKPTR)) == 0)
2509				continue;
2510
2511			p = &flagstr[i + 1];
2512			if (bit == ZDB_FLAG_PRINT_BLKPTR)
2513				blkptr_offset = strtoull(p, &p, 16);
2514			if (*p != ':' && *p != '\0') {
2515				(void) printf("***Invalid flag arg: '%s'\n", s);
2516				free(dup);
2517				return;
2518			}
2519		}
2520	}
2521
2522	vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
2523	if (vd == NULL) {
2524		(void) printf("***Invalid vdev: %s\n", vdev);
2525		free(dup);
2526		return;
2527	} else {
2528		if (vd->vdev_path)
2529			(void) fprintf(stderr, "Found vdev: %s\n",
2530			    vd->vdev_path);
2531		else
2532			(void) fprintf(stderr, "Found vdev type: %s\n",
2533			    vd->vdev_ops->vdev_op_type);
2534	}
2535
2536	psize = size;
2537	lsize = size;
2538
2539	pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2540	lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2541
2542	BP_ZERO(bp);
2543
2544	DVA_SET_VDEV(&dva[0], vd->vdev_id);
2545	DVA_SET_OFFSET(&dva[0], offset);
2546	DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
2547	DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
2548
2549	BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
2550
2551	BP_SET_LSIZE(bp, lsize);
2552	BP_SET_PSIZE(bp, psize);
2553	BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
2554	BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
2555	BP_SET_TYPE(bp, DMU_OT_NONE);
2556	BP_SET_LEVEL(bp, 0);
2557	BP_SET_DEDUP(bp, 0);
2558	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
2559
2560	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
2561	zio = zio_root(spa, NULL, NULL, 0);
2562
2563	if (vd == vd->vdev_top) {
2564		/*
2565		 * Treat this as a normal block read.
2566		 */
2567		zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
2568		    ZIO_PRIORITY_SYNC_READ,
2569		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
2570	} else {
2571		/*
2572		 * Treat this as a vdev child I/O.
2573		 */
2574		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
2575		    ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
2576		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
2577		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
2578		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
2579	}
2580
2581	error = zio_wait(zio);
2582	spa_config_exit(spa, SCL_STATE, FTAG);
2583
2584	if (error) {
2585		(void) printf("Read of %s failed, error: %d\n", thing, error);
2586		goto out;
2587	}
2588
2589	if (flags & ZDB_FLAG_DECOMPRESS) {
2590		/*
2591		 * We don't know how the data was compressed, so just try
2592		 * every decompress function at every inflated blocksize.
2593		 */
2594		enum zio_compress c;
2595		void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2596		void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2597
2598		bcopy(pbuf, pbuf2, psize);
2599
2600		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
2601		    SPA_MAXBLOCKSIZE - psize) == 0);
2602
2603		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
2604		    SPA_MAXBLOCKSIZE - psize) == 0);
2605
2606		for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
2607		    lsize -= SPA_MINBLOCKSIZE) {
2608			for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
2609				if (zio_decompress_data(c, pbuf, lbuf,
2610				    psize, lsize) == 0 &&
2611				    zio_decompress_data(c, pbuf2, lbuf2,
2612				    psize, lsize) == 0 &&
2613				    bcmp(lbuf, lbuf2, lsize) == 0)
2614					break;
2615			}
2616			if (c != ZIO_COMPRESS_FUNCTIONS)
2617				break;
2618			lsize -= SPA_MINBLOCKSIZE;
2619		}
2620
2621		umem_free(pbuf2, SPA_MAXBLOCKSIZE);
2622		umem_free(lbuf2, SPA_MAXBLOCKSIZE);
2623
2624		if (lsize <= psize) {
2625			(void) printf("Decompress of %s failed\n", thing);
2626			goto out;
2627		}
2628		buf = lbuf;
2629		size = lsize;
2630	} else {
2631		buf = pbuf;
2632		size = psize;
2633	}
2634
2635	if (flags & ZDB_FLAG_PRINT_BLKPTR)
2636		zdb_print_blkptr((blkptr_t *)(void *)
2637		    ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
2638	else if (flags & ZDB_FLAG_RAW)
2639		zdb_dump_block_raw(buf, size, flags);
2640	else if (flags & ZDB_FLAG_INDIRECT)
2641		zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
2642		    flags);
2643	else if (flags & ZDB_FLAG_GBH)
2644		zdb_dump_gbh(buf, flags);
2645	else
2646		zdb_dump_block(thing, buf, size, flags);
2647
2648out:
2649	umem_free(pbuf, SPA_MAXBLOCKSIZE);
2650	umem_free(lbuf, SPA_MAXBLOCKSIZE);
2651	free(dup);
2652}
2653
2654static boolean_t
2655pool_match(nvlist_t *cfg, char *tgt)
2656{
2657	uint64_t v, guid = strtoull(tgt, NULL, 0);
2658	char *s;
2659
2660	if (guid != 0) {
2661		if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
2662			return (v == guid);
2663	} else {
2664		if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
2665			return (strcmp(s, tgt) == 0);
2666	}
2667	return (B_FALSE);
2668}
2669
2670static char *
2671find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
2672{
2673	nvlist_t *pools;
2674	nvlist_t *match = NULL;
2675	char *name = NULL;
2676	char *sepp = NULL;
2677	char sep;
2678	int count = 0;
2679
2680	if ((sepp = strpbrk(*target, "/@")) != NULL) {
2681		sep = *sepp;
2682		*sepp = '\0';
2683	}
2684
2685	pools = zpool_find_import_activeok(g_zfs, dirc, dirv);
2686
2687	if (pools != NULL) {
2688		nvpair_t *elem = NULL;
2689		while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
2690			verify(nvpair_value_nvlist(elem, configp) == 0);
2691			if (pool_match(*configp, *target)) {
2692				count++;
2693				if (match != NULL) {
2694					/* print previously found config */
2695					if (name != NULL) {
2696						(void) printf("%s\n", name);
2697						dump_nvlist(match, 8);
2698						name = NULL;
2699					}
2700					(void) printf("%s\n",
2701					    nvpair_name(elem));
2702					dump_nvlist(*configp, 8);
2703				} else {
2704					match = *configp;
2705					name = nvpair_name(elem);
2706				}
2707			}
2708		}
2709	}
2710	if (count > 1)
2711		(void) fatal("\tMatched %d pools - use pool GUID "
2712		    "instead of pool name or \n"
2713		    "\tpool name part of a dataset name to select pool", count);
2714
2715	if (sepp)
2716		*sepp = sep;
2717	/*
2718	 * If pool GUID was specified for pool id, replace it with pool name
2719	 */
2720	if (name && (strstr(*target, name) != *target)) {
2721		int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
2722
2723		*target = umem_alloc(sz, UMEM_NOFAIL);
2724		(void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
2725	}
2726
2727	*configp = name ? match : NULL;
2728
2729	return (name);
2730}
2731
2732int
2733main(int argc, char **argv)
2734{
2735	int i, c;
2736	struct rlimit rl = { 1024, 1024 };
2737	spa_t *spa = NULL;
2738	objset_t *os = NULL;
2739	int dump_all = 1;
2740	int verbose = 0;
2741	int error;
2742	char **searchdirs = NULL;
2743	int nsearch = 0;
2744	char *target;
2745	nvlist_t *policy = NULL;
2746	uint64_t max_txg = UINT64_MAX;
2747
2748	(void) setrlimit(RLIMIT_NOFILE, &rl);
2749	(void) enable_extended_FILE_stdio(-1, -1);
2750
2751	dprintf_setup(&argc, argv);
2752
2753	while ((c = getopt(argc, argv, "bcdhilmsuCDRSLevp:t:U:")) != -1) {
2754		switch (c) {
2755		case 'b':
2756		case 'c':
2757		case 'd':
2758		case 'h':
2759		case 'i':
2760		case 'l':
2761		case 'm':
2762		case 's':
2763		case 'u':
2764		case 'C':
2765		case 'D':
2766		case 'R':
2767		case 'S':
2768			dump_opt[c]++;
2769			dump_all = 0;
2770			break;
2771		case 'L':
2772		case 'e':
2773			dump_opt[c]++;
2774			break;
2775		case 'v':
2776			verbose++;
2777			break;
2778		case 'p':
2779			if (searchdirs == NULL) {
2780				searchdirs = umem_alloc(sizeof (char *),
2781				    UMEM_NOFAIL);
2782			} else {
2783				char **tmp = umem_alloc((nsearch + 1) *
2784				    sizeof (char *), UMEM_NOFAIL);
2785				bcopy(searchdirs, tmp, nsearch *
2786				    sizeof (char *));
2787				umem_free(searchdirs,
2788				    nsearch * sizeof (char *));
2789				searchdirs = tmp;
2790			}
2791			searchdirs[nsearch++] = optarg;
2792			break;
2793		case 't':
2794			max_txg = strtoull(optarg, NULL, 0);
2795			if (max_txg < TXG_INITIAL) {
2796				(void) fprintf(stderr, "incorrect txg "
2797				    "specified: %s\n", optarg);
2798				usage();
2799			}
2800			break;
2801		case 'U':
2802			spa_config_path = optarg;
2803			break;
2804		default:
2805			usage();
2806			break;
2807		}
2808	}
2809
2810	if (!dump_opt['e'] && searchdirs != NULL) {
2811		(void) fprintf(stderr, "-p option requires use of -e\n");
2812		usage();
2813	}
2814
2815	kernel_init(FREAD);
2816	g_zfs = libzfs_init();
2817	ASSERT(g_zfs != NULL);
2818
2819	if (dump_all)
2820		verbose = MAX(verbose, 1);
2821
2822	for (c = 0; c < 256; c++) {
2823		if (dump_all && !strchr("elLRS", c))
2824			dump_opt[c] = 1;
2825		if (dump_opt[c])
2826			dump_opt[c] += verbose;
2827	}
2828
2829	argc -= optind;
2830	argv += optind;
2831
2832	if (argc < 2 && dump_opt['R'])
2833		usage();
2834	if (argc < 1) {
2835		if (!dump_opt['e'] && dump_opt['C']) {
2836			dump_cachefile(spa_config_path);
2837			return (0);
2838		}
2839		usage();
2840	}
2841
2842	if (dump_opt['l']) {
2843		dump_label(argv[0]);
2844		return (0);
2845	}
2846
2847	error = 0;
2848	target = argv[0];
2849
2850	VERIFY(nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) == 0);
2851
2852	if (dump_opt['e']) {
2853		nvlist_t *cfg = NULL;
2854		char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
2855
2856		error = ENOENT;
2857		if (name) {
2858			if (dump_opt['C'] > 1) {
2859				(void) printf("\nConfiguration for import:\n");
2860				dump_nvlist(cfg, 8);
2861			}
2862			if (nvlist_add_uint64(policy,
2863			    ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
2864			    nvlist_add_nvlist(cfg,
2865			    ZPOOL_REWIND_POLICY, policy) != 0) {
2866				fatal("can't open '%s': %s",
2867				    target, strerror(ENOMEM));
2868			}
2869			if ((error = spa_import(name, cfg, NULL)) != 0)
2870				error = spa_import_verbatim(name, cfg, NULL);
2871		}
2872	} else {
2873		VERIFY(nvlist_add_uint64(policy, ZPOOL_REWIND_META_THRESH,
2874		    UINT64_MAX) == 0);
2875	}
2876
2877	if (error == 0) {
2878		if (strpbrk(target, "/@") == NULL || dump_opt['R']) {
2879			error = spa_open_rewind(target, &spa, FTAG, policy,
2880			    NULL);
2881			if (error) {
2882				/*
2883				 * If we're missing the log device then
2884				 * try opening the pool after clearing the
2885				 * log state.
2886				 */
2887				mutex_enter(&spa_namespace_lock);
2888				if ((spa = spa_lookup(target)) != NULL &&
2889				    spa->spa_log_state == SPA_LOG_MISSING) {
2890					spa->spa_log_state = SPA_LOG_CLEAR;
2891					error = 0;
2892				}
2893				mutex_exit(&spa_namespace_lock);
2894
2895				if (!error) {
2896					error = spa_open_rewind(target, &spa,
2897					    FTAG, policy, NULL);
2898				}
2899			}
2900		} else {
2901			error = dmu_objset_own(target, DMU_OST_ANY,
2902			    B_TRUE, FTAG, &os);
2903		}
2904	}
2905	nvlist_free(policy);
2906
2907	if (error)
2908		fatal("can't open '%s': %s", target, strerror(error));
2909
2910	argv++;
2911	argc--;
2912	if (!dump_opt['R']) {
2913		if (argc > 0) {
2914			zopt_objects = argc;
2915			zopt_object = calloc(zopt_objects, sizeof (uint64_t));
2916			for (i = 0; i < zopt_objects; i++) {
2917				errno = 0;
2918				zopt_object[i] = strtoull(argv[i], NULL, 0);
2919				if (zopt_object[i] == 0 && errno != 0)
2920					fatal("bad number %s: %s",
2921					    argv[i], strerror(errno));
2922			}
2923		}
2924		(os != NULL) ? dump_dir(os) : dump_zpool(spa);
2925	} else {
2926		flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
2927		flagbits['c'] = ZDB_FLAG_CHECKSUM;
2928		flagbits['d'] = ZDB_FLAG_DECOMPRESS;
2929		flagbits['e'] = ZDB_FLAG_BSWAP;
2930		flagbits['g'] = ZDB_FLAG_GBH;
2931		flagbits['i'] = ZDB_FLAG_INDIRECT;
2932		flagbits['p'] = ZDB_FLAG_PHYS;
2933		flagbits['r'] = ZDB_FLAG_RAW;
2934
2935		for (i = 0; i < argc; i++)
2936			zdb_read_block(argv[i], spa);
2937	}
2938
2939	(os != NULL) ? dmu_objset_disown(os, FTAG) : spa_close(spa, FTAG);
2940
2941	fuid_table_destroy();
2942
2943	libzfs_fini(g_zfs);
2944	kernel_fini();
2945
2946	return (0);
2947}
2948