zdb.c revision bbfd46c40e81c7d954cec28db66453ec5ab44613
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <stdio.h>
27#include <stdio_ext.h>
28#include <stdlib.h>
29#include <ctype.h>
30#include <sys/zfs_context.h>
31#include <sys/spa.h>
32#include <sys/spa_impl.h>
33#include <sys/dmu.h>
34#include <sys/zap.h>
35#include <sys/fs/zfs.h>
36#include <sys/zfs_znode.h>
37#include <sys/vdev.h>
38#include <sys/vdev_impl.h>
39#include <sys/metaslab_impl.h>
40#include <sys/dmu_objset.h>
41#include <sys/dsl_dir.h>
42#include <sys/dsl_dataset.h>
43#include <sys/dsl_pool.h>
44#include <sys/dbuf.h>
45#include <sys/zil.h>
46#include <sys/zil_impl.h>
47#include <sys/stat.h>
48#include <sys/resource.h>
49#include <sys/dmu_traverse.h>
50#include <sys/zio_checksum.h>
51#include <sys/zio_compress.h>
52#include <sys/zfs_fuid.h>
53#include <sys/arc.h>
54#include <sys/ddt.h>
55#undef ZFS_MAXNAMELEN
56#undef verify
57#include <libzfs.h>
58
59#define	ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
60    zio_compress_table[(idx)].ci_name : "UNKNOWN")
61#define	ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
62    zio_checksum_table[(idx)].ci_name : "UNKNOWN")
63#define	ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \
64    dmu_ot[(idx)].ot_name : "UNKNOWN")
65#define	ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : DMU_OT_NUMTYPES)
66
67const char cmdname[] = "zdb";
68uint8_t dump_opt[256];
69
70typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
71
72extern void dump_intent_log(zilog_t *);
73uint64_t *zopt_object = NULL;
74int zopt_objects = 0;
75libzfs_handle_t *g_zfs;
76
77/*
78 * These libumem hooks provide a reasonable set of defaults for the allocator's
79 * debugging facilities.
80 */
81const char *
82_umem_debug_init()
83{
84	return ("default,verbose"); /* $UMEM_DEBUG setting */
85}
86
87const char *
88_umem_logging_init(void)
89{
90	return ("fail,contents"); /* $UMEM_LOGGING setting */
91}
92
93static void
94usage(void)
95{
96	(void) fprintf(stderr,
97	    "Usage: %s [-CumdibcsvhL] "
98	    "poolname [object...]\n"
99	    "       %s [-div] dataset [object...]\n"
100	    "       %s -m [-L] poolname [vdev [metaslab...]]\n"
101	    "       %s -R poolname vdev:offset:size[:flags]\n"
102	    "       %s -S poolname\n"
103	    "       %s -l device\n"
104	    "       %s -C\n\n",
105	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
106
107	(void) fprintf(stderr, "    Dataset name must include at least one "
108	    "separator character '/' or '@'\n");
109	(void) fprintf(stderr, "    If dataset name is specified, only that "
110	    "dataset is dumped\n");
111	(void) fprintf(stderr, "    If object numbers are specified, only "
112	    "those objects are dumped\n\n");
113	(void) fprintf(stderr, "    Options to control amount of output:\n");
114	(void) fprintf(stderr, "        -u uberblock\n");
115	(void) fprintf(stderr, "        -d dataset(s)\n");
116	(void) fprintf(stderr, "        -i intent logs\n");
117	(void) fprintf(stderr, "        -C config (or cachefile if alone)\n");
118	(void) fprintf(stderr, "        -h pool history\n");
119	(void) fprintf(stderr, "        -b block statistics\n");
120	(void) fprintf(stderr, "        -m metaslabs\n");
121	(void) fprintf(stderr, "        -c checksum all metadata (twice for "
122	    "all data) blocks\n");
123	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
124	(void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
125	(void) fprintf(stderr, "        -v verbose (applies to all others)\n");
126	(void) fprintf(stderr, "        -l dump label contents\n");
127	(void) fprintf(stderr, "        -L disable leak tracking (do not "
128	    "load spacemaps)\n");
129	(void) fprintf(stderr, "        -R read and display block from a "
130	    "device\n\n");
131	(void) fprintf(stderr, "    Below options are intended for use "
132	    "with other options (except -l):\n");
133	(void) fprintf(stderr, "        -U <cachefile_path> -- use alternate "
134	    "cachefile\n");
135	(void) fprintf(stderr, "        -e pool is exported/destroyed/"
136	    "has altroot/not in a cachefile\n");
137	(void) fprintf(stderr, "        -p <path> -- use one or more with "
138	    "-e to specify path to vdev dir\n");
139	(void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
140	    "searching for uberblocks\n");
141	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
142	    "to make only that option verbose\n");
143	(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
144	exit(1);
145}
146
147/*
148 * Called for usage errors that are discovered after a call to spa_open(),
149 * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
150 */
151
152static void
153fatal(const char *fmt, ...)
154{
155	va_list ap;
156
157	va_start(ap, fmt);
158	(void) fprintf(stderr, "%s: ", cmdname);
159	(void) vfprintf(stderr, fmt, ap);
160	va_end(ap);
161	(void) fprintf(stderr, "\n");
162
163	exit(1);
164}
165
166/* ARGSUSED */
167static void
168dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
169{
170	nvlist_t *nv;
171	size_t nvsize = *(uint64_t *)data;
172	char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
173
174	VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
175
176	VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
177
178	umem_free(packed, nvsize);
179
180	dump_nvlist(nv, 8);
181
182	nvlist_free(nv);
183}
184
185const char dump_zap_stars[] = "****************************************";
186const int dump_zap_width = sizeof (dump_zap_stars) - 1;
187
188static void
189dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE])
190{
191	int i;
192	int minidx = ZAP_HISTOGRAM_SIZE - 1;
193	int maxidx = 0;
194	uint64_t max = 0;
195
196	for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) {
197		if (histo[i] > max)
198			max = histo[i];
199		if (histo[i] > 0 && i > maxidx)
200			maxidx = i;
201		if (histo[i] > 0 && i < minidx)
202			minidx = i;
203	}
204
205	if (max < dump_zap_width)
206		max = dump_zap_width;
207
208	for (i = minidx; i <= maxidx; i++)
209		(void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i],
210		    &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]);
211}
212
213static void
214dump_zap_stats(objset_t *os, uint64_t object)
215{
216	int error;
217	zap_stats_t zs;
218
219	error = zap_get_stats(os, object, &zs);
220	if (error)
221		return;
222
223	if (zs.zs_ptrtbl_len == 0) {
224		ASSERT(zs.zs_num_blocks == 1);
225		(void) printf("\tmicrozap: %llu bytes, %llu entries\n",
226		    (u_longlong_t)zs.zs_blocksize,
227		    (u_longlong_t)zs.zs_num_entries);
228		return;
229	}
230
231	(void) printf("\tFat ZAP stats:\n");
232
233	(void) printf("\t\tPointer table:\n");
234	(void) printf("\t\t\t%llu elements\n",
235	    (u_longlong_t)zs.zs_ptrtbl_len);
236	(void) printf("\t\t\tzt_blk: %llu\n",
237	    (u_longlong_t)zs.zs_ptrtbl_zt_blk);
238	(void) printf("\t\t\tzt_numblks: %llu\n",
239	    (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
240	(void) printf("\t\t\tzt_shift: %llu\n",
241	    (u_longlong_t)zs.zs_ptrtbl_zt_shift);
242	(void) printf("\t\t\tzt_blks_copied: %llu\n",
243	    (u_longlong_t)zs.zs_ptrtbl_blks_copied);
244	(void) printf("\t\t\tzt_nextblk: %llu\n",
245	    (u_longlong_t)zs.zs_ptrtbl_nextblk);
246
247	(void) printf("\t\tZAP entries: %llu\n",
248	    (u_longlong_t)zs.zs_num_entries);
249	(void) printf("\t\tLeaf blocks: %llu\n",
250	    (u_longlong_t)zs.zs_num_leafs);
251	(void) printf("\t\tTotal blocks: %llu\n",
252	    (u_longlong_t)zs.zs_num_blocks);
253	(void) printf("\t\tzap_block_type: 0x%llx\n",
254	    (u_longlong_t)zs.zs_block_type);
255	(void) printf("\t\tzap_magic: 0x%llx\n",
256	    (u_longlong_t)zs.zs_magic);
257	(void) printf("\t\tzap_salt: 0x%llx\n",
258	    (u_longlong_t)zs.zs_salt);
259
260	(void) printf("\t\tLeafs with 2^n pointers:\n");
261	dump_zap_histogram(zs.zs_leafs_with_2n_pointers);
262
263	(void) printf("\t\tBlocks with n*5 entries:\n");
264	dump_zap_histogram(zs.zs_blocks_with_n5_entries);
265
266	(void) printf("\t\tBlocks n/10 full:\n");
267	dump_zap_histogram(zs.zs_blocks_n_tenths_full);
268
269	(void) printf("\t\tEntries with n chunks:\n");
270	dump_zap_histogram(zs.zs_entries_using_n_chunks);
271
272	(void) printf("\t\tBuckets with n entries:\n");
273	dump_zap_histogram(zs.zs_buckets_with_n_entries);
274}
275
276/*ARGSUSED*/
277static void
278dump_none(objset_t *os, uint64_t object, void *data, size_t size)
279{
280}
281
282/*ARGSUSED*/
283static void
284dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
285{
286	(void) printf("\tUNKNOWN OBJECT TYPE\n");
287}
288
289/*ARGSUSED*/
290void
291dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
292{
293}
294
295/*ARGSUSED*/
296static void
297dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
298{
299}
300
301/*ARGSUSED*/
302static void
303dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
304{
305	zap_cursor_t zc;
306	zap_attribute_t attr;
307	void *prop;
308	int i;
309
310	dump_zap_stats(os, object);
311	(void) printf("\n");
312
313	for (zap_cursor_init(&zc, os, object);
314	    zap_cursor_retrieve(&zc, &attr) == 0;
315	    zap_cursor_advance(&zc)) {
316		(void) printf("\t\t%s = ", attr.za_name);
317		if (attr.za_num_integers == 0) {
318			(void) printf("\n");
319			continue;
320		}
321		prop = umem_zalloc(attr.za_num_integers *
322		    attr.za_integer_length, UMEM_NOFAIL);
323		(void) zap_lookup(os, object, attr.za_name,
324		    attr.za_integer_length, attr.za_num_integers, prop);
325		if (attr.za_integer_length == 1) {
326			(void) printf("%s", (char *)prop);
327		} else {
328			for (i = 0; i < attr.za_num_integers; i++) {
329				switch (attr.za_integer_length) {
330				case 2:
331					(void) printf("%u ",
332					    ((uint16_t *)prop)[i]);
333					break;
334				case 4:
335					(void) printf("%u ",
336					    ((uint32_t *)prop)[i]);
337					break;
338				case 8:
339					(void) printf("%lld ",
340					    (u_longlong_t)((int64_t *)prop)[i]);
341					break;
342				}
343			}
344		}
345		(void) printf("\n");
346		umem_free(prop, attr.za_num_integers * attr.za_integer_length);
347	}
348	zap_cursor_fini(&zc);
349}
350
351/*ARGSUSED*/
352static void
353dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
354{
355	zap_cursor_t zc;
356	zap_attribute_t attr;
357	const char *typenames[] = {
358		/* 0 */ "not specified",
359		/* 1 */ "FIFO",
360		/* 2 */ "Character Device",
361		/* 3 */ "3 (invalid)",
362		/* 4 */ "Directory",
363		/* 5 */ "5 (invalid)",
364		/* 6 */ "Block Device",
365		/* 7 */ "7 (invalid)",
366		/* 8 */ "Regular File",
367		/* 9 */ "9 (invalid)",
368		/* 10 */ "Symbolic Link",
369		/* 11 */ "11 (invalid)",
370		/* 12 */ "Socket",
371		/* 13 */ "Door",
372		/* 14 */ "Event Port",
373		/* 15 */ "15 (invalid)",
374	};
375
376	dump_zap_stats(os, object);
377	(void) printf("\n");
378
379	for (zap_cursor_init(&zc, os, object);
380	    zap_cursor_retrieve(&zc, &attr) == 0;
381	    zap_cursor_advance(&zc)) {
382		(void) printf("\t\t%s = %lld (type: %s)\n",
383		    attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
384		    typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
385	}
386	zap_cursor_fini(&zc);
387}
388
389static void
390dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
391{
392	uint64_t alloc, offset, entry;
393	uint8_t mapshift = sm->sm_shift;
394	uint64_t mapstart = sm->sm_start;
395	char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
396			    "INVALID", "INVALID", "INVALID", "INVALID" };
397
398	if (smo->smo_object == 0)
399		return;
400
401	/*
402	 * Print out the freelist entries in both encoded and decoded form.
403	 */
404	alloc = 0;
405	for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
406		VERIFY(0 == dmu_read(os, smo->smo_object, offset,
407		    sizeof (entry), &entry, DMU_READ_PREFETCH));
408		if (SM_DEBUG_DECODE(entry)) {
409			(void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
410			    (u_longlong_t)(offset / sizeof (entry)),
411			    ddata[SM_DEBUG_ACTION_DECODE(entry)],
412			    (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
413			    (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
414		} else {
415			(void) printf("\t    [%6llu]    %c  range:"
416			    " %010llx-%010llx  size: %06llx\n",
417			    (u_longlong_t)(offset / sizeof (entry)),
418			    SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
419			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
420			    mapshift) + mapstart),
421			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
422			    mapshift) + mapstart + (SM_RUN_DECODE(entry) <<
423			    mapshift)),
424			    (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
425			if (SM_TYPE_DECODE(entry) == SM_ALLOC)
426				alloc += SM_RUN_DECODE(entry) << mapshift;
427			else
428				alloc -= SM_RUN_DECODE(entry) << mapshift;
429		}
430	}
431	if (alloc != smo->smo_alloc) {
432		(void) printf("space_map_object alloc (%llu) INCONSISTENT "
433		    "with space map summary (%llu)\n",
434		    (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc);
435	}
436}
437
438static void
439dump_metaslab_stats(metaslab_t *msp)
440{
441	char maxbuf[5];
442	space_map_t *sm = &msp->ms_map;
443	avl_tree_t *t = sm->sm_pp_root;
444	int free_pct = sm->sm_space * 100 / sm->sm_size;
445
446	nicenum(space_map_maxsize(sm), maxbuf);
447
448	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
449	    "segments", avl_numnodes(t), "maxsize", maxbuf,
450	    "freepct", free_pct);
451}
452
453static void
454dump_metaslab(metaslab_t *msp)
455{
456	char freebuf[5];
457	space_map_obj_t *smo = &msp->ms_smo;
458	vdev_t *vd = msp->ms_group->mg_vd;
459	spa_t *spa = vd->vdev_spa;
460
461	nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf);
462
463	(void) printf(
464	    "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
465	    (u_longlong_t)(msp->ms_map.sm_start / msp->ms_map.sm_size),
466	    (u_longlong_t)msp->ms_map.sm_start, (u_longlong_t)smo->smo_object,
467	    freebuf);
468
469	if (dump_opt['m'] > 1 && !dump_opt['L']) {
470		mutex_enter(&msp->ms_lock);
471		VERIFY(space_map_load(&msp->ms_map, zfs_metaslab_ops,
472		    SM_FREE, &msp->ms_smo, spa->spa_meta_objset) == 0);
473		dump_metaslab_stats(msp);
474		space_map_unload(&msp->ms_map);
475		mutex_exit(&msp->ms_lock);
476	}
477
478	if (dump_opt['d'] > 5 || dump_opt['m'] > 2) {
479		ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift));
480
481		mutex_enter(&msp->ms_lock);
482		dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map);
483		mutex_exit(&msp->ms_lock);
484	}
485}
486
487static void
488print_vdev_metaslab_header(vdev_t *vd)
489{
490	(void) printf("\tvdev %10llu\n\t%-10s%5llu   %-19s   %-15s   %-10s\n",
491	    (u_longlong_t)vd->vdev_id,
492	    "metaslabs", (u_longlong_t)vd->vdev_ms_count,
493	    "offset", "spacemap", "free");
494	(void) printf("\t%15s   %19s   %15s   %10s\n",
495	    "---------------", "-------------------",
496	    "---------------", "-------------");
497}
498
499static void
500dump_metaslabs(spa_t *spa)
501{
502	vdev_t *vd, *rvd = spa->spa_root_vdev;
503	uint64_t m, c = 0, children = rvd->vdev_children;
504
505	(void) printf("\nMetaslabs:\n");
506
507	if (!dump_opt['d'] && zopt_objects > 0) {
508		c = zopt_object[0];
509
510		if (c >= children)
511			(void) fatal("bad vdev id: %llu", (u_longlong_t)c);
512
513		if (zopt_objects > 1) {
514			vd = rvd->vdev_child[c];
515			print_vdev_metaslab_header(vd);
516
517			for (m = 1; m < zopt_objects; m++) {
518				if (zopt_object[m] < vd->vdev_ms_count)
519					dump_metaslab(
520					    vd->vdev_ms[zopt_object[m]]);
521				else
522					(void) fprintf(stderr, "bad metaslab "
523					    "number %llu\n",
524					    (u_longlong_t)zopt_object[m]);
525			}
526			(void) printf("\n");
527			return;
528		}
529		children = c + 1;
530	}
531	for (; c < children; c++) {
532		vd = rvd->vdev_child[c];
533		print_vdev_metaslab_header(vd);
534
535		for (m = 0; m < vd->vdev_ms_count; m++)
536			dump_metaslab(vd->vdev_ms[m]);
537		(void) printf("\n");
538	}
539}
540
541static void
542dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
543{
544	const ddt_phys_t *ddp = dde->dde_phys;
545	const ddt_key_t *ddk = &dde->dde_key;
546	char *types[4] = { "ditto", "single", "double", "triple" };
547	char blkbuf[BP_SPRINTF_LEN];
548	blkptr_t blk;
549
550	for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
551		if (ddp->ddp_phys_birth == 0)
552			continue;
553		ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
554		sprintf_blkptr(blkbuf, &blk);
555		(void) printf("index %llx refcnt %llu %s %s\n",
556		    (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
557		    types[p], blkbuf);
558	}
559}
560
561static void
562dump_dedup_ratio(const ddt_stat_t *dds)
563{
564	double rL, rP, rD, D, dedup, compress, copies;
565
566	if (dds->dds_blocks == 0)
567		return;
568
569	rL = (double)dds->dds_ref_lsize;
570	rP = (double)dds->dds_ref_psize;
571	rD = (double)dds->dds_ref_dsize;
572	D = (double)dds->dds_dsize;
573
574	dedup = rD / D;
575	compress = rL / rP;
576	copies = rD / rP;
577
578	(void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
579	    "dedup * compress / copies = %.2f\n\n",
580	    dedup, compress, copies, dedup * compress / copies);
581}
582
583static void
584dump_ddt_stat(const ddt_stat_t *dds, int h)
585{
586	char refcnt[6];
587	char blocks[6], lsize[6], psize[6], dsize[6];
588	char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6];
589
590	if (dds->dds_blocks == 0)
591		return;
592
593	if (h == -1)
594		(void) strcpy(refcnt, "Total");
595	else
596		nicenum(1ULL << h, refcnt);
597
598	nicenum(dds->dds_blocks, blocks);
599	nicenum(dds->dds_lsize, lsize);
600	nicenum(dds->dds_psize, psize);
601	nicenum(dds->dds_dsize, dsize);
602	nicenum(dds->dds_ref_blocks, ref_blocks);
603	nicenum(dds->dds_ref_lsize, ref_lsize);
604	nicenum(dds->dds_ref_psize, ref_psize);
605	nicenum(dds->dds_ref_dsize, ref_dsize);
606
607	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
608	    refcnt,
609	    blocks, lsize, psize, dsize,
610	    ref_blocks, ref_lsize, ref_psize, ref_dsize);
611}
612
613static void
614dump_ddt_histogram(const ddt_histogram_t *ddh)
615{
616	ddt_stat_t dds_total = { 0 };
617
618	ddt_histogram_stat(&dds_total, ddh);
619
620	(void) printf("\n");
621
622	(void) printf("bucket   "
623	    "           allocated             "
624	    "          referenced          \n");
625	(void) printf("______   "
626	    "______________________________   "
627	    "______________________________\n");
628
629	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
630	    "refcnt",
631	    "blocks", "LSIZE", "PSIZE", "DSIZE",
632	    "blocks", "LSIZE", "PSIZE", "DSIZE");
633
634	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
635	    "------",
636	    "------", "-----", "-----", "-----",
637	    "------", "-----", "-----", "-----");
638
639	for (int h = 0; h < 64; h++)
640		dump_ddt_stat(&ddh->ddh_stat[h], h);
641
642	dump_ddt_stat(&dds_total, -1);
643
644	(void) printf("\n");
645}
646
647static void
648dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
649{
650	char name[DDT_NAMELEN];
651	ddt_entry_t dde;
652	uint64_t walk = 0;
653	dmu_object_info_t doi;
654	uint64_t count, dspace, mspace;
655	int error;
656
657	error = ddt_object_info(ddt, type, class, &doi);
658
659	if (error == ENOENT)
660		return;
661	ASSERT(error == 0);
662
663	count = ddt_object_count(ddt, type, class);
664	dspace = doi.doi_physical_blocks_512 << 9;
665	mspace = doi.doi_fill_count * doi.doi_data_block_size;
666
667	ASSERT(count != 0);	/* we should have destroyed it */
668
669	ddt_object_name(ddt, type, class, name);
670
671	(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
672	    name,
673	    (u_longlong_t)count,
674	    (u_longlong_t)(dspace / count),
675	    (u_longlong_t)(mspace / count));
676
677	if (dump_opt['D'] < 3)
678		return;
679
680	dump_ddt_histogram(&ddt->ddt_histogram[type][class]);
681
682	if (dump_opt['D'] < 4)
683		return;
684
685	if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
686		return;
687
688	(void) printf("%s contents:\n\n", name);
689
690	while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
691		dump_dde(ddt, &dde, walk);
692
693	ASSERT(error == ENOENT);
694
695	(void) printf("\n");
696}
697
698static void
699dump_all_ddts(spa_t *spa)
700{
701	ddt_histogram_t ddh_total = { 0 };
702	ddt_stat_t dds_total = { 0 };
703
704	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
705		ddt_t *ddt = spa->spa_ddt[c];
706		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
707			for (enum ddt_class class = 0; class < DDT_CLASSES;
708			    class++) {
709				ddt_histogram_add(&ddh_total,
710				    &ddt->ddt_histogram[type][class]);
711				dump_ddt(ddt, type, class);
712			}
713		}
714	}
715
716	ddt_histogram_stat(&dds_total, &ddh_total);
717
718	if (dds_total.dds_blocks == 0) {
719		(void) printf("All DDTs are empty\n");
720		return;
721	}
722
723	(void) printf("\n");
724
725	if (dump_opt['D'] > 1) {
726		(void) printf("DDT histogram (aggregated over all DDTs):\n");
727		dump_ddt_histogram(&ddh_total);
728	}
729
730	dump_dedup_ratio(&dds_total);
731}
732
733static void
734dump_dtl_seg(space_map_t *sm, uint64_t start, uint64_t size)
735{
736	char *prefix = (void *)sm;
737
738	(void) printf("%s [%llu,%llu) length %llu\n",
739	    prefix,
740	    (u_longlong_t)start,
741	    (u_longlong_t)(start + size),
742	    (u_longlong_t)(size));
743}
744
745static void
746dump_dtl(vdev_t *vd, int indent)
747{
748	spa_t *spa = vd->vdev_spa;
749	boolean_t required;
750	char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" };
751	char prefix[256];
752
753	spa_vdev_state_enter(spa, SCL_NONE);
754	required = vdev_dtl_required(vd);
755	(void) spa_vdev_state_exit(spa, NULL, 0);
756
757	if (indent == 0)
758		(void) printf("\nDirty time logs:\n\n");
759
760	(void) printf("\t%*s%s [%s]\n", indent, "",
761	    vd->vdev_path ? vd->vdev_path :
762	    vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
763	    required ? "DTL-required" : "DTL-expendable");
764
765	for (int t = 0; t < DTL_TYPES; t++) {
766		space_map_t *sm = &vd->vdev_dtl[t];
767		if (sm->sm_space == 0)
768			continue;
769		(void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
770		    indent + 2, "", name[t]);
771		mutex_enter(sm->sm_lock);
772		space_map_walk(sm, dump_dtl_seg, (void *)prefix);
773		mutex_exit(sm->sm_lock);
774		if (dump_opt['d'] > 5 && vd->vdev_children == 0)
775			dump_spacemap(spa->spa_meta_objset,
776			    &vd->vdev_dtl_smo, sm);
777	}
778
779	for (int c = 0; c < vd->vdev_children; c++)
780		dump_dtl(vd->vdev_child[c], indent + 4);
781}
782
783static void
784dump_history(spa_t *spa)
785{
786	nvlist_t **events = NULL;
787	char buf[SPA_MAXBLOCKSIZE];
788	uint64_t resid, len, off = 0;
789	uint_t num = 0;
790	int error;
791	time_t tsec;
792	struct tm t;
793	char tbuf[30];
794	char internalstr[MAXPATHLEN];
795
796	do {
797		len = sizeof (buf);
798
799		if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
800			(void) fprintf(stderr, "Unable to read history: "
801			    "error %d\n", error);
802			return;
803		}
804
805		if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
806			break;
807
808		off -= resid;
809	} while (len != 0);
810
811	(void) printf("\nHistory:\n");
812	for (int i = 0; i < num; i++) {
813		uint64_t time, txg, ievent;
814		char *cmd, *intstr;
815
816		if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
817		    &time) != 0)
818			continue;
819		if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
820		    &cmd) != 0) {
821			if (nvlist_lookup_uint64(events[i],
822			    ZPOOL_HIST_INT_EVENT, &ievent) != 0)
823				continue;
824			verify(nvlist_lookup_uint64(events[i],
825			    ZPOOL_HIST_TXG, &txg) == 0);
826			verify(nvlist_lookup_string(events[i],
827			    ZPOOL_HIST_INT_STR, &intstr) == 0);
828			if (ievent >= LOG_END)
829				continue;
830
831			(void) snprintf(internalstr,
832			    sizeof (internalstr),
833			    "[internal %s txg:%lld] %s",
834			    hist_event_table[ievent], txg,
835			    intstr);
836			cmd = internalstr;
837		}
838		tsec = time;
839		(void) localtime_r(&tsec, &t);
840		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
841		(void) printf("%s %s\n", tbuf, cmd);
842	}
843}
844
845/*ARGSUSED*/
846static void
847dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
848{
849}
850
851static uint64_t
852blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
853{
854	if (dnp == NULL) {
855		ASSERT(zb->zb_level < 0);
856		if (zb->zb_object == 0)
857			return (zb->zb_blkid);
858		return (zb->zb_blkid * BP_GET_LSIZE(bp));
859	}
860
861	ASSERT(zb->zb_level >= 0);
862
863	return ((zb->zb_blkid <<
864	    (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
865	    dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
866}
867
868static void
869sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp)
870{
871	dva_t *dva = bp->blk_dva;
872	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
873
874	if (dump_opt['b'] >= 5) {
875		sprintf_blkptr(blkbuf, bp);
876		return;
877	}
878
879	blkbuf[0] = '\0';
880
881	for (int i = 0; i < ndvas; i++)
882		(void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
883		    (u_longlong_t)DVA_GET_VDEV(&dva[i]),
884		    (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
885		    (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
886
887	(void) sprintf(blkbuf + strlen(blkbuf),
888	    "%llxL/%llxP F=%llu B=%llu/%llu",
889	    (u_longlong_t)BP_GET_LSIZE(bp),
890	    (u_longlong_t)BP_GET_PSIZE(bp),
891	    (u_longlong_t)bp->blk_fill,
892	    (u_longlong_t)bp->blk_birth,
893	    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
894}
895
896static void
897print_indirect(blkptr_t *bp, const zbookmark_t *zb,
898    const dnode_phys_t *dnp)
899{
900	char blkbuf[BP_SPRINTF_LEN];
901	int l;
902
903	ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
904	ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
905
906	(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
907
908	ASSERT(zb->zb_level >= 0);
909
910	for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
911		if (l == zb->zb_level) {
912			(void) printf("L%llx", (u_longlong_t)zb->zb_level);
913		} else {
914			(void) printf(" ");
915		}
916	}
917
918	sprintf_blkptr_compact(blkbuf, bp);
919	(void) printf("%s\n", blkbuf);
920}
921
922static int
923visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
924    blkptr_t *bp, const zbookmark_t *zb)
925{
926	int err = 0;
927
928	if (bp->blk_birth == 0)
929		return (0);
930
931	print_indirect(bp, zb, dnp);
932
933	if (BP_GET_LEVEL(bp) > 0) {
934		uint32_t flags = ARC_WAIT;
935		int i;
936		blkptr_t *cbp;
937		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
938		arc_buf_t *buf;
939		uint64_t fill = 0;
940
941		err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf,
942		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
943		if (err)
944			return (err);
945
946		/* recursively visit blocks below this */
947		cbp = buf->b_data;
948		for (i = 0; i < epb; i++, cbp++) {
949			zbookmark_t czb;
950
951			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
952			    zb->zb_level - 1,
953			    zb->zb_blkid * epb + i);
954			err = visit_indirect(spa, dnp, cbp, &czb);
955			if (err)
956				break;
957			fill += cbp->blk_fill;
958		}
959		if (!err)
960			ASSERT3U(fill, ==, bp->blk_fill);
961		(void) arc_buf_remove_ref(buf, &buf);
962	}
963
964	return (err);
965}
966
967/*ARGSUSED*/
968static void
969dump_indirect(dnode_t *dn)
970{
971	dnode_phys_t *dnp = dn->dn_phys;
972	int j;
973	zbookmark_t czb;
974
975	(void) printf("Indirect blocks:\n");
976
977	SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
978	    dn->dn_object, dnp->dn_nlevels - 1, 0);
979	for (j = 0; j < dnp->dn_nblkptr; j++) {
980		czb.zb_blkid = j;
981		(void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
982		    &dnp->dn_blkptr[j], &czb);
983	}
984
985	(void) printf("\n");
986}
987
988/*ARGSUSED*/
989static void
990dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
991{
992	dsl_dir_phys_t *dd = data;
993	time_t crtime;
994	char nice[6];
995
996	if (dd == NULL)
997		return;
998
999	ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
1000
1001	crtime = dd->dd_creation_time;
1002	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1003	(void) printf("\t\thead_dataset_obj = %llu\n",
1004	    (u_longlong_t)dd->dd_head_dataset_obj);
1005	(void) printf("\t\tparent_dir_obj = %llu\n",
1006	    (u_longlong_t)dd->dd_parent_obj);
1007	(void) printf("\t\torigin_obj = %llu\n",
1008	    (u_longlong_t)dd->dd_origin_obj);
1009	(void) printf("\t\tchild_dir_zapobj = %llu\n",
1010	    (u_longlong_t)dd->dd_child_dir_zapobj);
1011	nicenum(dd->dd_used_bytes, nice);
1012	(void) printf("\t\tused_bytes = %s\n", nice);
1013	nicenum(dd->dd_compressed_bytes, nice);
1014	(void) printf("\t\tcompressed_bytes = %s\n", nice);
1015	nicenum(dd->dd_uncompressed_bytes, nice);
1016	(void) printf("\t\tuncompressed_bytes = %s\n", nice);
1017	nicenum(dd->dd_quota, nice);
1018	(void) printf("\t\tquota = %s\n", nice);
1019	nicenum(dd->dd_reserved, nice);
1020	(void) printf("\t\treserved = %s\n", nice);
1021	(void) printf("\t\tprops_zapobj = %llu\n",
1022	    (u_longlong_t)dd->dd_props_zapobj);
1023	(void) printf("\t\tdeleg_zapobj = %llu\n",
1024	    (u_longlong_t)dd->dd_deleg_zapobj);
1025	(void) printf("\t\tflags = %llx\n",
1026	    (u_longlong_t)dd->dd_flags);
1027
1028#define	DO(which) \
1029	nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
1030	(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
1031	DO(HEAD);
1032	DO(SNAP);
1033	DO(CHILD);
1034	DO(CHILD_RSRV);
1035	DO(REFRSRV);
1036#undef DO
1037}
1038
1039/*ARGSUSED*/
1040static void
1041dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
1042{
1043	dsl_dataset_phys_t *ds = data;
1044	time_t crtime;
1045	char used[6], compressed[6], uncompressed[6], unique[6];
1046	char blkbuf[BP_SPRINTF_LEN];
1047
1048	if (ds == NULL)
1049		return;
1050
1051	ASSERT(size == sizeof (*ds));
1052	crtime = ds->ds_creation_time;
1053	nicenum(ds->ds_used_bytes, used);
1054	nicenum(ds->ds_compressed_bytes, compressed);
1055	nicenum(ds->ds_uncompressed_bytes, uncompressed);
1056	nicenum(ds->ds_unique_bytes, unique);
1057	sprintf_blkptr(blkbuf, &ds->ds_bp);
1058
1059	(void) printf("\t\tdir_obj = %llu\n",
1060	    (u_longlong_t)ds->ds_dir_obj);
1061	(void) printf("\t\tprev_snap_obj = %llu\n",
1062	    (u_longlong_t)ds->ds_prev_snap_obj);
1063	(void) printf("\t\tprev_snap_txg = %llu\n",
1064	    (u_longlong_t)ds->ds_prev_snap_txg);
1065	(void) printf("\t\tnext_snap_obj = %llu\n",
1066	    (u_longlong_t)ds->ds_next_snap_obj);
1067	(void) printf("\t\tsnapnames_zapobj = %llu\n",
1068	    (u_longlong_t)ds->ds_snapnames_zapobj);
1069	(void) printf("\t\tnum_children = %llu\n",
1070	    (u_longlong_t)ds->ds_num_children);
1071	(void) printf("\t\tuserrefs_obj = %llu\n",
1072	    (u_longlong_t)ds->ds_userrefs_obj);
1073	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1074	(void) printf("\t\tcreation_txg = %llu\n",
1075	    (u_longlong_t)ds->ds_creation_txg);
1076	(void) printf("\t\tdeadlist_obj = %llu\n",
1077	    (u_longlong_t)ds->ds_deadlist_obj);
1078	(void) printf("\t\tused_bytes = %s\n", used);
1079	(void) printf("\t\tcompressed_bytes = %s\n", compressed);
1080	(void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
1081	(void) printf("\t\tunique = %s\n", unique);
1082	(void) printf("\t\tfsid_guid = %llu\n",
1083	    (u_longlong_t)ds->ds_fsid_guid);
1084	(void) printf("\t\tguid = %llu\n",
1085	    (u_longlong_t)ds->ds_guid);
1086	(void) printf("\t\tflags = %llx\n",
1087	    (u_longlong_t)ds->ds_flags);
1088	(void) printf("\t\tnext_clones_obj = %llu\n",
1089	    (u_longlong_t)ds->ds_next_clones_obj);
1090	(void) printf("\t\tprops_obj = %llu\n",
1091	    (u_longlong_t)ds->ds_props_obj);
1092	(void) printf("\t\tbp = %s\n", blkbuf);
1093}
1094
1095static void
1096dump_bplist(objset_t *mos, uint64_t object, char *name)
1097{
1098	bplist_t bpl = { 0 };
1099	blkptr_t blk, *bp = &blk;
1100	uint64_t itor = 0;
1101	char bytes[6];
1102	char comp[6];
1103	char uncomp[6];
1104
1105	if (dump_opt['d'] < 3)
1106		return;
1107
1108	bplist_init(&bpl);
1109	VERIFY(0 == bplist_open(&bpl, mos, object));
1110	if (bplist_empty(&bpl)) {
1111		bplist_close(&bpl);
1112		bplist_fini(&bpl);
1113		return;
1114	}
1115
1116	nicenum(bpl.bpl_phys->bpl_bytes, bytes);
1117	if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) {
1118		nicenum(bpl.bpl_phys->bpl_comp, comp);
1119		nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
1120		(void) printf("\n    %s: %llu entries, %s (%s/%s comp)\n",
1121		    name, (u_longlong_t)bpl.bpl_phys->bpl_entries,
1122		    bytes, comp, uncomp);
1123	} else {
1124		(void) printf("\n    %s: %llu entries, %s\n",
1125		    name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes);
1126	}
1127
1128	if (dump_opt['d'] < 5) {
1129		bplist_close(&bpl);
1130		bplist_fini(&bpl);
1131		return;
1132	}
1133
1134	(void) printf("\n");
1135
1136	while (bplist_iterate(&bpl, &itor, bp) == 0) {
1137		char blkbuf[BP_SPRINTF_LEN];
1138
1139		ASSERT(bp->blk_birth != 0);
1140		sprintf_blkptr_compact(blkbuf, bp);
1141		(void) printf("\tItem %3llu: %s\n",
1142		    (u_longlong_t)itor - 1, blkbuf);
1143	}
1144
1145	bplist_close(&bpl);
1146	bplist_fini(&bpl);
1147}
1148
1149static avl_tree_t idx_tree;
1150static avl_tree_t domain_tree;
1151static boolean_t fuid_table_loaded;
1152
1153static void
1154fuid_table_destroy()
1155{
1156	if (fuid_table_loaded) {
1157		zfs_fuid_table_destroy(&idx_tree, &domain_tree);
1158		fuid_table_loaded = B_FALSE;
1159	}
1160}
1161
1162/*
1163 * print uid or gid information.
1164 * For normal POSIX id just the id is printed in decimal format.
1165 * For CIFS files with FUID the fuid is printed in hex followed by
1166 * the doman-rid string.
1167 */
1168static void
1169print_idstr(uint64_t id, const char *id_type)
1170{
1171	if (FUID_INDEX(id)) {
1172		char *domain;
1173
1174		domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
1175		(void) printf("\t%s     %llx [%s-%d]\n", id_type,
1176		    (u_longlong_t)id, domain, (int)FUID_RID(id));
1177	} else {
1178		(void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
1179	}
1180
1181}
1182
1183static void
1184dump_uidgid(objset_t *os, znode_phys_t *zp)
1185{
1186	uint32_t uid_idx, gid_idx;
1187
1188	uid_idx = FUID_INDEX(zp->zp_uid);
1189	gid_idx = FUID_INDEX(zp->zp_gid);
1190
1191	/* Load domain table, if not already loaded */
1192	if (!fuid_table_loaded && (uid_idx || gid_idx)) {
1193		uint64_t fuid_obj;
1194
1195		/* first find the fuid object.  It lives in the master node */
1196		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
1197		    8, 1, &fuid_obj) == 0);
1198		zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
1199		(void) zfs_fuid_table_load(os, fuid_obj,
1200		    &idx_tree, &domain_tree);
1201		fuid_table_loaded = B_TRUE;
1202	}
1203
1204	print_idstr(zp->zp_uid, "uid");
1205	print_idstr(zp->zp_gid, "gid");
1206}
1207
1208/*ARGSUSED*/
1209static void
1210dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
1211{
1212	znode_phys_t *zp = data;
1213	time_t z_crtime, z_atime, z_mtime, z_ctime;
1214	char path[MAXPATHLEN * 2];	/* allow for xattr and failure prefix */
1215	int error;
1216
1217	ASSERT(size >= sizeof (znode_phys_t));
1218
1219	error = zfs_obj_to_path(os, object, path, sizeof (path));
1220	if (error != 0) {
1221		(void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
1222		    (u_longlong_t)object);
1223	}
1224
1225	if (dump_opt['d'] < 3) {
1226		(void) printf("\t%s\n", path);
1227		return;
1228	}
1229
1230	z_crtime = (time_t)zp->zp_crtime[0];
1231	z_atime = (time_t)zp->zp_atime[0];
1232	z_mtime = (time_t)zp->zp_mtime[0];
1233	z_ctime = (time_t)zp->zp_ctime[0];
1234
1235	(void) printf("\tpath	%s\n", path);
1236	dump_uidgid(os, zp);
1237	(void) printf("\tatime	%s", ctime(&z_atime));
1238	(void) printf("\tmtime	%s", ctime(&z_mtime));
1239	(void) printf("\tctime	%s", ctime(&z_ctime));
1240	(void) printf("\tcrtime	%s", ctime(&z_crtime));
1241	(void) printf("\tgen	%llu\n", (u_longlong_t)zp->zp_gen);
1242	(void) printf("\tmode	%llo\n", (u_longlong_t)zp->zp_mode);
1243	(void) printf("\tsize	%llu\n", (u_longlong_t)zp->zp_size);
1244	(void) printf("\tparent	%llu\n", (u_longlong_t)zp->zp_parent);
1245	(void) printf("\tlinks	%llu\n", (u_longlong_t)zp->zp_links);
1246	(void) printf("\txattr	%llu\n", (u_longlong_t)zp->zp_xattr);
1247	(void) printf("\trdev	0x%016llx\n", (u_longlong_t)zp->zp_rdev);
1248}
1249
1250/*ARGSUSED*/
1251static void
1252dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
1253{
1254}
1255
1256/*ARGSUSED*/
1257static void
1258dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
1259{
1260}
1261
1262static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
1263	dump_none,		/* unallocated			*/
1264	dump_zap,		/* object directory		*/
1265	dump_uint64,		/* object array			*/
1266	dump_none,		/* packed nvlist		*/
1267	dump_packed_nvlist,	/* packed nvlist size		*/
1268	dump_none,		/* bplist			*/
1269	dump_none,		/* bplist header		*/
1270	dump_none,		/* SPA space map header		*/
1271	dump_none,		/* SPA space map		*/
1272	dump_none,		/* ZIL intent log		*/
1273	dump_dnode,		/* DMU dnode			*/
1274	dump_dmu_objset,	/* DMU objset			*/
1275	dump_dsl_dir,		/* DSL directory		*/
1276	dump_zap,		/* DSL directory child map	*/
1277	dump_zap,		/* DSL dataset snap map		*/
1278	dump_zap,		/* DSL props			*/
1279	dump_dsl_dataset,	/* DSL dataset			*/
1280	dump_znode,		/* ZFS znode			*/
1281	dump_acl,		/* ZFS V0 ACL			*/
1282	dump_uint8,		/* ZFS plain file		*/
1283	dump_zpldir,		/* ZFS directory		*/
1284	dump_zap,		/* ZFS master node		*/
1285	dump_zap,		/* ZFS delete queue		*/
1286	dump_uint8,		/* zvol object			*/
1287	dump_zap,		/* zvol prop			*/
1288	dump_uint8,		/* other uint8[]		*/
1289	dump_uint64,		/* other uint64[]		*/
1290	dump_zap,		/* other ZAP			*/
1291	dump_zap,		/* persistent error log		*/
1292	dump_uint8,		/* SPA history			*/
1293	dump_uint64,		/* SPA history offsets		*/
1294	dump_zap,		/* Pool properties		*/
1295	dump_zap,		/* DSL permissions		*/
1296	dump_acl,		/* ZFS ACL			*/
1297	dump_uint8,		/* ZFS SYSACL			*/
1298	dump_none,		/* FUID nvlist			*/
1299	dump_packed_nvlist,	/* FUID nvlist size		*/
1300	dump_zap,		/* DSL dataset next clones	*/
1301	dump_zap,		/* DSL scrub queue		*/
1302	dump_zap,		/* ZFS user/group used		*/
1303	dump_zap,		/* ZFS user/group quota		*/
1304	dump_zap,		/* snapshot refcount tags	*/
1305	dump_none,		/* DDT ZAP object		*/
1306	dump_zap,		/* DDT statistics		*/
1307	dump_unknown		/* Unknown type, must be last	*/
1308};
1309
1310static void
1311dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
1312{
1313	dmu_buf_t *db = NULL;
1314	dmu_object_info_t doi;
1315	dnode_t *dn;
1316	void *bonus = NULL;
1317	size_t bsize = 0;
1318	char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], fill[7];
1319	char aux[50];
1320	int error;
1321
1322	if (*print_header) {
1323		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1324		    "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
1325		    "%full", "type");
1326		*print_header = 0;
1327	}
1328
1329	if (object == 0) {
1330		dn = os->os_meta_dnode;
1331	} else {
1332		error = dmu_bonus_hold(os, object, FTAG, &db);
1333		if (error)
1334			fatal("dmu_bonus_hold(%llu) failed, errno %u",
1335			    object, error);
1336		bonus = db->db_data;
1337		bsize = db->db_size;
1338		dn = ((dmu_buf_impl_t *)db)->db_dnode;
1339	}
1340	dmu_object_info_from_dnode(dn, &doi);
1341
1342	nicenum(doi.doi_metadata_block_size, iblk);
1343	nicenum(doi.doi_data_block_size, dblk);
1344	nicenum(doi.doi_max_offset, lsize);
1345	nicenum(doi.doi_physical_blocks_512 << 9, asize);
1346	nicenum(doi.doi_bonus_size, bonus_size);
1347	(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
1348	    doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
1349	    doi.doi_max_offset);
1350
1351	aux[0] = '\0';
1352
1353	if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
1354		(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
1355		    ZDB_CHECKSUM_NAME(doi.doi_checksum));
1356	}
1357
1358	if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
1359		(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
1360		    ZDB_COMPRESS_NAME(doi.doi_compress));
1361	}
1362
1363	(void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %6s  %s%s\n",
1364	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
1365	    asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
1366
1367	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
1368		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1369		    "", "", "", "", "", bonus_size, "bonus",
1370		    ZDB_OT_NAME(doi.doi_bonus_type));
1371	}
1372
1373	if (verbosity >= 4) {
1374		(void) printf("\tdnode flags: %s%s\n",
1375		    (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
1376		    "USED_BYTES " : "",
1377		    (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
1378		    "USERUSED_ACCOUNTED " : "");
1379		(void) printf("\tdnode maxblkid: %llu\n",
1380		    (longlong_t)dn->dn_phys->dn_maxblkid);
1381
1382		object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
1383		    bonus, bsize);
1384		object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
1385		*print_header = 1;
1386	}
1387
1388	if (verbosity >= 5)
1389		dump_indirect(dn);
1390
1391	if (verbosity >= 5) {
1392		/*
1393		 * Report the list of segments that comprise the object.
1394		 */
1395		uint64_t start = 0;
1396		uint64_t end;
1397		uint64_t blkfill = 1;
1398		int minlvl = 1;
1399
1400		if (dn->dn_type == DMU_OT_DNODE) {
1401			minlvl = 0;
1402			blkfill = DNODES_PER_BLOCK;
1403		}
1404
1405		for (;;) {
1406			char segsize[6];
1407			error = dnode_next_offset(dn,
1408			    0, &start, minlvl, blkfill, 0);
1409			if (error)
1410				break;
1411			end = start;
1412			error = dnode_next_offset(dn,
1413			    DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
1414			nicenum(end - start, segsize);
1415			(void) printf("\t\tsegment [%016llx, %016llx)"
1416			    " size %5s\n", (u_longlong_t)start,
1417			    (u_longlong_t)end, segsize);
1418			if (error)
1419				break;
1420			start = end;
1421		}
1422	}
1423
1424	if (db != NULL)
1425		dmu_buf_rele(db, FTAG);
1426}
1427
1428static char *objset_types[DMU_OST_NUMTYPES] = {
1429	"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
1430
1431static void
1432dump_dir(objset_t *os)
1433{
1434	dmu_objset_stats_t dds;
1435	uint64_t object, object_count;
1436	uint64_t refdbytes, usedobjs, scratch;
1437	char numbuf[8];
1438	char blkbuf[BP_SPRINTF_LEN + 20];
1439	char osname[MAXNAMELEN];
1440	char *type = "UNKNOWN";
1441	int verbosity = dump_opt['d'];
1442	int print_header = 1;
1443	int i, error;
1444
1445	dmu_objset_fast_stat(os, &dds);
1446
1447	if (dds.dds_type < DMU_OST_NUMTYPES)
1448		type = objset_types[dds.dds_type];
1449
1450	if (dds.dds_type == DMU_OST_META) {
1451		dds.dds_creation_txg = TXG_INITIAL;
1452		usedobjs = os->os_rootbp->blk_fill;
1453		refdbytes = os->os_spa->spa_dsl_pool->
1454		    dp_mos_dir->dd_phys->dd_used_bytes;
1455	} else {
1456		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
1457	}
1458
1459	ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
1460
1461	nicenum(refdbytes, numbuf);
1462
1463	if (verbosity >= 4) {
1464		(void) sprintf(blkbuf, ", rootbp ");
1465		(void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp);
1466	} else {
1467		blkbuf[0] = '\0';
1468	}
1469
1470	dmu_objset_name(os, osname);
1471
1472	(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
1473	    "%s, %llu objects%s\n",
1474	    osname, type, (u_longlong_t)dmu_objset_id(os),
1475	    (u_longlong_t)dds.dds_creation_txg,
1476	    numbuf, (u_longlong_t)usedobjs, blkbuf);
1477
1478	if (zopt_objects != 0) {
1479		for (i = 0; i < zopt_objects; i++)
1480			dump_object(os, zopt_object[i], verbosity,
1481			    &print_header);
1482		(void) printf("\n");
1483		return;
1484	}
1485
1486	if (dump_opt['i'] != 0 || verbosity >= 2)
1487		dump_intent_log(dmu_objset_zil(os));
1488
1489	if (dmu_objset_ds(os) != NULL)
1490		dump_bplist(dmu_objset_pool(os)->dp_meta_objset,
1491		    dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist");
1492
1493	if (verbosity < 2)
1494		return;
1495
1496	if (os->os_rootbp->blk_birth == 0)
1497		return;
1498
1499	dump_object(os, 0, verbosity, &print_header);
1500	object_count = 0;
1501	if (os->os_userused_dnode &&
1502	    os->os_userused_dnode->dn_type != 0) {
1503		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
1504		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
1505	}
1506
1507	object = 0;
1508	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
1509		dump_object(os, object, verbosity, &print_header);
1510		object_count++;
1511	}
1512
1513	ASSERT3U(object_count, ==, usedobjs);
1514
1515	(void) printf("\n");
1516
1517	if (error != ESRCH) {
1518		(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
1519		abort();
1520	}
1521}
1522
1523static void
1524dump_uberblock(uberblock_t *ub)
1525{
1526	time_t timestamp = ub->ub_timestamp;
1527
1528	(void) printf("\nUberblock:\n");
1529	(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
1530	(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
1531	(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
1532	(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
1533	(void) printf("\ttimestamp = %llu UTC = %s",
1534	    (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
1535	if (dump_opt['u'] >= 3) {
1536		char blkbuf[BP_SPRINTF_LEN];
1537		sprintf_blkptr(blkbuf, &ub->ub_rootbp);
1538		(void) printf("\trootbp = %s\n", blkbuf);
1539	}
1540	(void) printf("\n");
1541}
1542
1543static void
1544dump_config(spa_t *spa)
1545{
1546	dmu_buf_t *db;
1547	size_t nvsize = 0;
1548	int error = 0;
1549
1550
1551	error = dmu_bonus_hold(spa->spa_meta_objset,
1552	    spa->spa_config_object, FTAG, &db);
1553
1554	if (error == 0) {
1555		nvsize = *(uint64_t *)db->db_data;
1556		dmu_buf_rele(db, FTAG);
1557
1558		(void) printf("\nMOS Configuration:\n");
1559		dump_packed_nvlist(spa->spa_meta_objset,
1560		    spa->spa_config_object, (void *)&nvsize, 1);
1561	} else {
1562		(void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
1563		    (u_longlong_t)spa->spa_config_object, error);
1564	}
1565}
1566
1567static void
1568dump_cachefile(const char *cachefile)
1569{
1570	int fd;
1571	struct stat64 statbuf;
1572	char *buf;
1573	nvlist_t *config;
1574
1575	if ((fd = open64(cachefile, O_RDONLY)) < 0) {
1576		(void) printf("cannot open '%s': %s\n", cachefile,
1577		    strerror(errno));
1578		exit(1);
1579	}
1580
1581	if (fstat64(fd, &statbuf) != 0) {
1582		(void) printf("failed to stat '%s': %s\n", cachefile,
1583		    strerror(errno));
1584		exit(1);
1585	}
1586
1587	if ((buf = malloc(statbuf.st_size)) == NULL) {
1588		(void) fprintf(stderr, "failed to allocate %llu bytes\n",
1589		    (u_longlong_t)statbuf.st_size);
1590		exit(1);
1591	}
1592
1593	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1594		(void) fprintf(stderr, "failed to read %llu bytes\n",
1595		    (u_longlong_t)statbuf.st_size);
1596		exit(1);
1597	}
1598
1599	(void) close(fd);
1600
1601	if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
1602		(void) fprintf(stderr, "failed to unpack nvlist\n");
1603		exit(1);
1604	}
1605
1606	free(buf);
1607
1608	dump_nvlist(config, 0);
1609
1610	nvlist_free(config);
1611}
1612
1613static void
1614dump_label(const char *dev)
1615{
1616	int fd;
1617	vdev_label_t label;
1618	char *buf = label.vl_vdev_phys.vp_nvlist;
1619	size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
1620	struct stat64 statbuf;
1621	uint64_t psize;
1622	int l;
1623
1624	if ((fd = open64(dev, O_RDONLY)) < 0) {
1625		(void) printf("cannot open '%s': %s\n", dev, strerror(errno));
1626		exit(1);
1627	}
1628
1629	if (fstat64(fd, &statbuf) != 0) {
1630		(void) printf("failed to stat '%s': %s\n", dev,
1631		    strerror(errno));
1632		exit(1);
1633	}
1634
1635	psize = statbuf.st_size;
1636	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
1637
1638	for (l = 0; l < VDEV_LABELS; l++) {
1639
1640		nvlist_t *config = NULL;
1641
1642		(void) printf("--------------------------------------------\n");
1643		(void) printf("LABEL %d\n", l);
1644		(void) printf("--------------------------------------------\n");
1645
1646		if (pread64(fd, &label, sizeof (label),
1647		    vdev_label_offset(psize, l, 0)) != sizeof (label)) {
1648			(void) printf("failed to read label %d\n", l);
1649			continue;
1650		}
1651
1652		if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
1653			(void) printf("failed to unpack label %d\n", l);
1654			continue;
1655		}
1656		dump_nvlist(config, 4);
1657		nvlist_free(config);
1658	}
1659}
1660
1661/*ARGSUSED*/
1662static int
1663dump_one_dir(char *dsname, void *arg)
1664{
1665	int error;
1666	objset_t *os;
1667
1668	error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
1669	if (error) {
1670		(void) printf("Could not open %s, error %d\n", dsname, error);
1671		return (0);
1672	}
1673	dump_dir(os);
1674	dmu_objset_disown(os, FTAG);
1675	fuid_table_destroy();
1676	return (0);
1677}
1678
1679/*
1680 * Block statistics.
1681 */
1682typedef struct zdb_blkstats {
1683	uint64_t	zb_asize;
1684	uint64_t	zb_lsize;
1685	uint64_t	zb_psize;
1686	uint64_t	zb_count;
1687} zdb_blkstats_t;
1688
1689/*
1690 * Extended object types to report deferred frees and dedup auto-ditto blocks.
1691 */
1692#define	ZDB_OT_DEFERRED	(DMU_OT_NUMTYPES + 0)
1693#define	ZDB_OT_DITTO	(DMU_OT_NUMTYPES + 1)
1694#define	ZDB_OT_TOTAL	(DMU_OT_NUMTYPES + 2)
1695
1696static char *zdb_ot_extname[] = {
1697	"deferred free",
1698	"dedup ditto",
1699	"Total",
1700};
1701
1702#define	ZB_TOTAL	DN_MAX_LEVELS
1703
1704typedef struct zdb_cb {
1705	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
1706	uint64_t	zcb_dedup_asize;
1707	uint64_t	zcb_dedup_blocks;
1708	uint64_t	zcb_errors[256];
1709	int		zcb_readfails;
1710	int		zcb_haderrors;
1711} zdb_cb_t;
1712
1713static void
1714zdb_count_block(spa_t *spa, zilog_t *zilog, zdb_cb_t *zcb, const blkptr_t *bp,
1715    dmu_object_type_t type)
1716{
1717	uint64_t refcnt = 0;
1718
1719	ASSERT(type < ZDB_OT_TOTAL);
1720
1721	if (zilog && zil_bp_tree_add(zilog, bp) != 0)
1722		return;
1723
1724	for (int i = 0; i < 4; i++) {
1725		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
1726		int t = (i & 1) ? type : ZDB_OT_TOTAL;
1727		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
1728
1729		zb->zb_asize += BP_GET_ASIZE(bp);
1730		zb->zb_lsize += BP_GET_LSIZE(bp);
1731		zb->zb_psize += BP_GET_PSIZE(bp);
1732		zb->zb_count++;
1733	}
1734
1735	if (dump_opt['L'])
1736		return;
1737
1738	if (BP_GET_DEDUP(bp)) {
1739		ddt_t *ddt;
1740		ddt_entry_t *dde;
1741
1742		ddt = ddt_select(spa, bp);
1743		ddt_enter(ddt);
1744		dde = ddt_lookup(ddt, bp, B_FALSE);
1745
1746		if (dde == NULL) {
1747			refcnt = 0;
1748		} else {
1749			ddt_phys_t *ddp = ddt_phys_select(dde, bp);
1750			ddt_phys_decref(ddp);
1751			refcnt = ddp->ddp_refcnt;
1752			if (ddt_phys_total_refcnt(dde) == 0)
1753				ddt_remove(ddt, dde);
1754		}
1755		ddt_exit(ddt);
1756	}
1757
1758	VERIFY3U(zio_wait(zio_claim(NULL, spa,
1759	    refcnt ? 0 : spa_first_txg(spa),
1760	    bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
1761}
1762
1763static int
1764zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
1765    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
1766{
1767	zdb_cb_t *zcb = arg;
1768	char blkbuf[BP_SPRINTF_LEN];
1769	dmu_object_type_t type;
1770	boolean_t is_metadata;
1771
1772	if (bp == NULL)
1773		return (0);
1774
1775	type = BP_GET_TYPE(bp);
1776
1777	zdb_count_block(spa, zilog, zcb, bp, type);
1778
1779	is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata);
1780
1781	if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
1782		int ioerr;
1783		size_t size = BP_GET_PSIZE(bp);
1784		void *data = malloc(size);
1785		int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
1786
1787		/* If it's an intent log block, failure is expected. */
1788		if (zb->zb_level == ZB_ZIL_LEVEL)
1789			flags |= ZIO_FLAG_SPECULATIVE;
1790
1791		ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
1792		    NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
1793
1794		free(data);
1795
1796		if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
1797			zcb->zcb_haderrors = 1;
1798			zcb->zcb_errors[ioerr]++;
1799
1800			if (dump_opt['b'] >= 2)
1801				sprintf_blkptr(blkbuf, bp);
1802			else
1803				blkbuf[0] = '\0';
1804
1805			(void) printf("zdb_blkptr_cb: "
1806			    "Got error %d reading "
1807			    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
1808			    ioerr,
1809			    (u_longlong_t)zb->zb_objset,
1810			    (u_longlong_t)zb->zb_object,
1811			    (u_longlong_t)zb->zb_level,
1812			    (u_longlong_t)zb->zb_blkid,
1813			    blkbuf);
1814		}
1815	}
1816
1817	zcb->zcb_readfails = 0;
1818
1819	if (dump_opt['b'] >= 4) {
1820		sprintf_blkptr(blkbuf, bp);
1821		(void) printf("objset %llu object %llu "
1822		    "level %lld offset 0x%llx %s\n",
1823		    (u_longlong_t)zb->zb_objset,
1824		    (u_longlong_t)zb->zb_object,
1825		    (longlong_t)zb->zb_level,
1826		    (u_longlong_t)blkid2offset(dnp, bp, zb),
1827		    blkbuf);
1828	}
1829
1830	return (0);
1831}
1832
1833static void
1834zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
1835{
1836	vdev_t *vd = sm->sm_ppd;
1837
1838	(void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
1839	    (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
1840}
1841
1842/* ARGSUSED */
1843static void
1844zdb_space_map_load(space_map_t *sm)
1845{
1846}
1847
1848static void
1849zdb_space_map_unload(space_map_t *sm)
1850{
1851	space_map_vacate(sm, zdb_leak, sm);
1852}
1853
1854/* ARGSUSED */
1855static void
1856zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
1857{
1858}
1859
1860static space_map_ops_t zdb_space_map_ops = {
1861	zdb_space_map_load,
1862	zdb_space_map_unload,
1863	NULL,	/* alloc */
1864	zdb_space_map_claim,
1865	NULL,	/* free */
1866	NULL	/* maxsize */
1867};
1868
1869static void
1870zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
1871{
1872	ddt_bookmark_t ddb = { 0 };
1873	ddt_entry_t dde;
1874	int error;
1875
1876	while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
1877		blkptr_t blk;
1878		ddt_phys_t *ddp = dde.dde_phys;
1879
1880		if (ddb.ddb_class == DDT_CLASS_UNIQUE)
1881			return;
1882
1883		ASSERT(ddt_phys_total_refcnt(&dde) > 1);
1884
1885		for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
1886			if (ddp->ddp_phys_birth == 0)
1887				continue;
1888			ddt_bp_create(ddb.ddb_checksum,
1889			    &dde.dde_key, ddp, &blk);
1890			if (p == DDT_PHYS_DITTO) {
1891				zdb_count_block(spa, NULL, zcb, &blk,
1892				    ZDB_OT_DITTO);
1893			} else {
1894				zcb->zcb_dedup_asize +=
1895				    BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
1896				zcb->zcb_dedup_blocks++;
1897			}
1898		}
1899		if (!dump_opt['L']) {
1900			ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
1901			ddt_enter(ddt);
1902			VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
1903			ddt_exit(ddt);
1904		}
1905	}
1906
1907	ASSERT(error == ENOENT);
1908}
1909
1910static void
1911zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
1912{
1913	if (!dump_opt['L']) {
1914		vdev_t *rvd = spa->spa_root_vdev;
1915		for (int c = 0; c < rvd->vdev_children; c++) {
1916			vdev_t *vd = rvd->vdev_child[c];
1917			for (int m = 0; m < vd->vdev_ms_count; m++) {
1918				metaslab_t *msp = vd->vdev_ms[m];
1919				mutex_enter(&msp->ms_lock);
1920				space_map_unload(&msp->ms_map);
1921				VERIFY(space_map_load(&msp->ms_map,
1922				    &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo,
1923				    spa->spa_meta_objset) == 0);
1924				msp->ms_map.sm_ppd = vd;
1925				mutex_exit(&msp->ms_lock);
1926			}
1927		}
1928	}
1929
1930	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
1931
1932	zdb_ddt_leak_init(spa, zcb);
1933
1934	spa_config_exit(spa, SCL_CONFIG, FTAG);
1935}
1936
1937static void
1938zdb_leak_fini(spa_t *spa)
1939{
1940	if (!dump_opt['L']) {
1941		vdev_t *rvd = spa->spa_root_vdev;
1942		for (int c = 0; c < rvd->vdev_children; c++) {
1943			vdev_t *vd = rvd->vdev_child[c];
1944			for (int m = 0; m < vd->vdev_ms_count; m++) {
1945				metaslab_t *msp = vd->vdev_ms[m];
1946				mutex_enter(&msp->ms_lock);
1947				space_map_unload(&msp->ms_map);
1948				mutex_exit(&msp->ms_lock);
1949			}
1950		}
1951	}
1952}
1953
1954static int
1955dump_block_stats(spa_t *spa)
1956{
1957	zdb_cb_t zcb = { 0 };
1958	zdb_blkstats_t *zb, *tzb;
1959	uint64_t norm_alloc, norm_space, total_alloc, total_found;
1960	int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA;
1961	int leaks = 0;
1962
1963	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
1964	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
1965	    (dump_opt['c'] == 1) ? "metadata " : "",
1966	    dump_opt['c'] ? "checksums " : "",
1967	    (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
1968	    !dump_opt['L'] ? "nothing leaked " : "");
1969
1970	/*
1971	 * Load all space maps as SM_ALLOC maps, then traverse the pool
1972	 * claiming each block we discover.  If the pool is perfectly
1973	 * consistent, the space maps will be empty when we're done.
1974	 * Anything left over is a leak; any block we can't claim (because
1975	 * it's not part of any space map) is a double allocation,
1976	 * reference to a freed block, or an unclaimed log block.
1977	 */
1978	zdb_leak_init(spa, &zcb);
1979
1980	/*
1981	 * If there's a deferred-free bplist, process that first.
1982	 */
1983	if (spa->spa_deferred_bplist_obj != 0) {
1984		bplist_t *bpl = &spa->spa_deferred_bplist;
1985		blkptr_t blk;
1986		uint64_t itor = 0;
1987
1988		VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset,
1989		    spa->spa_deferred_bplist_obj));
1990
1991		while (bplist_iterate(bpl, &itor, &blk) == 0) {
1992			if (dump_opt['b'] >= 4) {
1993				char blkbuf[BP_SPRINTF_LEN];
1994				sprintf_blkptr(blkbuf, &blk);
1995				(void) printf("[%s] %s\n",
1996				    "deferred free", blkbuf);
1997			}
1998			zdb_count_block(spa, NULL, &zcb, &blk, ZDB_OT_DEFERRED);
1999		}
2000
2001		bplist_close(bpl);
2002	}
2003
2004	if (dump_opt['c'] > 1)
2005		flags |= TRAVERSE_PREFETCH_DATA;
2006
2007	zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
2008
2009	if (zcb.zcb_haderrors) {
2010		(void) printf("\nError counts:\n\n");
2011		(void) printf("\t%5s  %s\n", "errno", "count");
2012		for (int e = 0; e < 256; e++) {
2013			if (zcb.zcb_errors[e] != 0) {
2014				(void) printf("\t%5d  %llu\n",
2015				    e, (u_longlong_t)zcb.zcb_errors[e]);
2016			}
2017		}
2018	}
2019
2020	/*
2021	 * Report any leaked segments.
2022	 */
2023	zdb_leak_fini(spa);
2024
2025	tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
2026
2027	norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
2028	norm_space = metaslab_class_get_space(spa_normal_class(spa));
2029
2030	total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
2031	total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
2032
2033	if (total_found == total_alloc) {
2034		if (!dump_opt['L'])
2035			(void) printf("\n\tNo leaks (block sum matches space"
2036			    " maps exactly)\n");
2037	} else {
2038		(void) printf("block traversal size %llu != alloc %llu "
2039		    "(%s %lld)\n",
2040		    (u_longlong_t)total_found,
2041		    (u_longlong_t)total_alloc,
2042		    (dump_opt['L']) ? "unreachable" : "leaked",
2043		    (longlong_t)(total_alloc - total_found));
2044		leaks = 1;
2045	}
2046
2047	if (tzb->zb_count == 0)
2048		return (2);
2049
2050	(void) printf("\n");
2051	(void) printf("\tbp count:      %10llu\n",
2052	    (u_longlong_t)tzb->zb_count);
2053	(void) printf("\tbp logical:    %10llu      avg: %6llu\n",
2054	    (u_longlong_t)tzb->zb_lsize,
2055	    (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
2056	(void) printf("\tbp physical:   %10llu      avg:"
2057	    " %6llu     compression: %6.2f\n",
2058	    (u_longlong_t)tzb->zb_psize,
2059	    (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
2060	    (double)tzb->zb_lsize / tzb->zb_psize);
2061	(void) printf("\tbp allocated:  %10llu      avg:"
2062	    " %6llu     compression: %6.2f\n",
2063	    (u_longlong_t)tzb->zb_asize,
2064	    (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
2065	    (double)tzb->zb_lsize / tzb->zb_asize);
2066	(void) printf("\tbp deduped:    %10llu    ref>1:"
2067	    " %6llu   deduplication: %6.2f\n",
2068	    (u_longlong_t)zcb.zcb_dedup_asize,
2069	    (u_longlong_t)zcb.zcb_dedup_blocks,
2070	    (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
2071	(void) printf("\tSPA allocated: %10llu     used: %5.2f%%\n",
2072	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
2073
2074	if (dump_opt['b'] >= 2) {
2075		int l, t, level;
2076		(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2077		    "\t  avg\t comp\t%%Total\tType\n");
2078
2079		for (t = 0; t <= ZDB_OT_TOTAL; t++) {
2080			char csize[6], lsize[6], psize[6], asize[6], avg[6];
2081			char *typename;
2082
2083			if (t < DMU_OT_NUMTYPES)
2084				typename = dmu_ot[t].ot_name;
2085			else
2086				typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
2087
2088			if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
2089				(void) printf("%6s\t%5s\t%5s\t%5s"
2090				    "\t%5s\t%5s\t%6s\t%s\n",
2091				    "-",
2092				    "-",
2093				    "-",
2094				    "-",
2095				    "-",
2096				    "-",
2097				    "-",
2098				    typename);
2099				continue;
2100			}
2101
2102			for (l = ZB_TOTAL - 1; l >= -1; l--) {
2103				level = (l == -1 ? ZB_TOTAL : l);
2104				zb = &zcb.zcb_type[level][t];
2105
2106				if (zb->zb_asize == 0)
2107					continue;
2108
2109				if (dump_opt['b'] < 3 && level != ZB_TOTAL)
2110					continue;
2111
2112				if (level == 0 && zb->zb_asize ==
2113				    zcb.zcb_type[ZB_TOTAL][t].zb_asize)
2114					continue;
2115
2116				nicenum(zb->zb_count, csize);
2117				nicenum(zb->zb_lsize, lsize);
2118				nicenum(zb->zb_psize, psize);
2119				nicenum(zb->zb_asize, asize);
2120				nicenum(zb->zb_asize / zb->zb_count, avg);
2121
2122				(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
2123				    "\t%5.2f\t%6.2f\t",
2124				    csize, lsize, psize, asize, avg,
2125				    (double)zb->zb_lsize / zb->zb_psize,
2126				    100.0 * zb->zb_asize / tzb->zb_asize);
2127
2128				if (level == ZB_TOTAL)
2129					(void) printf("%s\n", typename);
2130				else
2131					(void) printf("    L%d %s\n",
2132					    level, typename);
2133			}
2134		}
2135	}
2136
2137	(void) printf("\n");
2138
2139	if (leaks)
2140		return (2);
2141
2142	if (zcb.zcb_haderrors)
2143		return (3);
2144
2145	return (0);
2146}
2147
2148typedef struct zdb_ddt_entry {
2149	ddt_key_t	zdde_key;
2150	uint64_t	zdde_ref_blocks;
2151	uint64_t	zdde_ref_lsize;
2152	uint64_t	zdde_ref_psize;
2153	uint64_t	zdde_ref_dsize;
2154	avl_node_t	zdde_node;
2155} zdb_ddt_entry_t;
2156
2157/* ARGSUSED */
2158static int
2159zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2160    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
2161{
2162	avl_tree_t *t = arg;
2163	avl_index_t where;
2164	zdb_ddt_entry_t *zdde, zdde_search;
2165
2166	if (bp == NULL)
2167		return (0);
2168
2169	if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
2170		(void) printf("traversing objset %llu, %llu objects, "
2171		    "%lu blocks so far\n",
2172		    (u_longlong_t)zb->zb_objset,
2173		    (u_longlong_t)bp->blk_fill,
2174		    avl_numnodes(t));
2175	}
2176
2177	if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
2178	    BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata)
2179		return (0);
2180
2181	ddt_key_fill(&zdde_search.zdde_key, bp);
2182
2183	zdde = avl_find(t, &zdde_search, &where);
2184
2185	if (zdde == NULL) {
2186		zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
2187		zdde->zdde_key = zdde_search.zdde_key;
2188		avl_insert(t, zdde, where);
2189	}
2190
2191	zdde->zdde_ref_blocks += 1;
2192	zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
2193	zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
2194	zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
2195
2196	return (0);
2197}
2198
2199static void
2200dump_simulated_ddt(spa_t *spa)
2201{
2202	avl_tree_t t;
2203	void *cookie = NULL;
2204	zdb_ddt_entry_t *zdde;
2205	ddt_histogram_t ddh_total = { 0 };
2206	ddt_stat_t dds_total = { 0 };
2207
2208	avl_create(&t, ddt_entry_compare,
2209	    sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
2210
2211	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2212
2213	(void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
2214	    zdb_ddt_add_cb, &t);
2215
2216	spa_config_exit(spa, SCL_CONFIG, FTAG);
2217
2218	while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
2219		ddt_stat_t dds;
2220		uint64_t refcnt = zdde->zdde_ref_blocks;
2221		ASSERT(refcnt != 0);
2222
2223		dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
2224		dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
2225		dds.dds_psize = zdde->zdde_ref_psize / refcnt;
2226		dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
2227
2228		dds.dds_ref_blocks = zdde->zdde_ref_blocks;
2229		dds.dds_ref_lsize = zdde->zdde_ref_lsize;
2230		dds.dds_ref_psize = zdde->zdde_ref_psize;
2231		dds.dds_ref_dsize = zdde->zdde_ref_dsize;
2232
2233		ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
2234
2235		umem_free(zdde, sizeof (*zdde));
2236	}
2237
2238	avl_destroy(&t);
2239
2240	ddt_histogram_stat(&dds_total, &ddh_total);
2241
2242	(void) printf("Simulated DDT histogram:\n");
2243
2244	dump_ddt_histogram(&ddh_total);
2245
2246	dump_dedup_ratio(&dds_total);
2247}
2248
2249static void
2250dump_zpool(spa_t *spa)
2251{
2252	dsl_pool_t *dp = spa_get_dsl(spa);
2253	int rc = 0;
2254
2255	if (dump_opt['S']) {
2256		dump_simulated_ddt(spa);
2257		return;
2258	}
2259
2260	if (!dump_opt['e'] && dump_opt['C'] > 1) {
2261		(void) printf("\nCached configuration:\n");
2262		dump_nvlist(spa->spa_config, 8);
2263	}
2264
2265	if (dump_opt['C'])
2266		dump_config(spa);
2267
2268	if (dump_opt['u'])
2269		dump_uberblock(&spa->spa_uberblock);
2270
2271	if (dump_opt['D'])
2272		dump_all_ddts(spa);
2273
2274	if (dump_opt['d'] > 2 || dump_opt['m'])
2275		dump_metaslabs(spa);
2276
2277	if (dump_opt['d'] || dump_opt['i']) {
2278		dump_dir(dp->dp_meta_objset);
2279		if (dump_opt['d'] >= 3) {
2280			dump_bplist(dp->dp_meta_objset,
2281			    spa->spa_deferred_bplist_obj, "Deferred frees");
2282			dump_dtl(spa->spa_root_vdev, 0);
2283		}
2284		(void) dmu_objset_find(spa_name(spa), dump_one_dir,
2285		    NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
2286	}
2287	if (dump_opt['b'] || dump_opt['c'])
2288		rc = dump_block_stats(spa);
2289
2290	if (dump_opt['s'])
2291		show_pool_stats(spa);
2292
2293	if (dump_opt['h'])
2294		dump_history(spa);
2295
2296	if (rc != 0)
2297		exit(rc);
2298}
2299
2300#define	ZDB_FLAG_CHECKSUM	0x0001
2301#define	ZDB_FLAG_DECOMPRESS	0x0002
2302#define	ZDB_FLAG_BSWAP		0x0004
2303#define	ZDB_FLAG_GBH		0x0008
2304#define	ZDB_FLAG_INDIRECT	0x0010
2305#define	ZDB_FLAG_PHYS		0x0020
2306#define	ZDB_FLAG_RAW		0x0040
2307#define	ZDB_FLAG_PRINT_BLKPTR	0x0080
2308
2309int flagbits[256];
2310
2311static void
2312zdb_print_blkptr(blkptr_t *bp, int flags)
2313{
2314	char blkbuf[BP_SPRINTF_LEN];
2315
2316	if (flags & ZDB_FLAG_BSWAP)
2317		byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
2318
2319	sprintf_blkptr(blkbuf, bp);
2320	(void) printf("%s\n", blkbuf);
2321}
2322
2323static void
2324zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
2325{
2326	int i;
2327
2328	for (i = 0; i < nbps; i++)
2329		zdb_print_blkptr(&bp[i], flags);
2330}
2331
2332static void
2333zdb_dump_gbh(void *buf, int flags)
2334{
2335	zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
2336}
2337
2338static void
2339zdb_dump_block_raw(void *buf, uint64_t size, int flags)
2340{
2341	if (flags & ZDB_FLAG_BSWAP)
2342		byteswap_uint64_array(buf, size);
2343	(void) write(1, buf, size);
2344}
2345
2346static void
2347zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
2348{
2349	uint64_t *d = (uint64_t *)buf;
2350	int nwords = size / sizeof (uint64_t);
2351	int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
2352	int i, j;
2353	char *hdr, *c;
2354
2355
2356	if (do_bswap)
2357		hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
2358	else
2359		hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
2360
2361	(void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
2362
2363	for (i = 0; i < nwords; i += 2) {
2364		(void) printf("%06llx:  %016llx  %016llx  ",
2365		    (u_longlong_t)(i * sizeof (uint64_t)),
2366		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
2367		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
2368
2369		c = (char *)&d[i];
2370		for (j = 0; j < 2 * sizeof (uint64_t); j++)
2371			(void) printf("%c", isprint(c[j]) ? c[j] : '.');
2372		(void) printf("\n");
2373	}
2374}
2375
2376/*
2377 * There are two acceptable formats:
2378 *	leaf_name	  - For example: c1t0d0 or /tmp/ztest.0a
2379 *	child[.child]*    - For example: 0.1.1
2380 *
2381 * The second form can be used to specify arbitrary vdevs anywhere
2382 * in the heirarchy.  For example, in a pool with a mirror of
2383 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
2384 */
2385static vdev_t *
2386zdb_vdev_lookup(vdev_t *vdev, char *path)
2387{
2388	char *s, *p, *q;
2389	int i;
2390
2391	if (vdev == NULL)
2392		return (NULL);
2393
2394	/* First, assume the x.x.x.x format */
2395	i = (int)strtoul(path, &s, 10);
2396	if (s == path || (s && *s != '.' && *s != '\0'))
2397		goto name;
2398	if (i < 0 || i >= vdev->vdev_children)
2399		return (NULL);
2400
2401	vdev = vdev->vdev_child[i];
2402	if (*s == '\0')
2403		return (vdev);
2404	return (zdb_vdev_lookup(vdev, s+1));
2405
2406name:
2407	for (i = 0; i < vdev->vdev_children; i++) {
2408		vdev_t *vc = vdev->vdev_child[i];
2409
2410		if (vc->vdev_path == NULL) {
2411			vc = zdb_vdev_lookup(vc, path);
2412			if (vc == NULL)
2413				continue;
2414			else
2415				return (vc);
2416		}
2417
2418		p = strrchr(vc->vdev_path, '/');
2419		p = p ? p + 1 : vc->vdev_path;
2420		q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
2421
2422		if (strcmp(vc->vdev_path, path) == 0)
2423			return (vc);
2424		if (strcmp(p, path) == 0)
2425			return (vc);
2426		if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
2427			return (vc);
2428	}
2429
2430	return (NULL);
2431}
2432
2433/*
2434 * Read a block from a pool and print it out.  The syntax of the
2435 * block descriptor is:
2436 *
2437 *	pool:vdev_specifier:offset:size[:flags]
2438 *
2439 *	pool           - The name of the pool you wish to read from
2440 *	vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
2441 *	offset         - offset, in hex, in bytes
2442 *	size           - Amount of data to read, in hex, in bytes
2443 *	flags          - A string of characters specifying options
2444 *		 b: Decode a blkptr at given offset within block
2445 *		*c: Calculate and display checksums
2446 *		 d: Decompress data before dumping
2447 *		 e: Byteswap data before dumping
2448 *		 g: Display data as a gang block header
2449 *		 i: Display as an indirect block
2450 *		 p: Do I/O to physical offset
2451 *		 r: Dump raw data to stdout
2452 *
2453 *              * = not yet implemented
2454 */
2455static void
2456zdb_read_block(char *thing, spa_t *spa)
2457{
2458	blkptr_t blk, *bp = &blk;
2459	dva_t *dva = bp->blk_dva;
2460	int flags = 0;
2461	uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
2462	zio_t *zio;
2463	vdev_t *vd;
2464	void *pbuf, *lbuf, *buf;
2465	char *s, *p, *dup, *vdev, *flagstr;
2466	int i, error;
2467
2468	dup = strdup(thing);
2469	s = strtok(dup, ":");
2470	vdev = s ? s : "";
2471	s = strtok(NULL, ":");
2472	offset = strtoull(s ? s : "", NULL, 16);
2473	s = strtok(NULL, ":");
2474	size = strtoull(s ? s : "", NULL, 16);
2475	s = strtok(NULL, ":");
2476	flagstr = s ? s : "";
2477
2478	s = NULL;
2479	if (size == 0)
2480		s = "size must not be zero";
2481	if (!IS_P2ALIGNED(size, DEV_BSIZE))
2482		s = "size must be a multiple of sector size";
2483	if (!IS_P2ALIGNED(offset, DEV_BSIZE))
2484		s = "offset must be a multiple of sector size";
2485	if (s) {
2486		(void) printf("Invalid block specifier: %s  - %s\n", thing, s);
2487		free(dup);
2488		return;
2489	}
2490
2491	for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
2492		for (i = 0; flagstr[i]; i++) {
2493			int bit = flagbits[(uchar_t)flagstr[i]];
2494
2495			if (bit == 0) {
2496				(void) printf("***Invalid flag: %c\n",
2497				    flagstr[i]);
2498				continue;
2499			}
2500			flags |= bit;
2501
2502			/* If it's not something with an argument, keep going */
2503			if ((bit & (ZDB_FLAG_CHECKSUM |
2504			    ZDB_FLAG_PRINT_BLKPTR)) == 0)
2505				continue;
2506
2507			p = &flagstr[i + 1];
2508			if (bit == ZDB_FLAG_PRINT_BLKPTR)
2509				blkptr_offset = strtoull(p, &p, 16);
2510			if (*p != ':' && *p != '\0') {
2511				(void) printf("***Invalid flag arg: '%s'\n", s);
2512				free(dup);
2513				return;
2514			}
2515		}
2516	}
2517
2518	vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
2519	if (vd == NULL) {
2520		(void) printf("***Invalid vdev: %s\n", vdev);
2521		free(dup);
2522		return;
2523	} else {
2524		if (vd->vdev_path)
2525			(void) fprintf(stderr, "Found vdev: %s\n",
2526			    vd->vdev_path);
2527		else
2528			(void) fprintf(stderr, "Found vdev type: %s\n",
2529			    vd->vdev_ops->vdev_op_type);
2530	}
2531
2532	psize = size;
2533	lsize = size;
2534
2535	pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2536	lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2537
2538	BP_ZERO(bp);
2539
2540	DVA_SET_VDEV(&dva[0], vd->vdev_id);
2541	DVA_SET_OFFSET(&dva[0], offset);
2542	DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
2543	DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
2544
2545	BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
2546
2547	BP_SET_LSIZE(bp, lsize);
2548	BP_SET_PSIZE(bp, psize);
2549	BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
2550	BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
2551	BP_SET_TYPE(bp, DMU_OT_NONE);
2552	BP_SET_LEVEL(bp, 0);
2553	BP_SET_DEDUP(bp, 0);
2554	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
2555
2556	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
2557	zio = zio_root(spa, NULL, NULL, 0);
2558
2559	if (vd == vd->vdev_top) {
2560		/*
2561		 * Treat this as a normal block read.
2562		 */
2563		zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
2564		    ZIO_PRIORITY_SYNC_READ,
2565		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
2566	} else {
2567		/*
2568		 * Treat this as a vdev child I/O.
2569		 */
2570		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
2571		    ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
2572		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
2573		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
2574		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
2575	}
2576
2577	error = zio_wait(zio);
2578	spa_config_exit(spa, SCL_STATE, FTAG);
2579
2580	if (error) {
2581		(void) printf("Read of %s failed, error: %d\n", thing, error);
2582		goto out;
2583	}
2584
2585	if (flags & ZDB_FLAG_DECOMPRESS) {
2586		/*
2587		 * We don't know how the data was compressed, so just try
2588		 * every decompress function at every inflated blocksize.
2589		 */
2590		enum zio_compress c;
2591		void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2592		void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2593
2594		bcopy(pbuf, pbuf2, psize);
2595
2596		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
2597		    SPA_MAXBLOCKSIZE - psize) == 0);
2598
2599		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
2600		    SPA_MAXBLOCKSIZE - psize) == 0);
2601
2602		for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
2603		    lsize -= SPA_MINBLOCKSIZE) {
2604			for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
2605				if (zio_decompress_data(c, pbuf, lbuf,
2606				    psize, lsize) == 0 &&
2607				    zio_decompress_data(c, pbuf2, lbuf2,
2608				    psize, lsize) == 0 &&
2609				    bcmp(lbuf, lbuf2, lsize) == 0)
2610					break;
2611			}
2612			if (c != ZIO_COMPRESS_FUNCTIONS)
2613				break;
2614			lsize -= SPA_MINBLOCKSIZE;
2615		}
2616
2617		umem_free(pbuf2, SPA_MAXBLOCKSIZE);
2618		umem_free(lbuf2, SPA_MAXBLOCKSIZE);
2619
2620		if (lsize <= psize) {
2621			(void) printf("Decompress of %s failed\n", thing);
2622			goto out;
2623		}
2624		buf = lbuf;
2625		size = lsize;
2626	} else {
2627		buf = pbuf;
2628		size = psize;
2629	}
2630
2631	if (flags & ZDB_FLAG_PRINT_BLKPTR)
2632		zdb_print_blkptr((blkptr_t *)(void *)
2633		    ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
2634	else if (flags & ZDB_FLAG_RAW)
2635		zdb_dump_block_raw(buf, size, flags);
2636	else if (flags & ZDB_FLAG_INDIRECT)
2637		zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
2638		    flags);
2639	else if (flags & ZDB_FLAG_GBH)
2640		zdb_dump_gbh(buf, flags);
2641	else
2642		zdb_dump_block(thing, buf, size, flags);
2643
2644out:
2645	umem_free(pbuf, SPA_MAXBLOCKSIZE);
2646	umem_free(lbuf, SPA_MAXBLOCKSIZE);
2647	free(dup);
2648}
2649
2650static boolean_t
2651pool_match(nvlist_t *cfg, char *tgt)
2652{
2653	uint64_t v, guid = strtoull(tgt, NULL, 0);
2654	char *s;
2655
2656	if (guid != 0) {
2657		if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
2658			return (v == guid);
2659	} else {
2660		if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
2661			return (strcmp(s, tgt) == 0);
2662	}
2663	return (B_FALSE);
2664}
2665
2666static char *
2667find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
2668{
2669	nvlist_t *pools;
2670	nvlist_t *match = NULL;
2671	char *name = NULL;
2672	char *sepp = NULL;
2673	char sep;
2674	int count = 0;
2675
2676	if ((sepp = strpbrk(*target, "/@")) != NULL) {
2677		sep = *sepp;
2678		*sepp = '\0';
2679	}
2680
2681	pools = zpool_find_import_activeok(g_zfs, dirc, dirv);
2682
2683	if (pools != NULL) {
2684		nvpair_t *elem = NULL;
2685		while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
2686			verify(nvpair_value_nvlist(elem, configp) == 0);
2687			if (pool_match(*configp, *target)) {
2688				count++;
2689				if (match != NULL) {
2690					/* print previously found config */
2691					if (name != NULL) {
2692						(void) printf("%s\n", name);
2693						dump_nvlist(match, 8);
2694						name = NULL;
2695					}
2696					(void) printf("%s\n",
2697					    nvpair_name(elem));
2698					dump_nvlist(*configp, 8);
2699				} else {
2700					match = *configp;
2701					name = nvpair_name(elem);
2702				}
2703			}
2704		}
2705	}
2706	if (count > 1)
2707		(void) fatal("\tMatched %d pools - use pool GUID "
2708		    "instead of pool name or \n"
2709		    "\tpool name part of a dataset name to select pool", count);
2710
2711	if (sepp)
2712		*sepp = sep;
2713	/*
2714	 * If pool GUID was specified for pool id, replace it with pool name
2715	 */
2716	if (name && (strstr(*target, name) != *target)) {
2717		int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
2718
2719		*target = umem_alloc(sz, UMEM_NOFAIL);
2720		(void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
2721	}
2722
2723	*configp = name ? match : NULL;
2724
2725	return (name);
2726}
2727
2728int
2729main(int argc, char **argv)
2730{
2731	int i, c;
2732	struct rlimit rl = { 1024, 1024 };
2733	spa_t *spa = NULL;
2734	objset_t *os = NULL;
2735	int dump_all = 1;
2736	int verbose = 0;
2737	int error;
2738	char **searchdirs = NULL;
2739	int nsearch = 0;
2740	char *target;
2741	nvlist_t *policy = NULL;
2742	uint64_t max_txg = UINT64_MAX;
2743
2744	(void) setrlimit(RLIMIT_NOFILE, &rl);
2745	(void) enable_extended_FILE_stdio(-1, -1);
2746
2747	dprintf_setup(&argc, argv);
2748
2749	while ((c = getopt(argc, argv, "bcdhilmsuCDRSLevp:t:U:")) != -1) {
2750		switch (c) {
2751		case 'b':
2752		case 'c':
2753		case 'd':
2754		case 'h':
2755		case 'i':
2756		case 'l':
2757		case 'm':
2758		case 's':
2759		case 'u':
2760		case 'C':
2761		case 'D':
2762		case 'R':
2763		case 'S':
2764			dump_opt[c]++;
2765			dump_all = 0;
2766			break;
2767		case 'L':
2768		case 'e':
2769			dump_opt[c]++;
2770			break;
2771		case 'v':
2772			verbose++;
2773			break;
2774		case 'p':
2775			if (searchdirs == NULL) {
2776				searchdirs = umem_alloc(sizeof (char *),
2777				    UMEM_NOFAIL);
2778			} else {
2779				char **tmp = umem_alloc((nsearch + 1) *
2780				    sizeof (char *), UMEM_NOFAIL);
2781				bcopy(searchdirs, tmp, nsearch *
2782				    sizeof (char *));
2783				umem_free(searchdirs,
2784				    nsearch * sizeof (char *));
2785				searchdirs = tmp;
2786			}
2787			searchdirs[nsearch++] = optarg;
2788			break;
2789		case 't':
2790			max_txg = strtoull(optarg, NULL, 0);
2791			if (max_txg < TXG_INITIAL) {
2792				(void) fprintf(stderr, "incorrect txg "
2793				    "specified: %s\n", optarg);
2794				usage();
2795			}
2796			break;
2797		case 'U':
2798			spa_config_path = optarg;
2799			break;
2800		default:
2801			usage();
2802			break;
2803		}
2804	}
2805
2806	if (!dump_opt['e'] && searchdirs != NULL) {
2807		(void) fprintf(stderr, "-p option requires use of -e\n");
2808		usage();
2809	}
2810
2811	kernel_init(FREAD);
2812	g_zfs = libzfs_init();
2813	ASSERT(g_zfs != NULL);
2814
2815	if (dump_all)
2816		verbose = MAX(verbose, 1);
2817
2818	for (c = 0; c < 256; c++) {
2819		if (dump_all && !strchr("elLRS", c))
2820			dump_opt[c] = 1;
2821		if (dump_opt[c])
2822			dump_opt[c] += verbose;
2823	}
2824
2825	argc -= optind;
2826	argv += optind;
2827
2828	if (argc < 2 && dump_opt['R'])
2829		usage();
2830	if (argc < 1) {
2831		if (!dump_opt['e'] && dump_opt['C']) {
2832			dump_cachefile(spa_config_path);
2833			return (0);
2834		}
2835		usage();
2836	}
2837
2838	if (dump_opt['l']) {
2839		dump_label(argv[0]);
2840		return (0);
2841	}
2842
2843	error = 0;
2844	target = argv[0];
2845
2846	if (dump_opt['e']) {
2847		nvlist_t *cfg = NULL;
2848		char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
2849
2850		error = ENOENT;
2851		if (name) {
2852			if (dump_opt['C'] > 1) {
2853				(void) printf("\nConfiguration for import:\n");
2854				dump_nvlist(cfg, 8);
2855			}
2856			if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) != 0 ||
2857			    nvlist_add_uint64(policy,
2858			    ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
2859			    nvlist_add_nvlist(cfg,
2860			    ZPOOL_REWIND_POLICY, policy) != 0) {
2861				fatal("can't open '%s': %s",
2862				    target, strerror(ENOMEM));
2863			}
2864			if ((error = spa_import(name, cfg, NULL)) != 0)
2865				error = spa_import_verbatim(name, cfg, NULL);
2866			nvlist_free(policy);
2867		}
2868	}
2869
2870	if (error == 0) {
2871		if (strpbrk(target, "/@") == NULL || dump_opt['R']) {
2872			error = spa_open(target, &spa, FTAG);
2873			if (error) {
2874				/*
2875				 * If we're missing the log device then
2876				 * try opening the pool after clearing the
2877				 * log state.
2878				 */
2879				mutex_enter(&spa_namespace_lock);
2880				if ((spa = spa_lookup(target)) != NULL &&
2881				    spa->spa_log_state == SPA_LOG_MISSING) {
2882					spa->spa_log_state = SPA_LOG_CLEAR;
2883					error = 0;
2884				}
2885				mutex_exit(&spa_namespace_lock);
2886
2887				if (!error)
2888					error = spa_open(target, &spa, FTAG);
2889			}
2890		} else {
2891			error = dmu_objset_own(target, DMU_OST_ANY,
2892			    B_TRUE, FTAG, &os);
2893		}
2894	}
2895	if (error)
2896		fatal("can't open '%s': %s", target, strerror(error));
2897
2898	argv++;
2899	argc--;
2900	if (!dump_opt['R']) {
2901		if (argc > 0) {
2902			zopt_objects = argc;
2903			zopt_object = calloc(zopt_objects, sizeof (uint64_t));
2904			for (i = 0; i < zopt_objects; i++) {
2905				errno = 0;
2906				zopt_object[i] = strtoull(argv[i], NULL, 0);
2907				if (zopt_object[i] == 0 && errno != 0)
2908					fatal("bad number %s: %s",
2909					    argv[i], strerror(errno));
2910			}
2911		}
2912		(os != NULL) ? dump_dir(os) : dump_zpool(spa);
2913	} else {
2914		flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
2915		flagbits['c'] = ZDB_FLAG_CHECKSUM;
2916		flagbits['d'] = ZDB_FLAG_DECOMPRESS;
2917		flagbits['e'] = ZDB_FLAG_BSWAP;
2918		flagbits['g'] = ZDB_FLAG_GBH;
2919		flagbits['i'] = ZDB_FLAG_INDIRECT;
2920		flagbits['p'] = ZDB_FLAG_PHYS;
2921		flagbits['r'] = ZDB_FLAG_RAW;
2922
2923		for (i = 0; i < argc; i++)
2924			zdb_read_block(argv[i], spa);
2925	}
2926
2927	(os != NULL) ? dmu_objset_disown(os, FTAG) : spa_close(spa, FTAG);
2928
2929	fuid_table_destroy();
2930
2931	libzfs_fini(g_zfs);
2932	kernel_fini();
2933
2934	return (0);
2935}
2936