zdb.c revision f6d08325130f87525523d7797367a61969bf61b5
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
25 * Copyright (c) 2014 Integros [integros.com]
26 * Copyright 2017 Nexenta Systems, Inc.
27 */
28
29#include <stdio.h>
30#include <unistd.h>
31#include <stdio_ext.h>
32#include <stdlib.h>
33#include <ctype.h>
34#include <sys/zfs_context.h>
35#include <sys/spa.h>
36#include <sys/spa_impl.h>
37#include <sys/dmu.h>
38#include <sys/zap.h>
39#include <sys/fs/zfs.h>
40#include <sys/zfs_znode.h>
41#include <sys/zfs_sa.h>
42#include <sys/sa.h>
43#include <sys/sa_impl.h>
44#include <sys/vdev.h>
45#include <sys/vdev_impl.h>
46#include <sys/metaslab_impl.h>
47#include <sys/dmu_objset.h>
48#include <sys/dsl_dir.h>
49#include <sys/dsl_dataset.h>
50#include <sys/dsl_pool.h>
51#include <sys/dbuf.h>
52#include <sys/zil.h>
53#include <sys/zil_impl.h>
54#include <sys/stat.h>
55#include <sys/resource.h>
56#include <sys/dmu_traverse.h>
57#include <sys/zio_checksum.h>
58#include <sys/zio_compress.h>
59#include <sys/zfs_fuid.h>
60#include <sys/arc.h>
61#include <sys/ddt.h>
62#include <sys/zfeature.h>
63#include <sys/abd.h>
64#include <zfs_comutil.h>
65#undef verify
66#include <libzfs.h>
67
68#define	ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ?	\
69	zio_compress_table[(idx)].ci_name : "UNKNOWN")
70#define	ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ?	\
71	zio_checksum_table[(idx)].ci_name : "UNKNOWN")
72#define	ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ?	\
73	dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ?	\
74	dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN")
75#define	ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) :		\
76	(((idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA) ?	\
77	DMU_OT_ZAP_OTHER : DMU_OT_NUMTYPES))
78
79#ifndef lint
80extern int reference_tracking_enable;
81extern boolean_t zfs_recover;
82extern uint64_t zfs_arc_max, zfs_arc_meta_limit;
83extern int zfs_vdev_async_read_max_active;
84#else
85int reference_tracking_enable;
86boolean_t zfs_recover;
87uint64_t zfs_arc_max, zfs_arc_meta_limit;
88int zfs_vdev_async_read_max_active;
89#endif
90
91const char cmdname[] = "zdb";
92uint8_t dump_opt[256];
93
94typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
95
96extern void dump_intent_log(zilog_t *);
97uint64_t *zopt_object = NULL;
98int zopt_objects = 0;
99libzfs_handle_t *g_zfs;
100uint64_t max_inflight = 1000;
101
102static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *);
103
104/*
105 * These libumem hooks provide a reasonable set of defaults for the allocator's
106 * debugging facilities.
107 */
108const char *
109_umem_debug_init()
110{
111	return ("default,verbose"); /* $UMEM_DEBUG setting */
112}
113
114const char *
115_umem_logging_init(void)
116{
117	return ("fail,contents"); /* $UMEM_LOGGING setting */
118}
119
120static void
121usage(void)
122{
123	(void) fprintf(stderr,
124	    "Usage:\t%s [-AbcdDFGhiLMPsvX] [-e [-V] [-p <path> ...]] "
125	    "[-I <inflight I/Os>]\n"
126	    "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
127	    "\t\t[<poolname> [<object> ...]]\n"
128	    "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] <dataset> "
129	    "[<object> ...]\n"
130	    "\t%s -C [-A] [-U <cache>]\n"
131	    "\t%s -l [-Aqu] <device>\n"
132	    "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
133	    "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"
134	    "\t%s -O <dataset> <path>\n"
135	    "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
136	    "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n"
137	    "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
138	    "<poolname>\n\n",
139	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
140	    cmdname);
141
142	(void) fprintf(stderr, "    Dataset name must include at least one "
143	    "separator character '/' or '@'\n");
144	(void) fprintf(stderr, "    If dataset name is specified, only that "
145	    "dataset is dumped\n");
146	(void) fprintf(stderr, "    If object numbers are specified, only "
147	    "those objects are dumped\n\n");
148	(void) fprintf(stderr, "    Options to control amount of output:\n");
149	(void) fprintf(stderr, "        -b block statistics\n");
150	(void) fprintf(stderr, "        -c checksum all metadata (twice for "
151	    "all data) blocks\n");
152	(void) fprintf(stderr, "        -C config (or cachefile if alone)\n");
153	(void) fprintf(stderr, "        -d dataset(s)\n");
154	(void) fprintf(stderr, "        -D dedup statistics\n");
155	(void) fprintf(stderr, "        -h pool history\n");
156	(void) fprintf(stderr, "        -i intent logs\n");
157	(void) fprintf(stderr, "        -l read label contents\n");
158	(void) fprintf(stderr, "        -L disable leak tracking (do not "
159	    "load spacemaps)\n");
160	(void) fprintf(stderr, "        -m metaslabs\n");
161	(void) fprintf(stderr, "        -M metaslab groups\n");
162	(void) fprintf(stderr, "        -O perform object lookups by path\n");
163	(void) fprintf(stderr, "        -R read and display block from a "
164	    "device\n");
165	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
166	(void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
167	(void) fprintf(stderr, "        -v verbose (applies to all "
168	    "others)\n\n");
169	(void) fprintf(stderr, "    Below options are intended for use "
170	    "with other options:\n");
171	(void) fprintf(stderr, "        -A ignore assertions (-A), enable "
172	    "panic recovery (-AA) or both (-AAA)\n");
173	(void) fprintf(stderr, "        -e pool is exported/destroyed/"
174	    "has altroot/not in a cachefile\n");
175	(void) fprintf(stderr, "        -F attempt automatic rewind within "
176	    "safe range of transaction groups\n");
177	(void) fprintf(stderr, "        -G dump zfs_dbgmsg buffer before "
178	    "exiting\n");
179	(void) fprintf(stderr, "        -I <number of inflight I/Os> -- "
180	    "specify the maximum number of "
181	    "checksumming I/Os [default is 200]\n");
182	(void) fprintf(stderr, "        -o <variable>=<value> set global "
183	    "variable to an unsigned 32-bit integer value\n");
184	(void) fprintf(stderr, "        -p <path> -- use one or more with "
185	    "-e to specify path to vdev dir\n");
186	(void) fprintf(stderr, "        -P print numbers in parseable form\n");
187	(void) fprintf(stderr, "        -q don't print label contents\n");
188	(void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
189	    "searching for uberblocks\n");
190	(void) fprintf(stderr, "        -u uberblock\n");
191	(void) fprintf(stderr, "        -U <cachefile_path> -- use alternate "
192	    "cachefile\n");
193	(void) fprintf(stderr, "        -V do verbatim import\n");
194	(void) fprintf(stderr, "        -x <dumpdir> -- "
195	    "dump all read blocks into specified directory\n");
196	(void) fprintf(stderr, "        -X attempt extreme rewind (does not "
197	    "work with dataset)\n\n");
198	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
199	    "to make only that option verbose\n");
200	(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
201	exit(1);
202}
203
204static void
205dump_debug_buffer()
206{
207	if (dump_opt['G']) {
208		(void) printf("\n");
209		zfs_dbgmsg_print("zdb");
210	}
211}
212
213/*
214 * Called for usage errors that are discovered after a call to spa_open(),
215 * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
216 */
217
218static void
219fatal(const char *fmt, ...)
220{
221	va_list ap;
222
223	va_start(ap, fmt);
224	(void) fprintf(stderr, "%s: ", cmdname);
225	(void) vfprintf(stderr, fmt, ap);
226	va_end(ap);
227	(void) fprintf(stderr, "\n");
228
229	dump_debug_buffer();
230
231	exit(1);
232}
233
234/* ARGSUSED */
235static void
236dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
237{
238	nvlist_t *nv;
239	size_t nvsize = *(uint64_t *)data;
240	char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
241
242	VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
243
244	VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
245
246	umem_free(packed, nvsize);
247
248	dump_nvlist(nv, 8);
249
250	nvlist_free(nv);
251}
252
253/* ARGSUSED */
254static void
255dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size)
256{
257	spa_history_phys_t *shp = data;
258
259	if (shp == NULL)
260		return;
261
262	(void) printf("\t\tpool_create_len = %llu\n",
263	    (u_longlong_t)shp->sh_pool_create_len);
264	(void) printf("\t\tphys_max_off = %llu\n",
265	    (u_longlong_t)shp->sh_phys_max_off);
266	(void) printf("\t\tbof = %llu\n",
267	    (u_longlong_t)shp->sh_bof);
268	(void) printf("\t\teof = %llu\n",
269	    (u_longlong_t)shp->sh_eof);
270	(void) printf("\t\trecords_lost = %llu\n",
271	    (u_longlong_t)shp->sh_records_lost);
272}
273
274static void
275zdb_nicenum(uint64_t num, char *buf)
276{
277	if (dump_opt['P'])
278		(void) sprintf(buf, "%llu", (longlong_t)num);
279	else
280		nicenum(num, buf);
281}
282
283const char histo_stars[] = "****************************************";
284const int histo_width = sizeof (histo_stars) - 1;
285
286static void
287dump_histogram(const uint64_t *histo, int size, int offset)
288{
289	int i;
290	int minidx = size - 1;
291	int maxidx = 0;
292	uint64_t max = 0;
293
294	for (i = 0; i < size; i++) {
295		if (histo[i] > max)
296			max = histo[i];
297		if (histo[i] > 0 && i > maxidx)
298			maxidx = i;
299		if (histo[i] > 0 && i < minidx)
300			minidx = i;
301	}
302
303	if (max < histo_width)
304		max = histo_width;
305
306	for (i = minidx; i <= maxidx; i++) {
307		(void) printf("\t\t\t%3u: %6llu %s\n",
308		    i + offset, (u_longlong_t)histo[i],
309		    &histo_stars[(max - histo[i]) * histo_width / max]);
310	}
311}
312
313static void
314dump_zap_stats(objset_t *os, uint64_t object)
315{
316	int error;
317	zap_stats_t zs;
318
319	error = zap_get_stats(os, object, &zs);
320	if (error)
321		return;
322
323	if (zs.zs_ptrtbl_len == 0) {
324		ASSERT(zs.zs_num_blocks == 1);
325		(void) printf("\tmicrozap: %llu bytes, %llu entries\n",
326		    (u_longlong_t)zs.zs_blocksize,
327		    (u_longlong_t)zs.zs_num_entries);
328		return;
329	}
330
331	(void) printf("\tFat ZAP stats:\n");
332
333	(void) printf("\t\tPointer table:\n");
334	(void) printf("\t\t\t%llu elements\n",
335	    (u_longlong_t)zs.zs_ptrtbl_len);
336	(void) printf("\t\t\tzt_blk: %llu\n",
337	    (u_longlong_t)zs.zs_ptrtbl_zt_blk);
338	(void) printf("\t\t\tzt_numblks: %llu\n",
339	    (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
340	(void) printf("\t\t\tzt_shift: %llu\n",
341	    (u_longlong_t)zs.zs_ptrtbl_zt_shift);
342	(void) printf("\t\t\tzt_blks_copied: %llu\n",
343	    (u_longlong_t)zs.zs_ptrtbl_blks_copied);
344	(void) printf("\t\t\tzt_nextblk: %llu\n",
345	    (u_longlong_t)zs.zs_ptrtbl_nextblk);
346
347	(void) printf("\t\tZAP entries: %llu\n",
348	    (u_longlong_t)zs.zs_num_entries);
349	(void) printf("\t\tLeaf blocks: %llu\n",
350	    (u_longlong_t)zs.zs_num_leafs);
351	(void) printf("\t\tTotal blocks: %llu\n",
352	    (u_longlong_t)zs.zs_num_blocks);
353	(void) printf("\t\tzap_block_type: 0x%llx\n",
354	    (u_longlong_t)zs.zs_block_type);
355	(void) printf("\t\tzap_magic: 0x%llx\n",
356	    (u_longlong_t)zs.zs_magic);
357	(void) printf("\t\tzap_salt: 0x%llx\n",
358	    (u_longlong_t)zs.zs_salt);
359
360	(void) printf("\t\tLeafs with 2^n pointers:\n");
361	dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0);
362
363	(void) printf("\t\tBlocks with n*5 entries:\n");
364	dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0);
365
366	(void) printf("\t\tBlocks n/10 full:\n");
367	dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0);
368
369	(void) printf("\t\tEntries with n chunks:\n");
370	dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0);
371
372	(void) printf("\t\tBuckets with n entries:\n");
373	dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0);
374}
375
376/*ARGSUSED*/
377static void
378dump_none(objset_t *os, uint64_t object, void *data, size_t size)
379{
380}
381
382/*ARGSUSED*/
383static void
384dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
385{
386	(void) printf("\tUNKNOWN OBJECT TYPE\n");
387}
388
389/*ARGSUSED*/
390void
391dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
392{
393}
394
395/*ARGSUSED*/
396static void
397dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
398{
399}
400
401/*ARGSUSED*/
402static void
403dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
404{
405	zap_cursor_t zc;
406	zap_attribute_t attr;
407	void *prop;
408	int i;
409
410	dump_zap_stats(os, object);
411	(void) printf("\n");
412
413	for (zap_cursor_init(&zc, os, object);
414	    zap_cursor_retrieve(&zc, &attr) == 0;
415	    zap_cursor_advance(&zc)) {
416		(void) printf("\t\t%s = ", attr.za_name);
417		if (attr.za_num_integers == 0) {
418			(void) printf("\n");
419			continue;
420		}
421		prop = umem_zalloc(attr.za_num_integers *
422		    attr.za_integer_length, UMEM_NOFAIL);
423		(void) zap_lookup(os, object, attr.za_name,
424		    attr.za_integer_length, attr.za_num_integers, prop);
425		if (attr.za_integer_length == 1) {
426			(void) printf("%s", (char *)prop);
427		} else {
428			for (i = 0; i < attr.za_num_integers; i++) {
429				switch (attr.za_integer_length) {
430				case 2:
431					(void) printf("%u ",
432					    ((uint16_t *)prop)[i]);
433					break;
434				case 4:
435					(void) printf("%u ",
436					    ((uint32_t *)prop)[i]);
437					break;
438				case 8:
439					(void) printf("%lld ",
440					    (u_longlong_t)((int64_t *)prop)[i]);
441					break;
442				}
443			}
444		}
445		(void) printf("\n");
446		umem_free(prop, attr.za_num_integers * attr.za_integer_length);
447	}
448	zap_cursor_fini(&zc);
449}
450
451static void
452dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size)
453{
454	bpobj_phys_t *bpop = data;
455	char bytes[32], comp[32], uncomp[32];
456
457	if (bpop == NULL)
458		return;
459
460	zdb_nicenum(bpop->bpo_bytes, bytes);
461	zdb_nicenum(bpop->bpo_comp, comp);
462	zdb_nicenum(bpop->bpo_uncomp, uncomp);
463
464	(void) printf("\t\tnum_blkptrs = %llu\n",
465	    (u_longlong_t)bpop->bpo_num_blkptrs);
466	(void) printf("\t\tbytes = %s\n", bytes);
467	if (size >= BPOBJ_SIZE_V1) {
468		(void) printf("\t\tcomp = %s\n", comp);
469		(void) printf("\t\tuncomp = %s\n", uncomp);
470	}
471	if (size >= sizeof (*bpop)) {
472		(void) printf("\t\tsubobjs = %llu\n",
473		    (u_longlong_t)bpop->bpo_subobjs);
474		(void) printf("\t\tnum_subobjs = %llu\n",
475		    (u_longlong_t)bpop->bpo_num_subobjs);
476	}
477
478	if (dump_opt['d'] < 5)
479		return;
480
481	for (uint64_t i = 0; i < bpop->bpo_num_blkptrs; i++) {
482		char blkbuf[BP_SPRINTF_LEN];
483		blkptr_t bp;
484
485		int err = dmu_read(os, object,
486		    i * sizeof (bp), sizeof (bp), &bp, 0);
487		if (err != 0) {
488			(void) printf("got error %u from dmu_read\n", err);
489			break;
490		}
491		snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp);
492		(void) printf("\t%s\n", blkbuf);
493	}
494}
495
496/* ARGSUSED */
497static void
498dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size)
499{
500	dmu_object_info_t doi;
501
502	VERIFY0(dmu_object_info(os, object, &doi));
503	uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP);
504
505	int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0);
506	if (err != 0) {
507		(void) printf("got error %u from dmu_read\n", err);
508		kmem_free(subobjs, doi.doi_max_offset);
509		return;
510	}
511
512	int64_t last_nonzero = -1;
513	for (uint64_t i = 0; i < doi.doi_max_offset / 8; i++) {
514		if (subobjs[i] != 0)
515			last_nonzero = i;
516	}
517
518	for (int64_t i = 0; i <= last_nonzero; i++) {
519		(void) printf("\t%llu\n", (longlong_t)subobjs[i]);
520	}
521	kmem_free(subobjs, doi.doi_max_offset);
522}
523
524/*ARGSUSED*/
525static void
526dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
527{
528	dump_zap_stats(os, object);
529	/* contents are printed elsewhere, properly decoded */
530}
531
532/*ARGSUSED*/
533static void
534dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
535{
536	zap_cursor_t zc;
537	zap_attribute_t attr;
538
539	dump_zap_stats(os, object);
540	(void) printf("\n");
541
542	for (zap_cursor_init(&zc, os, object);
543	    zap_cursor_retrieve(&zc, &attr) == 0;
544	    zap_cursor_advance(&zc)) {
545		(void) printf("\t\t%s = ", attr.za_name);
546		if (attr.za_num_integers == 0) {
547			(void) printf("\n");
548			continue;
549		}
550		(void) printf(" %llx : [%d:%d:%d]\n",
551		    (u_longlong_t)attr.za_first_integer,
552		    (int)ATTR_LENGTH(attr.za_first_integer),
553		    (int)ATTR_BSWAP(attr.za_first_integer),
554		    (int)ATTR_NUM(attr.za_first_integer));
555	}
556	zap_cursor_fini(&zc);
557}
558
559/*ARGSUSED*/
560static void
561dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
562{
563	zap_cursor_t zc;
564	zap_attribute_t attr;
565	uint16_t *layout_attrs;
566	int i;
567
568	dump_zap_stats(os, object);
569	(void) printf("\n");
570
571	for (zap_cursor_init(&zc, os, object);
572	    zap_cursor_retrieve(&zc, &attr) == 0;
573	    zap_cursor_advance(&zc)) {
574		(void) printf("\t\t%s = [", attr.za_name);
575		if (attr.za_num_integers == 0) {
576			(void) printf("\n");
577			continue;
578		}
579
580		VERIFY(attr.za_integer_length == 2);
581		layout_attrs = umem_zalloc(attr.za_num_integers *
582		    attr.za_integer_length, UMEM_NOFAIL);
583
584		VERIFY(zap_lookup(os, object, attr.za_name,
585		    attr.za_integer_length,
586		    attr.za_num_integers, layout_attrs) == 0);
587
588		for (i = 0; i != attr.za_num_integers; i++)
589			(void) printf(" %d ", (int)layout_attrs[i]);
590		(void) printf("]\n");
591		umem_free(layout_attrs,
592		    attr.za_num_integers * attr.za_integer_length);
593	}
594	zap_cursor_fini(&zc);
595}
596
597/*ARGSUSED*/
598static void
599dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
600{
601	zap_cursor_t zc;
602	zap_attribute_t attr;
603	const char *typenames[] = {
604		/* 0 */ "not specified",
605		/* 1 */ "FIFO",
606		/* 2 */ "Character Device",
607		/* 3 */ "3 (invalid)",
608		/* 4 */ "Directory",
609		/* 5 */ "5 (invalid)",
610		/* 6 */ "Block Device",
611		/* 7 */ "7 (invalid)",
612		/* 8 */ "Regular File",
613		/* 9 */ "9 (invalid)",
614		/* 10 */ "Symbolic Link",
615		/* 11 */ "11 (invalid)",
616		/* 12 */ "Socket",
617		/* 13 */ "Door",
618		/* 14 */ "Event Port",
619		/* 15 */ "15 (invalid)",
620	};
621
622	dump_zap_stats(os, object);
623	(void) printf("\n");
624
625	for (zap_cursor_init(&zc, os, object);
626	    zap_cursor_retrieve(&zc, &attr) == 0;
627	    zap_cursor_advance(&zc)) {
628		(void) printf("\t\t%s = %lld (type: %s)\n",
629		    attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
630		    typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
631	}
632	zap_cursor_fini(&zc);
633}
634
635int
636get_dtl_refcount(vdev_t *vd)
637{
638	int refcount = 0;
639
640	if (vd->vdev_ops->vdev_op_leaf) {
641		space_map_t *sm = vd->vdev_dtl_sm;
642
643		if (sm != NULL &&
644		    sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
645			return (1);
646		return (0);
647	}
648
649	for (int c = 0; c < vd->vdev_children; c++)
650		refcount += get_dtl_refcount(vd->vdev_child[c]);
651	return (refcount);
652}
653
654int
655get_metaslab_refcount(vdev_t *vd)
656{
657	int refcount = 0;
658
659	if (vd->vdev_top == vd && !vd->vdev_removing) {
660		for (int m = 0; m < vd->vdev_ms_count; m++) {
661			space_map_t *sm = vd->vdev_ms[m]->ms_sm;
662
663			if (sm != NULL &&
664			    sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
665				refcount++;
666		}
667	}
668	for (int c = 0; c < vd->vdev_children; c++)
669		refcount += get_metaslab_refcount(vd->vdev_child[c]);
670
671	return (refcount);
672}
673
674static int
675verify_spacemap_refcounts(spa_t *spa)
676{
677	uint64_t expected_refcount = 0;
678	uint64_t actual_refcount;
679
680	(void) feature_get_refcount(spa,
681	    &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM],
682	    &expected_refcount);
683	actual_refcount = get_dtl_refcount(spa->spa_root_vdev);
684	actual_refcount += get_metaslab_refcount(spa->spa_root_vdev);
685
686	if (expected_refcount != actual_refcount) {
687		(void) printf("space map refcount mismatch: expected %lld != "
688		    "actual %lld\n",
689		    (longlong_t)expected_refcount,
690		    (longlong_t)actual_refcount);
691		return (2);
692	}
693	return (0);
694}
695
696static void
697dump_spacemap(objset_t *os, space_map_t *sm)
698{
699	uint64_t alloc, offset, entry;
700	char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
701			    "INVALID", "INVALID", "INVALID", "INVALID" };
702
703	if (sm == NULL)
704		return;
705
706	/*
707	 * Print out the freelist entries in both encoded and decoded form.
708	 */
709	alloc = 0;
710	for (offset = 0; offset < space_map_length(sm);
711	    offset += sizeof (entry)) {
712		uint8_t mapshift = sm->sm_shift;
713
714		VERIFY0(dmu_read(os, space_map_object(sm), offset,
715		    sizeof (entry), &entry, DMU_READ_PREFETCH));
716		if (SM_DEBUG_DECODE(entry)) {
717
718			(void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
719			    (u_longlong_t)(offset / sizeof (entry)),
720			    ddata[SM_DEBUG_ACTION_DECODE(entry)],
721			    (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
722			    (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
723		} else {
724			(void) printf("\t    [%6llu]    %c  range:"
725			    " %010llx-%010llx  size: %06llx\n",
726			    (u_longlong_t)(offset / sizeof (entry)),
727			    SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
728			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
729			    mapshift) + sm->sm_start),
730			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
731			    mapshift) + sm->sm_start +
732			    (SM_RUN_DECODE(entry) << mapshift)),
733			    (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
734			if (SM_TYPE_DECODE(entry) == SM_ALLOC)
735				alloc += SM_RUN_DECODE(entry) << mapshift;
736			else
737				alloc -= SM_RUN_DECODE(entry) << mapshift;
738		}
739	}
740	if (alloc != space_map_allocated(sm)) {
741		(void) printf("space_map_object alloc (%llu) INCONSISTENT "
742		    "with space map summary (%llu)\n",
743		    (u_longlong_t)space_map_allocated(sm), (u_longlong_t)alloc);
744	}
745}
746
747static void
748dump_metaslab_stats(metaslab_t *msp)
749{
750	char maxbuf[32];
751	range_tree_t *rt = msp->ms_tree;
752	avl_tree_t *t = &msp->ms_size_tree;
753	int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
754
755	zdb_nicenum(metaslab_block_maxsize(msp), maxbuf);
756
757	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
758	    "segments", avl_numnodes(t), "maxsize", maxbuf,
759	    "freepct", free_pct);
760	(void) printf("\tIn-memory histogram:\n");
761	dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
762}
763
764static void
765dump_metaslab(metaslab_t *msp)
766{
767	vdev_t *vd = msp->ms_group->mg_vd;
768	spa_t *spa = vd->vdev_spa;
769	space_map_t *sm = msp->ms_sm;
770	char freebuf[32];
771
772	zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf);
773
774	(void) printf(
775	    "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
776	    (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
777	    (u_longlong_t)space_map_object(sm), freebuf);
778
779	if (dump_opt['m'] > 2 && !dump_opt['L']) {
780		mutex_enter(&msp->ms_lock);
781		metaslab_load_wait(msp);
782		if (!msp->ms_loaded) {
783			VERIFY0(metaslab_load(msp));
784			range_tree_stat_verify(msp->ms_tree);
785		}
786		dump_metaslab_stats(msp);
787		metaslab_unload(msp);
788		mutex_exit(&msp->ms_lock);
789	}
790
791	if (dump_opt['m'] > 1 && sm != NULL &&
792	    spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
793		/*
794		 * The space map histogram represents free space in chunks
795		 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
796		 */
797		(void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
798		    (u_longlong_t)msp->ms_fragmentation);
799		dump_histogram(sm->sm_phys->smp_histogram,
800		    SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
801	}
802
803	if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
804		ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
805
806		mutex_enter(&msp->ms_lock);
807		dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
808		mutex_exit(&msp->ms_lock);
809	}
810}
811
812static void
813print_vdev_metaslab_header(vdev_t *vd)
814{
815	(void) printf("\tvdev %10llu\n\t%-10s%5llu   %-19s   %-15s   %-10s\n",
816	    (u_longlong_t)vd->vdev_id,
817	    "metaslabs", (u_longlong_t)vd->vdev_ms_count,
818	    "offset", "spacemap", "free");
819	(void) printf("\t%15s   %19s   %15s   %10s\n",
820	    "---------------", "-------------------",
821	    "---------------", "-------------");
822}
823
824static void
825dump_metaslab_groups(spa_t *spa)
826{
827	vdev_t *rvd = spa->spa_root_vdev;
828	metaslab_class_t *mc = spa_normal_class(spa);
829	uint64_t fragmentation;
830
831	metaslab_class_histogram_verify(mc);
832
833	for (int c = 0; c < rvd->vdev_children; c++) {
834		vdev_t *tvd = rvd->vdev_child[c];
835		metaslab_group_t *mg = tvd->vdev_mg;
836
837		if (mg->mg_class != mc)
838			continue;
839
840		metaslab_group_histogram_verify(mg);
841		mg->mg_fragmentation = metaslab_group_fragmentation(mg);
842
843		(void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t"
844		    "fragmentation",
845		    (u_longlong_t)tvd->vdev_id,
846		    (u_longlong_t)tvd->vdev_ms_count);
847		if (mg->mg_fragmentation == ZFS_FRAG_INVALID) {
848			(void) printf("%3s\n", "-");
849		} else {
850			(void) printf("%3llu%%\n",
851			    (u_longlong_t)mg->mg_fragmentation);
852		}
853		dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
854	}
855
856	(void) printf("\tpool %s\tfragmentation", spa_name(spa));
857	fragmentation = metaslab_class_fragmentation(mc);
858	if (fragmentation == ZFS_FRAG_INVALID)
859		(void) printf("\t%3s\n", "-");
860	else
861		(void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation);
862	dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
863}
864
865static void
866dump_metaslabs(spa_t *spa)
867{
868	vdev_t *vd, *rvd = spa->spa_root_vdev;
869	uint64_t m, c = 0, children = rvd->vdev_children;
870
871	(void) printf("\nMetaslabs:\n");
872
873	if (!dump_opt['d'] && zopt_objects > 0) {
874		c = zopt_object[0];
875
876		if (c >= children)
877			(void) fatal("bad vdev id: %llu", (u_longlong_t)c);
878
879		if (zopt_objects > 1) {
880			vd = rvd->vdev_child[c];
881			print_vdev_metaslab_header(vd);
882
883			for (m = 1; m < zopt_objects; m++) {
884				if (zopt_object[m] < vd->vdev_ms_count)
885					dump_metaslab(
886					    vd->vdev_ms[zopt_object[m]]);
887				else
888					(void) fprintf(stderr, "bad metaslab "
889					    "number %llu\n",
890					    (u_longlong_t)zopt_object[m]);
891			}
892			(void) printf("\n");
893			return;
894		}
895		children = c + 1;
896	}
897	for (; c < children; c++) {
898		vd = rvd->vdev_child[c];
899		print_vdev_metaslab_header(vd);
900
901		for (m = 0; m < vd->vdev_ms_count; m++)
902			dump_metaslab(vd->vdev_ms[m]);
903		(void) printf("\n");
904	}
905}
906
907static void
908dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
909{
910	const ddt_phys_t *ddp = dde->dde_phys;
911	const ddt_key_t *ddk = &dde->dde_key;
912	char *types[4] = { "ditto", "single", "double", "triple" };
913	char blkbuf[BP_SPRINTF_LEN];
914	blkptr_t blk;
915
916	for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
917		if (ddp->ddp_phys_birth == 0)
918			continue;
919		ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
920		snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
921		(void) printf("index %llx refcnt %llu %s %s\n",
922		    (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
923		    types[p], blkbuf);
924	}
925}
926
927static void
928dump_dedup_ratio(const ddt_stat_t *dds)
929{
930	double rL, rP, rD, D, dedup, compress, copies;
931
932	if (dds->dds_blocks == 0)
933		return;
934
935	rL = (double)dds->dds_ref_lsize;
936	rP = (double)dds->dds_ref_psize;
937	rD = (double)dds->dds_ref_dsize;
938	D = (double)dds->dds_dsize;
939
940	dedup = rD / D;
941	compress = rL / rP;
942	copies = rD / rP;
943
944	(void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
945	    "dedup * compress / copies = %.2f\n\n",
946	    dedup, compress, copies, dedup * compress / copies);
947}
948
949static void
950dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
951{
952	char name[DDT_NAMELEN];
953	ddt_entry_t dde;
954	uint64_t walk = 0;
955	dmu_object_info_t doi;
956	uint64_t count, dspace, mspace;
957	int error;
958
959	error = ddt_object_info(ddt, type, class, &doi);
960
961	if (error == ENOENT)
962		return;
963	ASSERT(error == 0);
964
965	if ((count = ddt_object_count(ddt, type, class)) == 0)
966		return;
967
968	dspace = doi.doi_physical_blocks_512 << 9;
969	mspace = doi.doi_fill_count * doi.doi_data_block_size;
970
971	ddt_object_name(ddt, type, class, name);
972
973	(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
974	    name,
975	    (u_longlong_t)count,
976	    (u_longlong_t)(dspace / count),
977	    (u_longlong_t)(mspace / count));
978
979	if (dump_opt['D'] < 3)
980		return;
981
982	zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
983
984	if (dump_opt['D'] < 4)
985		return;
986
987	if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
988		return;
989
990	(void) printf("%s contents:\n\n", name);
991
992	while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
993		dump_dde(ddt, &dde, walk);
994
995	ASSERT(error == ENOENT);
996
997	(void) printf("\n");
998}
999
1000static void
1001dump_all_ddts(spa_t *spa)
1002{
1003	ddt_histogram_t ddh_total = { 0 };
1004	ddt_stat_t dds_total = { 0 };
1005
1006	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
1007		ddt_t *ddt = spa->spa_ddt[c];
1008		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
1009			for (enum ddt_class class = 0; class < DDT_CLASSES;
1010			    class++) {
1011				dump_ddt(ddt, type, class);
1012			}
1013		}
1014	}
1015
1016	ddt_get_dedup_stats(spa, &dds_total);
1017
1018	if (dds_total.dds_blocks == 0) {
1019		(void) printf("All DDTs are empty\n");
1020		return;
1021	}
1022
1023	(void) printf("\n");
1024
1025	if (dump_opt['D'] > 1) {
1026		(void) printf("DDT histogram (aggregated over all DDTs):\n");
1027		ddt_get_dedup_histogram(spa, &ddh_total);
1028		zpool_dump_ddt(&dds_total, &ddh_total);
1029	}
1030
1031	dump_dedup_ratio(&dds_total);
1032}
1033
1034static void
1035dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
1036{
1037	char *prefix = arg;
1038
1039	(void) printf("%s [%llu,%llu) length %llu\n",
1040	    prefix,
1041	    (u_longlong_t)start,
1042	    (u_longlong_t)(start + size),
1043	    (u_longlong_t)(size));
1044}
1045
1046static void
1047dump_dtl(vdev_t *vd, int indent)
1048{
1049	spa_t *spa = vd->vdev_spa;
1050	boolean_t required;
1051	char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" };
1052	char prefix[256];
1053
1054	spa_vdev_state_enter(spa, SCL_NONE);
1055	required = vdev_dtl_required(vd);
1056	(void) spa_vdev_state_exit(spa, NULL, 0);
1057
1058	if (indent == 0)
1059		(void) printf("\nDirty time logs:\n\n");
1060
1061	(void) printf("\t%*s%s [%s]\n", indent, "",
1062	    vd->vdev_path ? vd->vdev_path :
1063	    vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
1064	    required ? "DTL-required" : "DTL-expendable");
1065
1066	for (int t = 0; t < DTL_TYPES; t++) {
1067		range_tree_t *rt = vd->vdev_dtl[t];
1068		if (range_tree_space(rt) == 0)
1069			continue;
1070		(void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
1071		    indent + 2, "", name[t]);
1072		mutex_enter(rt->rt_lock);
1073		range_tree_walk(rt, dump_dtl_seg, prefix);
1074		mutex_exit(rt->rt_lock);
1075		if (dump_opt['d'] > 5 && vd->vdev_children == 0)
1076			dump_spacemap(spa->spa_meta_objset, vd->vdev_dtl_sm);
1077	}
1078
1079	for (int c = 0; c < vd->vdev_children; c++)
1080		dump_dtl(vd->vdev_child[c], indent + 4);
1081}
1082
1083static void
1084dump_history(spa_t *spa)
1085{
1086	nvlist_t **events = NULL;
1087	uint64_t resid, len, off = 0;
1088	uint_t num = 0;
1089	int error;
1090	time_t tsec;
1091	struct tm t;
1092	char tbuf[30];
1093	char internalstr[MAXPATHLEN];
1094
1095	char *buf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
1096	do {
1097		len = SPA_MAXBLOCKSIZE;
1098
1099		if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
1100			(void) fprintf(stderr, "Unable to read history: "
1101			    "error %d\n", error);
1102			umem_free(buf, SPA_MAXBLOCKSIZE);
1103			return;
1104		}
1105
1106		if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
1107			break;
1108
1109		off -= resid;
1110	} while (len != 0);
1111	umem_free(buf, SPA_MAXBLOCKSIZE);
1112
1113	(void) printf("\nHistory:\n");
1114	for (int i = 0; i < num; i++) {
1115		uint64_t time, txg, ievent;
1116		char *cmd, *intstr;
1117		boolean_t printed = B_FALSE;
1118
1119		if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
1120		    &time) != 0)
1121			goto next;
1122		if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
1123		    &cmd) != 0) {
1124			if (nvlist_lookup_uint64(events[i],
1125			    ZPOOL_HIST_INT_EVENT, &ievent) != 0)
1126				goto next;
1127			verify(nvlist_lookup_uint64(events[i],
1128			    ZPOOL_HIST_TXG, &txg) == 0);
1129			verify(nvlist_lookup_string(events[i],
1130			    ZPOOL_HIST_INT_STR, &intstr) == 0);
1131			if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS)
1132				goto next;
1133
1134			(void) snprintf(internalstr,
1135			    sizeof (internalstr),
1136			    "[internal %s txg:%lld] %s",
1137			    zfs_history_event_names[ievent], txg,
1138			    intstr);
1139			cmd = internalstr;
1140		}
1141		tsec = time;
1142		(void) localtime_r(&tsec, &t);
1143		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
1144		(void) printf("%s %s\n", tbuf, cmd);
1145		printed = B_TRUE;
1146
1147next:
1148		if (dump_opt['h'] > 1) {
1149			if (!printed)
1150				(void) printf("unrecognized record:\n");
1151			dump_nvlist(events[i], 2);
1152		}
1153	}
1154}
1155
1156/*ARGSUSED*/
1157static void
1158dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
1159{
1160}
1161
1162static uint64_t
1163blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp,
1164    const zbookmark_phys_t *zb)
1165{
1166	if (dnp == NULL) {
1167		ASSERT(zb->zb_level < 0);
1168		if (zb->zb_object == 0)
1169			return (zb->zb_blkid);
1170		return (zb->zb_blkid * BP_GET_LSIZE(bp));
1171	}
1172
1173	ASSERT(zb->zb_level >= 0);
1174
1175	return ((zb->zb_blkid <<
1176	    (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
1177	    dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
1178}
1179
1180static void
1181snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
1182{
1183	const dva_t *dva = bp->blk_dva;
1184	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
1185
1186	if (dump_opt['b'] >= 6) {
1187		snprintf_blkptr(blkbuf, buflen, bp);
1188		return;
1189	}
1190
1191	if (BP_IS_EMBEDDED(bp)) {
1192		(void) sprintf(blkbuf,
1193		    "EMBEDDED et=%u %llxL/%llxP B=%llu",
1194		    (int)BPE_GET_ETYPE(bp),
1195		    (u_longlong_t)BPE_GET_LSIZE(bp),
1196		    (u_longlong_t)BPE_GET_PSIZE(bp),
1197		    (u_longlong_t)bp->blk_birth);
1198		return;
1199	}
1200
1201	blkbuf[0] = '\0';
1202	for (int i = 0; i < ndvas; i++)
1203		(void) snprintf(blkbuf + strlen(blkbuf),
1204		    buflen - strlen(blkbuf), "%llu:%llx:%llx ",
1205		    (u_longlong_t)DVA_GET_VDEV(&dva[i]),
1206		    (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
1207		    (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
1208
1209	if (BP_IS_HOLE(bp)) {
1210		(void) snprintf(blkbuf + strlen(blkbuf),
1211		    buflen - strlen(blkbuf),
1212		    "%llxL B=%llu",
1213		    (u_longlong_t)BP_GET_LSIZE(bp),
1214		    (u_longlong_t)bp->blk_birth);
1215	} else {
1216		(void) snprintf(blkbuf + strlen(blkbuf),
1217		    buflen - strlen(blkbuf),
1218		    "%llxL/%llxP F=%llu B=%llu/%llu",
1219		    (u_longlong_t)BP_GET_LSIZE(bp),
1220		    (u_longlong_t)BP_GET_PSIZE(bp),
1221		    (u_longlong_t)BP_GET_FILL(bp),
1222		    (u_longlong_t)bp->blk_birth,
1223		    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
1224	}
1225}
1226
1227static void
1228print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb,
1229    const dnode_phys_t *dnp)
1230{
1231	char blkbuf[BP_SPRINTF_LEN];
1232	int l;
1233
1234	if (!BP_IS_EMBEDDED(bp)) {
1235		ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
1236		ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
1237	}
1238
1239	(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
1240
1241	ASSERT(zb->zb_level >= 0);
1242
1243	for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
1244		if (l == zb->zb_level) {
1245			(void) printf("L%llx", (u_longlong_t)zb->zb_level);
1246		} else {
1247			(void) printf(" ");
1248		}
1249	}
1250
1251	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1252	(void) printf("%s\n", blkbuf);
1253}
1254
1255static int
1256visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
1257    blkptr_t *bp, const zbookmark_phys_t *zb)
1258{
1259	int err = 0;
1260
1261	if (bp->blk_birth == 0)
1262		return (0);
1263
1264	print_indirect(bp, zb, dnp);
1265
1266	if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
1267		arc_flags_t flags = ARC_FLAG_WAIT;
1268		int i;
1269		blkptr_t *cbp;
1270		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
1271		arc_buf_t *buf;
1272		uint64_t fill = 0;
1273
1274		err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf,
1275		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
1276		if (err)
1277			return (err);
1278		ASSERT(buf->b_data);
1279
1280		/* recursively visit blocks below this */
1281		cbp = buf->b_data;
1282		for (i = 0; i < epb; i++, cbp++) {
1283			zbookmark_phys_t czb;
1284
1285			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
1286			    zb->zb_level - 1,
1287			    zb->zb_blkid * epb + i);
1288			err = visit_indirect(spa, dnp, cbp, &czb);
1289			if (err)
1290				break;
1291			fill += BP_GET_FILL(cbp);
1292		}
1293		if (!err)
1294			ASSERT3U(fill, ==, BP_GET_FILL(bp));
1295		arc_buf_destroy(buf, &buf);
1296	}
1297
1298	return (err);
1299}
1300
1301/*ARGSUSED*/
1302static void
1303dump_indirect(dnode_t *dn)
1304{
1305	dnode_phys_t *dnp = dn->dn_phys;
1306	int j;
1307	zbookmark_phys_t czb;
1308
1309	(void) printf("Indirect blocks:\n");
1310
1311	SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
1312	    dn->dn_object, dnp->dn_nlevels - 1, 0);
1313	for (j = 0; j < dnp->dn_nblkptr; j++) {
1314		czb.zb_blkid = j;
1315		(void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
1316		    &dnp->dn_blkptr[j], &czb);
1317	}
1318
1319	(void) printf("\n");
1320}
1321
1322/*ARGSUSED*/
1323static void
1324dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
1325{
1326	dsl_dir_phys_t *dd = data;
1327	time_t crtime;
1328	char nice[32];
1329
1330	if (dd == NULL)
1331		return;
1332
1333	ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
1334
1335	crtime = dd->dd_creation_time;
1336	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1337	(void) printf("\t\thead_dataset_obj = %llu\n",
1338	    (u_longlong_t)dd->dd_head_dataset_obj);
1339	(void) printf("\t\tparent_dir_obj = %llu\n",
1340	    (u_longlong_t)dd->dd_parent_obj);
1341	(void) printf("\t\torigin_obj = %llu\n",
1342	    (u_longlong_t)dd->dd_origin_obj);
1343	(void) printf("\t\tchild_dir_zapobj = %llu\n",
1344	    (u_longlong_t)dd->dd_child_dir_zapobj);
1345	zdb_nicenum(dd->dd_used_bytes, nice);
1346	(void) printf("\t\tused_bytes = %s\n", nice);
1347	zdb_nicenum(dd->dd_compressed_bytes, nice);
1348	(void) printf("\t\tcompressed_bytes = %s\n", nice);
1349	zdb_nicenum(dd->dd_uncompressed_bytes, nice);
1350	(void) printf("\t\tuncompressed_bytes = %s\n", nice);
1351	zdb_nicenum(dd->dd_quota, nice);
1352	(void) printf("\t\tquota = %s\n", nice);
1353	zdb_nicenum(dd->dd_reserved, nice);
1354	(void) printf("\t\treserved = %s\n", nice);
1355	(void) printf("\t\tprops_zapobj = %llu\n",
1356	    (u_longlong_t)dd->dd_props_zapobj);
1357	(void) printf("\t\tdeleg_zapobj = %llu\n",
1358	    (u_longlong_t)dd->dd_deleg_zapobj);
1359	(void) printf("\t\tflags = %llx\n",
1360	    (u_longlong_t)dd->dd_flags);
1361
1362#define	DO(which) \
1363	zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
1364	(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
1365	DO(HEAD);
1366	DO(SNAP);
1367	DO(CHILD);
1368	DO(CHILD_RSRV);
1369	DO(REFRSRV);
1370#undef DO
1371}
1372
1373/*ARGSUSED*/
1374static void
1375dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
1376{
1377	dsl_dataset_phys_t *ds = data;
1378	time_t crtime;
1379	char used[32], compressed[32], uncompressed[32], unique[32];
1380	char blkbuf[BP_SPRINTF_LEN];
1381
1382	if (ds == NULL)
1383		return;
1384
1385	ASSERT(size == sizeof (*ds));
1386	crtime = ds->ds_creation_time;
1387	zdb_nicenum(ds->ds_referenced_bytes, used);
1388	zdb_nicenum(ds->ds_compressed_bytes, compressed);
1389	zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
1390	zdb_nicenum(ds->ds_unique_bytes, unique);
1391	snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp);
1392
1393	(void) printf("\t\tdir_obj = %llu\n",
1394	    (u_longlong_t)ds->ds_dir_obj);
1395	(void) printf("\t\tprev_snap_obj = %llu\n",
1396	    (u_longlong_t)ds->ds_prev_snap_obj);
1397	(void) printf("\t\tprev_snap_txg = %llu\n",
1398	    (u_longlong_t)ds->ds_prev_snap_txg);
1399	(void) printf("\t\tnext_snap_obj = %llu\n",
1400	    (u_longlong_t)ds->ds_next_snap_obj);
1401	(void) printf("\t\tsnapnames_zapobj = %llu\n",
1402	    (u_longlong_t)ds->ds_snapnames_zapobj);
1403	(void) printf("\t\tnum_children = %llu\n",
1404	    (u_longlong_t)ds->ds_num_children);
1405	(void) printf("\t\tuserrefs_obj = %llu\n",
1406	    (u_longlong_t)ds->ds_userrefs_obj);
1407	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1408	(void) printf("\t\tcreation_txg = %llu\n",
1409	    (u_longlong_t)ds->ds_creation_txg);
1410	(void) printf("\t\tdeadlist_obj = %llu\n",
1411	    (u_longlong_t)ds->ds_deadlist_obj);
1412	(void) printf("\t\tused_bytes = %s\n", used);
1413	(void) printf("\t\tcompressed_bytes = %s\n", compressed);
1414	(void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
1415	(void) printf("\t\tunique = %s\n", unique);
1416	(void) printf("\t\tfsid_guid = %llu\n",
1417	    (u_longlong_t)ds->ds_fsid_guid);
1418	(void) printf("\t\tguid = %llu\n",
1419	    (u_longlong_t)ds->ds_guid);
1420	(void) printf("\t\tflags = %llx\n",
1421	    (u_longlong_t)ds->ds_flags);
1422	(void) printf("\t\tnext_clones_obj = %llu\n",
1423	    (u_longlong_t)ds->ds_next_clones_obj);
1424	(void) printf("\t\tprops_obj = %llu\n",
1425	    (u_longlong_t)ds->ds_props_obj);
1426	(void) printf("\t\tbp = %s\n", blkbuf);
1427}
1428
1429/* ARGSUSED */
1430static int
1431dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1432{
1433	char blkbuf[BP_SPRINTF_LEN];
1434
1435	if (bp->blk_birth != 0) {
1436		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
1437		(void) printf("\t%s\n", blkbuf);
1438	}
1439	return (0);
1440}
1441
1442static void
1443dump_bptree(objset_t *os, uint64_t obj, char *name)
1444{
1445	char bytes[32];
1446	bptree_phys_t *bt;
1447	dmu_buf_t *db;
1448
1449	if (dump_opt['d'] < 3)
1450		return;
1451
1452	VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
1453	bt = db->db_data;
1454	zdb_nicenum(bt->bt_bytes, bytes);
1455	(void) printf("\n    %s: %llu datasets, %s\n",
1456	    name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
1457	dmu_buf_rele(db, FTAG);
1458
1459	if (dump_opt['d'] < 5)
1460		return;
1461
1462	(void) printf("\n");
1463
1464	(void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL);
1465}
1466
1467/* ARGSUSED */
1468static int
1469dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1470{
1471	char blkbuf[BP_SPRINTF_LEN];
1472
1473	ASSERT(bp->blk_birth != 0);
1474	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1475	(void) printf("\t%s\n", blkbuf);
1476	return (0);
1477}
1478
1479static void
1480dump_full_bpobj(bpobj_t *bpo, char *name, int indent)
1481{
1482	char bytes[32];
1483	char comp[32];
1484	char uncomp[32];
1485
1486	if (dump_opt['d'] < 3)
1487		return;
1488
1489	zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes);
1490	if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
1491		zdb_nicenum(bpo->bpo_phys->bpo_comp, comp);
1492		zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp);
1493		(void) printf("    %*s: object %llu, %llu local blkptrs, "
1494		    "%llu subobjs in object %llu, %s (%s/%s comp)\n",
1495		    indent * 8, name,
1496		    (u_longlong_t)bpo->bpo_object,
1497		    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1498		    (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
1499		    (u_longlong_t)bpo->bpo_phys->bpo_subobjs,
1500		    bytes, comp, uncomp);
1501
1502		for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
1503			uint64_t subobj;
1504			bpobj_t subbpo;
1505			int error;
1506			VERIFY0(dmu_read(bpo->bpo_os,
1507			    bpo->bpo_phys->bpo_subobjs,
1508			    i * sizeof (subobj), sizeof (subobj), &subobj, 0));
1509			error = bpobj_open(&subbpo, bpo->bpo_os, subobj);
1510			if (error != 0) {
1511				(void) printf("ERROR %u while trying to open "
1512				    "subobj id %llu\n",
1513				    error, (u_longlong_t)subobj);
1514				continue;
1515			}
1516			dump_full_bpobj(&subbpo, "subobj", indent + 1);
1517			bpobj_close(&subbpo);
1518		}
1519	} else {
1520		(void) printf("    %*s: object %llu, %llu blkptrs, %s\n",
1521		    indent * 8, name,
1522		    (u_longlong_t)bpo->bpo_object,
1523		    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1524		    bytes);
1525	}
1526
1527	if (dump_opt['d'] < 5)
1528		return;
1529
1530
1531	if (indent == 0) {
1532		(void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
1533		(void) printf("\n");
1534	}
1535}
1536
1537static void
1538dump_deadlist(dsl_deadlist_t *dl)
1539{
1540	dsl_deadlist_entry_t *dle;
1541	uint64_t unused;
1542	char bytes[32];
1543	char comp[32];
1544	char uncomp[32];
1545
1546	if (dump_opt['d'] < 3)
1547		return;
1548
1549	if (dl->dl_oldfmt) {
1550		dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0);
1551		return;
1552	}
1553
1554	zdb_nicenum(dl->dl_phys->dl_used, bytes);
1555	zdb_nicenum(dl->dl_phys->dl_comp, comp);
1556	zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp);
1557	(void) printf("\n    Deadlist: %s (%s/%s comp)\n",
1558	    bytes, comp, uncomp);
1559
1560	if (dump_opt['d'] < 4)
1561		return;
1562
1563	(void) printf("\n");
1564
1565	/* force the tree to be loaded */
1566	dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused);
1567
1568	for (dle = avl_first(&dl->dl_tree); dle;
1569	    dle = AVL_NEXT(&dl->dl_tree, dle)) {
1570		if (dump_opt['d'] >= 5) {
1571			char buf[128];
1572			(void) snprintf(buf, sizeof (buf),
1573			    "mintxg %llu -> obj %llu",
1574			    (longlong_t)dle->dle_mintxg,
1575			    (longlong_t)dle->dle_bpobj.bpo_object);
1576
1577			dump_full_bpobj(&dle->dle_bpobj, buf, 0);
1578		} else {
1579			(void) printf("mintxg %llu -> obj %llu\n",
1580			    (longlong_t)dle->dle_mintxg,
1581			    (longlong_t)dle->dle_bpobj.bpo_object);
1582
1583		}
1584	}
1585}
1586
1587static avl_tree_t idx_tree;
1588static avl_tree_t domain_tree;
1589static boolean_t fuid_table_loaded;
1590static objset_t *sa_os = NULL;
1591static sa_attr_type_t *sa_attr_table = NULL;
1592
1593static int
1594open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp)
1595{
1596	int err;
1597	uint64_t sa_attrs = 0;
1598	uint64_t version = 0;
1599
1600	VERIFY3P(sa_os, ==, NULL);
1601	err = dmu_objset_own(path, type, B_TRUE, tag, osp);
1602	if (err != 0) {
1603		(void) fprintf(stderr, "failed to own dataset '%s': %s\n", path,
1604		    strerror(err));
1605		return (err);
1606	}
1607
1608	if (dmu_objset_type(*osp) == DMU_OST_ZFS) {
1609		(void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR,
1610		    8, 1, &version);
1611		if (version >= ZPL_VERSION_SA) {
1612			(void) zap_lookup(*osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
1613			    8, 1, &sa_attrs);
1614		}
1615		err = sa_setup(*osp, sa_attrs, zfs_attr_table, ZPL_END,
1616		    &sa_attr_table);
1617		if (err != 0) {
1618			(void) fprintf(stderr, "sa_setup failed: %s\n",
1619			    strerror(err));
1620			dmu_objset_disown(*osp, tag);
1621			*osp = NULL;
1622		}
1623	}
1624	sa_os = *osp;
1625
1626	return (0);
1627}
1628
1629static void
1630close_objset(objset_t *os, void *tag)
1631{
1632	VERIFY3P(os, ==, sa_os);
1633	if (os->os_sa != NULL)
1634		sa_tear_down(os);
1635	dmu_objset_disown(os, tag);
1636	sa_attr_table = NULL;
1637	sa_os = NULL;
1638}
1639
1640static void
1641fuid_table_destroy()
1642{
1643	if (fuid_table_loaded) {
1644		zfs_fuid_table_destroy(&idx_tree, &domain_tree);
1645		fuid_table_loaded = B_FALSE;
1646	}
1647}
1648
1649/*
1650 * print uid or gid information.
1651 * For normal POSIX id just the id is printed in decimal format.
1652 * For CIFS files with FUID the fuid is printed in hex followed by
1653 * the domain-rid string.
1654 */
1655static void
1656print_idstr(uint64_t id, const char *id_type)
1657{
1658	if (FUID_INDEX(id)) {
1659		char *domain;
1660
1661		domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
1662		(void) printf("\t%s     %llx [%s-%d]\n", id_type,
1663		    (u_longlong_t)id, domain, (int)FUID_RID(id));
1664	} else {
1665		(void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
1666	}
1667
1668}
1669
1670static void
1671dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
1672{
1673	uint32_t uid_idx, gid_idx;
1674
1675	uid_idx = FUID_INDEX(uid);
1676	gid_idx = FUID_INDEX(gid);
1677
1678	/* Load domain table, if not already loaded */
1679	if (!fuid_table_loaded && (uid_idx || gid_idx)) {
1680		uint64_t fuid_obj;
1681
1682		/* first find the fuid object.  It lives in the master node */
1683		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
1684		    8, 1, &fuid_obj) == 0);
1685		zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
1686		(void) zfs_fuid_table_load(os, fuid_obj,
1687		    &idx_tree, &domain_tree);
1688		fuid_table_loaded = B_TRUE;
1689	}
1690
1691	print_idstr(uid, "uid");
1692	print_idstr(gid, "gid");
1693}
1694
1695/*ARGSUSED*/
1696static void
1697dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
1698{
1699	char path[MAXPATHLEN * 2];	/* allow for xattr and failure prefix */
1700	sa_handle_t *hdl;
1701	uint64_t xattr, rdev, gen;
1702	uint64_t uid, gid, mode, fsize, parent, links;
1703	uint64_t pflags;
1704	uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
1705	time_t z_crtime, z_atime, z_mtime, z_ctime;
1706	sa_bulk_attr_t bulk[12];
1707	int idx = 0;
1708	int error;
1709
1710	VERIFY3P(os, ==, sa_os);
1711	if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
1712		(void) printf("Failed to get handle for SA znode\n");
1713		return;
1714	}
1715
1716	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
1717	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
1718	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
1719	    &links, 8);
1720	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
1721	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
1722	    &mode, 8);
1723	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
1724	    NULL, &parent, 8);
1725	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
1726	    &fsize, 8);
1727	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
1728	    acctm, 16);
1729	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
1730	    modtm, 16);
1731	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
1732	    crtm, 16);
1733	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
1734	    chgtm, 16);
1735	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
1736	    &pflags, 8);
1737
1738	if (sa_bulk_lookup(hdl, bulk, idx)) {
1739		(void) sa_handle_destroy(hdl);
1740		return;
1741	}
1742
1743	z_crtime = (time_t)crtm[0];
1744	z_atime = (time_t)acctm[0];
1745	z_mtime = (time_t)modtm[0];
1746	z_ctime = (time_t)chgtm[0];
1747
1748	if (dump_opt['d'] > 4) {
1749		error = zfs_obj_to_path(os, object, path, sizeof (path));
1750		if (error != 0) {
1751			(void) snprintf(path, sizeof (path),
1752			    "\?\?\?<object#%llu>", (u_longlong_t)object);
1753		}
1754		(void) printf("\tpath	%s\n", path);
1755	}
1756	dump_uidgid(os, uid, gid);
1757	(void) printf("\tatime	%s", ctime(&z_atime));
1758	(void) printf("\tmtime	%s", ctime(&z_mtime));
1759	(void) printf("\tctime	%s", ctime(&z_ctime));
1760	(void) printf("\tcrtime	%s", ctime(&z_crtime));
1761	(void) printf("\tgen	%llu\n", (u_longlong_t)gen);
1762	(void) printf("\tmode	%llo\n", (u_longlong_t)mode);
1763	(void) printf("\tsize	%llu\n", (u_longlong_t)fsize);
1764	(void) printf("\tparent	%llu\n", (u_longlong_t)parent);
1765	(void) printf("\tlinks	%llu\n", (u_longlong_t)links);
1766	(void) printf("\tpflags	%llx\n", (u_longlong_t)pflags);
1767	if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
1768	    sizeof (uint64_t)) == 0)
1769		(void) printf("\txattr	%llu\n", (u_longlong_t)xattr);
1770	if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
1771	    sizeof (uint64_t)) == 0)
1772		(void) printf("\trdev	0x%016llx\n", (u_longlong_t)rdev);
1773	sa_handle_destroy(hdl);
1774}
1775
1776/*ARGSUSED*/
1777static void
1778dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
1779{
1780}
1781
1782/*ARGSUSED*/
1783static void
1784dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
1785{
1786}
1787
1788static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
1789	dump_none,		/* unallocated			*/
1790	dump_zap,		/* object directory		*/
1791	dump_uint64,		/* object array			*/
1792	dump_none,		/* packed nvlist		*/
1793	dump_packed_nvlist,	/* packed nvlist size		*/
1794	dump_none,		/* bpobj			*/
1795	dump_bpobj,		/* bpobj header			*/
1796	dump_none,		/* SPA space map header		*/
1797	dump_none,		/* SPA space map		*/
1798	dump_none,		/* ZIL intent log		*/
1799	dump_dnode,		/* DMU dnode			*/
1800	dump_dmu_objset,	/* DMU objset			*/
1801	dump_dsl_dir,		/* DSL directory		*/
1802	dump_zap,		/* DSL directory child map	*/
1803	dump_zap,		/* DSL dataset snap map		*/
1804	dump_zap,		/* DSL props			*/
1805	dump_dsl_dataset,	/* DSL dataset			*/
1806	dump_znode,		/* ZFS znode			*/
1807	dump_acl,		/* ZFS V0 ACL			*/
1808	dump_uint8,		/* ZFS plain file		*/
1809	dump_zpldir,		/* ZFS directory		*/
1810	dump_zap,		/* ZFS master node		*/
1811	dump_zap,		/* ZFS delete queue		*/
1812	dump_uint8,		/* zvol object			*/
1813	dump_zap,		/* zvol prop			*/
1814	dump_uint8,		/* other uint8[]		*/
1815	dump_uint64,		/* other uint64[]		*/
1816	dump_zap,		/* other ZAP			*/
1817	dump_zap,		/* persistent error log		*/
1818	dump_uint8,		/* SPA history			*/
1819	dump_history_offsets,	/* SPA history offsets		*/
1820	dump_zap,		/* Pool properties		*/
1821	dump_zap,		/* DSL permissions		*/
1822	dump_acl,		/* ZFS ACL			*/
1823	dump_uint8,		/* ZFS SYSACL			*/
1824	dump_none,		/* FUID nvlist			*/
1825	dump_packed_nvlist,	/* FUID nvlist size		*/
1826	dump_zap,		/* DSL dataset next clones	*/
1827	dump_zap,		/* DSL scrub queue		*/
1828	dump_zap,		/* ZFS user/group used		*/
1829	dump_zap,		/* ZFS user/group quota		*/
1830	dump_zap,		/* snapshot refcount tags	*/
1831	dump_ddt_zap,		/* DDT ZAP object		*/
1832	dump_zap,		/* DDT statistics		*/
1833	dump_znode,		/* SA object			*/
1834	dump_zap,		/* SA Master Node		*/
1835	dump_sa_attrs,		/* SA attribute registration	*/
1836	dump_sa_layouts,	/* SA attribute layouts		*/
1837	dump_zap,		/* DSL scrub translations	*/
1838	dump_none,		/* fake dedup BP		*/
1839	dump_zap,		/* deadlist			*/
1840	dump_none,		/* deadlist hdr			*/
1841	dump_zap,		/* dsl clones			*/
1842	dump_bpobj_subobjs,	/* bpobj subobjs		*/
1843	dump_unknown,		/* Unknown type, must be last	*/
1844};
1845
1846static void
1847dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
1848{
1849	dmu_buf_t *db = NULL;
1850	dmu_object_info_t doi;
1851	dnode_t *dn;
1852	void *bonus = NULL;
1853	size_t bsize = 0;
1854	char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
1855	char bonus_size[32];
1856	char aux[50];
1857	int error;
1858
1859	if (*print_header) {
1860		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1861		    "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
1862		    "%full", "type");
1863		*print_header = 0;
1864	}
1865
1866	if (object == 0) {
1867		dn = DMU_META_DNODE(os);
1868	} else {
1869		error = dmu_bonus_hold(os, object, FTAG, &db);
1870		if (error)
1871			fatal("dmu_bonus_hold(%llu) failed, errno %u",
1872			    object, error);
1873		bonus = db->db_data;
1874		bsize = db->db_size;
1875		dn = DB_DNODE((dmu_buf_impl_t *)db);
1876	}
1877	dmu_object_info_from_dnode(dn, &doi);
1878
1879	zdb_nicenum(doi.doi_metadata_block_size, iblk);
1880	zdb_nicenum(doi.doi_data_block_size, dblk);
1881	zdb_nicenum(doi.doi_max_offset, lsize);
1882	zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize);
1883	zdb_nicenum(doi.doi_bonus_size, bonus_size);
1884	(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
1885	    doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
1886	    doi.doi_max_offset);
1887
1888	aux[0] = '\0';
1889
1890	if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
1891		(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
1892		    ZDB_CHECKSUM_NAME(doi.doi_checksum));
1893	}
1894
1895	if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
1896		(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
1897		    ZDB_COMPRESS_NAME(doi.doi_compress));
1898	}
1899
1900	(void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %6s  %s%s\n",
1901	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
1902	    asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
1903
1904	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
1905		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1906		    "", "", "", "", "", bonus_size, "bonus",
1907		    ZDB_OT_NAME(doi.doi_bonus_type));
1908	}
1909
1910	if (verbosity >= 4) {
1911		(void) printf("\tdnode flags: %s%s%s\n",
1912		    (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
1913		    "USED_BYTES " : "",
1914		    (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
1915		    "USERUSED_ACCOUNTED " : "",
1916		    (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
1917		    "SPILL_BLKPTR" : "");
1918		(void) printf("\tdnode maxblkid: %llu\n",
1919		    (longlong_t)dn->dn_phys->dn_maxblkid);
1920
1921		object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
1922		    bonus, bsize);
1923		object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
1924		*print_header = 1;
1925	}
1926
1927	if (verbosity >= 5)
1928		dump_indirect(dn);
1929
1930	if (verbosity >= 5) {
1931		/*
1932		 * Report the list of segments that comprise the object.
1933		 */
1934		uint64_t start = 0;
1935		uint64_t end;
1936		uint64_t blkfill = 1;
1937		int minlvl = 1;
1938
1939		if (dn->dn_type == DMU_OT_DNODE) {
1940			minlvl = 0;
1941			blkfill = DNODES_PER_BLOCK;
1942		}
1943
1944		for (;;) {
1945			char segsize[32];
1946			error = dnode_next_offset(dn,
1947			    0, &start, minlvl, blkfill, 0);
1948			if (error)
1949				break;
1950			end = start;
1951			error = dnode_next_offset(dn,
1952			    DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
1953			zdb_nicenum(end - start, segsize);
1954			(void) printf("\t\tsegment [%016llx, %016llx)"
1955			    " size %5s\n", (u_longlong_t)start,
1956			    (u_longlong_t)end, segsize);
1957			if (error)
1958				break;
1959			start = end;
1960		}
1961	}
1962
1963	if (db != NULL)
1964		dmu_buf_rele(db, FTAG);
1965}
1966
1967static char *objset_types[DMU_OST_NUMTYPES] = {
1968	"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
1969
1970static void
1971dump_dir(objset_t *os)
1972{
1973	dmu_objset_stats_t dds;
1974	uint64_t object, object_count;
1975	uint64_t refdbytes, usedobjs, scratch;
1976	char numbuf[32];
1977	char blkbuf[BP_SPRINTF_LEN + 20];
1978	char osname[ZFS_MAX_DATASET_NAME_LEN];
1979	char *type = "UNKNOWN";
1980	int verbosity = dump_opt['d'];
1981	int print_header = 1;
1982	int i, error;
1983
1984	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
1985	dmu_objset_fast_stat(os, &dds);
1986	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
1987
1988	if (dds.dds_type < DMU_OST_NUMTYPES)
1989		type = objset_types[dds.dds_type];
1990
1991	if (dds.dds_type == DMU_OST_META) {
1992		dds.dds_creation_txg = TXG_INITIAL;
1993		usedobjs = BP_GET_FILL(os->os_rootbp);
1994		refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)->
1995		    dd_used_bytes;
1996	} else {
1997		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
1998	}
1999
2000	ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp));
2001
2002	zdb_nicenum(refdbytes, numbuf);
2003
2004	if (verbosity >= 4) {
2005		(void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp ");
2006		(void) snprintf_blkptr(blkbuf + strlen(blkbuf),
2007		    sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp);
2008	} else {
2009		blkbuf[0] = '\0';
2010	}
2011
2012	dmu_objset_name(os, osname);
2013
2014	(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
2015	    "%s, %llu objects%s\n",
2016	    osname, type, (u_longlong_t)dmu_objset_id(os),
2017	    (u_longlong_t)dds.dds_creation_txg,
2018	    numbuf, (u_longlong_t)usedobjs, blkbuf);
2019
2020	if (zopt_objects != 0) {
2021		for (i = 0; i < zopt_objects; i++)
2022			dump_object(os, zopt_object[i], verbosity,
2023			    &print_header);
2024		(void) printf("\n");
2025		return;
2026	}
2027
2028	if (dump_opt['i'] != 0 || verbosity >= 2)
2029		dump_intent_log(dmu_objset_zil(os));
2030
2031	if (dmu_objset_ds(os) != NULL)
2032		dump_deadlist(&dmu_objset_ds(os)->ds_deadlist);
2033
2034	if (verbosity < 2)
2035		return;
2036
2037	if (BP_IS_HOLE(os->os_rootbp))
2038		return;
2039
2040	dump_object(os, 0, verbosity, &print_header);
2041	object_count = 0;
2042	if (DMU_USERUSED_DNODE(os) != NULL &&
2043	    DMU_USERUSED_DNODE(os)->dn_type != 0) {
2044		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
2045		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
2046	}
2047
2048	object = 0;
2049	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
2050		dump_object(os, object, verbosity, &print_header);
2051		object_count++;
2052	}
2053
2054	ASSERT3U(object_count, ==, usedobjs);
2055
2056	(void) printf("\n");
2057
2058	if (error != ESRCH) {
2059		(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
2060		abort();
2061	}
2062}
2063
2064static void
2065dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
2066{
2067	time_t timestamp = ub->ub_timestamp;
2068
2069	(void) printf(header ? header : "");
2070	(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
2071	(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
2072	(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
2073	(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
2074	(void) printf("\ttimestamp = %llu UTC = %s",
2075	    (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
2076	if (dump_opt['u'] >= 3) {
2077		char blkbuf[BP_SPRINTF_LEN];
2078		snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
2079		(void) printf("\trootbp = %s\n", blkbuf);
2080	}
2081	(void) printf(footer ? footer : "");
2082}
2083
2084static void
2085dump_config(spa_t *spa)
2086{
2087	dmu_buf_t *db;
2088	size_t nvsize = 0;
2089	int error = 0;
2090
2091
2092	error = dmu_bonus_hold(spa->spa_meta_objset,
2093	    spa->spa_config_object, FTAG, &db);
2094
2095	if (error == 0) {
2096		nvsize = *(uint64_t *)db->db_data;
2097		dmu_buf_rele(db, FTAG);
2098
2099		(void) printf("\nMOS Configuration:\n");
2100		dump_packed_nvlist(spa->spa_meta_objset,
2101		    spa->spa_config_object, (void *)&nvsize, 1);
2102	} else {
2103		(void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
2104		    (u_longlong_t)spa->spa_config_object, error);
2105	}
2106}
2107
2108static void
2109dump_cachefile(const char *cachefile)
2110{
2111	int fd;
2112	struct stat64 statbuf;
2113	char *buf;
2114	nvlist_t *config;
2115
2116	if ((fd = open64(cachefile, O_RDONLY)) < 0) {
2117		(void) printf("cannot open '%s': %s\n", cachefile,
2118		    strerror(errno));
2119		exit(1);
2120	}
2121
2122	if (fstat64(fd, &statbuf) != 0) {
2123		(void) printf("failed to stat '%s': %s\n", cachefile,
2124		    strerror(errno));
2125		exit(1);
2126	}
2127
2128	if ((buf = malloc(statbuf.st_size)) == NULL) {
2129		(void) fprintf(stderr, "failed to allocate %llu bytes\n",
2130		    (u_longlong_t)statbuf.st_size);
2131		exit(1);
2132	}
2133
2134	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
2135		(void) fprintf(stderr, "failed to read %llu bytes\n",
2136		    (u_longlong_t)statbuf.st_size);
2137		exit(1);
2138	}
2139
2140	(void) close(fd);
2141
2142	if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
2143		(void) fprintf(stderr, "failed to unpack nvlist\n");
2144		exit(1);
2145	}
2146
2147	free(buf);
2148
2149	dump_nvlist(config, 0);
2150
2151	nvlist_free(config);
2152}
2153
2154#define	ZDB_MAX_UB_HEADER_SIZE 32
2155
2156static void
2157dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
2158{
2159	vdev_t vd;
2160	vdev_t *vdp = &vd;
2161	char header[ZDB_MAX_UB_HEADER_SIZE];
2162
2163	vd.vdev_ashift = ashift;
2164	vdp->vdev_top = vdp;
2165
2166	for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
2167		uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
2168		uberblock_t *ub = (void *)((char *)lbl + uoff);
2169
2170		if (uberblock_verify(ub))
2171			continue;
2172		(void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
2173		    "Uberblock[%d]\n", i);
2174		dump_uberblock(ub, header, "");
2175	}
2176}
2177
2178static char curpath[PATH_MAX];
2179
2180/*
2181 * Iterate through the path components, recursively passing
2182 * current one's obj and remaining path until we find the obj
2183 * for the last one.
2184 */
2185static int
2186dump_path_impl(objset_t *os, uint64_t obj, char *name)
2187{
2188	int err;
2189	int header = 1;
2190	uint64_t child_obj;
2191	char *s;
2192	dmu_buf_t *db;
2193	dmu_object_info_t doi;
2194
2195	if ((s = strchr(name, '/')) != NULL)
2196		*s = '\0';
2197	err = zap_lookup(os, obj, name, 8, 1, &child_obj);
2198
2199	(void) strlcat(curpath, name, sizeof (curpath));
2200
2201	if (err != 0) {
2202		(void) fprintf(stderr, "failed to lookup %s: %s\n",
2203		    curpath, strerror(err));
2204		return (err);
2205	}
2206
2207	child_obj = ZFS_DIRENT_OBJ(child_obj);
2208	err = sa_buf_hold(os, child_obj, FTAG, &db);
2209	if (err != 0) {
2210		(void) fprintf(stderr,
2211		    "failed to get SA dbuf for obj %llu: %s\n",
2212		    (u_longlong_t)child_obj, strerror(err));
2213		return (EINVAL);
2214	}
2215	dmu_object_info_from_db(db, &doi);
2216	sa_buf_rele(db, FTAG);
2217
2218	if (doi.doi_bonus_type != DMU_OT_SA &&
2219	    doi.doi_bonus_type != DMU_OT_ZNODE) {
2220		(void) fprintf(stderr, "invalid bonus type %d for obj %llu\n",
2221		    doi.doi_bonus_type, (u_longlong_t)child_obj);
2222		return (EINVAL);
2223	}
2224
2225	if (dump_opt['v'] > 6) {
2226		(void) printf("obj=%llu %s type=%d bonustype=%d\n",
2227		    (u_longlong_t)child_obj, curpath, doi.doi_type,
2228		    doi.doi_bonus_type);
2229	}
2230
2231	(void) strlcat(curpath, "/", sizeof (curpath));
2232
2233	switch (doi.doi_type) {
2234	case DMU_OT_DIRECTORY_CONTENTS:
2235		if (s != NULL && *(s + 1) != '\0')
2236			return (dump_path_impl(os, child_obj, s + 1));
2237		/*FALLTHROUGH*/
2238	case DMU_OT_PLAIN_FILE_CONTENTS:
2239		dump_object(os, child_obj, dump_opt['v'], &header);
2240		return (0);
2241	default:
2242		(void) fprintf(stderr, "object %llu has non-file/directory "
2243		    "type %d\n", (u_longlong_t)obj, doi.doi_type);
2244		break;
2245	}
2246
2247	return (EINVAL);
2248}
2249
2250/*
2251 * Dump the blocks for the object specified by path inside the dataset.
2252 */
2253static int
2254dump_path(char *ds, char *path)
2255{
2256	int err;
2257	objset_t *os;
2258	uint64_t root_obj;
2259
2260	err = open_objset(ds, DMU_OST_ZFS, FTAG, &os);
2261	if (err != 0)
2262		return (err);
2263
2264	err = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &root_obj);
2265	if (err != 0) {
2266		(void) fprintf(stderr, "can't lookup root znode: %s\n",
2267		    strerror(err));
2268		dmu_objset_disown(os, FTAG);
2269		return (EINVAL);
2270	}
2271
2272	(void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds);
2273
2274	err = dump_path_impl(os, root_obj, path);
2275
2276	close_objset(os, FTAG);
2277	return (err);
2278}
2279
2280static int
2281dump_label(const char *dev)
2282{
2283	int fd;
2284	vdev_label_t label;
2285	char path[MAXPATHLEN];
2286	char *buf = label.vl_vdev_phys.vp_nvlist;
2287	size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
2288	struct stat64 statbuf;
2289	uint64_t psize, ashift;
2290	boolean_t label_found = B_FALSE;
2291
2292	(void) strlcpy(path, dev, sizeof (path));
2293	if (dev[0] == '/') {
2294		if (strncmp(dev, ZFS_DISK_ROOTD,
2295		    strlen(ZFS_DISK_ROOTD)) == 0) {
2296			(void) snprintf(path, sizeof (path), "%s%s",
2297			    ZFS_RDISK_ROOTD, dev + strlen(ZFS_DISK_ROOTD));
2298		}
2299	} else if (stat64(path, &statbuf) != 0) {
2300		char *s;
2301
2302		(void) snprintf(path, sizeof (path), "%s%s", ZFS_RDISK_ROOTD,
2303		    dev);
2304		if ((s = strrchr(dev, 's')) == NULL || !isdigit(*(s + 1)))
2305			(void) strlcat(path, "s0", sizeof (path));
2306	}
2307
2308	if (stat64(path, &statbuf) != 0) {
2309		(void) printf("failed to stat '%s': %s\n", path,
2310		    strerror(errno));
2311		exit(1);
2312	}
2313
2314	if (S_ISBLK(statbuf.st_mode)) {
2315		(void) printf("cannot use '%s': character device required\n",
2316		    path);
2317		exit(1);
2318	}
2319
2320	if ((fd = open64(path, O_RDONLY)) < 0) {
2321		(void) printf("cannot open '%s': %s\n", path, strerror(errno));
2322		exit(1);
2323	}
2324
2325	psize = statbuf.st_size;
2326	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
2327
2328	for (int l = 0; l < VDEV_LABELS; l++) {
2329		nvlist_t *config = NULL;
2330
2331		if (!dump_opt['q']) {
2332			(void) printf("------------------------------------\n");
2333			(void) printf("LABEL %d\n", l);
2334			(void) printf("------------------------------------\n");
2335		}
2336
2337		if (pread64(fd, &label, sizeof (label),
2338		    vdev_label_offset(psize, l, 0)) != sizeof (label)) {
2339			if (!dump_opt['q'])
2340				(void) printf("failed to read label %d\n", l);
2341			continue;
2342		}
2343
2344		if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
2345			if (!dump_opt['q'])
2346				(void) printf("failed to unpack label %d\n", l);
2347			ashift = SPA_MINBLOCKSHIFT;
2348		} else {
2349			nvlist_t *vdev_tree = NULL;
2350
2351			if (!dump_opt['q'])
2352				dump_nvlist(config, 4);
2353			if ((nvlist_lookup_nvlist(config,
2354			    ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
2355			    (nvlist_lookup_uint64(vdev_tree,
2356			    ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
2357				ashift = SPA_MINBLOCKSHIFT;
2358			nvlist_free(config);
2359			label_found = B_TRUE;
2360		}
2361		if (dump_opt['u'])
2362			dump_label_uberblocks(&label, ashift);
2363	}
2364
2365	(void) close(fd);
2366
2367	return (label_found ? 0 : 2);
2368}
2369
2370static uint64_t dataset_feature_count[SPA_FEATURES];
2371
2372/*ARGSUSED*/
2373static int
2374dump_one_dir(const char *dsname, void *arg)
2375{
2376	int error;
2377	objset_t *os;
2378
2379	error = open_objset(dsname, DMU_OST_ANY, FTAG, &os);
2380	if (error != 0)
2381		return (0);
2382
2383	for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
2384		if (!dmu_objset_ds(os)->ds_feature_inuse[f])
2385			continue;
2386		ASSERT(spa_feature_table[f].fi_flags &
2387		    ZFEATURE_FLAG_PER_DATASET);
2388		dataset_feature_count[f]++;
2389	}
2390
2391	dump_dir(os);
2392	close_objset(os, FTAG);
2393	fuid_table_destroy();
2394	return (0);
2395}
2396
2397/*
2398 * Block statistics.
2399 */
2400#define	PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2)
2401typedef struct zdb_blkstats {
2402	uint64_t zb_asize;
2403	uint64_t zb_lsize;
2404	uint64_t zb_psize;
2405	uint64_t zb_count;
2406	uint64_t zb_gangs;
2407	uint64_t zb_ditto_samevdev;
2408	uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
2409} zdb_blkstats_t;
2410
2411/*
2412 * Extended object types to report deferred frees and dedup auto-ditto blocks.
2413 */
2414#define	ZDB_OT_DEFERRED	(DMU_OT_NUMTYPES + 0)
2415#define	ZDB_OT_DITTO	(DMU_OT_NUMTYPES + 1)
2416#define	ZDB_OT_OTHER	(DMU_OT_NUMTYPES + 2)
2417#define	ZDB_OT_TOTAL	(DMU_OT_NUMTYPES + 3)
2418
2419static char *zdb_ot_extname[] = {
2420	"deferred free",
2421	"dedup ditto",
2422	"other",
2423	"Total",
2424};
2425
2426#define	ZB_TOTAL	DN_MAX_LEVELS
2427
2428typedef struct zdb_cb {
2429	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
2430	uint64_t	zcb_dedup_asize;
2431	uint64_t	zcb_dedup_blocks;
2432	uint64_t	zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
2433	uint64_t	zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
2434	    [BPE_PAYLOAD_SIZE];
2435	uint64_t	zcb_start;
2436	uint64_t	zcb_lastprint;
2437	uint64_t	zcb_totalasize;
2438	uint64_t	zcb_errors[256];
2439	int		zcb_readfails;
2440	int		zcb_haderrors;
2441	spa_t		*zcb_spa;
2442} zdb_cb_t;
2443
2444static void
2445zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
2446    dmu_object_type_t type)
2447{
2448	uint64_t refcnt = 0;
2449
2450	ASSERT(type < ZDB_OT_TOTAL);
2451
2452	if (zilog && zil_bp_tree_add(zilog, bp) != 0)
2453		return;
2454
2455	for (int i = 0; i < 4; i++) {
2456		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
2457		int t = (i & 1) ? type : ZDB_OT_TOTAL;
2458		int equal;
2459		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
2460
2461		zb->zb_asize += BP_GET_ASIZE(bp);
2462		zb->zb_lsize += BP_GET_LSIZE(bp);
2463		zb->zb_psize += BP_GET_PSIZE(bp);
2464		zb->zb_count++;
2465
2466		/*
2467		 * The histogram is only big enough to record blocks up to
2468		 * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last,
2469		 * "other", bucket.
2470		 */
2471		int idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT;
2472		idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1);
2473		zb->zb_psize_histogram[idx]++;
2474
2475		zb->zb_gangs += BP_COUNT_GANG(bp);
2476
2477		switch (BP_GET_NDVAS(bp)) {
2478		case 2:
2479			if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2480			    DVA_GET_VDEV(&bp->blk_dva[1]))
2481				zb->zb_ditto_samevdev++;
2482			break;
2483		case 3:
2484			equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2485			    DVA_GET_VDEV(&bp->blk_dva[1])) +
2486			    (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2487			    DVA_GET_VDEV(&bp->blk_dva[2])) +
2488			    (DVA_GET_VDEV(&bp->blk_dva[1]) ==
2489			    DVA_GET_VDEV(&bp->blk_dva[2]));
2490			if (equal != 0)
2491				zb->zb_ditto_samevdev++;
2492			break;
2493		}
2494
2495	}
2496
2497	if (BP_IS_EMBEDDED(bp)) {
2498		zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
2499		zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
2500		    [BPE_GET_PSIZE(bp)]++;
2501		return;
2502	}
2503
2504	if (dump_opt['L'])
2505		return;
2506
2507	if (BP_GET_DEDUP(bp)) {
2508		ddt_t *ddt;
2509		ddt_entry_t *dde;
2510
2511		ddt = ddt_select(zcb->zcb_spa, bp);
2512		ddt_enter(ddt);
2513		dde = ddt_lookup(ddt, bp, B_FALSE);
2514
2515		if (dde == NULL) {
2516			refcnt = 0;
2517		} else {
2518			ddt_phys_t *ddp = ddt_phys_select(dde, bp);
2519			ddt_phys_decref(ddp);
2520			refcnt = ddp->ddp_refcnt;
2521			if (ddt_phys_total_refcnt(dde) == 0)
2522				ddt_remove(ddt, dde);
2523		}
2524		ddt_exit(ddt);
2525	}
2526
2527	VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
2528	    refcnt ? 0 : spa_first_txg(zcb->zcb_spa),
2529	    bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
2530}
2531
2532static void
2533zdb_blkptr_done(zio_t *zio)
2534{
2535	spa_t *spa = zio->io_spa;
2536	blkptr_t *bp = zio->io_bp;
2537	int ioerr = zio->io_error;
2538	zdb_cb_t *zcb = zio->io_private;
2539	zbookmark_phys_t *zb = &zio->io_bookmark;
2540
2541	abd_free(zio->io_abd);
2542
2543	mutex_enter(&spa->spa_scrub_lock);
2544	spa->spa_scrub_inflight--;
2545	cv_broadcast(&spa->spa_scrub_io_cv);
2546
2547	if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
2548		char blkbuf[BP_SPRINTF_LEN];
2549
2550		zcb->zcb_haderrors = 1;
2551		zcb->zcb_errors[ioerr]++;
2552
2553		if (dump_opt['b'] >= 2)
2554			snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2555		else
2556			blkbuf[0] = '\0';
2557
2558		(void) printf("zdb_blkptr_cb: "
2559		    "Got error %d reading "
2560		    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
2561		    ioerr,
2562		    (u_longlong_t)zb->zb_objset,
2563		    (u_longlong_t)zb->zb_object,
2564		    (u_longlong_t)zb->zb_level,
2565		    (u_longlong_t)zb->zb_blkid,
2566		    blkbuf);
2567	}
2568	mutex_exit(&spa->spa_scrub_lock);
2569}
2570
2571static int
2572zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2573    const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
2574{
2575	zdb_cb_t *zcb = arg;
2576	dmu_object_type_t type;
2577	boolean_t is_metadata;
2578
2579	if (bp == NULL)
2580		return (0);
2581
2582	if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
2583		char blkbuf[BP_SPRINTF_LEN];
2584		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2585		(void) printf("objset %llu object %llu "
2586		    "level %lld offset 0x%llx %s\n",
2587		    (u_longlong_t)zb->zb_objset,
2588		    (u_longlong_t)zb->zb_object,
2589		    (longlong_t)zb->zb_level,
2590		    (u_longlong_t)blkid2offset(dnp, bp, zb),
2591		    blkbuf);
2592	}
2593
2594	if (BP_IS_HOLE(bp))
2595		return (0);
2596
2597	type = BP_GET_TYPE(bp);
2598
2599	zdb_count_block(zcb, zilog, bp,
2600	    (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type);
2601
2602	is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
2603
2604	if (!BP_IS_EMBEDDED(bp) &&
2605	    (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
2606		size_t size = BP_GET_PSIZE(bp);
2607		abd_t *abd = abd_alloc(size, B_FALSE);
2608		int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
2609
2610		/* If it's an intent log block, failure is expected. */
2611		if (zb->zb_level == ZB_ZIL_LEVEL)
2612			flags |= ZIO_FLAG_SPECULATIVE;
2613
2614		mutex_enter(&spa->spa_scrub_lock);
2615		while (spa->spa_scrub_inflight > max_inflight)
2616			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
2617		spa->spa_scrub_inflight++;
2618		mutex_exit(&spa->spa_scrub_lock);
2619
2620		zio_nowait(zio_read(NULL, spa, bp, abd, size,
2621		    zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
2622	}
2623
2624	zcb->zcb_readfails = 0;
2625
2626	/* only call gethrtime() every 100 blocks */
2627	static int iters;
2628	if (++iters > 100)
2629		iters = 0;
2630	else
2631		return (0);
2632
2633	if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) {
2634		uint64_t now = gethrtime();
2635		char buf[10];
2636		uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize;
2637		int kb_per_sec =
2638		    1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000));
2639		int sec_remaining =
2640		    (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec;
2641
2642		zfs_nicenum(bytes, buf, sizeof (buf));
2643		(void) fprintf(stderr,
2644		    "\r%5s completed (%4dMB/s) "
2645		    "estimated time remaining: %uhr %02umin %02usec        ",
2646		    buf, kb_per_sec / 1024,
2647		    sec_remaining / 60 / 60,
2648		    sec_remaining / 60 % 60,
2649		    sec_remaining % 60);
2650
2651		zcb->zcb_lastprint = now;
2652	}
2653
2654	return (0);
2655}
2656
2657static void
2658zdb_leak(void *arg, uint64_t start, uint64_t size)
2659{
2660	vdev_t *vd = arg;
2661
2662	(void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
2663	    (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
2664}
2665
2666static metaslab_ops_t zdb_metaslab_ops = {
2667	NULL	/* alloc */
2668};
2669
2670static void
2671zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
2672{
2673	ddt_bookmark_t ddb = { 0 };
2674	ddt_entry_t dde;
2675	int error;
2676
2677	while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
2678		blkptr_t blk;
2679		ddt_phys_t *ddp = dde.dde_phys;
2680
2681		if (ddb.ddb_class == DDT_CLASS_UNIQUE)
2682			return;
2683
2684		ASSERT(ddt_phys_total_refcnt(&dde) > 1);
2685
2686		for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
2687			if (ddp->ddp_phys_birth == 0)
2688				continue;
2689			ddt_bp_create(ddb.ddb_checksum,
2690			    &dde.dde_key, ddp, &blk);
2691			if (p == DDT_PHYS_DITTO) {
2692				zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
2693			} else {
2694				zcb->zcb_dedup_asize +=
2695				    BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
2696				zcb->zcb_dedup_blocks++;
2697			}
2698		}
2699		if (!dump_opt['L']) {
2700			ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
2701			ddt_enter(ddt);
2702			VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
2703			ddt_exit(ddt);
2704		}
2705	}
2706
2707	ASSERT(error == ENOENT);
2708}
2709
2710static void
2711zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
2712{
2713	zcb->zcb_spa = spa;
2714
2715	if (!dump_opt['L']) {
2716		vdev_t *rvd = spa->spa_root_vdev;
2717
2718		/*
2719		 * We are going to be changing the meaning of the metaslab's
2720		 * ms_tree.  Ensure that the allocator doesn't try to
2721		 * use the tree.
2722		 */
2723		spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
2724		spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
2725
2726		for (uint64_t c = 0; c < rvd->vdev_children; c++) {
2727			vdev_t *vd = rvd->vdev_child[c];
2728			metaslab_group_t *mg = vd->vdev_mg;
2729			for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
2730				metaslab_t *msp = vd->vdev_ms[m];
2731				ASSERT3P(msp->ms_group, ==, mg);
2732				mutex_enter(&msp->ms_lock);
2733				metaslab_unload(msp);
2734
2735				/*
2736				 * For leak detection, we overload the metaslab
2737				 * ms_tree to contain allocated segments
2738				 * instead of free segments. As a result,
2739				 * we can't use the normal metaslab_load/unload
2740				 * interfaces.
2741				 */
2742				if (msp->ms_sm != NULL) {
2743					(void) fprintf(stderr,
2744					    "\rloading space map for "
2745					    "vdev %llu of %llu, "
2746					    "metaslab %llu of %llu ...",
2747					    (longlong_t)c,
2748					    (longlong_t)rvd->vdev_children,
2749					    (longlong_t)m,
2750					    (longlong_t)vd->vdev_ms_count);
2751
2752					/*
2753					 * We don't want to spend the CPU
2754					 * manipulating the size-ordered
2755					 * tree, so clear the range_tree
2756					 * ops.
2757					 */
2758					msp->ms_tree->rt_ops = NULL;
2759					VERIFY0(space_map_load(msp->ms_sm,
2760					    msp->ms_tree, SM_ALLOC));
2761
2762					if (!msp->ms_loaded) {
2763						msp->ms_loaded = B_TRUE;
2764					}
2765				}
2766				mutex_exit(&msp->ms_lock);
2767			}
2768		}
2769		(void) fprintf(stderr, "\n");
2770	}
2771
2772	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2773
2774	zdb_ddt_leak_init(spa, zcb);
2775
2776	spa_config_exit(spa, SCL_CONFIG, FTAG);
2777}
2778
2779static void
2780zdb_leak_fini(spa_t *spa)
2781{
2782	if (!dump_opt['L']) {
2783		vdev_t *rvd = spa->spa_root_vdev;
2784		for (int c = 0; c < rvd->vdev_children; c++) {
2785			vdev_t *vd = rvd->vdev_child[c];
2786			metaslab_group_t *mg = vd->vdev_mg;
2787			for (int m = 0; m < vd->vdev_ms_count; m++) {
2788				metaslab_t *msp = vd->vdev_ms[m];
2789				ASSERT3P(mg, ==, msp->ms_group);
2790				mutex_enter(&msp->ms_lock);
2791
2792				/*
2793				 * The ms_tree has been overloaded to
2794				 * contain allocated segments. Now that we
2795				 * finished traversing all blocks, any
2796				 * block that remains in the ms_tree
2797				 * represents an allocated block that we
2798				 * did not claim during the traversal.
2799				 * Claimed blocks would have been removed
2800				 * from the ms_tree.
2801				 */
2802				range_tree_vacate(msp->ms_tree, zdb_leak, vd);
2803
2804				if (msp->ms_loaded) {
2805					msp->ms_loaded = B_FALSE;
2806				}
2807
2808				mutex_exit(&msp->ms_lock);
2809			}
2810		}
2811	}
2812}
2813
2814/* ARGSUSED */
2815static int
2816count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
2817{
2818	zdb_cb_t *zcb = arg;
2819
2820	if (dump_opt['b'] >= 5) {
2821		char blkbuf[BP_SPRINTF_LEN];
2822		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2823		(void) printf("[%s] %s\n",
2824		    "deferred free", blkbuf);
2825	}
2826	zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
2827	return (0);
2828}
2829
2830static int
2831dump_block_stats(spa_t *spa)
2832{
2833	zdb_cb_t zcb = { 0 };
2834	zdb_blkstats_t *zb, *tzb;
2835	uint64_t norm_alloc, norm_space, total_alloc, total_found;
2836	int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
2837	boolean_t leaks = B_FALSE;
2838
2839	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
2840	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
2841	    (dump_opt['c'] == 1) ? "metadata " : "",
2842	    dump_opt['c'] ? "checksums " : "",
2843	    (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
2844	    !dump_opt['L'] ? "nothing leaked " : "");
2845
2846	/*
2847	 * Load all space maps as SM_ALLOC maps, then traverse the pool
2848	 * claiming each block we discover.  If the pool is perfectly
2849	 * consistent, the space maps will be empty when we're done.
2850	 * Anything left over is a leak; any block we can't claim (because
2851	 * it's not part of any space map) is a double allocation,
2852	 * reference to a freed block, or an unclaimed log block.
2853	 */
2854	zdb_leak_init(spa, &zcb);
2855
2856	/*
2857	 * If there's a deferred-free bplist, process that first.
2858	 */
2859	(void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
2860	    count_block_cb, &zcb, NULL);
2861	if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
2862		(void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
2863		    count_block_cb, &zcb, NULL);
2864	}
2865	if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
2866		VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
2867		    spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
2868		    &zcb, NULL));
2869	}
2870
2871	if (dump_opt['c'] > 1)
2872		flags |= TRAVERSE_PREFETCH_DATA;
2873
2874	zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
2875	zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
2876	zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
2877
2878	/*
2879	 * If we've traversed the data blocks then we need to wait for those
2880	 * I/Os to complete. We leverage "The Godfather" zio to wait on
2881	 * all async I/Os to complete.
2882	 */
2883	if (dump_opt['c']) {
2884		for (int i = 0; i < max_ncpus; i++) {
2885			(void) zio_wait(spa->spa_async_zio_root[i]);
2886			spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL,
2887			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
2888			    ZIO_FLAG_GODFATHER);
2889		}
2890	}
2891
2892	if (zcb.zcb_haderrors) {
2893		(void) printf("\nError counts:\n\n");
2894		(void) printf("\t%5s  %s\n", "errno", "count");
2895		for (int e = 0; e < 256; e++) {
2896			if (zcb.zcb_errors[e] != 0) {
2897				(void) printf("\t%5d  %llu\n",
2898				    e, (u_longlong_t)zcb.zcb_errors[e]);
2899			}
2900		}
2901	}
2902
2903	/*
2904	 * Report any leaked segments.
2905	 */
2906	zdb_leak_fini(spa);
2907
2908	tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
2909
2910	norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
2911	norm_space = metaslab_class_get_space(spa_normal_class(spa));
2912
2913	total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
2914	total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
2915
2916	if (total_found == total_alloc) {
2917		if (!dump_opt['L'])
2918			(void) printf("\n\tNo leaks (block sum matches space"
2919			    " maps exactly)\n");
2920	} else {
2921		(void) printf("block traversal size %llu != alloc %llu "
2922		    "(%s %lld)\n",
2923		    (u_longlong_t)total_found,
2924		    (u_longlong_t)total_alloc,
2925		    (dump_opt['L']) ? "unreachable" : "leaked",
2926		    (longlong_t)(total_alloc - total_found));
2927		leaks = B_TRUE;
2928	}
2929
2930	if (tzb->zb_count == 0)
2931		return (2);
2932
2933	(void) printf("\n");
2934	(void) printf("\tbp count:      %10llu\n",
2935	    (u_longlong_t)tzb->zb_count);
2936	(void) printf("\tganged count:  %10llu\n",
2937	    (longlong_t)tzb->zb_gangs);
2938	(void) printf("\tbp logical:    %10llu      avg: %6llu\n",
2939	    (u_longlong_t)tzb->zb_lsize,
2940	    (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
2941	(void) printf("\tbp physical:   %10llu      avg:"
2942	    " %6llu     compression: %6.2f\n",
2943	    (u_longlong_t)tzb->zb_psize,
2944	    (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
2945	    (double)tzb->zb_lsize / tzb->zb_psize);
2946	(void) printf("\tbp allocated:  %10llu      avg:"
2947	    " %6llu     compression: %6.2f\n",
2948	    (u_longlong_t)tzb->zb_asize,
2949	    (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
2950	    (double)tzb->zb_lsize / tzb->zb_asize);
2951	(void) printf("\tbp deduped:    %10llu    ref>1:"
2952	    " %6llu   deduplication: %6.2f\n",
2953	    (u_longlong_t)zcb.zcb_dedup_asize,
2954	    (u_longlong_t)zcb.zcb_dedup_blocks,
2955	    (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
2956	(void) printf("\tSPA allocated: %10llu     used: %5.2f%%\n",
2957	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
2958
2959	for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
2960		if (zcb.zcb_embedded_blocks[i] == 0)
2961			continue;
2962		(void) printf("\n");
2963		(void) printf("\tadditional, non-pointer bps of type %u: "
2964		    "%10llu\n",
2965		    i, (u_longlong_t)zcb.zcb_embedded_blocks[i]);
2966
2967		if (dump_opt['b'] >= 3) {
2968			(void) printf("\t number of (compressed) bytes:  "
2969			    "number of bps\n");
2970			dump_histogram(zcb.zcb_embedded_histogram[i],
2971			    sizeof (zcb.zcb_embedded_histogram[i]) /
2972			    sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
2973		}
2974	}
2975
2976	if (tzb->zb_ditto_samevdev != 0) {
2977		(void) printf("\tDittoed blocks on same vdev: %llu\n",
2978		    (longlong_t)tzb->zb_ditto_samevdev);
2979	}
2980
2981	if (dump_opt['b'] >= 2) {
2982		int l, t, level;
2983		(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2984		    "\t  avg\t comp\t%%Total\tType\n");
2985
2986		for (t = 0; t <= ZDB_OT_TOTAL; t++) {
2987			char csize[32], lsize[32], psize[32], asize[32];
2988			char avg[32], gang[32];
2989			char *typename;
2990
2991			if (t < DMU_OT_NUMTYPES)
2992				typename = dmu_ot[t].ot_name;
2993			else
2994				typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
2995
2996			if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
2997				(void) printf("%6s\t%5s\t%5s\t%5s"
2998				    "\t%5s\t%5s\t%6s\t%s\n",
2999				    "-",
3000				    "-",
3001				    "-",
3002				    "-",
3003				    "-",
3004				    "-",
3005				    "-",
3006				    typename);
3007				continue;
3008			}
3009
3010			for (l = ZB_TOTAL - 1; l >= -1; l--) {
3011				level = (l == -1 ? ZB_TOTAL : l);
3012				zb = &zcb.zcb_type[level][t];
3013
3014				if (zb->zb_asize == 0)
3015					continue;
3016
3017				if (dump_opt['b'] < 3 && level != ZB_TOTAL)
3018					continue;
3019
3020				if (level == 0 && zb->zb_asize ==
3021				    zcb.zcb_type[ZB_TOTAL][t].zb_asize)
3022					continue;
3023
3024				zdb_nicenum(zb->zb_count, csize);
3025				zdb_nicenum(zb->zb_lsize, lsize);
3026				zdb_nicenum(zb->zb_psize, psize);
3027				zdb_nicenum(zb->zb_asize, asize);
3028				zdb_nicenum(zb->zb_asize / zb->zb_count, avg);
3029				zdb_nicenum(zb->zb_gangs, gang);
3030
3031				(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
3032				    "\t%5.2f\t%6.2f\t",
3033				    csize, lsize, psize, asize, avg,
3034				    (double)zb->zb_lsize / zb->zb_psize,
3035				    100.0 * zb->zb_asize / tzb->zb_asize);
3036
3037				if (level == ZB_TOTAL)
3038					(void) printf("%s\n", typename);
3039				else
3040					(void) printf("    L%d %s\n",
3041					    level, typename);
3042
3043				if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) {
3044					(void) printf("\t number of ganged "
3045					    "blocks: %s\n", gang);
3046				}
3047
3048				if (dump_opt['b'] >= 4) {
3049					(void) printf("psize "
3050					    "(in 512-byte sectors): "
3051					    "number of blocks\n");
3052					dump_histogram(zb->zb_psize_histogram,
3053					    PSIZE_HISTO_SIZE, 0);
3054				}
3055			}
3056		}
3057	}
3058
3059	(void) printf("\n");
3060
3061	if (leaks)
3062		return (2);
3063
3064	if (zcb.zcb_haderrors)
3065		return (3);
3066
3067	return (0);
3068}
3069
3070typedef struct zdb_ddt_entry {
3071	ddt_key_t	zdde_key;
3072	uint64_t	zdde_ref_blocks;
3073	uint64_t	zdde_ref_lsize;
3074	uint64_t	zdde_ref_psize;
3075	uint64_t	zdde_ref_dsize;
3076	avl_node_t	zdde_node;
3077} zdb_ddt_entry_t;
3078
3079/* ARGSUSED */
3080static int
3081zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
3082    const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
3083{
3084	avl_tree_t *t = arg;
3085	avl_index_t where;
3086	zdb_ddt_entry_t *zdde, zdde_search;
3087
3088	if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
3089		return (0);
3090
3091	if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
3092		(void) printf("traversing objset %llu, %llu objects, "
3093		    "%lu blocks so far\n",
3094		    (u_longlong_t)zb->zb_objset,
3095		    (u_longlong_t)BP_GET_FILL(bp),
3096		    avl_numnodes(t));
3097	}
3098
3099	if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
3100	    BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
3101		return (0);
3102
3103	ddt_key_fill(&zdde_search.zdde_key, bp);
3104
3105	zdde = avl_find(t, &zdde_search, &where);
3106
3107	if (zdde == NULL) {
3108		zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
3109		zdde->zdde_key = zdde_search.zdde_key;
3110		avl_insert(t, zdde, where);
3111	}
3112
3113	zdde->zdde_ref_blocks += 1;
3114	zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
3115	zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
3116	zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
3117
3118	return (0);
3119}
3120
3121static void
3122dump_simulated_ddt(spa_t *spa)
3123{
3124	avl_tree_t t;
3125	void *cookie = NULL;
3126	zdb_ddt_entry_t *zdde;
3127	ddt_histogram_t ddh_total = { 0 };
3128	ddt_stat_t dds_total = { 0 };
3129
3130	avl_create(&t, ddt_entry_compare,
3131	    sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
3132
3133	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3134
3135	(void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
3136	    zdb_ddt_add_cb, &t);
3137
3138	spa_config_exit(spa, SCL_CONFIG, FTAG);
3139
3140	while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
3141		ddt_stat_t dds;
3142		uint64_t refcnt = zdde->zdde_ref_blocks;
3143		ASSERT(refcnt != 0);
3144
3145		dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
3146		dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
3147		dds.dds_psize = zdde->zdde_ref_psize / refcnt;
3148		dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
3149
3150		dds.dds_ref_blocks = zdde->zdde_ref_blocks;
3151		dds.dds_ref_lsize = zdde->zdde_ref_lsize;
3152		dds.dds_ref_psize = zdde->zdde_ref_psize;
3153		dds.dds_ref_dsize = zdde->zdde_ref_dsize;
3154
3155		ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
3156		    &dds, 0);
3157
3158		umem_free(zdde, sizeof (*zdde));
3159	}
3160
3161	avl_destroy(&t);
3162
3163	ddt_histogram_stat(&dds_total, &ddh_total);
3164
3165	(void) printf("Simulated DDT histogram:\n");
3166
3167	zpool_dump_ddt(&dds_total, &ddh_total);
3168
3169	dump_dedup_ratio(&dds_total);
3170}
3171
3172static void
3173dump_zpool(spa_t *spa)
3174{
3175	dsl_pool_t *dp = spa_get_dsl(spa);
3176	int rc = 0;
3177
3178	if (dump_opt['S']) {
3179		dump_simulated_ddt(spa);
3180		return;
3181	}
3182
3183	if (!dump_opt['e'] && dump_opt['C'] > 1) {
3184		(void) printf("\nCached configuration:\n");
3185		dump_nvlist(spa->spa_config, 8);
3186	}
3187
3188	if (dump_opt['C'])
3189		dump_config(spa);
3190
3191	if (dump_opt['u'])
3192		dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
3193
3194	if (dump_opt['D'])
3195		dump_all_ddts(spa);
3196
3197	if (dump_opt['d'] > 2 || dump_opt['m'])
3198		dump_metaslabs(spa);
3199	if (dump_opt['M'])
3200		dump_metaslab_groups(spa);
3201
3202	if (dump_opt['d'] || dump_opt['i']) {
3203		dump_dir(dp->dp_meta_objset);
3204		if (dump_opt['d'] >= 3) {
3205			dump_full_bpobj(&spa->spa_deferred_bpobj,
3206			    "Deferred frees", 0);
3207			if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
3208				dump_full_bpobj(
3209				    &spa->spa_dsl_pool->dp_free_bpobj,
3210				    "Pool snapshot frees", 0);
3211			}
3212
3213			if (spa_feature_is_active(spa,
3214			    SPA_FEATURE_ASYNC_DESTROY)) {
3215				dump_bptree(spa->spa_meta_objset,
3216				    spa->spa_dsl_pool->dp_bptree_obj,
3217				    "Pool dataset frees");
3218			}
3219			dump_dtl(spa->spa_root_vdev, 0);
3220		}
3221		(void) dmu_objset_find(spa_name(spa), dump_one_dir,
3222		    NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
3223
3224		for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
3225			uint64_t refcount;
3226
3227			if (!(spa_feature_table[f].fi_flags &
3228			    ZFEATURE_FLAG_PER_DATASET) ||
3229			    !spa_feature_is_enabled(spa, f)) {
3230				ASSERT0(dataset_feature_count[f]);
3231				continue;
3232			}
3233			(void) feature_get_refcount(spa,
3234			    &spa_feature_table[f], &refcount);
3235			if (dataset_feature_count[f] != refcount) {
3236				(void) printf("%s feature refcount mismatch: "
3237				    "%lld datasets != %lld refcount\n",
3238				    spa_feature_table[f].fi_uname,
3239				    (longlong_t)dataset_feature_count[f],
3240				    (longlong_t)refcount);
3241				rc = 2;
3242			} else {
3243				(void) printf("Verified %s feature refcount "
3244				    "of %llu is correct\n",
3245				    spa_feature_table[f].fi_uname,
3246				    (longlong_t)refcount);
3247			}
3248		}
3249	}
3250	if (rc == 0 && (dump_opt['b'] || dump_opt['c']))
3251		rc = dump_block_stats(spa);
3252
3253	if (rc == 0)
3254		rc = verify_spacemap_refcounts(spa);
3255
3256	if (dump_opt['s'])
3257		show_pool_stats(spa);
3258
3259	if (dump_opt['h'])
3260		dump_history(spa);
3261
3262	if (rc != 0) {
3263		dump_debug_buffer();
3264		exit(rc);
3265	}
3266}
3267
3268#define	ZDB_FLAG_CHECKSUM	0x0001
3269#define	ZDB_FLAG_DECOMPRESS	0x0002
3270#define	ZDB_FLAG_BSWAP		0x0004
3271#define	ZDB_FLAG_GBH		0x0008
3272#define	ZDB_FLAG_INDIRECT	0x0010
3273#define	ZDB_FLAG_PHYS		0x0020
3274#define	ZDB_FLAG_RAW		0x0040
3275#define	ZDB_FLAG_PRINT_BLKPTR	0x0080
3276
3277int flagbits[256];
3278
3279static void
3280zdb_print_blkptr(blkptr_t *bp, int flags)
3281{
3282	char blkbuf[BP_SPRINTF_LEN];
3283
3284	if (flags & ZDB_FLAG_BSWAP)
3285		byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
3286
3287	snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
3288	(void) printf("%s\n", blkbuf);
3289}
3290
3291static void
3292zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
3293{
3294	int i;
3295
3296	for (i = 0; i < nbps; i++)
3297		zdb_print_blkptr(&bp[i], flags);
3298}
3299
3300static void
3301zdb_dump_gbh(void *buf, int flags)
3302{
3303	zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
3304}
3305
3306static void
3307zdb_dump_block_raw(void *buf, uint64_t size, int flags)
3308{
3309	if (flags & ZDB_FLAG_BSWAP)
3310		byteswap_uint64_array(buf, size);
3311	(void) write(1, buf, size);
3312}
3313
3314static void
3315zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
3316{
3317	uint64_t *d = (uint64_t *)buf;
3318	int nwords = size / sizeof (uint64_t);
3319	int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
3320	int i, j;
3321	char *hdr, *c;
3322
3323
3324	if (do_bswap)
3325		hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
3326	else
3327		hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
3328
3329	(void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
3330
3331	for (i = 0; i < nwords; i += 2) {
3332		(void) printf("%06llx:  %016llx  %016llx  ",
3333		    (u_longlong_t)(i * sizeof (uint64_t)),
3334		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
3335		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
3336
3337		c = (char *)&d[i];
3338		for (j = 0; j < 2 * sizeof (uint64_t); j++)
3339			(void) printf("%c", isprint(c[j]) ? c[j] : '.');
3340		(void) printf("\n");
3341	}
3342}
3343
3344/*
3345 * There are two acceptable formats:
3346 *	leaf_name	  - For example: c1t0d0 or /tmp/ztest.0a
3347 *	child[.child]*    - For example: 0.1.1
3348 *
3349 * The second form can be used to specify arbitrary vdevs anywhere
3350 * in the heirarchy.  For example, in a pool with a mirror of
3351 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
3352 */
3353static vdev_t *
3354zdb_vdev_lookup(vdev_t *vdev, char *path)
3355{
3356	char *s, *p, *q;
3357	int i;
3358
3359	if (vdev == NULL)
3360		return (NULL);
3361
3362	/* First, assume the x.x.x.x format */
3363	i = (int)strtoul(path, &s, 10);
3364	if (s == path || (s && *s != '.' && *s != '\0'))
3365		goto name;
3366	if (i < 0 || i >= vdev->vdev_children)
3367		return (NULL);
3368
3369	vdev = vdev->vdev_child[i];
3370	if (*s == '\0')
3371		return (vdev);
3372	return (zdb_vdev_lookup(vdev, s+1));
3373
3374name:
3375	for (i = 0; i < vdev->vdev_children; i++) {
3376		vdev_t *vc = vdev->vdev_child[i];
3377
3378		if (vc->vdev_path == NULL) {
3379			vc = zdb_vdev_lookup(vc, path);
3380			if (vc == NULL)
3381				continue;
3382			else
3383				return (vc);
3384		}
3385
3386		p = strrchr(vc->vdev_path, '/');
3387		p = p ? p + 1 : vc->vdev_path;
3388		q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
3389
3390		if (strcmp(vc->vdev_path, path) == 0)
3391			return (vc);
3392		if (strcmp(p, path) == 0)
3393			return (vc);
3394		if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
3395			return (vc);
3396	}
3397
3398	return (NULL);
3399}
3400
3401/* ARGSUSED */
3402static int
3403random_get_pseudo_bytes_cb(void *buf, size_t len, void *unused)
3404{
3405	return (random_get_pseudo_bytes(buf, len));
3406}
3407
3408/*
3409 * Read a block from a pool and print it out.  The syntax of the
3410 * block descriptor is:
3411 *
3412 *	pool:vdev_specifier:offset:size[:flags]
3413 *
3414 *	pool           - The name of the pool you wish to read from
3415 *	vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
3416 *	offset         - offset, in hex, in bytes
3417 *	size           - Amount of data to read, in hex, in bytes
3418 *	flags          - A string of characters specifying options
3419 *		 b: Decode a blkptr at given offset within block
3420 *		*c: Calculate and display checksums
3421 *		 d: Decompress data before dumping
3422 *		 e: Byteswap data before dumping
3423 *		 g: Display data as a gang block header
3424 *		 i: Display as an indirect block
3425 *		 p: Do I/O to physical offset
3426 *		 r: Dump raw data to stdout
3427 *
3428 *              * = not yet implemented
3429 */
3430static void
3431zdb_read_block(char *thing, spa_t *spa)
3432{
3433	blkptr_t blk, *bp = &blk;
3434	dva_t *dva = bp->blk_dva;
3435	int flags = 0;
3436	uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
3437	zio_t *zio;
3438	vdev_t *vd;
3439	abd_t *pabd;
3440	void *lbuf, *buf;
3441	char *s, *p, *dup, *vdev, *flagstr;
3442	int i, error;
3443
3444	dup = strdup(thing);
3445	s = strtok(dup, ":");
3446	vdev = s ? s : "";
3447	s = strtok(NULL, ":");
3448	offset = strtoull(s ? s : "", NULL, 16);
3449	s = strtok(NULL, ":");
3450	size = strtoull(s ? s : "", NULL, 16);
3451	s = strtok(NULL, ":");
3452	flagstr = s ? s : "";
3453
3454	s = NULL;
3455	if (size == 0)
3456		s = "size must not be zero";
3457	if (!IS_P2ALIGNED(size, DEV_BSIZE))
3458		s = "size must be a multiple of sector size";
3459	if (!IS_P2ALIGNED(offset, DEV_BSIZE))
3460		s = "offset must be a multiple of sector size";
3461	if (s) {
3462		(void) printf("Invalid block specifier: %s  - %s\n", thing, s);
3463		free(dup);
3464		return;
3465	}
3466
3467	for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
3468		for (i = 0; flagstr[i]; i++) {
3469			int bit = flagbits[(uchar_t)flagstr[i]];
3470
3471			if (bit == 0) {
3472				(void) printf("***Invalid flag: %c\n",
3473				    flagstr[i]);
3474				continue;
3475			}
3476			flags |= bit;
3477
3478			/* If it's not something with an argument, keep going */
3479			if ((bit & (ZDB_FLAG_CHECKSUM |
3480			    ZDB_FLAG_PRINT_BLKPTR)) == 0)
3481				continue;
3482
3483			p = &flagstr[i + 1];
3484			if (bit == ZDB_FLAG_PRINT_BLKPTR)
3485				blkptr_offset = strtoull(p, &p, 16);
3486			if (*p != ':' && *p != '\0') {
3487				(void) printf("***Invalid flag arg: '%s'\n", s);
3488				free(dup);
3489				return;
3490			}
3491		}
3492	}
3493
3494	vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
3495	if (vd == NULL) {
3496		(void) printf("***Invalid vdev: %s\n", vdev);
3497		free(dup);
3498		return;
3499	} else {
3500		if (vd->vdev_path)
3501			(void) fprintf(stderr, "Found vdev: %s\n",
3502			    vd->vdev_path);
3503		else
3504			(void) fprintf(stderr, "Found vdev type: %s\n",
3505			    vd->vdev_ops->vdev_op_type);
3506	}
3507
3508	psize = size;
3509	lsize = size;
3510
3511	pabd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_FALSE);
3512	lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3513
3514	BP_ZERO(bp);
3515
3516	DVA_SET_VDEV(&dva[0], vd->vdev_id);
3517	DVA_SET_OFFSET(&dva[0], offset);
3518	DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
3519	DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
3520
3521	BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
3522
3523	BP_SET_LSIZE(bp, lsize);
3524	BP_SET_PSIZE(bp, psize);
3525	BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
3526	BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
3527	BP_SET_TYPE(bp, DMU_OT_NONE);
3528	BP_SET_LEVEL(bp, 0);
3529	BP_SET_DEDUP(bp, 0);
3530	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
3531
3532	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
3533	zio = zio_root(spa, NULL, NULL, 0);
3534
3535	if (vd == vd->vdev_top) {
3536		/*
3537		 * Treat this as a normal block read.
3538		 */
3539		zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL,
3540		    ZIO_PRIORITY_SYNC_READ,
3541		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
3542	} else {
3543		/*
3544		 * Treat this as a vdev child I/O.
3545		 */
3546		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
3547		    psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
3548		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
3549		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
3550		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
3551	}
3552
3553	error = zio_wait(zio);
3554	spa_config_exit(spa, SCL_STATE, FTAG);
3555
3556	if (error) {
3557		(void) printf("Read of %s failed, error: %d\n", thing, error);
3558		goto out;
3559	}
3560
3561	if (flags & ZDB_FLAG_DECOMPRESS) {
3562		/*
3563		 * We don't know how the data was compressed, so just try
3564		 * every decompress function at every inflated blocksize.
3565		 */
3566		enum zio_compress c;
3567		void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3568		void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3569
3570		abd_copy_to_buf(pbuf2, pabd, psize);
3571
3572		VERIFY0(abd_iterate_func(pabd, psize, SPA_MAXBLOCKSIZE - psize,
3573		    random_get_pseudo_bytes_cb, NULL));
3574
3575		VERIFY0(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
3576		    SPA_MAXBLOCKSIZE - psize));
3577
3578		for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
3579		    lsize -= SPA_MINBLOCKSIZE) {
3580			for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
3581				if (zio_decompress_data(c, pabd,
3582				    lbuf, psize, lsize) == 0 &&
3583				    zio_decompress_data_buf(c, pbuf2,
3584				    lbuf2, psize, lsize) == 0 &&
3585				    bcmp(lbuf, lbuf2, lsize) == 0)
3586					break;
3587			}
3588			if (c != ZIO_COMPRESS_FUNCTIONS)
3589				break;
3590			lsize -= SPA_MINBLOCKSIZE;
3591		}
3592
3593		umem_free(pbuf2, SPA_MAXBLOCKSIZE);
3594		umem_free(lbuf2, SPA_MAXBLOCKSIZE);
3595
3596		if (lsize <= psize) {
3597			(void) printf("Decompress of %s failed\n", thing);
3598			goto out;
3599		}
3600		buf = lbuf;
3601		size = lsize;
3602	} else {
3603		buf = abd_to_buf(pabd);
3604		size = psize;
3605	}
3606
3607	if (flags & ZDB_FLAG_PRINT_BLKPTR)
3608		zdb_print_blkptr((blkptr_t *)(void *)
3609		    ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
3610	else if (flags & ZDB_FLAG_RAW)
3611		zdb_dump_block_raw(buf, size, flags);
3612	else if (flags & ZDB_FLAG_INDIRECT)
3613		zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
3614		    flags);
3615	else if (flags & ZDB_FLAG_GBH)
3616		zdb_dump_gbh(buf, flags);
3617	else
3618		zdb_dump_block(thing, buf, size, flags);
3619
3620out:
3621	abd_free(pabd);
3622	umem_free(lbuf, SPA_MAXBLOCKSIZE);
3623	free(dup);
3624}
3625
3626static boolean_t
3627pool_match(nvlist_t *cfg, char *tgt)
3628{
3629	uint64_t v, guid = strtoull(tgt, NULL, 0);
3630	char *s;
3631
3632	if (guid != 0) {
3633		if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
3634			return (v == guid);
3635	} else {
3636		if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
3637			return (strcmp(s, tgt) == 0);
3638	}
3639	return (B_FALSE);
3640}
3641
3642static char *
3643find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
3644{
3645	nvlist_t *pools;
3646	nvlist_t *match = NULL;
3647	char *name = NULL;
3648	char *sepp = NULL;
3649	char sep = '\0';
3650	int count = 0;
3651	importargs_t args = { 0 };
3652
3653	args.paths = dirc;
3654	args.path = dirv;
3655	args.can_be_active = B_TRUE;
3656
3657	if ((sepp = strpbrk(*target, "/@")) != NULL) {
3658		sep = *sepp;
3659		*sepp = '\0';
3660	}
3661
3662	pools = zpool_search_import(g_zfs, &args);
3663
3664	if (pools != NULL) {
3665		nvpair_t *elem = NULL;
3666		while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
3667			verify(nvpair_value_nvlist(elem, configp) == 0);
3668			if (pool_match(*configp, *target)) {
3669				count++;
3670				if (match != NULL) {
3671					/* print previously found config */
3672					if (name != NULL) {
3673						(void) printf("%s\n", name);
3674						dump_nvlist(match, 8);
3675						name = NULL;
3676					}
3677					(void) printf("%s\n",
3678					    nvpair_name(elem));
3679					dump_nvlist(*configp, 8);
3680				} else {
3681					match = *configp;
3682					name = nvpair_name(elem);
3683				}
3684			}
3685		}
3686	}
3687	if (count > 1)
3688		(void) fatal("\tMatched %d pools - use pool GUID "
3689		    "instead of pool name or \n"
3690		    "\tpool name part of a dataset name to select pool", count);
3691
3692	if (sepp)
3693		*sepp = sep;
3694	/*
3695	 * If pool GUID was specified for pool id, replace it with pool name
3696	 */
3697	if (name && (strstr(*target, name) != *target)) {
3698		int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
3699
3700		*target = umem_alloc(sz, UMEM_NOFAIL);
3701		(void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
3702	}
3703
3704	*configp = name ? match : NULL;
3705
3706	return (name);
3707}
3708
3709int
3710main(int argc, char **argv)
3711{
3712	int i, c;
3713	struct rlimit rl = { 1024, 1024 };
3714	spa_t *spa = NULL;
3715	objset_t *os = NULL;
3716	int dump_all = 1;
3717	int verbose = 0;
3718	int error = 0;
3719	char **searchdirs = NULL;
3720	int nsearch = 0;
3721	char *target;
3722	nvlist_t *policy = NULL;
3723	uint64_t max_txg = UINT64_MAX;
3724	int flags = ZFS_IMPORT_MISSING_LOG;
3725	int rewind = ZPOOL_NEVER_REWIND;
3726	char *spa_config_path_env;
3727	boolean_t target_is_spa = B_TRUE;
3728
3729	(void) setrlimit(RLIMIT_NOFILE, &rl);
3730	(void) enable_extended_FILE_stdio(-1, -1);
3731
3732	dprintf_setup(&argc, argv);
3733
3734	/*
3735	 * If there is an environment variable SPA_CONFIG_PATH it overrides
3736	 * default spa_config_path setting. If -U flag is specified it will
3737	 * override this environment variable settings once again.
3738	 */
3739	spa_config_path_env = getenv("SPA_CONFIG_PATH");
3740	if (spa_config_path_env != NULL)
3741		spa_config_path = spa_config_path_env;
3742
3743	while ((c = getopt(argc, argv,
3744	    "AbcCdDeFGhiI:lLmMo:Op:PqRsSt:uU:vVx:X")) != -1) {
3745		switch (c) {
3746		case 'b':
3747		case 'c':
3748		case 'C':
3749		case 'd':
3750		case 'D':
3751		case 'G':
3752		case 'h':
3753		case 'i':
3754		case 'l':
3755		case 'm':
3756		case 'M':
3757		case 'O':
3758		case 'R':
3759		case 's':
3760		case 'S':
3761		case 'u':
3762			dump_opt[c]++;
3763			dump_all = 0;
3764			break;
3765		case 'A':
3766		case 'e':
3767		case 'F':
3768		case 'L':
3769		case 'P':
3770		case 'q':
3771		case 'X':
3772			dump_opt[c]++;
3773			break;
3774		/* NB: Sort single match options below. */
3775		case 'I':
3776			max_inflight = strtoull(optarg, NULL, 0);
3777			if (max_inflight == 0) {
3778				(void) fprintf(stderr, "maximum number "
3779				    "of inflight I/Os must be greater "
3780				    "than 0\n");
3781				usage();
3782			}
3783			break;
3784		case 'o':
3785			error = set_global_var(optarg);
3786			if (error != 0)
3787				usage();
3788			break;
3789		case 'p':
3790			if (searchdirs == NULL) {
3791				searchdirs = umem_alloc(sizeof (char *),
3792				    UMEM_NOFAIL);
3793			} else {
3794				char **tmp = umem_alloc((nsearch + 1) *
3795				    sizeof (char *), UMEM_NOFAIL);
3796				bcopy(searchdirs, tmp, nsearch *
3797				    sizeof (char *));
3798				umem_free(searchdirs,
3799				    nsearch * sizeof (char *));
3800				searchdirs = tmp;
3801			}
3802			searchdirs[nsearch++] = optarg;
3803			break;
3804		case 't':
3805			max_txg = strtoull(optarg, NULL, 0);
3806			if (max_txg < TXG_INITIAL) {
3807				(void) fprintf(stderr, "incorrect txg "
3808				    "specified: %s\n", optarg);
3809				usage();
3810			}
3811			break;
3812		case 'U':
3813			spa_config_path = optarg;
3814			break;
3815		case 'v':
3816			verbose++;
3817			break;
3818		case 'V':
3819			flags = ZFS_IMPORT_VERBATIM;
3820			break;
3821		case 'x':
3822			vn_dumpdir = optarg;
3823			break;
3824		default:
3825			usage();
3826			break;
3827		}
3828	}
3829
3830	if (!dump_opt['e'] && searchdirs != NULL) {
3831		(void) fprintf(stderr, "-p option requires use of -e\n");
3832		usage();
3833	}
3834
3835	/*
3836	 * ZDB does not typically re-read blocks; therefore limit the ARC
3837	 * to 256 MB, which can be used entirely for metadata.
3838	 */
3839	zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024;
3840
3841	/*
3842	 * "zdb -c" uses checksum-verifying scrub i/os which are async reads.
3843	 * "zdb -b" uses traversal prefetch which uses async reads.
3844	 * For good performance, let several of them be active at once.
3845	 */
3846	zfs_vdev_async_read_max_active = 10;
3847
3848	/*
3849	 * Disable reference tracking for better performance.
3850	 */
3851	reference_tracking_enable = B_FALSE;
3852
3853	kernel_init(FREAD);
3854	g_zfs = libzfs_init();
3855	ASSERT(g_zfs != NULL);
3856
3857	if (dump_all)
3858		verbose = MAX(verbose, 1);
3859
3860	for (c = 0; c < 256; c++) {
3861		if (dump_all && strchr("AeFlLOPRSX", c) == NULL)
3862			dump_opt[c] = 1;
3863		if (dump_opt[c])
3864			dump_opt[c] += verbose;
3865	}
3866
3867	aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
3868	zfs_recover = (dump_opt['A'] > 1);
3869
3870	argc -= optind;
3871	argv += optind;
3872
3873	if (argc < 2 && dump_opt['R'])
3874		usage();
3875	if (argc < 1) {
3876		if (!dump_opt['e'] && dump_opt['C']) {
3877			dump_cachefile(spa_config_path);
3878			return (0);
3879		}
3880		usage();
3881	}
3882
3883	if (dump_opt['l'])
3884		return (dump_label(argv[0]));
3885
3886	if (dump_opt['O']) {
3887		if (argc != 2)
3888			usage();
3889		dump_opt['v'] = verbose + 3;
3890		return (dump_path(argv[0], argv[1]));
3891	}
3892
3893	if (dump_opt['X'] || dump_opt['F'])
3894		rewind = ZPOOL_DO_REWIND |
3895		    (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
3896
3897	if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
3898	    nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
3899	    nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0)
3900		fatal("internal error: %s", strerror(ENOMEM));
3901
3902	error = 0;
3903	target = argv[0];
3904
3905	if (dump_opt['e']) {
3906		nvlist_t *cfg = NULL;
3907		char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
3908
3909		error = ENOENT;
3910		if (name) {
3911			if (dump_opt['C'] > 1) {
3912				(void) printf("\nConfiguration for import:\n");
3913				dump_nvlist(cfg, 8);
3914			}
3915			if (nvlist_add_nvlist(cfg,
3916			    ZPOOL_REWIND_POLICY, policy) != 0) {
3917				fatal("can't open '%s': %s",
3918				    target, strerror(ENOMEM));
3919			}
3920			error = spa_import(name, cfg, NULL, flags);
3921		}
3922	}
3923
3924	if (strpbrk(target, "/@") != NULL) {
3925		size_t targetlen;
3926
3927		target_is_spa = B_FALSE;
3928		/*
3929		 * Remove any trailing slash.  Later code would get confused
3930		 * by it, but we want to allow it so that "pool/" can
3931		 * indicate that we want to dump the topmost filesystem,
3932		 * rather than the whole pool.
3933		 */
3934		targetlen = strlen(target);
3935		if (targetlen != 0 && target[targetlen - 1] == '/')
3936			target[targetlen - 1] = '\0';
3937	}
3938
3939	if (error == 0) {
3940		if (target_is_spa || dump_opt['R']) {
3941			error = spa_open_rewind(target, &spa, FTAG, policy,
3942			    NULL);
3943			if (error) {
3944				/*
3945				 * If we're missing the log device then
3946				 * try opening the pool after clearing the
3947				 * log state.
3948				 */
3949				mutex_enter(&spa_namespace_lock);
3950				if ((spa = spa_lookup(target)) != NULL &&
3951				    spa->spa_log_state == SPA_LOG_MISSING) {
3952					spa->spa_log_state = SPA_LOG_CLEAR;
3953					error = 0;
3954				}
3955				mutex_exit(&spa_namespace_lock);
3956
3957				if (!error) {
3958					error = spa_open_rewind(target, &spa,
3959					    FTAG, policy, NULL);
3960				}
3961			}
3962		} else {
3963			error = open_objset(target, DMU_OST_ANY, FTAG, &os);
3964		}
3965	}
3966	nvlist_free(policy);
3967
3968	if (error)
3969		fatal("can't open '%s': %s", target, strerror(error));
3970
3971	argv++;
3972	argc--;
3973	if (!dump_opt['R']) {
3974		if (argc > 0) {
3975			zopt_objects = argc;
3976			zopt_object = calloc(zopt_objects, sizeof (uint64_t));
3977			for (i = 0; i < zopt_objects; i++) {
3978				errno = 0;
3979				zopt_object[i] = strtoull(argv[i], NULL, 0);
3980				if (zopt_object[i] == 0 && errno != 0)
3981					fatal("bad number %s: %s",
3982					    argv[i], strerror(errno));
3983			}
3984		}
3985		if (os != NULL) {
3986			dump_dir(os);
3987		} else if (zopt_objects > 0 && !dump_opt['m']) {
3988			dump_dir(spa->spa_meta_objset);
3989		} else {
3990			dump_zpool(spa);
3991		}
3992	} else {
3993		flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
3994		flagbits['c'] = ZDB_FLAG_CHECKSUM;
3995		flagbits['d'] = ZDB_FLAG_DECOMPRESS;
3996		flagbits['e'] = ZDB_FLAG_BSWAP;
3997		flagbits['g'] = ZDB_FLAG_GBH;
3998		flagbits['i'] = ZDB_FLAG_INDIRECT;
3999		flagbits['p'] = ZDB_FLAG_PHYS;
4000		flagbits['r'] = ZDB_FLAG_RAW;
4001
4002		for (i = 0; i < argc; i++)
4003			zdb_read_block(argv[i], spa);
4004	}
4005
4006	if (os != NULL)
4007		close_objset(os, FTAG);
4008	else
4009		spa_close(spa, FTAG);
4010
4011	fuid_table_destroy();
4012
4013	dump_debug_buffer();
4014
4015	libzfs_fini(g_zfs);
4016	kernel_fini();
4017
4018	return (0);
4019}
4020