zdb.c revision ad135b5d644628e791c3188a6ecbd9c257961ef8
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012 by Delphix. All rights reserved.
25 */
26
27#include <stdio.h>
28#include <stdio_ext.h>
29#include <stdlib.h>
30#include <ctype.h>
31#include <sys/zfs_context.h>
32#include <sys/spa.h>
33#include <sys/spa_impl.h>
34#include <sys/dmu.h>
35#include <sys/zap.h>
36#include <sys/fs/zfs.h>
37#include <sys/zfs_znode.h>
38#include <sys/zfs_sa.h>
39#include <sys/sa.h>
40#include <sys/sa_impl.h>
41#include <sys/vdev.h>
42#include <sys/vdev_impl.h>
43#include <sys/metaslab_impl.h>
44#include <sys/dmu_objset.h>
45#include <sys/dsl_dir.h>
46#include <sys/dsl_dataset.h>
47#include <sys/dsl_pool.h>
48#include <sys/dbuf.h>
49#include <sys/zil.h>
50#include <sys/zil_impl.h>
51#include <sys/stat.h>
52#include <sys/resource.h>
53#include <sys/dmu_traverse.h>
54#include <sys/zio_checksum.h>
55#include <sys/zio_compress.h>
56#include <sys/zfs_fuid.h>
57#include <sys/arc.h>
58#include <sys/ddt.h>
59#include <sys/zfeature.h>
60#undef ZFS_MAXNAMELEN
61#undef verify
62#include <libzfs.h>
63
64#define	ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
65    zio_compress_table[(idx)].ci_name : "UNKNOWN")
66#define	ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
67    zio_checksum_table[(idx)].ci_name : "UNKNOWN")
68#define	ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \
69    dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ? \
70    dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN")
71#define	ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : DMU_OT_NUMTYPES)
72
73#ifndef lint
74extern int zfs_recover;
75#else
76int zfs_recover;
77#endif
78
79const char cmdname[] = "zdb";
80uint8_t dump_opt[256];
81
82typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
83
84extern void dump_intent_log(zilog_t *);
85uint64_t *zopt_object = NULL;
86int zopt_objects = 0;
87libzfs_handle_t *g_zfs;
88
89/*
90 * These libumem hooks provide a reasonable set of defaults for the allocator's
91 * debugging facilities.
92 */
93const char *
94_umem_debug_init()
95{
96	return ("default,verbose"); /* $UMEM_DEBUG setting */
97}
98
99const char *
100_umem_logging_init(void)
101{
102	return ("fail,contents"); /* $UMEM_LOGGING setting */
103}
104
105static void
106usage(void)
107{
108	(void) fprintf(stderr,
109	    "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
110	    "poolname [object...]\n"
111	    "       %s [-divPA] [-e -p path...] dataset [object...]\n"
112	    "       %s -m [-LXFPA] [-t txg] [-e [-p path...]] "
113	    "poolname [vdev [metaslab...]]\n"
114	    "       %s -R [-A] [-e [-p path...]] poolname "
115	    "vdev:offset:size[:flags]\n"
116	    "       %s -S [-PA] [-e [-p path...]] poolname\n"
117	    "       %s -l [-uA] device\n"
118	    "       %s -C [-A] [-U config]\n\n",
119	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
120
121	(void) fprintf(stderr, "    Dataset name must include at least one "
122	    "separator character '/' or '@'\n");
123	(void) fprintf(stderr, "    If dataset name is specified, only that "
124	    "dataset is dumped\n");
125	(void) fprintf(stderr, "    If object numbers are specified, only "
126	    "those objects are dumped\n\n");
127	(void) fprintf(stderr, "    Options to control amount of output:\n");
128	(void) fprintf(stderr, "        -u uberblock\n");
129	(void) fprintf(stderr, "        -d dataset(s)\n");
130	(void) fprintf(stderr, "        -i intent logs\n");
131	(void) fprintf(stderr, "        -C config (or cachefile if alone)\n");
132	(void) fprintf(stderr, "        -h pool history\n");
133	(void) fprintf(stderr, "        -b block statistics\n");
134	(void) fprintf(stderr, "        -m metaslabs\n");
135	(void) fprintf(stderr, "        -c checksum all metadata (twice for "
136	    "all data) blocks\n");
137	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
138	(void) fprintf(stderr, "        -D dedup statistics\n");
139	(void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
140	(void) fprintf(stderr, "        -v verbose (applies to all others)\n");
141	(void) fprintf(stderr, "        -l dump label contents\n");
142	(void) fprintf(stderr, "        -L disable leak tracking (do not "
143	    "load spacemaps)\n");
144	(void) fprintf(stderr, "        -R read and display block from a "
145	    "device\n\n");
146	(void) fprintf(stderr, "    Below options are intended for use "
147	    "with other options (except -l):\n");
148	(void) fprintf(stderr, "        -A ignore assertions (-A), enable "
149	    "panic recovery (-AA) or both (-AAA)\n");
150	(void) fprintf(stderr, "        -F attempt automatic rewind within "
151	    "safe range of transaction groups\n");
152	(void) fprintf(stderr, "        -U <cachefile_path> -- use alternate "
153	    "cachefile\n");
154	(void) fprintf(stderr, "        -X attempt extreme rewind (does not "
155	    "work with dataset)\n");
156	(void) fprintf(stderr, "        -e pool is exported/destroyed/"
157	    "has altroot/not in a cachefile\n");
158	(void) fprintf(stderr, "        -p <path> -- use one or more with "
159	    "-e to specify path to vdev dir\n");
160	(void) fprintf(stderr, "        -P print numbers in parseable form\n");
161	(void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
162	    "searching for uberblocks\n");
163	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
164	    "to make only that option verbose\n");
165	(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
166	exit(1);
167}
168
169/*
170 * Called for usage errors that are discovered after a call to spa_open(),
171 * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
172 */
173
174static void
175fatal(const char *fmt, ...)
176{
177	va_list ap;
178
179	va_start(ap, fmt);
180	(void) fprintf(stderr, "%s: ", cmdname);
181	(void) vfprintf(stderr, fmt, ap);
182	va_end(ap);
183	(void) fprintf(stderr, "\n");
184
185	exit(1);
186}
187
188/* ARGSUSED */
189static void
190dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
191{
192	nvlist_t *nv;
193	size_t nvsize = *(uint64_t *)data;
194	char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
195
196	VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
197
198	VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
199
200	umem_free(packed, nvsize);
201
202	dump_nvlist(nv, 8);
203
204	nvlist_free(nv);
205}
206
207static void
208zdb_nicenum(uint64_t num, char *buf)
209{
210	if (dump_opt['P'])
211		(void) sprintf(buf, "%llu", (longlong_t)num);
212	else
213		nicenum(num, buf);
214}
215
216const char dump_zap_stars[] = "****************************************";
217const int dump_zap_width = sizeof (dump_zap_stars) - 1;
218
219static void
220dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE])
221{
222	int i;
223	int minidx = ZAP_HISTOGRAM_SIZE - 1;
224	int maxidx = 0;
225	uint64_t max = 0;
226
227	for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) {
228		if (histo[i] > max)
229			max = histo[i];
230		if (histo[i] > 0 && i > maxidx)
231			maxidx = i;
232		if (histo[i] > 0 && i < minidx)
233			minidx = i;
234	}
235
236	if (max < dump_zap_width)
237		max = dump_zap_width;
238
239	for (i = minidx; i <= maxidx; i++)
240		(void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i],
241		    &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]);
242}
243
244static void
245dump_zap_stats(objset_t *os, uint64_t object)
246{
247	int error;
248	zap_stats_t zs;
249
250	error = zap_get_stats(os, object, &zs);
251	if (error)
252		return;
253
254	if (zs.zs_ptrtbl_len == 0) {
255		ASSERT(zs.zs_num_blocks == 1);
256		(void) printf("\tmicrozap: %llu bytes, %llu entries\n",
257		    (u_longlong_t)zs.zs_blocksize,
258		    (u_longlong_t)zs.zs_num_entries);
259		return;
260	}
261
262	(void) printf("\tFat ZAP stats:\n");
263
264	(void) printf("\t\tPointer table:\n");
265	(void) printf("\t\t\t%llu elements\n",
266	    (u_longlong_t)zs.zs_ptrtbl_len);
267	(void) printf("\t\t\tzt_blk: %llu\n",
268	    (u_longlong_t)zs.zs_ptrtbl_zt_blk);
269	(void) printf("\t\t\tzt_numblks: %llu\n",
270	    (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
271	(void) printf("\t\t\tzt_shift: %llu\n",
272	    (u_longlong_t)zs.zs_ptrtbl_zt_shift);
273	(void) printf("\t\t\tzt_blks_copied: %llu\n",
274	    (u_longlong_t)zs.zs_ptrtbl_blks_copied);
275	(void) printf("\t\t\tzt_nextblk: %llu\n",
276	    (u_longlong_t)zs.zs_ptrtbl_nextblk);
277
278	(void) printf("\t\tZAP entries: %llu\n",
279	    (u_longlong_t)zs.zs_num_entries);
280	(void) printf("\t\tLeaf blocks: %llu\n",
281	    (u_longlong_t)zs.zs_num_leafs);
282	(void) printf("\t\tTotal blocks: %llu\n",
283	    (u_longlong_t)zs.zs_num_blocks);
284	(void) printf("\t\tzap_block_type: 0x%llx\n",
285	    (u_longlong_t)zs.zs_block_type);
286	(void) printf("\t\tzap_magic: 0x%llx\n",
287	    (u_longlong_t)zs.zs_magic);
288	(void) printf("\t\tzap_salt: 0x%llx\n",
289	    (u_longlong_t)zs.zs_salt);
290
291	(void) printf("\t\tLeafs with 2^n pointers:\n");
292	dump_zap_histogram(zs.zs_leafs_with_2n_pointers);
293
294	(void) printf("\t\tBlocks with n*5 entries:\n");
295	dump_zap_histogram(zs.zs_blocks_with_n5_entries);
296
297	(void) printf("\t\tBlocks n/10 full:\n");
298	dump_zap_histogram(zs.zs_blocks_n_tenths_full);
299
300	(void) printf("\t\tEntries with n chunks:\n");
301	dump_zap_histogram(zs.zs_entries_using_n_chunks);
302
303	(void) printf("\t\tBuckets with n entries:\n");
304	dump_zap_histogram(zs.zs_buckets_with_n_entries);
305}
306
307/*ARGSUSED*/
308static void
309dump_none(objset_t *os, uint64_t object, void *data, size_t size)
310{
311}
312
313/*ARGSUSED*/
314static void
315dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
316{
317	(void) printf("\tUNKNOWN OBJECT TYPE\n");
318}
319
320/*ARGSUSED*/
321void
322dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
323{
324}
325
326/*ARGSUSED*/
327static void
328dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
329{
330}
331
332/*ARGSUSED*/
333static void
334dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
335{
336	zap_cursor_t zc;
337	zap_attribute_t attr;
338	void *prop;
339	int i;
340
341	dump_zap_stats(os, object);
342	(void) printf("\n");
343
344	for (zap_cursor_init(&zc, os, object);
345	    zap_cursor_retrieve(&zc, &attr) == 0;
346	    zap_cursor_advance(&zc)) {
347		(void) printf("\t\t%s = ", attr.za_name);
348		if (attr.za_num_integers == 0) {
349			(void) printf("\n");
350			continue;
351		}
352		prop = umem_zalloc(attr.za_num_integers *
353		    attr.za_integer_length, UMEM_NOFAIL);
354		(void) zap_lookup(os, object, attr.za_name,
355		    attr.za_integer_length, attr.za_num_integers, prop);
356		if (attr.za_integer_length == 1) {
357			(void) printf("%s", (char *)prop);
358		} else {
359			for (i = 0; i < attr.za_num_integers; i++) {
360				switch (attr.za_integer_length) {
361				case 2:
362					(void) printf("%u ",
363					    ((uint16_t *)prop)[i]);
364					break;
365				case 4:
366					(void) printf("%u ",
367					    ((uint32_t *)prop)[i]);
368					break;
369				case 8:
370					(void) printf("%lld ",
371					    (u_longlong_t)((int64_t *)prop)[i]);
372					break;
373				}
374			}
375		}
376		(void) printf("\n");
377		umem_free(prop, attr.za_num_integers * attr.za_integer_length);
378	}
379	zap_cursor_fini(&zc);
380}
381
382/*ARGSUSED*/
383static void
384dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
385{
386	dump_zap_stats(os, object);
387	/* contents are printed elsewhere, properly decoded */
388}
389
390/*ARGSUSED*/
391static void
392dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
393{
394	zap_cursor_t zc;
395	zap_attribute_t attr;
396
397	dump_zap_stats(os, object);
398	(void) printf("\n");
399
400	for (zap_cursor_init(&zc, os, object);
401	    zap_cursor_retrieve(&zc, &attr) == 0;
402	    zap_cursor_advance(&zc)) {
403		(void) printf("\t\t%s = ", attr.za_name);
404		if (attr.za_num_integers == 0) {
405			(void) printf("\n");
406			continue;
407		}
408		(void) printf(" %llx : [%d:%d:%d]\n",
409		    (u_longlong_t)attr.za_first_integer,
410		    (int)ATTR_LENGTH(attr.za_first_integer),
411		    (int)ATTR_BSWAP(attr.za_first_integer),
412		    (int)ATTR_NUM(attr.za_first_integer));
413	}
414	zap_cursor_fini(&zc);
415}
416
417/*ARGSUSED*/
418static void
419dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
420{
421	zap_cursor_t zc;
422	zap_attribute_t attr;
423	uint16_t *layout_attrs;
424	int i;
425
426	dump_zap_stats(os, object);
427	(void) printf("\n");
428
429	for (zap_cursor_init(&zc, os, object);
430	    zap_cursor_retrieve(&zc, &attr) == 0;
431	    zap_cursor_advance(&zc)) {
432		(void) printf("\t\t%s = [", attr.za_name);
433		if (attr.za_num_integers == 0) {
434			(void) printf("\n");
435			continue;
436		}
437
438		VERIFY(attr.za_integer_length == 2);
439		layout_attrs = umem_zalloc(attr.za_num_integers *
440		    attr.za_integer_length, UMEM_NOFAIL);
441
442		VERIFY(zap_lookup(os, object, attr.za_name,
443		    attr.za_integer_length,
444		    attr.za_num_integers, layout_attrs) == 0);
445
446		for (i = 0; i != attr.za_num_integers; i++)
447			(void) printf(" %d ", (int)layout_attrs[i]);
448		(void) printf("]\n");
449		umem_free(layout_attrs,
450		    attr.za_num_integers * attr.za_integer_length);
451	}
452	zap_cursor_fini(&zc);
453}
454
455/*ARGSUSED*/
456static void
457dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
458{
459	zap_cursor_t zc;
460	zap_attribute_t attr;
461	const char *typenames[] = {
462		/* 0 */ "not specified",
463		/* 1 */ "FIFO",
464		/* 2 */ "Character Device",
465		/* 3 */ "3 (invalid)",
466		/* 4 */ "Directory",
467		/* 5 */ "5 (invalid)",
468		/* 6 */ "Block Device",
469		/* 7 */ "7 (invalid)",
470		/* 8 */ "Regular File",
471		/* 9 */ "9 (invalid)",
472		/* 10 */ "Symbolic Link",
473		/* 11 */ "11 (invalid)",
474		/* 12 */ "Socket",
475		/* 13 */ "Door",
476		/* 14 */ "Event Port",
477		/* 15 */ "15 (invalid)",
478	};
479
480	dump_zap_stats(os, object);
481	(void) printf("\n");
482
483	for (zap_cursor_init(&zc, os, object);
484	    zap_cursor_retrieve(&zc, &attr) == 0;
485	    zap_cursor_advance(&zc)) {
486		(void) printf("\t\t%s = %lld (type: %s)\n",
487		    attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
488		    typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
489	}
490	zap_cursor_fini(&zc);
491}
492
493static void
494dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
495{
496	uint64_t alloc, offset, entry;
497	uint8_t mapshift = sm->sm_shift;
498	uint64_t mapstart = sm->sm_start;
499	char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
500			    "INVALID", "INVALID", "INVALID", "INVALID" };
501
502	if (smo->smo_object == 0)
503		return;
504
505	/*
506	 * Print out the freelist entries in both encoded and decoded form.
507	 */
508	alloc = 0;
509	for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
510		VERIFY3U(0, ==, dmu_read(os, smo->smo_object, offset,
511		    sizeof (entry), &entry, DMU_READ_PREFETCH));
512		if (SM_DEBUG_DECODE(entry)) {
513			(void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
514			    (u_longlong_t)(offset / sizeof (entry)),
515			    ddata[SM_DEBUG_ACTION_DECODE(entry)],
516			    (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
517			    (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
518		} else {
519			(void) printf("\t    [%6llu]    %c  range:"
520			    " %010llx-%010llx  size: %06llx\n",
521			    (u_longlong_t)(offset / sizeof (entry)),
522			    SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
523			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
524			    mapshift) + mapstart),
525			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
526			    mapshift) + mapstart + (SM_RUN_DECODE(entry) <<
527			    mapshift)),
528			    (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
529			if (SM_TYPE_DECODE(entry) == SM_ALLOC)
530				alloc += SM_RUN_DECODE(entry) << mapshift;
531			else
532				alloc -= SM_RUN_DECODE(entry) << mapshift;
533		}
534	}
535	if (alloc != smo->smo_alloc) {
536		(void) printf("space_map_object alloc (%llu) INCONSISTENT "
537		    "with space map summary (%llu)\n",
538		    (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc);
539	}
540}
541
542static void
543dump_metaslab_stats(metaslab_t *msp)
544{
545	char maxbuf[32];
546	space_map_t *sm = &msp->ms_map;
547	avl_tree_t *t = sm->sm_pp_root;
548	int free_pct = sm->sm_space * 100 / sm->sm_size;
549
550	zdb_nicenum(space_map_maxsize(sm), maxbuf);
551
552	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
553	    "segments", avl_numnodes(t), "maxsize", maxbuf,
554	    "freepct", free_pct);
555}
556
557static void
558dump_metaslab(metaslab_t *msp)
559{
560	vdev_t *vd = msp->ms_group->mg_vd;
561	spa_t *spa = vd->vdev_spa;
562	space_map_t *sm = &msp->ms_map;
563	space_map_obj_t *smo = &msp->ms_smo;
564	char freebuf[32];
565
566	zdb_nicenum(sm->sm_size - smo->smo_alloc, freebuf);
567
568	(void) printf(
569	    "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
570	    (u_longlong_t)(sm->sm_start / sm->sm_size),
571	    (u_longlong_t)sm->sm_start, (u_longlong_t)smo->smo_object, freebuf);
572
573	if (dump_opt['m'] > 1 && !dump_opt['L']) {
574		mutex_enter(&msp->ms_lock);
575		space_map_load_wait(sm);
576		if (!sm->sm_loaded)
577			VERIFY(space_map_load(sm, zfs_metaslab_ops,
578			    SM_FREE, smo, spa->spa_meta_objset) == 0);
579		dump_metaslab_stats(msp);
580		space_map_unload(sm);
581		mutex_exit(&msp->ms_lock);
582	}
583
584	if (dump_opt['d'] > 5 || dump_opt['m'] > 2) {
585		ASSERT(sm->sm_size == (1ULL << vd->vdev_ms_shift));
586
587		mutex_enter(&msp->ms_lock);
588		dump_spacemap(spa->spa_meta_objset, smo, sm);
589		mutex_exit(&msp->ms_lock);
590	}
591}
592
593static void
594print_vdev_metaslab_header(vdev_t *vd)
595{
596	(void) printf("\tvdev %10llu\n\t%-10s%5llu   %-19s   %-15s   %-10s\n",
597	    (u_longlong_t)vd->vdev_id,
598	    "metaslabs", (u_longlong_t)vd->vdev_ms_count,
599	    "offset", "spacemap", "free");
600	(void) printf("\t%15s   %19s   %15s   %10s\n",
601	    "---------------", "-------------------",
602	    "---------------", "-------------");
603}
604
605static void
606dump_metaslabs(spa_t *spa)
607{
608	vdev_t *vd, *rvd = spa->spa_root_vdev;
609	uint64_t m, c = 0, children = rvd->vdev_children;
610
611	(void) printf("\nMetaslabs:\n");
612
613	if (!dump_opt['d'] && zopt_objects > 0) {
614		c = zopt_object[0];
615
616		if (c >= children)
617			(void) fatal("bad vdev id: %llu", (u_longlong_t)c);
618
619		if (zopt_objects > 1) {
620			vd = rvd->vdev_child[c];
621			print_vdev_metaslab_header(vd);
622
623			for (m = 1; m < zopt_objects; m++) {
624				if (zopt_object[m] < vd->vdev_ms_count)
625					dump_metaslab(
626					    vd->vdev_ms[zopt_object[m]]);
627				else
628					(void) fprintf(stderr, "bad metaslab "
629					    "number %llu\n",
630					    (u_longlong_t)zopt_object[m]);
631			}
632			(void) printf("\n");
633			return;
634		}
635		children = c + 1;
636	}
637	for (; c < children; c++) {
638		vd = rvd->vdev_child[c];
639		print_vdev_metaslab_header(vd);
640
641		for (m = 0; m < vd->vdev_ms_count; m++)
642			dump_metaslab(vd->vdev_ms[m]);
643		(void) printf("\n");
644	}
645}
646
647static void
648dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
649{
650	const ddt_phys_t *ddp = dde->dde_phys;
651	const ddt_key_t *ddk = &dde->dde_key;
652	char *types[4] = { "ditto", "single", "double", "triple" };
653	char blkbuf[BP_SPRINTF_LEN];
654	blkptr_t blk;
655
656	for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
657		if (ddp->ddp_phys_birth == 0)
658			continue;
659		ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
660		sprintf_blkptr(blkbuf, &blk);
661		(void) printf("index %llx refcnt %llu %s %s\n",
662		    (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
663		    types[p], blkbuf);
664	}
665}
666
667static void
668dump_dedup_ratio(const ddt_stat_t *dds)
669{
670	double rL, rP, rD, D, dedup, compress, copies;
671
672	if (dds->dds_blocks == 0)
673		return;
674
675	rL = (double)dds->dds_ref_lsize;
676	rP = (double)dds->dds_ref_psize;
677	rD = (double)dds->dds_ref_dsize;
678	D = (double)dds->dds_dsize;
679
680	dedup = rD / D;
681	compress = rL / rP;
682	copies = rD / rP;
683
684	(void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
685	    "dedup * compress / copies = %.2f\n\n",
686	    dedup, compress, copies, dedup * compress / copies);
687}
688
689static void
690dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
691{
692	char name[DDT_NAMELEN];
693	ddt_entry_t dde;
694	uint64_t walk = 0;
695	dmu_object_info_t doi;
696	uint64_t count, dspace, mspace;
697	int error;
698
699	error = ddt_object_info(ddt, type, class, &doi);
700
701	if (error == ENOENT)
702		return;
703	ASSERT(error == 0);
704
705	if ((count = ddt_object_count(ddt, type, class)) == 0)
706		return;
707
708	dspace = doi.doi_physical_blocks_512 << 9;
709	mspace = doi.doi_fill_count * doi.doi_data_block_size;
710
711	ddt_object_name(ddt, type, class, name);
712
713	(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
714	    name,
715	    (u_longlong_t)count,
716	    (u_longlong_t)(dspace / count),
717	    (u_longlong_t)(mspace / count));
718
719	if (dump_opt['D'] < 3)
720		return;
721
722	zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
723
724	if (dump_opt['D'] < 4)
725		return;
726
727	if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
728		return;
729
730	(void) printf("%s contents:\n\n", name);
731
732	while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
733		dump_dde(ddt, &dde, walk);
734
735	ASSERT(error == ENOENT);
736
737	(void) printf("\n");
738}
739
740static void
741dump_all_ddts(spa_t *spa)
742{
743	ddt_histogram_t ddh_total = { 0 };
744	ddt_stat_t dds_total = { 0 };
745
746	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
747		ddt_t *ddt = spa->spa_ddt[c];
748		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
749			for (enum ddt_class class = 0; class < DDT_CLASSES;
750			    class++) {
751				dump_ddt(ddt, type, class);
752			}
753		}
754	}
755
756	ddt_get_dedup_stats(spa, &dds_total);
757
758	if (dds_total.dds_blocks == 0) {
759		(void) printf("All DDTs are empty\n");
760		return;
761	}
762
763	(void) printf("\n");
764
765	if (dump_opt['D'] > 1) {
766		(void) printf("DDT histogram (aggregated over all DDTs):\n");
767		ddt_get_dedup_histogram(spa, &ddh_total);
768		zpool_dump_ddt(&dds_total, &ddh_total);
769	}
770
771	dump_dedup_ratio(&dds_total);
772}
773
774static void
775dump_dtl_seg(space_map_t *sm, uint64_t start, uint64_t size)
776{
777	char *prefix = (void *)sm;
778
779	(void) printf("%s [%llu,%llu) length %llu\n",
780	    prefix,
781	    (u_longlong_t)start,
782	    (u_longlong_t)(start + size),
783	    (u_longlong_t)(size));
784}
785
786static void
787dump_dtl(vdev_t *vd, int indent)
788{
789	spa_t *spa = vd->vdev_spa;
790	boolean_t required;
791	char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" };
792	char prefix[256];
793
794	spa_vdev_state_enter(spa, SCL_NONE);
795	required = vdev_dtl_required(vd);
796	(void) spa_vdev_state_exit(spa, NULL, 0);
797
798	if (indent == 0)
799		(void) printf("\nDirty time logs:\n\n");
800
801	(void) printf("\t%*s%s [%s]\n", indent, "",
802	    vd->vdev_path ? vd->vdev_path :
803	    vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
804	    required ? "DTL-required" : "DTL-expendable");
805
806	for (int t = 0; t < DTL_TYPES; t++) {
807		space_map_t *sm = &vd->vdev_dtl[t];
808		if (sm->sm_space == 0)
809			continue;
810		(void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
811		    indent + 2, "", name[t]);
812		mutex_enter(sm->sm_lock);
813		space_map_walk(sm, dump_dtl_seg, (void *)prefix);
814		mutex_exit(sm->sm_lock);
815		if (dump_opt['d'] > 5 && vd->vdev_children == 0)
816			dump_spacemap(spa->spa_meta_objset,
817			    &vd->vdev_dtl_smo, sm);
818	}
819
820	for (int c = 0; c < vd->vdev_children; c++)
821		dump_dtl(vd->vdev_child[c], indent + 4);
822}
823
824static void
825dump_history(spa_t *spa)
826{
827	nvlist_t **events = NULL;
828	char buf[SPA_MAXBLOCKSIZE];
829	uint64_t resid, len, off = 0;
830	uint_t num = 0;
831	int error;
832	time_t tsec;
833	struct tm t;
834	char tbuf[30];
835	char internalstr[MAXPATHLEN];
836
837	do {
838		len = sizeof (buf);
839
840		if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
841			(void) fprintf(stderr, "Unable to read history: "
842			    "error %d\n", error);
843			return;
844		}
845
846		if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
847			break;
848
849		off -= resid;
850	} while (len != 0);
851
852	(void) printf("\nHistory:\n");
853	for (int i = 0; i < num; i++) {
854		uint64_t time, txg, ievent;
855		char *cmd, *intstr;
856
857		if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
858		    &time) != 0)
859			continue;
860		if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
861		    &cmd) != 0) {
862			if (nvlist_lookup_uint64(events[i],
863			    ZPOOL_HIST_INT_EVENT, &ievent) != 0)
864				continue;
865			verify(nvlist_lookup_uint64(events[i],
866			    ZPOOL_HIST_TXG, &txg) == 0);
867			verify(nvlist_lookup_string(events[i],
868			    ZPOOL_HIST_INT_STR, &intstr) == 0);
869			if (ievent >= LOG_END)
870				continue;
871
872			(void) snprintf(internalstr,
873			    sizeof (internalstr),
874			    "[internal %s txg:%lld] %s",
875			    zfs_history_event_names[ievent], txg,
876			    intstr);
877			cmd = internalstr;
878		}
879		tsec = time;
880		(void) localtime_r(&tsec, &t);
881		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
882		(void) printf("%s %s\n", tbuf, cmd);
883	}
884}
885
886/*ARGSUSED*/
887static void
888dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
889{
890}
891
892static uint64_t
893blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
894{
895	if (dnp == NULL) {
896		ASSERT(zb->zb_level < 0);
897		if (zb->zb_object == 0)
898			return (zb->zb_blkid);
899		return (zb->zb_blkid * BP_GET_LSIZE(bp));
900	}
901
902	ASSERT(zb->zb_level >= 0);
903
904	return ((zb->zb_blkid <<
905	    (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
906	    dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
907}
908
909static void
910sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp)
911{
912	const dva_t *dva = bp->blk_dva;
913	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
914
915	if (dump_opt['b'] >= 5) {
916		sprintf_blkptr(blkbuf, bp);
917		return;
918	}
919
920	blkbuf[0] = '\0';
921
922	for (int i = 0; i < ndvas; i++)
923		(void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
924		    (u_longlong_t)DVA_GET_VDEV(&dva[i]),
925		    (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
926		    (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
927
928	(void) sprintf(blkbuf + strlen(blkbuf),
929	    "%llxL/%llxP F=%llu B=%llu/%llu",
930	    (u_longlong_t)BP_GET_LSIZE(bp),
931	    (u_longlong_t)BP_GET_PSIZE(bp),
932	    (u_longlong_t)bp->blk_fill,
933	    (u_longlong_t)bp->blk_birth,
934	    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
935}
936
937static void
938print_indirect(blkptr_t *bp, const zbookmark_t *zb,
939    const dnode_phys_t *dnp)
940{
941	char blkbuf[BP_SPRINTF_LEN];
942	int l;
943
944	ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
945	ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
946
947	(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
948
949	ASSERT(zb->zb_level >= 0);
950
951	for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
952		if (l == zb->zb_level) {
953			(void) printf("L%llx", (u_longlong_t)zb->zb_level);
954		} else {
955			(void) printf(" ");
956		}
957	}
958
959	sprintf_blkptr_compact(blkbuf, bp);
960	(void) printf("%s\n", blkbuf);
961}
962
963static int
964visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
965    blkptr_t *bp, const zbookmark_t *zb)
966{
967	int err = 0;
968
969	if (bp->blk_birth == 0)
970		return (0);
971
972	print_indirect(bp, zb, dnp);
973
974	if (BP_GET_LEVEL(bp) > 0) {
975		uint32_t flags = ARC_WAIT;
976		int i;
977		blkptr_t *cbp;
978		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
979		arc_buf_t *buf;
980		uint64_t fill = 0;
981
982		err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf,
983		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
984		if (err)
985			return (err);
986		ASSERT(buf->b_data);
987
988		/* recursively visit blocks below this */
989		cbp = buf->b_data;
990		for (i = 0; i < epb; i++, cbp++) {
991			zbookmark_t czb;
992
993			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
994			    zb->zb_level - 1,
995			    zb->zb_blkid * epb + i);
996			err = visit_indirect(spa, dnp, cbp, &czb);
997			if (err)
998				break;
999			fill += cbp->blk_fill;
1000		}
1001		if (!err)
1002			ASSERT3U(fill, ==, bp->blk_fill);
1003		(void) arc_buf_remove_ref(buf, &buf);
1004	}
1005
1006	return (err);
1007}
1008
1009/*ARGSUSED*/
1010static void
1011dump_indirect(dnode_t *dn)
1012{
1013	dnode_phys_t *dnp = dn->dn_phys;
1014	int j;
1015	zbookmark_t czb;
1016
1017	(void) printf("Indirect blocks:\n");
1018
1019	SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
1020	    dn->dn_object, dnp->dn_nlevels - 1, 0);
1021	for (j = 0; j < dnp->dn_nblkptr; j++) {
1022		czb.zb_blkid = j;
1023		(void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
1024		    &dnp->dn_blkptr[j], &czb);
1025	}
1026
1027	(void) printf("\n");
1028}
1029
1030/*ARGSUSED*/
1031static void
1032dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
1033{
1034	dsl_dir_phys_t *dd = data;
1035	time_t crtime;
1036	char nice[32];
1037
1038	if (dd == NULL)
1039		return;
1040
1041	ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
1042
1043	crtime = dd->dd_creation_time;
1044	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1045	(void) printf("\t\thead_dataset_obj = %llu\n",
1046	    (u_longlong_t)dd->dd_head_dataset_obj);
1047	(void) printf("\t\tparent_dir_obj = %llu\n",
1048	    (u_longlong_t)dd->dd_parent_obj);
1049	(void) printf("\t\torigin_obj = %llu\n",
1050	    (u_longlong_t)dd->dd_origin_obj);
1051	(void) printf("\t\tchild_dir_zapobj = %llu\n",
1052	    (u_longlong_t)dd->dd_child_dir_zapobj);
1053	zdb_nicenum(dd->dd_used_bytes, nice);
1054	(void) printf("\t\tused_bytes = %s\n", nice);
1055	zdb_nicenum(dd->dd_compressed_bytes, nice);
1056	(void) printf("\t\tcompressed_bytes = %s\n", nice);
1057	zdb_nicenum(dd->dd_uncompressed_bytes, nice);
1058	(void) printf("\t\tuncompressed_bytes = %s\n", nice);
1059	zdb_nicenum(dd->dd_quota, nice);
1060	(void) printf("\t\tquota = %s\n", nice);
1061	zdb_nicenum(dd->dd_reserved, nice);
1062	(void) printf("\t\treserved = %s\n", nice);
1063	(void) printf("\t\tprops_zapobj = %llu\n",
1064	    (u_longlong_t)dd->dd_props_zapobj);
1065	(void) printf("\t\tdeleg_zapobj = %llu\n",
1066	    (u_longlong_t)dd->dd_deleg_zapobj);
1067	(void) printf("\t\tflags = %llx\n",
1068	    (u_longlong_t)dd->dd_flags);
1069
1070#define	DO(which) \
1071	zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
1072	(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
1073	DO(HEAD);
1074	DO(SNAP);
1075	DO(CHILD);
1076	DO(CHILD_RSRV);
1077	DO(REFRSRV);
1078#undef DO
1079}
1080
1081/*ARGSUSED*/
1082static void
1083dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
1084{
1085	dsl_dataset_phys_t *ds = data;
1086	time_t crtime;
1087	char used[32], compressed[32], uncompressed[32], unique[32];
1088	char blkbuf[BP_SPRINTF_LEN];
1089
1090	if (ds == NULL)
1091		return;
1092
1093	ASSERT(size == sizeof (*ds));
1094	crtime = ds->ds_creation_time;
1095	zdb_nicenum(ds->ds_referenced_bytes, used);
1096	zdb_nicenum(ds->ds_compressed_bytes, compressed);
1097	zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
1098	zdb_nicenum(ds->ds_unique_bytes, unique);
1099	sprintf_blkptr(blkbuf, &ds->ds_bp);
1100
1101	(void) printf("\t\tdir_obj = %llu\n",
1102	    (u_longlong_t)ds->ds_dir_obj);
1103	(void) printf("\t\tprev_snap_obj = %llu\n",
1104	    (u_longlong_t)ds->ds_prev_snap_obj);
1105	(void) printf("\t\tprev_snap_txg = %llu\n",
1106	    (u_longlong_t)ds->ds_prev_snap_txg);
1107	(void) printf("\t\tnext_snap_obj = %llu\n",
1108	    (u_longlong_t)ds->ds_next_snap_obj);
1109	(void) printf("\t\tsnapnames_zapobj = %llu\n",
1110	    (u_longlong_t)ds->ds_snapnames_zapobj);
1111	(void) printf("\t\tnum_children = %llu\n",
1112	    (u_longlong_t)ds->ds_num_children);
1113	(void) printf("\t\tuserrefs_obj = %llu\n",
1114	    (u_longlong_t)ds->ds_userrefs_obj);
1115	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1116	(void) printf("\t\tcreation_txg = %llu\n",
1117	    (u_longlong_t)ds->ds_creation_txg);
1118	(void) printf("\t\tdeadlist_obj = %llu\n",
1119	    (u_longlong_t)ds->ds_deadlist_obj);
1120	(void) printf("\t\tused_bytes = %s\n", used);
1121	(void) printf("\t\tcompressed_bytes = %s\n", compressed);
1122	(void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
1123	(void) printf("\t\tunique = %s\n", unique);
1124	(void) printf("\t\tfsid_guid = %llu\n",
1125	    (u_longlong_t)ds->ds_fsid_guid);
1126	(void) printf("\t\tguid = %llu\n",
1127	    (u_longlong_t)ds->ds_guid);
1128	(void) printf("\t\tflags = %llx\n",
1129	    (u_longlong_t)ds->ds_flags);
1130	(void) printf("\t\tnext_clones_obj = %llu\n",
1131	    (u_longlong_t)ds->ds_next_clones_obj);
1132	(void) printf("\t\tprops_obj = %llu\n",
1133	    (u_longlong_t)ds->ds_props_obj);
1134	(void) printf("\t\tbp = %s\n", blkbuf);
1135}
1136
1137/* ARGSUSED */
1138static int
1139dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1140{
1141	char blkbuf[BP_SPRINTF_LEN];
1142
1143	if (bp->blk_birth != 0) {
1144		sprintf_blkptr(blkbuf, bp);
1145		(void) printf("\t%s\n", blkbuf);
1146	}
1147	return (0);
1148}
1149
1150static void
1151dump_bptree(objset_t *os, uint64_t obj, char *name)
1152{
1153	char bytes[32];
1154	bptree_phys_t *bt;
1155	dmu_buf_t *db;
1156
1157	if (dump_opt['d'] < 3)
1158		return;
1159
1160	VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
1161	bt = db->db_data;
1162	zdb_nicenum(bt->bt_bytes, bytes);
1163	(void) printf("\n    %s: %llu datasets, %s\n",
1164	    name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
1165	dmu_buf_rele(db, FTAG);
1166
1167	if (dump_opt['d'] < 5)
1168		return;
1169
1170	(void) printf("\n");
1171
1172	(void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL);
1173}
1174
1175/* ARGSUSED */
1176static int
1177dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1178{
1179	char blkbuf[BP_SPRINTF_LEN];
1180
1181	ASSERT(bp->blk_birth != 0);
1182	sprintf_blkptr_compact(blkbuf, bp);
1183	(void) printf("\t%s\n", blkbuf);
1184	return (0);
1185}
1186
1187static void
1188dump_bpobj(bpobj_t *bpo, char *name)
1189{
1190	char bytes[32];
1191	char comp[32];
1192	char uncomp[32];
1193
1194	if (dump_opt['d'] < 3)
1195		return;
1196
1197	zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes);
1198	if (bpo->bpo_havesubobj) {
1199		zdb_nicenum(bpo->bpo_phys->bpo_comp, comp);
1200		zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp);
1201		(void) printf("\n    %s: %llu local blkptrs, %llu subobjs, "
1202		    "%s (%s/%s comp)\n",
1203		    name, (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1204		    (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
1205		    bytes, comp, uncomp);
1206	} else {
1207		(void) printf("\n    %s: %llu blkptrs, %s\n",
1208		    name, (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, bytes);
1209	}
1210
1211	if (dump_opt['d'] < 5)
1212		return;
1213
1214	(void) printf("\n");
1215
1216	(void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
1217}
1218
1219static void
1220dump_deadlist(dsl_deadlist_t *dl)
1221{
1222	dsl_deadlist_entry_t *dle;
1223	char bytes[32];
1224	char comp[32];
1225	char uncomp[32];
1226
1227	if (dump_opt['d'] < 3)
1228		return;
1229
1230	zdb_nicenum(dl->dl_phys->dl_used, bytes);
1231	zdb_nicenum(dl->dl_phys->dl_comp, comp);
1232	zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp);
1233	(void) printf("\n    Deadlist: %s (%s/%s comp)\n",
1234	    bytes, comp, uncomp);
1235
1236	if (dump_opt['d'] < 4)
1237		return;
1238
1239	(void) printf("\n");
1240
1241	for (dle = avl_first(&dl->dl_tree); dle;
1242	    dle = AVL_NEXT(&dl->dl_tree, dle)) {
1243		(void) printf("      mintxg %llu -> obj %llu\n",
1244		    (longlong_t)dle->dle_mintxg,
1245		    (longlong_t)dle->dle_bpobj.bpo_object);
1246
1247		if (dump_opt['d'] >= 5)
1248			dump_bpobj(&dle->dle_bpobj, "");
1249	}
1250}
1251
1252static avl_tree_t idx_tree;
1253static avl_tree_t domain_tree;
1254static boolean_t fuid_table_loaded;
1255static boolean_t sa_loaded;
1256sa_attr_type_t *sa_attr_table;
1257
1258static void
1259fuid_table_destroy()
1260{
1261	if (fuid_table_loaded) {
1262		zfs_fuid_table_destroy(&idx_tree, &domain_tree);
1263		fuid_table_loaded = B_FALSE;
1264	}
1265}
1266
1267/*
1268 * print uid or gid information.
1269 * For normal POSIX id just the id is printed in decimal format.
1270 * For CIFS files with FUID the fuid is printed in hex followed by
1271 * the doman-rid string.
1272 */
1273static void
1274print_idstr(uint64_t id, const char *id_type)
1275{
1276	if (FUID_INDEX(id)) {
1277		char *domain;
1278
1279		domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
1280		(void) printf("\t%s     %llx [%s-%d]\n", id_type,
1281		    (u_longlong_t)id, domain, (int)FUID_RID(id));
1282	} else {
1283		(void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
1284	}
1285
1286}
1287
1288static void
1289dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
1290{
1291	uint32_t uid_idx, gid_idx;
1292
1293	uid_idx = FUID_INDEX(uid);
1294	gid_idx = FUID_INDEX(gid);
1295
1296	/* Load domain table, if not already loaded */
1297	if (!fuid_table_loaded && (uid_idx || gid_idx)) {
1298		uint64_t fuid_obj;
1299
1300		/* first find the fuid object.  It lives in the master node */
1301		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
1302		    8, 1, &fuid_obj) == 0);
1303		zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
1304		(void) zfs_fuid_table_load(os, fuid_obj,
1305		    &idx_tree, &domain_tree);
1306		fuid_table_loaded = B_TRUE;
1307	}
1308
1309	print_idstr(uid, "uid");
1310	print_idstr(gid, "gid");
1311}
1312
1313/*ARGSUSED*/
1314static void
1315dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
1316{
1317	char path[MAXPATHLEN * 2];	/* allow for xattr and failure prefix */
1318	sa_handle_t *hdl;
1319	uint64_t xattr, rdev, gen;
1320	uint64_t uid, gid, mode, fsize, parent, links;
1321	uint64_t pflags;
1322	uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
1323	time_t z_crtime, z_atime, z_mtime, z_ctime;
1324	sa_bulk_attr_t bulk[12];
1325	int idx = 0;
1326	int error;
1327
1328	if (!sa_loaded) {
1329		uint64_t sa_attrs = 0;
1330		uint64_t version;
1331
1332		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
1333		    8, 1, &version) == 0);
1334		if (version >= ZPL_VERSION_SA) {
1335			VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
1336			    8, 1, &sa_attrs) == 0);
1337		}
1338		if ((error = sa_setup(os, sa_attrs, zfs_attr_table,
1339		    ZPL_END, &sa_attr_table)) != 0) {
1340			(void) printf("sa_setup failed errno %d, can't "
1341			    "display znode contents\n", error);
1342			return;
1343		}
1344		sa_loaded = B_TRUE;
1345	}
1346
1347	if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
1348		(void) printf("Failed to get handle for SA znode\n");
1349		return;
1350	}
1351
1352	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
1353	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
1354	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
1355	    &links, 8);
1356	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
1357	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
1358	    &mode, 8);
1359	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
1360	    NULL, &parent, 8);
1361	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
1362	    &fsize, 8);
1363	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
1364	    acctm, 16);
1365	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
1366	    modtm, 16);
1367	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
1368	    crtm, 16);
1369	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
1370	    chgtm, 16);
1371	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
1372	    &pflags, 8);
1373
1374	if (sa_bulk_lookup(hdl, bulk, idx)) {
1375		(void) sa_handle_destroy(hdl);
1376		return;
1377	}
1378
1379	error = zfs_obj_to_path(os, object, path, sizeof (path));
1380	if (error != 0) {
1381		(void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
1382		    (u_longlong_t)object);
1383	}
1384	if (dump_opt['d'] < 3) {
1385		(void) printf("\t%s\n", path);
1386		(void) sa_handle_destroy(hdl);
1387		return;
1388	}
1389
1390	z_crtime = (time_t)crtm[0];
1391	z_atime = (time_t)acctm[0];
1392	z_mtime = (time_t)modtm[0];
1393	z_ctime = (time_t)chgtm[0];
1394
1395	(void) printf("\tpath	%s\n", path);
1396	dump_uidgid(os, uid, gid);
1397	(void) printf("\tatime	%s", ctime(&z_atime));
1398	(void) printf("\tmtime	%s", ctime(&z_mtime));
1399	(void) printf("\tctime	%s", ctime(&z_ctime));
1400	(void) printf("\tcrtime	%s", ctime(&z_crtime));
1401	(void) printf("\tgen	%llu\n", (u_longlong_t)gen);
1402	(void) printf("\tmode	%llo\n", (u_longlong_t)mode);
1403	(void) printf("\tsize	%llu\n", (u_longlong_t)fsize);
1404	(void) printf("\tparent	%llu\n", (u_longlong_t)parent);
1405	(void) printf("\tlinks	%llu\n", (u_longlong_t)links);
1406	(void) printf("\tpflags	%llx\n", (u_longlong_t)pflags);
1407	if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
1408	    sizeof (uint64_t)) == 0)
1409		(void) printf("\txattr	%llu\n", (u_longlong_t)xattr);
1410	if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
1411	    sizeof (uint64_t)) == 0)
1412		(void) printf("\trdev	0x%016llx\n", (u_longlong_t)rdev);
1413	sa_handle_destroy(hdl);
1414}
1415
1416/*ARGSUSED*/
1417static void
1418dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
1419{
1420}
1421
1422/*ARGSUSED*/
1423static void
1424dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
1425{
1426}
1427
1428static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
1429	dump_none,		/* unallocated			*/
1430	dump_zap,		/* object directory		*/
1431	dump_uint64,		/* object array			*/
1432	dump_none,		/* packed nvlist		*/
1433	dump_packed_nvlist,	/* packed nvlist size		*/
1434	dump_none,		/* bplist			*/
1435	dump_none,		/* bplist header		*/
1436	dump_none,		/* SPA space map header		*/
1437	dump_none,		/* SPA space map		*/
1438	dump_none,		/* ZIL intent log		*/
1439	dump_dnode,		/* DMU dnode			*/
1440	dump_dmu_objset,	/* DMU objset			*/
1441	dump_dsl_dir,		/* DSL directory		*/
1442	dump_zap,		/* DSL directory child map	*/
1443	dump_zap,		/* DSL dataset snap map		*/
1444	dump_zap,		/* DSL props			*/
1445	dump_dsl_dataset,	/* DSL dataset			*/
1446	dump_znode,		/* ZFS znode			*/
1447	dump_acl,		/* ZFS V0 ACL			*/
1448	dump_uint8,		/* ZFS plain file		*/
1449	dump_zpldir,		/* ZFS directory		*/
1450	dump_zap,		/* ZFS master node		*/
1451	dump_zap,		/* ZFS delete queue		*/
1452	dump_uint8,		/* zvol object			*/
1453	dump_zap,		/* zvol prop			*/
1454	dump_uint8,		/* other uint8[]		*/
1455	dump_uint64,		/* other uint64[]		*/
1456	dump_zap,		/* other ZAP			*/
1457	dump_zap,		/* persistent error log		*/
1458	dump_uint8,		/* SPA history			*/
1459	dump_uint64,		/* SPA history offsets		*/
1460	dump_zap,		/* Pool properties		*/
1461	dump_zap,		/* DSL permissions		*/
1462	dump_acl,		/* ZFS ACL			*/
1463	dump_uint8,		/* ZFS SYSACL			*/
1464	dump_none,		/* FUID nvlist			*/
1465	dump_packed_nvlist,	/* FUID nvlist size		*/
1466	dump_zap,		/* DSL dataset next clones	*/
1467	dump_zap,		/* DSL scrub queue		*/
1468	dump_zap,		/* ZFS user/group used		*/
1469	dump_zap,		/* ZFS user/group quota		*/
1470	dump_zap,		/* snapshot refcount tags	*/
1471	dump_ddt_zap,		/* DDT ZAP object		*/
1472	dump_zap,		/* DDT statistics		*/
1473	dump_znode,		/* SA object			*/
1474	dump_zap,		/* SA Master Node		*/
1475	dump_sa_attrs,		/* SA attribute registration	*/
1476	dump_sa_layouts,	/* SA attribute layouts		*/
1477	dump_zap,		/* DSL scrub translations	*/
1478	dump_none,		/* fake dedup BP		*/
1479	dump_zap,		/* deadlist			*/
1480	dump_none,		/* deadlist hdr			*/
1481	dump_zap,		/* dsl clones			*/
1482	dump_none,		/* bpobj subobjs		*/
1483	dump_unknown,		/* Unknown type, must be last	*/
1484};
1485
1486static void
1487dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
1488{
1489	dmu_buf_t *db = NULL;
1490	dmu_object_info_t doi;
1491	dnode_t *dn;
1492	void *bonus = NULL;
1493	size_t bsize = 0;
1494	char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
1495	char bonus_size[32];
1496	char aux[50];
1497	int error;
1498
1499	if (*print_header) {
1500		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1501		    "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
1502		    "%full", "type");
1503		*print_header = 0;
1504	}
1505
1506	if (object == 0) {
1507		dn = DMU_META_DNODE(os);
1508	} else {
1509		error = dmu_bonus_hold(os, object, FTAG, &db);
1510		if (error)
1511			fatal("dmu_bonus_hold(%llu) failed, errno %u",
1512			    object, error);
1513		bonus = db->db_data;
1514		bsize = db->db_size;
1515		dn = DB_DNODE((dmu_buf_impl_t *)db);
1516	}
1517	dmu_object_info_from_dnode(dn, &doi);
1518
1519	zdb_nicenum(doi.doi_metadata_block_size, iblk);
1520	zdb_nicenum(doi.doi_data_block_size, dblk);
1521	zdb_nicenum(doi.doi_max_offset, lsize);
1522	zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize);
1523	zdb_nicenum(doi.doi_bonus_size, bonus_size);
1524	(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
1525	    doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
1526	    doi.doi_max_offset);
1527
1528	aux[0] = '\0';
1529
1530	if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
1531		(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
1532		    ZDB_CHECKSUM_NAME(doi.doi_checksum));
1533	}
1534
1535	if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
1536		(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
1537		    ZDB_COMPRESS_NAME(doi.doi_compress));
1538	}
1539
1540	(void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %6s  %s%s\n",
1541	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
1542	    asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
1543
1544	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
1545		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1546		    "", "", "", "", "", bonus_size, "bonus",
1547		    ZDB_OT_NAME(doi.doi_bonus_type));
1548	}
1549
1550	if (verbosity >= 4) {
1551		(void) printf("\tdnode flags: %s%s%s\n",
1552		    (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
1553		    "USED_BYTES " : "",
1554		    (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
1555		    "USERUSED_ACCOUNTED " : "",
1556		    (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
1557		    "SPILL_BLKPTR" : "");
1558		(void) printf("\tdnode maxblkid: %llu\n",
1559		    (longlong_t)dn->dn_phys->dn_maxblkid);
1560
1561		object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
1562		    bonus, bsize);
1563		object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
1564		*print_header = 1;
1565	}
1566
1567	if (verbosity >= 5)
1568		dump_indirect(dn);
1569
1570	if (verbosity >= 5) {
1571		/*
1572		 * Report the list of segments that comprise the object.
1573		 */
1574		uint64_t start = 0;
1575		uint64_t end;
1576		uint64_t blkfill = 1;
1577		int minlvl = 1;
1578
1579		if (dn->dn_type == DMU_OT_DNODE) {
1580			minlvl = 0;
1581			blkfill = DNODES_PER_BLOCK;
1582		}
1583
1584		for (;;) {
1585			char segsize[32];
1586			error = dnode_next_offset(dn,
1587			    0, &start, minlvl, blkfill, 0);
1588			if (error)
1589				break;
1590			end = start;
1591			error = dnode_next_offset(dn,
1592			    DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
1593			zdb_nicenum(end - start, segsize);
1594			(void) printf("\t\tsegment [%016llx, %016llx)"
1595			    " size %5s\n", (u_longlong_t)start,
1596			    (u_longlong_t)end, segsize);
1597			if (error)
1598				break;
1599			start = end;
1600		}
1601	}
1602
1603	if (db != NULL)
1604		dmu_buf_rele(db, FTAG);
1605}
1606
1607static char *objset_types[DMU_OST_NUMTYPES] = {
1608	"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
1609
1610static void
1611dump_dir(objset_t *os)
1612{
1613	dmu_objset_stats_t dds;
1614	uint64_t object, object_count;
1615	uint64_t refdbytes, usedobjs, scratch;
1616	char numbuf[32];
1617	char blkbuf[BP_SPRINTF_LEN + 20];
1618	char osname[MAXNAMELEN];
1619	char *type = "UNKNOWN";
1620	int verbosity = dump_opt['d'];
1621	int print_header = 1;
1622	int i, error;
1623
1624	dmu_objset_fast_stat(os, &dds);
1625
1626	if (dds.dds_type < DMU_OST_NUMTYPES)
1627		type = objset_types[dds.dds_type];
1628
1629	if (dds.dds_type == DMU_OST_META) {
1630		dds.dds_creation_txg = TXG_INITIAL;
1631		usedobjs = os->os_rootbp->blk_fill;
1632		refdbytes = os->os_spa->spa_dsl_pool->
1633		    dp_mos_dir->dd_phys->dd_used_bytes;
1634	} else {
1635		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
1636	}
1637
1638	ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
1639
1640	zdb_nicenum(refdbytes, numbuf);
1641
1642	if (verbosity >= 4) {
1643		(void) sprintf(blkbuf, ", rootbp ");
1644		(void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp);
1645	} else {
1646		blkbuf[0] = '\0';
1647	}
1648
1649	dmu_objset_name(os, osname);
1650
1651	(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
1652	    "%s, %llu objects%s\n",
1653	    osname, type, (u_longlong_t)dmu_objset_id(os),
1654	    (u_longlong_t)dds.dds_creation_txg,
1655	    numbuf, (u_longlong_t)usedobjs, blkbuf);
1656
1657	if (zopt_objects != 0) {
1658		for (i = 0; i < zopt_objects; i++)
1659			dump_object(os, zopt_object[i], verbosity,
1660			    &print_header);
1661		(void) printf("\n");
1662		return;
1663	}
1664
1665	if (dump_opt['i'] != 0 || verbosity >= 2)
1666		dump_intent_log(dmu_objset_zil(os));
1667
1668	if (dmu_objset_ds(os) != NULL)
1669		dump_deadlist(&dmu_objset_ds(os)->ds_deadlist);
1670
1671	if (verbosity < 2)
1672		return;
1673
1674	if (os->os_rootbp->blk_birth == 0)
1675		return;
1676
1677	dump_object(os, 0, verbosity, &print_header);
1678	object_count = 0;
1679	if (DMU_USERUSED_DNODE(os) != NULL &&
1680	    DMU_USERUSED_DNODE(os)->dn_type != 0) {
1681		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
1682		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
1683	}
1684
1685	object = 0;
1686	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
1687		dump_object(os, object, verbosity, &print_header);
1688		object_count++;
1689	}
1690
1691	ASSERT3U(object_count, ==, usedobjs);
1692
1693	(void) printf("\n");
1694
1695	if (error != ESRCH) {
1696		(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
1697		abort();
1698	}
1699}
1700
1701static void
1702dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
1703{
1704	time_t timestamp = ub->ub_timestamp;
1705
1706	(void) printf(header ? header : "");
1707	(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
1708	(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
1709	(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
1710	(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
1711	(void) printf("\ttimestamp = %llu UTC = %s",
1712	    (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
1713	if (dump_opt['u'] >= 3) {
1714		char blkbuf[BP_SPRINTF_LEN];
1715		sprintf_blkptr(blkbuf, &ub->ub_rootbp);
1716		(void) printf("\trootbp = %s\n", blkbuf);
1717	}
1718	(void) printf(footer ? footer : "");
1719}
1720
1721static void
1722dump_config(spa_t *spa)
1723{
1724	dmu_buf_t *db;
1725	size_t nvsize = 0;
1726	int error = 0;
1727
1728
1729	error = dmu_bonus_hold(spa->spa_meta_objset,
1730	    spa->spa_config_object, FTAG, &db);
1731
1732	if (error == 0) {
1733		nvsize = *(uint64_t *)db->db_data;
1734		dmu_buf_rele(db, FTAG);
1735
1736		(void) printf("\nMOS Configuration:\n");
1737		dump_packed_nvlist(spa->spa_meta_objset,
1738		    spa->spa_config_object, (void *)&nvsize, 1);
1739	} else {
1740		(void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
1741		    (u_longlong_t)spa->spa_config_object, error);
1742	}
1743}
1744
1745static void
1746dump_cachefile(const char *cachefile)
1747{
1748	int fd;
1749	struct stat64 statbuf;
1750	char *buf;
1751	nvlist_t *config;
1752
1753	if ((fd = open64(cachefile, O_RDONLY)) < 0) {
1754		(void) printf("cannot open '%s': %s\n", cachefile,
1755		    strerror(errno));
1756		exit(1);
1757	}
1758
1759	if (fstat64(fd, &statbuf) != 0) {
1760		(void) printf("failed to stat '%s': %s\n", cachefile,
1761		    strerror(errno));
1762		exit(1);
1763	}
1764
1765	if ((buf = malloc(statbuf.st_size)) == NULL) {
1766		(void) fprintf(stderr, "failed to allocate %llu bytes\n",
1767		    (u_longlong_t)statbuf.st_size);
1768		exit(1);
1769	}
1770
1771	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1772		(void) fprintf(stderr, "failed to read %llu bytes\n",
1773		    (u_longlong_t)statbuf.st_size);
1774		exit(1);
1775	}
1776
1777	(void) close(fd);
1778
1779	if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
1780		(void) fprintf(stderr, "failed to unpack nvlist\n");
1781		exit(1);
1782	}
1783
1784	free(buf);
1785
1786	dump_nvlist(config, 0);
1787
1788	nvlist_free(config);
1789}
1790
1791#define	ZDB_MAX_UB_HEADER_SIZE 32
1792
1793static void
1794dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
1795{
1796	vdev_t vd;
1797	vdev_t *vdp = &vd;
1798	char header[ZDB_MAX_UB_HEADER_SIZE];
1799
1800	vd.vdev_ashift = ashift;
1801	vdp->vdev_top = vdp;
1802
1803	for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
1804		uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
1805		uberblock_t *ub = (void *)((char *)lbl + uoff);
1806
1807		if (uberblock_verify(ub))
1808			continue;
1809		(void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
1810		    "Uberblock[%d]\n", i);
1811		dump_uberblock(ub, header, "");
1812	}
1813}
1814
1815static void
1816dump_label(const char *dev)
1817{
1818	int fd;
1819	vdev_label_t label;
1820	char *path, *buf = label.vl_vdev_phys.vp_nvlist;
1821	size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
1822	struct stat64 statbuf;
1823	uint64_t psize, ashift;
1824	int len = strlen(dev) + 1;
1825
1826	if (strncmp(dev, "/dev/dsk/", 9) == 0) {
1827		len++;
1828		path = malloc(len);
1829		(void) snprintf(path, len, "%s%s", "/dev/rdsk/", dev + 9);
1830	} else {
1831		path = strdup(dev);
1832	}
1833
1834	if ((fd = open64(path, O_RDONLY)) < 0) {
1835		(void) printf("cannot open '%s': %s\n", path, strerror(errno));
1836		free(path);
1837		exit(1);
1838	}
1839
1840	if (fstat64(fd, &statbuf) != 0) {
1841		(void) printf("failed to stat '%s': %s\n", path,
1842		    strerror(errno));
1843		free(path);
1844		(void) close(fd);
1845		exit(1);
1846	}
1847
1848	if (S_ISBLK(statbuf.st_mode)) {
1849		(void) printf("cannot use '%s': character device required\n",
1850		    path);
1851		free(path);
1852		(void) close(fd);
1853		exit(1);
1854	}
1855
1856	psize = statbuf.st_size;
1857	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
1858
1859	for (int l = 0; l < VDEV_LABELS; l++) {
1860		nvlist_t *config = NULL;
1861
1862		(void) printf("--------------------------------------------\n");
1863		(void) printf("LABEL %d\n", l);
1864		(void) printf("--------------------------------------------\n");
1865
1866		if (pread64(fd, &label, sizeof (label),
1867		    vdev_label_offset(psize, l, 0)) != sizeof (label)) {
1868			(void) printf("failed to read label %d\n", l);
1869			continue;
1870		}
1871
1872		if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
1873			(void) printf("failed to unpack label %d\n", l);
1874			ashift = SPA_MINBLOCKSHIFT;
1875		} else {
1876			nvlist_t *vdev_tree = NULL;
1877
1878			dump_nvlist(config, 4);
1879			if ((nvlist_lookup_nvlist(config,
1880			    ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
1881			    (nvlist_lookup_uint64(vdev_tree,
1882			    ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
1883				ashift = SPA_MINBLOCKSHIFT;
1884			nvlist_free(config);
1885		}
1886		if (dump_opt['u'])
1887			dump_label_uberblocks(&label, ashift);
1888	}
1889
1890	free(path);
1891	(void) close(fd);
1892}
1893
1894/*ARGSUSED*/
1895static int
1896dump_one_dir(const char *dsname, void *arg)
1897{
1898	int error;
1899	objset_t *os;
1900
1901	error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
1902	if (error) {
1903		(void) printf("Could not open %s, error %d\n", dsname, error);
1904		return (0);
1905	}
1906	dump_dir(os);
1907	dmu_objset_disown(os, FTAG);
1908	fuid_table_destroy();
1909	sa_loaded = B_FALSE;
1910	return (0);
1911}
1912
1913/*
1914 * Block statistics.
1915 */
1916typedef struct zdb_blkstats {
1917	uint64_t	zb_asize;
1918	uint64_t	zb_lsize;
1919	uint64_t	zb_psize;
1920	uint64_t	zb_count;
1921} zdb_blkstats_t;
1922
1923/*
1924 * Extended object types to report deferred frees and dedup auto-ditto blocks.
1925 */
1926#define	ZDB_OT_DEFERRED	(DMU_OT_NUMTYPES + 0)
1927#define	ZDB_OT_DITTO	(DMU_OT_NUMTYPES + 1)
1928#define	ZDB_OT_OTHER	(DMU_OT_NUMTYPES + 2)
1929#define	ZDB_OT_TOTAL	(DMU_OT_NUMTYPES + 3)
1930
1931static char *zdb_ot_extname[] = {
1932	"deferred free",
1933	"dedup ditto",
1934	"other",
1935	"Total",
1936};
1937
1938#define	ZB_TOTAL	DN_MAX_LEVELS
1939
1940typedef struct zdb_cb {
1941	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
1942	uint64_t	zcb_dedup_asize;
1943	uint64_t	zcb_dedup_blocks;
1944	uint64_t	zcb_errors[256];
1945	int		zcb_readfails;
1946	int		zcb_haderrors;
1947	spa_t		*zcb_spa;
1948} zdb_cb_t;
1949
1950static void
1951zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
1952    dmu_object_type_t type)
1953{
1954	uint64_t refcnt = 0;
1955
1956	ASSERT(type < ZDB_OT_TOTAL);
1957
1958	if (zilog && zil_bp_tree_add(zilog, bp) != 0)
1959		return;
1960
1961	for (int i = 0; i < 4; i++) {
1962		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
1963		int t = (i & 1) ? type : ZDB_OT_TOTAL;
1964		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
1965
1966		zb->zb_asize += BP_GET_ASIZE(bp);
1967		zb->zb_lsize += BP_GET_LSIZE(bp);
1968		zb->zb_psize += BP_GET_PSIZE(bp);
1969		zb->zb_count++;
1970	}
1971
1972	if (dump_opt['L'])
1973		return;
1974
1975	if (BP_GET_DEDUP(bp)) {
1976		ddt_t *ddt;
1977		ddt_entry_t *dde;
1978
1979		ddt = ddt_select(zcb->zcb_spa, bp);
1980		ddt_enter(ddt);
1981		dde = ddt_lookup(ddt, bp, B_FALSE);
1982
1983		if (dde == NULL) {
1984			refcnt = 0;
1985		} else {
1986			ddt_phys_t *ddp = ddt_phys_select(dde, bp);
1987			ddt_phys_decref(ddp);
1988			refcnt = ddp->ddp_refcnt;
1989			if (ddt_phys_total_refcnt(dde) == 0)
1990				ddt_remove(ddt, dde);
1991		}
1992		ddt_exit(ddt);
1993	}
1994
1995	VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
1996	    refcnt ? 0 : spa_first_txg(zcb->zcb_spa),
1997	    bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
1998}
1999
2000/* ARGSUSED */
2001static int
2002zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
2003    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
2004{
2005	zdb_cb_t *zcb = arg;
2006	char blkbuf[BP_SPRINTF_LEN];
2007	dmu_object_type_t type;
2008	boolean_t is_metadata;
2009
2010	if (bp == NULL)
2011		return (0);
2012
2013	type = BP_GET_TYPE(bp);
2014
2015	zdb_count_block(zcb, zilog, bp,
2016	    (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type);
2017
2018	is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
2019
2020	if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
2021		int ioerr;
2022		size_t size = BP_GET_PSIZE(bp);
2023		void *data = malloc(size);
2024		int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
2025
2026		/* If it's an intent log block, failure is expected. */
2027		if (zb->zb_level == ZB_ZIL_LEVEL)
2028			flags |= ZIO_FLAG_SPECULATIVE;
2029
2030		ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
2031		    NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
2032
2033		free(data);
2034
2035		if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
2036			zcb->zcb_haderrors = 1;
2037			zcb->zcb_errors[ioerr]++;
2038
2039			if (dump_opt['b'] >= 2)
2040				sprintf_blkptr(blkbuf, bp);
2041			else
2042				blkbuf[0] = '\0';
2043
2044			(void) printf("zdb_blkptr_cb: "
2045			    "Got error %d reading "
2046			    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
2047			    ioerr,
2048			    (u_longlong_t)zb->zb_objset,
2049			    (u_longlong_t)zb->zb_object,
2050			    (u_longlong_t)zb->zb_level,
2051			    (u_longlong_t)zb->zb_blkid,
2052			    blkbuf);
2053		}
2054	}
2055
2056	zcb->zcb_readfails = 0;
2057
2058	if (dump_opt['b'] >= 4) {
2059		sprintf_blkptr(blkbuf, bp);
2060		(void) printf("objset %llu object %llu "
2061		    "level %lld offset 0x%llx %s\n",
2062		    (u_longlong_t)zb->zb_objset,
2063		    (u_longlong_t)zb->zb_object,
2064		    (longlong_t)zb->zb_level,
2065		    (u_longlong_t)blkid2offset(dnp, bp, zb),
2066		    blkbuf);
2067	}
2068
2069	return (0);
2070}
2071
2072static void
2073zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
2074{
2075	vdev_t *vd = sm->sm_ppd;
2076
2077	(void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
2078	    (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
2079}
2080
2081/* ARGSUSED */
2082static void
2083zdb_space_map_load(space_map_t *sm)
2084{
2085}
2086
2087static void
2088zdb_space_map_unload(space_map_t *sm)
2089{
2090	space_map_vacate(sm, zdb_leak, sm);
2091}
2092
2093/* ARGSUSED */
2094static void
2095zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
2096{
2097}
2098
2099static space_map_ops_t zdb_space_map_ops = {
2100	zdb_space_map_load,
2101	zdb_space_map_unload,
2102	NULL,	/* alloc */
2103	zdb_space_map_claim,
2104	NULL,	/* free */
2105	NULL	/* maxsize */
2106};
2107
2108static void
2109zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
2110{
2111	ddt_bookmark_t ddb = { 0 };
2112	ddt_entry_t dde;
2113	int error;
2114
2115	while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
2116		blkptr_t blk;
2117		ddt_phys_t *ddp = dde.dde_phys;
2118
2119		if (ddb.ddb_class == DDT_CLASS_UNIQUE)
2120			return;
2121
2122		ASSERT(ddt_phys_total_refcnt(&dde) > 1);
2123
2124		for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
2125			if (ddp->ddp_phys_birth == 0)
2126				continue;
2127			ddt_bp_create(ddb.ddb_checksum,
2128			    &dde.dde_key, ddp, &blk);
2129			if (p == DDT_PHYS_DITTO) {
2130				zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
2131			} else {
2132				zcb->zcb_dedup_asize +=
2133				    BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
2134				zcb->zcb_dedup_blocks++;
2135			}
2136		}
2137		if (!dump_opt['L']) {
2138			ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
2139			ddt_enter(ddt);
2140			VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
2141			ddt_exit(ddt);
2142		}
2143	}
2144
2145	ASSERT(error == ENOENT);
2146}
2147
2148static void
2149zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
2150{
2151	zcb->zcb_spa = spa;
2152
2153	if (!dump_opt['L']) {
2154		vdev_t *rvd = spa->spa_root_vdev;
2155		for (int c = 0; c < rvd->vdev_children; c++) {
2156			vdev_t *vd = rvd->vdev_child[c];
2157			for (int m = 0; m < vd->vdev_ms_count; m++) {
2158				metaslab_t *msp = vd->vdev_ms[m];
2159				mutex_enter(&msp->ms_lock);
2160				space_map_unload(&msp->ms_map);
2161				VERIFY(space_map_load(&msp->ms_map,
2162				    &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo,
2163				    spa->spa_meta_objset) == 0);
2164				msp->ms_map.sm_ppd = vd;
2165				mutex_exit(&msp->ms_lock);
2166			}
2167		}
2168	}
2169
2170	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2171
2172	zdb_ddt_leak_init(spa, zcb);
2173
2174	spa_config_exit(spa, SCL_CONFIG, FTAG);
2175}
2176
2177static void
2178zdb_leak_fini(spa_t *spa)
2179{
2180	if (!dump_opt['L']) {
2181		vdev_t *rvd = spa->spa_root_vdev;
2182		for (int c = 0; c < rvd->vdev_children; c++) {
2183			vdev_t *vd = rvd->vdev_child[c];
2184			for (int m = 0; m < vd->vdev_ms_count; m++) {
2185				metaslab_t *msp = vd->vdev_ms[m];
2186				mutex_enter(&msp->ms_lock);
2187				space_map_unload(&msp->ms_map);
2188				mutex_exit(&msp->ms_lock);
2189			}
2190		}
2191	}
2192}
2193
2194/* ARGSUSED */
2195static int
2196count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
2197{
2198	zdb_cb_t *zcb = arg;
2199
2200	if (dump_opt['b'] >= 4) {
2201		char blkbuf[BP_SPRINTF_LEN];
2202		sprintf_blkptr(blkbuf, bp);
2203		(void) printf("[%s] %s\n",
2204		    "deferred free", blkbuf);
2205	}
2206	zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
2207	return (0);
2208}
2209
2210static int
2211dump_block_stats(spa_t *spa)
2212{
2213	zdb_cb_t zcb = { 0 };
2214	zdb_blkstats_t *zb, *tzb;
2215	uint64_t norm_alloc, norm_space, total_alloc, total_found;
2216	int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
2217	int leaks = 0;
2218
2219	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
2220	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
2221	    (dump_opt['c'] == 1) ? "metadata " : "",
2222	    dump_opt['c'] ? "checksums " : "",
2223	    (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
2224	    !dump_opt['L'] ? "nothing leaked " : "");
2225
2226	/*
2227	 * Load all space maps as SM_ALLOC maps, then traverse the pool
2228	 * claiming each block we discover.  If the pool is perfectly
2229	 * consistent, the space maps will be empty when we're done.
2230	 * Anything left over is a leak; any block we can't claim (because
2231	 * it's not part of any space map) is a double allocation,
2232	 * reference to a freed block, or an unclaimed log block.
2233	 */
2234	zdb_leak_init(spa, &zcb);
2235
2236	/*
2237	 * If there's a deferred-free bplist, process that first.
2238	 */
2239	(void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
2240	    count_block_cb, &zcb, NULL);
2241	(void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
2242	    count_block_cb, &zcb, NULL);
2243	if (spa_feature_is_active(spa,
2244	    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
2245		VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
2246		    spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
2247		    &zcb, NULL));
2248	}
2249
2250	if (dump_opt['c'] > 1)
2251		flags |= TRAVERSE_PREFETCH_DATA;
2252
2253	zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
2254
2255	if (zcb.zcb_haderrors) {
2256		(void) printf("\nError counts:\n\n");
2257		(void) printf("\t%5s  %s\n", "errno", "count");
2258		for (int e = 0; e < 256; e++) {
2259			if (zcb.zcb_errors[e] != 0) {
2260				(void) printf("\t%5d  %llu\n",
2261				    e, (u_longlong_t)zcb.zcb_errors[e]);
2262			}
2263		}
2264	}
2265
2266	/*
2267	 * Report any leaked segments.
2268	 */
2269	zdb_leak_fini(spa);
2270
2271	tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
2272
2273	norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
2274	norm_space = metaslab_class_get_space(spa_normal_class(spa));
2275
2276	total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
2277	total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
2278
2279	if (total_found == total_alloc) {
2280		if (!dump_opt['L'])
2281			(void) printf("\n\tNo leaks (block sum matches space"
2282			    " maps exactly)\n");
2283	} else {
2284		(void) printf("block traversal size %llu != alloc %llu "
2285		    "(%s %lld)\n",
2286		    (u_longlong_t)total_found,
2287		    (u_longlong_t)total_alloc,
2288		    (dump_opt['L']) ? "unreachable" : "leaked",
2289		    (longlong_t)(total_alloc - total_found));
2290		leaks = 1;
2291	}
2292
2293	if (tzb->zb_count == 0)
2294		return (2);
2295
2296	(void) printf("\n");
2297	(void) printf("\tbp count:      %10llu\n",
2298	    (u_longlong_t)tzb->zb_count);
2299	(void) printf("\tbp logical:    %10llu      avg: %6llu\n",
2300	    (u_longlong_t)tzb->zb_lsize,
2301	    (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
2302	(void) printf("\tbp physical:   %10llu      avg:"
2303	    " %6llu     compression: %6.2f\n",
2304	    (u_longlong_t)tzb->zb_psize,
2305	    (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
2306	    (double)tzb->zb_lsize / tzb->zb_psize);
2307	(void) printf("\tbp allocated:  %10llu      avg:"
2308	    " %6llu     compression: %6.2f\n",
2309	    (u_longlong_t)tzb->zb_asize,
2310	    (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
2311	    (double)tzb->zb_lsize / tzb->zb_asize);
2312	(void) printf("\tbp deduped:    %10llu    ref>1:"
2313	    " %6llu   deduplication: %6.2f\n",
2314	    (u_longlong_t)zcb.zcb_dedup_asize,
2315	    (u_longlong_t)zcb.zcb_dedup_blocks,
2316	    (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
2317	(void) printf("\tSPA allocated: %10llu     used: %5.2f%%\n",
2318	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
2319
2320	if (dump_opt['b'] >= 2) {
2321		int l, t, level;
2322		(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2323		    "\t  avg\t comp\t%%Total\tType\n");
2324
2325		for (t = 0; t <= ZDB_OT_TOTAL; t++) {
2326			char csize[32], lsize[32], psize[32], asize[32];
2327			char avg[32];
2328			char *typename;
2329
2330			if (t < DMU_OT_NUMTYPES)
2331				typename = dmu_ot[t].ot_name;
2332			else
2333				typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
2334
2335			if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
2336				(void) printf("%6s\t%5s\t%5s\t%5s"
2337				    "\t%5s\t%5s\t%6s\t%s\n",
2338				    "-",
2339				    "-",
2340				    "-",
2341				    "-",
2342				    "-",
2343				    "-",
2344				    "-",
2345				    typename);
2346				continue;
2347			}
2348
2349			for (l = ZB_TOTAL - 1; l >= -1; l--) {
2350				level = (l == -1 ? ZB_TOTAL : l);
2351				zb = &zcb.zcb_type[level][t];
2352
2353				if (zb->zb_asize == 0)
2354					continue;
2355
2356				if (dump_opt['b'] < 3 && level != ZB_TOTAL)
2357					continue;
2358
2359				if (level == 0 && zb->zb_asize ==
2360				    zcb.zcb_type[ZB_TOTAL][t].zb_asize)
2361					continue;
2362
2363				zdb_nicenum(zb->zb_count, csize);
2364				zdb_nicenum(zb->zb_lsize, lsize);
2365				zdb_nicenum(zb->zb_psize, psize);
2366				zdb_nicenum(zb->zb_asize, asize);
2367				zdb_nicenum(zb->zb_asize / zb->zb_count, avg);
2368
2369				(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
2370				    "\t%5.2f\t%6.2f\t",
2371				    csize, lsize, psize, asize, avg,
2372				    (double)zb->zb_lsize / zb->zb_psize,
2373				    100.0 * zb->zb_asize / tzb->zb_asize);
2374
2375				if (level == ZB_TOTAL)
2376					(void) printf("%s\n", typename);
2377				else
2378					(void) printf("    L%d %s\n",
2379					    level, typename);
2380			}
2381		}
2382	}
2383
2384	(void) printf("\n");
2385
2386	if (leaks)
2387		return (2);
2388
2389	if (zcb.zcb_haderrors)
2390		return (3);
2391
2392	return (0);
2393}
2394
2395typedef struct zdb_ddt_entry {
2396	ddt_key_t	zdde_key;
2397	uint64_t	zdde_ref_blocks;
2398	uint64_t	zdde_ref_lsize;
2399	uint64_t	zdde_ref_psize;
2400	uint64_t	zdde_ref_dsize;
2401	avl_node_t	zdde_node;
2402} zdb_ddt_entry_t;
2403
2404/* ARGSUSED */
2405static int
2406zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2407    arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
2408{
2409	avl_tree_t *t = arg;
2410	avl_index_t where;
2411	zdb_ddt_entry_t *zdde, zdde_search;
2412
2413	if (bp == NULL)
2414		return (0);
2415
2416	if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
2417		(void) printf("traversing objset %llu, %llu objects, "
2418		    "%lu blocks so far\n",
2419		    (u_longlong_t)zb->zb_objset,
2420		    (u_longlong_t)bp->blk_fill,
2421		    avl_numnodes(t));
2422	}
2423
2424	if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
2425	    BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
2426		return (0);
2427
2428	ddt_key_fill(&zdde_search.zdde_key, bp);
2429
2430	zdde = avl_find(t, &zdde_search, &where);
2431
2432	if (zdde == NULL) {
2433		zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
2434		zdde->zdde_key = zdde_search.zdde_key;
2435		avl_insert(t, zdde, where);
2436	}
2437
2438	zdde->zdde_ref_blocks += 1;
2439	zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
2440	zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
2441	zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
2442
2443	return (0);
2444}
2445
2446static void
2447dump_simulated_ddt(spa_t *spa)
2448{
2449	avl_tree_t t;
2450	void *cookie = NULL;
2451	zdb_ddt_entry_t *zdde;
2452	ddt_histogram_t ddh_total = { 0 };
2453	ddt_stat_t dds_total = { 0 };
2454
2455	avl_create(&t, ddt_entry_compare,
2456	    sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
2457
2458	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2459
2460	(void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
2461	    zdb_ddt_add_cb, &t);
2462
2463	spa_config_exit(spa, SCL_CONFIG, FTAG);
2464
2465	while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
2466		ddt_stat_t dds;
2467		uint64_t refcnt = zdde->zdde_ref_blocks;
2468		ASSERT(refcnt != 0);
2469
2470		dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
2471		dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
2472		dds.dds_psize = zdde->zdde_ref_psize / refcnt;
2473		dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
2474
2475		dds.dds_ref_blocks = zdde->zdde_ref_blocks;
2476		dds.dds_ref_lsize = zdde->zdde_ref_lsize;
2477		dds.dds_ref_psize = zdde->zdde_ref_psize;
2478		dds.dds_ref_dsize = zdde->zdde_ref_dsize;
2479
2480		ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
2481
2482		umem_free(zdde, sizeof (*zdde));
2483	}
2484
2485	avl_destroy(&t);
2486
2487	ddt_histogram_stat(&dds_total, &ddh_total);
2488
2489	(void) printf("Simulated DDT histogram:\n");
2490
2491	zpool_dump_ddt(&dds_total, &ddh_total);
2492
2493	dump_dedup_ratio(&dds_total);
2494}
2495
2496static void
2497dump_zpool(spa_t *spa)
2498{
2499	dsl_pool_t *dp = spa_get_dsl(spa);
2500	int rc = 0;
2501
2502	if (dump_opt['S']) {
2503		dump_simulated_ddt(spa);
2504		return;
2505	}
2506
2507	if (!dump_opt['e'] && dump_opt['C'] > 1) {
2508		(void) printf("\nCached configuration:\n");
2509		dump_nvlist(spa->spa_config, 8);
2510	}
2511
2512	if (dump_opt['C'])
2513		dump_config(spa);
2514
2515	if (dump_opt['u'])
2516		dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
2517
2518	if (dump_opt['D'])
2519		dump_all_ddts(spa);
2520
2521	if (dump_opt['d'] > 2 || dump_opt['m'])
2522		dump_metaslabs(spa);
2523
2524	if (dump_opt['d'] || dump_opt['i']) {
2525		dump_dir(dp->dp_meta_objset);
2526		if (dump_opt['d'] >= 3) {
2527			dump_bpobj(&spa->spa_deferred_bpobj, "Deferred frees");
2528			if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
2529				dump_bpobj(&spa->spa_dsl_pool->dp_free_bpobj,
2530				    "Pool snapshot frees");
2531			}
2532
2533			if (spa_feature_is_active(spa,
2534			    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
2535				dump_bptree(spa->spa_meta_objset,
2536				    spa->spa_dsl_pool->dp_bptree_obj,
2537				    "Pool dataset frees");
2538			}
2539			dump_dtl(spa->spa_root_vdev, 0);
2540		}
2541		(void) dmu_objset_find(spa_name(spa), dump_one_dir,
2542		    NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
2543	}
2544	if (dump_opt['b'] || dump_opt['c'])
2545		rc = dump_block_stats(spa);
2546
2547	if (dump_opt['s'])
2548		show_pool_stats(spa);
2549
2550	if (dump_opt['h'])
2551		dump_history(spa);
2552
2553	if (rc != 0)
2554		exit(rc);
2555}
2556
2557#define	ZDB_FLAG_CHECKSUM	0x0001
2558#define	ZDB_FLAG_DECOMPRESS	0x0002
2559#define	ZDB_FLAG_BSWAP		0x0004
2560#define	ZDB_FLAG_GBH		0x0008
2561#define	ZDB_FLAG_INDIRECT	0x0010
2562#define	ZDB_FLAG_PHYS		0x0020
2563#define	ZDB_FLAG_RAW		0x0040
2564#define	ZDB_FLAG_PRINT_BLKPTR	0x0080
2565
2566int flagbits[256];
2567
2568static void
2569zdb_print_blkptr(blkptr_t *bp, int flags)
2570{
2571	char blkbuf[BP_SPRINTF_LEN];
2572
2573	if (flags & ZDB_FLAG_BSWAP)
2574		byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
2575
2576	sprintf_blkptr(blkbuf, bp);
2577	(void) printf("%s\n", blkbuf);
2578}
2579
2580static void
2581zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
2582{
2583	int i;
2584
2585	for (i = 0; i < nbps; i++)
2586		zdb_print_blkptr(&bp[i], flags);
2587}
2588
2589static void
2590zdb_dump_gbh(void *buf, int flags)
2591{
2592	zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
2593}
2594
2595static void
2596zdb_dump_block_raw(void *buf, uint64_t size, int flags)
2597{
2598	if (flags & ZDB_FLAG_BSWAP)
2599		byteswap_uint64_array(buf, size);
2600	(void) write(1, buf, size);
2601}
2602
2603static void
2604zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
2605{
2606	uint64_t *d = (uint64_t *)buf;
2607	int nwords = size / sizeof (uint64_t);
2608	int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
2609	int i, j;
2610	char *hdr, *c;
2611
2612
2613	if (do_bswap)
2614		hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
2615	else
2616		hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
2617
2618	(void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
2619
2620	for (i = 0; i < nwords; i += 2) {
2621		(void) printf("%06llx:  %016llx  %016llx  ",
2622		    (u_longlong_t)(i * sizeof (uint64_t)),
2623		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
2624		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
2625
2626		c = (char *)&d[i];
2627		for (j = 0; j < 2 * sizeof (uint64_t); j++)
2628			(void) printf("%c", isprint(c[j]) ? c[j] : '.');
2629		(void) printf("\n");
2630	}
2631}
2632
2633/*
2634 * There are two acceptable formats:
2635 *	leaf_name	  - For example: c1t0d0 or /tmp/ztest.0a
2636 *	child[.child]*    - For example: 0.1.1
2637 *
2638 * The second form can be used to specify arbitrary vdevs anywhere
2639 * in the heirarchy.  For example, in a pool with a mirror of
2640 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
2641 */
2642static vdev_t *
2643zdb_vdev_lookup(vdev_t *vdev, char *path)
2644{
2645	char *s, *p, *q;
2646	int i;
2647
2648	if (vdev == NULL)
2649		return (NULL);
2650
2651	/* First, assume the x.x.x.x format */
2652	i = (int)strtoul(path, &s, 10);
2653	if (s == path || (s && *s != '.' && *s != '\0'))
2654		goto name;
2655	if (i < 0 || i >= vdev->vdev_children)
2656		return (NULL);
2657
2658	vdev = vdev->vdev_child[i];
2659	if (*s == '\0')
2660		return (vdev);
2661	return (zdb_vdev_lookup(vdev, s+1));
2662
2663name:
2664	for (i = 0; i < vdev->vdev_children; i++) {
2665		vdev_t *vc = vdev->vdev_child[i];
2666
2667		if (vc->vdev_path == NULL) {
2668			vc = zdb_vdev_lookup(vc, path);
2669			if (vc == NULL)
2670				continue;
2671			else
2672				return (vc);
2673		}
2674
2675		p = strrchr(vc->vdev_path, '/');
2676		p = p ? p + 1 : vc->vdev_path;
2677		q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
2678
2679		if (strcmp(vc->vdev_path, path) == 0)
2680			return (vc);
2681		if (strcmp(p, path) == 0)
2682			return (vc);
2683		if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
2684			return (vc);
2685	}
2686
2687	return (NULL);
2688}
2689
2690/*
2691 * Read a block from a pool and print it out.  The syntax of the
2692 * block descriptor is:
2693 *
2694 *	pool:vdev_specifier:offset:size[:flags]
2695 *
2696 *	pool           - The name of the pool you wish to read from
2697 *	vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
2698 *	offset         - offset, in hex, in bytes
2699 *	size           - Amount of data to read, in hex, in bytes
2700 *	flags          - A string of characters specifying options
2701 *		 b: Decode a blkptr at given offset within block
2702 *		*c: Calculate and display checksums
2703 *		 d: Decompress data before dumping
2704 *		 e: Byteswap data before dumping
2705 *		 g: Display data as a gang block header
2706 *		 i: Display as an indirect block
2707 *		 p: Do I/O to physical offset
2708 *		 r: Dump raw data to stdout
2709 *
2710 *              * = not yet implemented
2711 */
2712static void
2713zdb_read_block(char *thing, spa_t *spa)
2714{
2715	blkptr_t blk, *bp = &blk;
2716	dva_t *dva = bp->blk_dva;
2717	int flags = 0;
2718	uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
2719	zio_t *zio;
2720	vdev_t *vd;
2721	void *pbuf, *lbuf, *buf;
2722	char *s, *p, *dup, *vdev, *flagstr;
2723	int i, error;
2724
2725	dup = strdup(thing);
2726	s = strtok(dup, ":");
2727	vdev = s ? s : "";
2728	s = strtok(NULL, ":");
2729	offset = strtoull(s ? s : "", NULL, 16);
2730	s = strtok(NULL, ":");
2731	size = strtoull(s ? s : "", NULL, 16);
2732	s = strtok(NULL, ":");
2733	flagstr = s ? s : "";
2734
2735	s = NULL;
2736	if (size == 0)
2737		s = "size must not be zero";
2738	if (!IS_P2ALIGNED(size, DEV_BSIZE))
2739		s = "size must be a multiple of sector size";
2740	if (!IS_P2ALIGNED(offset, DEV_BSIZE))
2741		s = "offset must be a multiple of sector size";
2742	if (s) {
2743		(void) printf("Invalid block specifier: %s  - %s\n", thing, s);
2744		free(dup);
2745		return;
2746	}
2747
2748	for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
2749		for (i = 0; flagstr[i]; i++) {
2750			int bit = flagbits[(uchar_t)flagstr[i]];
2751
2752			if (bit == 0) {
2753				(void) printf("***Invalid flag: %c\n",
2754				    flagstr[i]);
2755				continue;
2756			}
2757			flags |= bit;
2758
2759			/* If it's not something with an argument, keep going */
2760			if ((bit & (ZDB_FLAG_CHECKSUM |
2761			    ZDB_FLAG_PRINT_BLKPTR)) == 0)
2762				continue;
2763
2764			p = &flagstr[i + 1];
2765			if (bit == ZDB_FLAG_PRINT_BLKPTR)
2766				blkptr_offset = strtoull(p, &p, 16);
2767			if (*p != ':' && *p != '\0') {
2768				(void) printf("***Invalid flag arg: '%s'\n", s);
2769				free(dup);
2770				return;
2771			}
2772		}
2773	}
2774
2775	vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
2776	if (vd == NULL) {
2777		(void) printf("***Invalid vdev: %s\n", vdev);
2778		free(dup);
2779		return;
2780	} else {
2781		if (vd->vdev_path)
2782			(void) fprintf(stderr, "Found vdev: %s\n",
2783			    vd->vdev_path);
2784		else
2785			(void) fprintf(stderr, "Found vdev type: %s\n",
2786			    vd->vdev_ops->vdev_op_type);
2787	}
2788
2789	psize = size;
2790	lsize = size;
2791
2792	pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2793	lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2794
2795	BP_ZERO(bp);
2796
2797	DVA_SET_VDEV(&dva[0], vd->vdev_id);
2798	DVA_SET_OFFSET(&dva[0], offset);
2799	DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
2800	DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
2801
2802	BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
2803
2804	BP_SET_LSIZE(bp, lsize);
2805	BP_SET_PSIZE(bp, psize);
2806	BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
2807	BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
2808	BP_SET_TYPE(bp, DMU_OT_NONE);
2809	BP_SET_LEVEL(bp, 0);
2810	BP_SET_DEDUP(bp, 0);
2811	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
2812
2813	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
2814	zio = zio_root(spa, NULL, NULL, 0);
2815
2816	if (vd == vd->vdev_top) {
2817		/*
2818		 * Treat this as a normal block read.
2819		 */
2820		zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
2821		    ZIO_PRIORITY_SYNC_READ,
2822		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
2823	} else {
2824		/*
2825		 * Treat this as a vdev child I/O.
2826		 */
2827		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
2828		    ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
2829		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
2830		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
2831		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
2832	}
2833
2834	error = zio_wait(zio);
2835	spa_config_exit(spa, SCL_STATE, FTAG);
2836
2837	if (error) {
2838		(void) printf("Read of %s failed, error: %d\n", thing, error);
2839		goto out;
2840	}
2841
2842	if (flags & ZDB_FLAG_DECOMPRESS) {
2843		/*
2844		 * We don't know how the data was compressed, so just try
2845		 * every decompress function at every inflated blocksize.
2846		 */
2847		enum zio_compress c;
2848		void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2849		void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2850
2851		bcopy(pbuf, pbuf2, psize);
2852
2853		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
2854		    SPA_MAXBLOCKSIZE - psize) == 0);
2855
2856		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
2857		    SPA_MAXBLOCKSIZE - psize) == 0);
2858
2859		for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
2860		    lsize -= SPA_MINBLOCKSIZE) {
2861			for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
2862				if (zio_decompress_data(c, pbuf, lbuf,
2863				    psize, lsize) == 0 &&
2864				    zio_decompress_data(c, pbuf2, lbuf2,
2865				    psize, lsize) == 0 &&
2866				    bcmp(lbuf, lbuf2, lsize) == 0)
2867					break;
2868			}
2869			if (c != ZIO_COMPRESS_FUNCTIONS)
2870				break;
2871			lsize -= SPA_MINBLOCKSIZE;
2872		}
2873
2874		umem_free(pbuf2, SPA_MAXBLOCKSIZE);
2875		umem_free(lbuf2, SPA_MAXBLOCKSIZE);
2876
2877		if (lsize <= psize) {
2878			(void) printf("Decompress of %s failed\n", thing);
2879			goto out;
2880		}
2881		buf = lbuf;
2882		size = lsize;
2883	} else {
2884		buf = pbuf;
2885		size = psize;
2886	}
2887
2888	if (flags & ZDB_FLAG_PRINT_BLKPTR)
2889		zdb_print_blkptr((blkptr_t *)(void *)
2890		    ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
2891	else if (flags & ZDB_FLAG_RAW)
2892		zdb_dump_block_raw(buf, size, flags);
2893	else if (flags & ZDB_FLAG_INDIRECT)
2894		zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
2895		    flags);
2896	else if (flags & ZDB_FLAG_GBH)
2897		zdb_dump_gbh(buf, flags);
2898	else
2899		zdb_dump_block(thing, buf, size, flags);
2900
2901out:
2902	umem_free(pbuf, SPA_MAXBLOCKSIZE);
2903	umem_free(lbuf, SPA_MAXBLOCKSIZE);
2904	free(dup);
2905}
2906
2907static boolean_t
2908pool_match(nvlist_t *cfg, char *tgt)
2909{
2910	uint64_t v, guid = strtoull(tgt, NULL, 0);
2911	char *s;
2912
2913	if (guid != 0) {
2914		if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
2915			return (v == guid);
2916	} else {
2917		if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
2918			return (strcmp(s, tgt) == 0);
2919	}
2920	return (B_FALSE);
2921}
2922
2923static char *
2924find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
2925{
2926	nvlist_t *pools;
2927	nvlist_t *match = NULL;
2928	char *name = NULL;
2929	char *sepp = NULL;
2930	char sep;
2931	int count = 0;
2932	importargs_t args = { 0 };
2933
2934	args.paths = dirc;
2935	args.path = dirv;
2936	args.can_be_active = B_TRUE;
2937
2938	if ((sepp = strpbrk(*target, "/@")) != NULL) {
2939		sep = *sepp;
2940		*sepp = '\0';
2941	}
2942
2943	pools = zpool_search_import(g_zfs, &args);
2944
2945	if (pools != NULL) {
2946		nvpair_t *elem = NULL;
2947		while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
2948			verify(nvpair_value_nvlist(elem, configp) == 0);
2949			if (pool_match(*configp, *target)) {
2950				count++;
2951				if (match != NULL) {
2952					/* print previously found config */
2953					if (name != NULL) {
2954						(void) printf("%s\n", name);
2955						dump_nvlist(match, 8);
2956						name = NULL;
2957					}
2958					(void) printf("%s\n",
2959					    nvpair_name(elem));
2960					dump_nvlist(*configp, 8);
2961				} else {
2962					match = *configp;
2963					name = nvpair_name(elem);
2964				}
2965			}
2966		}
2967	}
2968	if (count > 1)
2969		(void) fatal("\tMatched %d pools - use pool GUID "
2970		    "instead of pool name or \n"
2971		    "\tpool name part of a dataset name to select pool", count);
2972
2973	if (sepp)
2974		*sepp = sep;
2975	/*
2976	 * If pool GUID was specified for pool id, replace it with pool name
2977	 */
2978	if (name && (strstr(*target, name) != *target)) {
2979		int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
2980
2981		*target = umem_alloc(sz, UMEM_NOFAIL);
2982		(void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
2983	}
2984
2985	*configp = name ? match : NULL;
2986
2987	return (name);
2988}
2989
2990int
2991main(int argc, char **argv)
2992{
2993	int i, c;
2994	struct rlimit rl = { 1024, 1024 };
2995	spa_t *spa = NULL;
2996	objset_t *os = NULL;
2997	int dump_all = 1;
2998	int verbose = 0;
2999	int error = 0;
3000	char **searchdirs = NULL;
3001	int nsearch = 0;
3002	char *target;
3003	nvlist_t *policy = NULL;
3004	uint64_t max_txg = UINT64_MAX;
3005	int rewind = ZPOOL_NEVER_REWIND;
3006
3007	(void) setrlimit(RLIMIT_NOFILE, &rl);
3008	(void) enable_extended_FILE_stdio(-1, -1);
3009
3010	dprintf_setup(&argc, argv);
3011
3012	while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) {
3013		switch (c) {
3014		case 'b':
3015		case 'c':
3016		case 'd':
3017		case 'h':
3018		case 'i':
3019		case 'l':
3020		case 'm':
3021		case 's':
3022		case 'u':
3023		case 'C':
3024		case 'D':
3025		case 'R':
3026		case 'S':
3027			dump_opt[c]++;
3028			dump_all = 0;
3029			break;
3030		case 'A':
3031		case 'F':
3032		case 'L':
3033		case 'X':
3034		case 'e':
3035		case 'P':
3036			dump_opt[c]++;
3037			break;
3038		case 'v':
3039			verbose++;
3040			break;
3041		case 'p':
3042			if (searchdirs == NULL) {
3043				searchdirs = umem_alloc(sizeof (char *),
3044				    UMEM_NOFAIL);
3045			} else {
3046				char **tmp = umem_alloc((nsearch + 1) *
3047				    sizeof (char *), UMEM_NOFAIL);
3048				bcopy(searchdirs, tmp, nsearch *
3049				    sizeof (char *));
3050				umem_free(searchdirs,
3051				    nsearch * sizeof (char *));
3052				searchdirs = tmp;
3053			}
3054			searchdirs[nsearch++] = optarg;
3055			break;
3056		case 't':
3057			max_txg = strtoull(optarg, NULL, 0);
3058			if (max_txg < TXG_INITIAL) {
3059				(void) fprintf(stderr, "incorrect txg "
3060				    "specified: %s\n", optarg);
3061				usage();
3062			}
3063			break;
3064		case 'U':
3065			spa_config_path = optarg;
3066			break;
3067		default:
3068			usage();
3069			break;
3070		}
3071	}
3072
3073	if (!dump_opt['e'] && searchdirs != NULL) {
3074		(void) fprintf(stderr, "-p option requires use of -e\n");
3075		usage();
3076	}
3077
3078	kernel_init(FREAD);
3079	g_zfs = libzfs_init();
3080	ASSERT(g_zfs != NULL);
3081
3082	if (dump_all)
3083		verbose = MAX(verbose, 1);
3084
3085	for (c = 0; c < 256; c++) {
3086		if (dump_all && !strchr("elAFLRSXP", c))
3087			dump_opt[c] = 1;
3088		if (dump_opt[c])
3089			dump_opt[c] += verbose;
3090	}
3091
3092	aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
3093	zfs_recover = (dump_opt['A'] > 1);
3094
3095	argc -= optind;
3096	argv += optind;
3097
3098	if (argc < 2 && dump_opt['R'])
3099		usage();
3100	if (argc < 1) {
3101		if (!dump_opt['e'] && dump_opt['C']) {
3102			dump_cachefile(spa_config_path);
3103			return (0);
3104		}
3105		usage();
3106	}
3107
3108	if (dump_opt['l']) {
3109		dump_label(argv[0]);
3110		return (0);
3111	}
3112
3113	if (dump_opt['X'] || dump_opt['F'])
3114		rewind = ZPOOL_DO_REWIND |
3115		    (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
3116
3117	if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
3118	    nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
3119	    nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0)
3120		fatal("internal error: %s", strerror(ENOMEM));
3121
3122	error = 0;
3123	target = argv[0];
3124
3125	if (dump_opt['e']) {
3126		nvlist_t *cfg = NULL;
3127		char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
3128
3129		error = ENOENT;
3130		if (name) {
3131			if (dump_opt['C'] > 1) {
3132				(void) printf("\nConfiguration for import:\n");
3133				dump_nvlist(cfg, 8);
3134			}
3135			if (nvlist_add_nvlist(cfg,
3136			    ZPOOL_REWIND_POLICY, policy) != 0) {
3137				fatal("can't open '%s': %s",
3138				    target, strerror(ENOMEM));
3139			}
3140			if ((error = spa_import(name, cfg, NULL,
3141			    ZFS_IMPORT_MISSING_LOG)) != 0) {
3142				error = spa_import(name, cfg, NULL,
3143				    ZFS_IMPORT_VERBATIM);
3144			}
3145		}
3146	}
3147
3148	if (error == 0) {
3149		if (strpbrk(target, "/@") == NULL || dump_opt['R']) {
3150			error = spa_open_rewind(target, &spa, FTAG, policy,
3151			    NULL);
3152			if (error) {
3153				/*
3154				 * If we're missing the log device then
3155				 * try opening the pool after clearing the
3156				 * log state.
3157				 */
3158				mutex_enter(&spa_namespace_lock);
3159				if ((spa = spa_lookup(target)) != NULL &&
3160				    spa->spa_log_state == SPA_LOG_MISSING) {
3161					spa->spa_log_state = SPA_LOG_CLEAR;
3162					error = 0;
3163				}
3164				mutex_exit(&spa_namespace_lock);
3165
3166				if (!error) {
3167					error = spa_open_rewind(target, &spa,
3168					    FTAG, policy, NULL);
3169				}
3170			}
3171		} else {
3172			error = dmu_objset_own(target, DMU_OST_ANY,
3173			    B_TRUE, FTAG, &os);
3174		}
3175	}
3176	nvlist_free(policy);
3177
3178	if (error)
3179		fatal("can't open '%s': %s", target, strerror(error));
3180
3181	argv++;
3182	argc--;
3183	if (!dump_opt['R']) {
3184		if (argc > 0) {
3185			zopt_objects = argc;
3186			zopt_object = calloc(zopt_objects, sizeof (uint64_t));
3187			for (i = 0; i < zopt_objects; i++) {
3188				errno = 0;
3189				zopt_object[i] = strtoull(argv[i], NULL, 0);
3190				if (zopt_object[i] == 0 && errno != 0)
3191					fatal("bad number %s: %s",
3192					    argv[i], strerror(errno));
3193			}
3194		}
3195		(os != NULL) ? dump_dir(os) : dump_zpool(spa);
3196	} else {
3197		flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
3198		flagbits['c'] = ZDB_FLAG_CHECKSUM;
3199		flagbits['d'] = ZDB_FLAG_DECOMPRESS;
3200		flagbits['e'] = ZDB_FLAG_BSWAP;
3201		flagbits['g'] = ZDB_FLAG_GBH;
3202		flagbits['i'] = ZDB_FLAG_INDIRECT;
3203		flagbits['p'] = ZDB_FLAG_PHYS;
3204		flagbits['r'] = ZDB_FLAG_RAW;
3205
3206		for (i = 0; i < argc; i++)
3207			zdb_read_block(argv[i], spa);
3208	}
3209
3210	(os != NULL) ? dmu_objset_disown(os, FTAG) : spa_close(spa, FTAG);
3211
3212	fuid_table_destroy();
3213	sa_loaded = B_FALSE;
3214
3215	libzfs_fini(g_zfs);
3216	kernel_fini();
3217
3218	return (0);
3219}
3220