zdb.c revision 90e894e2eeb5dc45bff06b19d393ee7b2d450277
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25#include <stdio.h>
26#include <stdio_ext.h>
27#include <stdlib.h>
28#include <ctype.h>
29#include <sys/zfs_context.h>
30#include <sys/spa.h>
31#include <sys/spa_impl.h>
32#include <sys/dmu.h>
33#include <sys/zap.h>
34#include <sys/fs/zfs.h>
35#include <sys/zfs_znode.h>
36#include <sys/zfs_sa.h>
37#include <sys/sa.h>
38#include <sys/sa_impl.h>
39#include <sys/vdev.h>
40#include <sys/vdev_impl.h>
41#include <sys/metaslab_impl.h>
42#include <sys/dmu_objset.h>
43#include <sys/dsl_dir.h>
44#include <sys/dsl_dataset.h>
45#include <sys/dsl_pool.h>
46#include <sys/dbuf.h>
47#include <sys/zil.h>
48#include <sys/zil_impl.h>
49#include <sys/stat.h>
50#include <sys/resource.h>
51#include <sys/dmu_traverse.h>
52#include <sys/zio_checksum.h>
53#include <sys/zio_compress.h>
54#include <sys/zfs_fuid.h>
55#include <sys/arc.h>
56#include <sys/ddt.h>
57#undef ZFS_MAXNAMELEN
58#undef verify
59#include <libzfs.h>
60
61#define	ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
62    zio_compress_table[(idx)].ci_name : "UNKNOWN")
63#define	ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
64    zio_checksum_table[(idx)].ci_name : "UNKNOWN")
65#define	ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \
66    dmu_ot[(idx)].ot_name : "UNKNOWN")
67#define	ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : DMU_OT_NUMTYPES)
68
69#ifndef lint
70extern int zfs_recover;
71#else
72int zfs_recover;
73#endif
74
75const char cmdname[] = "zdb";
76uint8_t dump_opt[256];
77
78typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
79
80extern void dump_intent_log(zilog_t *);
81uint64_t *zopt_object = NULL;
82int zopt_objects = 0;
83libzfs_handle_t *g_zfs;
84
85/*
86 * These libumem hooks provide a reasonable set of defaults for the allocator's
87 * debugging facilities.
88 */
89const char *
90_umem_debug_init()
91{
92	return ("default,verbose"); /* $UMEM_DEBUG setting */
93}
94
95const char *
96_umem_logging_init(void)
97{
98	return ("fail,contents"); /* $UMEM_LOGGING setting */
99}
100
101static void
102usage(void)
103{
104	(void) fprintf(stderr,
105	    "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
106	    "poolname [object...]\n"
107	    "       %s [-divPA] [-e -p path...] dataset [object...]\n"
108	    "       %s -m [-LXFPA] [-t txg] [-e [-p path...]] "
109	    "poolname [vdev [metaslab...]]\n"
110	    "       %s -R [-A] [-e [-p path...]] poolname "
111	    "vdev:offset:size[:flags]\n"
112	    "       %s -S [-PA] [-e [-p path...]] poolname\n"
113	    "       %s -l [-uA] device\n"
114	    "       %s -C [-A] [-U config]\n\n",
115	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
116
117	(void) fprintf(stderr, "    Dataset name must include at least one "
118	    "separator character '/' or '@'\n");
119	(void) fprintf(stderr, "    If dataset name is specified, only that "
120	    "dataset is dumped\n");
121	(void) fprintf(stderr, "    If object numbers are specified, only "
122	    "those objects are dumped\n\n");
123	(void) fprintf(stderr, "    Options to control amount of output:\n");
124	(void) fprintf(stderr, "        -u uberblock\n");
125	(void) fprintf(stderr, "        -d dataset(s)\n");
126	(void) fprintf(stderr, "        -i intent logs\n");
127	(void) fprintf(stderr, "        -C config (or cachefile if alone)\n");
128	(void) fprintf(stderr, "        -h pool history\n");
129	(void) fprintf(stderr, "        -b block statistics\n");
130	(void) fprintf(stderr, "        -m metaslabs\n");
131	(void) fprintf(stderr, "        -c checksum all metadata (twice for "
132	    "all data) blocks\n");
133	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
134	(void) fprintf(stderr, "        -D dedup statistics\n");
135	(void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
136	(void) fprintf(stderr, "        -v verbose (applies to all others)\n");
137	(void) fprintf(stderr, "        -l dump label contents\n");
138	(void) fprintf(stderr, "        -L disable leak tracking (do not "
139	    "load spacemaps)\n");
140	(void) fprintf(stderr, "        -R read and display block from a "
141	    "device\n\n");
142	(void) fprintf(stderr, "    Below options are intended for use "
143	    "with other options (except -l):\n");
144	(void) fprintf(stderr, "        -A ignore assertions (-A), enable "
145	    "panic recovery (-AA) or both (-AAA)\n");
146	(void) fprintf(stderr, "        -F attempt automatic rewind within "
147	    "safe range of transaction groups\n");
148	(void) fprintf(stderr, "        -U <cachefile_path> -- use alternate "
149	    "cachefile\n");
150	(void) fprintf(stderr, "        -X attempt extreme rewind (does not "
151	    "work with dataset)\n");
152	(void) fprintf(stderr, "        -e pool is exported/destroyed/"
153	    "has altroot/not in a cachefile\n");
154	(void) fprintf(stderr, "        -p <path> -- use one or more with "
155	    "-e to specify path to vdev dir\n");
156	(void) fprintf(stderr, "        -P print numbers in parseable form\n");
157	(void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
158	    "searching for uberblocks\n");
159	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
160	    "to make only that option verbose\n");
161	(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
162	exit(1);
163}
164
165/*
166 * Called for usage errors that are discovered after a call to spa_open(),
167 * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
168 */
169
170static void
171fatal(const char *fmt, ...)
172{
173	va_list ap;
174
175	va_start(ap, fmt);
176	(void) fprintf(stderr, "%s: ", cmdname);
177	(void) vfprintf(stderr, fmt, ap);
178	va_end(ap);
179	(void) fprintf(stderr, "\n");
180
181	exit(1);
182}
183
184/* ARGSUSED */
185static void
186dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
187{
188	nvlist_t *nv;
189	size_t nvsize = *(uint64_t *)data;
190	char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
191
192	VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
193
194	VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
195
196	umem_free(packed, nvsize);
197
198	dump_nvlist(nv, 8);
199
200	nvlist_free(nv);
201}
202
203static void
204zdb_nicenum(uint64_t num, char *buf)
205{
206	if (dump_opt['P'])
207		(void) sprintf(buf, "%llu", (longlong_t)num);
208	else
209		nicenum(num, buf);
210}
211
212const char dump_zap_stars[] = "****************************************";
213const int dump_zap_width = sizeof (dump_zap_stars) - 1;
214
215static void
216dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE])
217{
218	int i;
219	int minidx = ZAP_HISTOGRAM_SIZE - 1;
220	int maxidx = 0;
221	uint64_t max = 0;
222
223	for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) {
224		if (histo[i] > max)
225			max = histo[i];
226		if (histo[i] > 0 && i > maxidx)
227			maxidx = i;
228		if (histo[i] > 0 && i < minidx)
229			minidx = i;
230	}
231
232	if (max < dump_zap_width)
233		max = dump_zap_width;
234
235	for (i = minidx; i <= maxidx; i++)
236		(void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i],
237		    &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]);
238}
239
240static void
241dump_zap_stats(objset_t *os, uint64_t object)
242{
243	int error;
244	zap_stats_t zs;
245
246	error = zap_get_stats(os, object, &zs);
247	if (error)
248		return;
249
250	if (zs.zs_ptrtbl_len == 0) {
251		ASSERT(zs.zs_num_blocks == 1);
252		(void) printf("\tmicrozap: %llu bytes, %llu entries\n",
253		    (u_longlong_t)zs.zs_blocksize,
254		    (u_longlong_t)zs.zs_num_entries);
255		return;
256	}
257
258	(void) printf("\tFat ZAP stats:\n");
259
260	(void) printf("\t\tPointer table:\n");
261	(void) printf("\t\t\t%llu elements\n",
262	    (u_longlong_t)zs.zs_ptrtbl_len);
263	(void) printf("\t\t\tzt_blk: %llu\n",
264	    (u_longlong_t)zs.zs_ptrtbl_zt_blk);
265	(void) printf("\t\t\tzt_numblks: %llu\n",
266	    (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
267	(void) printf("\t\t\tzt_shift: %llu\n",
268	    (u_longlong_t)zs.zs_ptrtbl_zt_shift);
269	(void) printf("\t\t\tzt_blks_copied: %llu\n",
270	    (u_longlong_t)zs.zs_ptrtbl_blks_copied);
271	(void) printf("\t\t\tzt_nextblk: %llu\n",
272	    (u_longlong_t)zs.zs_ptrtbl_nextblk);
273
274	(void) printf("\t\tZAP entries: %llu\n",
275	    (u_longlong_t)zs.zs_num_entries);
276	(void) printf("\t\tLeaf blocks: %llu\n",
277	    (u_longlong_t)zs.zs_num_leafs);
278	(void) printf("\t\tTotal blocks: %llu\n",
279	    (u_longlong_t)zs.zs_num_blocks);
280	(void) printf("\t\tzap_block_type: 0x%llx\n",
281	    (u_longlong_t)zs.zs_block_type);
282	(void) printf("\t\tzap_magic: 0x%llx\n",
283	    (u_longlong_t)zs.zs_magic);
284	(void) printf("\t\tzap_salt: 0x%llx\n",
285	    (u_longlong_t)zs.zs_salt);
286
287	(void) printf("\t\tLeafs with 2^n pointers:\n");
288	dump_zap_histogram(zs.zs_leafs_with_2n_pointers);
289
290	(void) printf("\t\tBlocks with n*5 entries:\n");
291	dump_zap_histogram(zs.zs_blocks_with_n5_entries);
292
293	(void) printf("\t\tBlocks n/10 full:\n");
294	dump_zap_histogram(zs.zs_blocks_n_tenths_full);
295
296	(void) printf("\t\tEntries with n chunks:\n");
297	dump_zap_histogram(zs.zs_entries_using_n_chunks);
298
299	(void) printf("\t\tBuckets with n entries:\n");
300	dump_zap_histogram(zs.zs_buckets_with_n_entries);
301}
302
303/*ARGSUSED*/
304static void
305dump_none(objset_t *os, uint64_t object, void *data, size_t size)
306{
307}
308
309/*ARGSUSED*/
310static void
311dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
312{
313	(void) printf("\tUNKNOWN OBJECT TYPE\n");
314}
315
316/*ARGSUSED*/
317void
318dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
319{
320}
321
322/*ARGSUSED*/
323static void
324dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
325{
326}
327
328/*ARGSUSED*/
329static void
330dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
331{
332	zap_cursor_t zc;
333	zap_attribute_t attr;
334	void *prop;
335	int i;
336
337	dump_zap_stats(os, object);
338	(void) printf("\n");
339
340	for (zap_cursor_init(&zc, os, object);
341	    zap_cursor_retrieve(&zc, &attr) == 0;
342	    zap_cursor_advance(&zc)) {
343		(void) printf("\t\t%s = ", attr.za_name);
344		if (attr.za_num_integers == 0) {
345			(void) printf("\n");
346			continue;
347		}
348		prop = umem_zalloc(attr.za_num_integers *
349		    attr.za_integer_length, UMEM_NOFAIL);
350		(void) zap_lookup(os, object, attr.za_name,
351		    attr.za_integer_length, attr.za_num_integers, prop);
352		if (attr.za_integer_length == 1) {
353			(void) printf("%s", (char *)prop);
354		} else {
355			for (i = 0; i < attr.za_num_integers; i++) {
356				switch (attr.za_integer_length) {
357				case 2:
358					(void) printf("%u ",
359					    ((uint16_t *)prop)[i]);
360					break;
361				case 4:
362					(void) printf("%u ",
363					    ((uint32_t *)prop)[i]);
364					break;
365				case 8:
366					(void) printf("%lld ",
367					    (u_longlong_t)((int64_t *)prop)[i]);
368					break;
369				}
370			}
371		}
372		(void) printf("\n");
373		umem_free(prop, attr.za_num_integers * attr.za_integer_length);
374	}
375	zap_cursor_fini(&zc);
376}
377
378/*ARGSUSED*/
379static void
380dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
381{
382	dump_zap_stats(os, object);
383	/* contents are printed elsewhere, properly decoded */
384}
385
386/*ARGSUSED*/
387static void
388dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
389{
390	zap_cursor_t zc;
391	zap_attribute_t attr;
392
393	dump_zap_stats(os, object);
394	(void) printf("\n");
395
396	for (zap_cursor_init(&zc, os, object);
397	    zap_cursor_retrieve(&zc, &attr) == 0;
398	    zap_cursor_advance(&zc)) {
399		(void) printf("\t\t%s = ", attr.za_name);
400		if (attr.za_num_integers == 0) {
401			(void) printf("\n");
402			continue;
403		}
404		(void) printf(" %llx : [%d:%d:%d]\n",
405		    (u_longlong_t)attr.za_first_integer,
406		    (int)ATTR_LENGTH(attr.za_first_integer),
407		    (int)ATTR_BSWAP(attr.za_first_integer),
408		    (int)ATTR_NUM(attr.za_first_integer));
409	}
410	zap_cursor_fini(&zc);
411}
412
413/*ARGSUSED*/
414static void
415dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
416{
417	zap_cursor_t zc;
418	zap_attribute_t attr;
419	uint16_t *layout_attrs;
420	int i;
421
422	dump_zap_stats(os, object);
423	(void) printf("\n");
424
425	for (zap_cursor_init(&zc, os, object);
426	    zap_cursor_retrieve(&zc, &attr) == 0;
427	    zap_cursor_advance(&zc)) {
428		(void) printf("\t\t%s = [", attr.za_name);
429		if (attr.za_num_integers == 0) {
430			(void) printf("\n");
431			continue;
432		}
433
434		VERIFY(attr.za_integer_length == 2);
435		layout_attrs = umem_zalloc(attr.za_num_integers *
436		    attr.za_integer_length, UMEM_NOFAIL);
437
438		VERIFY(zap_lookup(os, object, attr.za_name,
439		    attr.za_integer_length,
440		    attr.za_num_integers, layout_attrs) == 0);
441
442		for (i = 0; i != attr.za_num_integers; i++)
443			(void) printf(" %d ", (int)layout_attrs[i]);
444		(void) printf("]\n");
445		umem_free(layout_attrs,
446		    attr.za_num_integers * attr.za_integer_length);
447	}
448	zap_cursor_fini(&zc);
449}
450
451/*ARGSUSED*/
452static void
453dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
454{
455	zap_cursor_t zc;
456	zap_attribute_t attr;
457	const char *typenames[] = {
458		/* 0 */ "not specified",
459		/* 1 */ "FIFO",
460		/* 2 */ "Character Device",
461		/* 3 */ "3 (invalid)",
462		/* 4 */ "Directory",
463		/* 5 */ "5 (invalid)",
464		/* 6 */ "Block Device",
465		/* 7 */ "7 (invalid)",
466		/* 8 */ "Regular File",
467		/* 9 */ "9 (invalid)",
468		/* 10 */ "Symbolic Link",
469		/* 11 */ "11 (invalid)",
470		/* 12 */ "Socket",
471		/* 13 */ "Door",
472		/* 14 */ "Event Port",
473		/* 15 */ "15 (invalid)",
474	};
475
476	dump_zap_stats(os, object);
477	(void) printf("\n");
478
479	for (zap_cursor_init(&zc, os, object);
480	    zap_cursor_retrieve(&zc, &attr) == 0;
481	    zap_cursor_advance(&zc)) {
482		(void) printf("\t\t%s = %lld (type: %s)\n",
483		    attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
484		    typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
485	}
486	zap_cursor_fini(&zc);
487}
488
489static void
490dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
491{
492	uint64_t alloc, offset, entry;
493	uint8_t mapshift = sm->sm_shift;
494	uint64_t mapstart = sm->sm_start;
495	char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
496			    "INVALID", "INVALID", "INVALID", "INVALID" };
497
498	if (smo->smo_object == 0)
499		return;
500
501	/*
502	 * Print out the freelist entries in both encoded and decoded form.
503	 */
504	alloc = 0;
505	for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
506		VERIFY3U(0, ==, dmu_read(os, smo->smo_object, offset,
507		    sizeof (entry), &entry, DMU_READ_PREFETCH));
508		if (SM_DEBUG_DECODE(entry)) {
509			(void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
510			    (u_longlong_t)(offset / sizeof (entry)),
511			    ddata[SM_DEBUG_ACTION_DECODE(entry)],
512			    (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
513			    (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
514		} else {
515			(void) printf("\t    [%6llu]    %c  range:"
516			    " %010llx-%010llx  size: %06llx\n",
517			    (u_longlong_t)(offset / sizeof (entry)),
518			    SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
519			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
520			    mapshift) + mapstart),
521			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
522			    mapshift) + mapstart + (SM_RUN_DECODE(entry) <<
523			    mapshift)),
524			    (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
525			if (SM_TYPE_DECODE(entry) == SM_ALLOC)
526				alloc += SM_RUN_DECODE(entry) << mapshift;
527			else
528				alloc -= SM_RUN_DECODE(entry) << mapshift;
529		}
530	}
531	if (alloc != smo->smo_alloc) {
532		(void) printf("space_map_object alloc (%llu) INCONSISTENT "
533		    "with space map summary (%llu)\n",
534		    (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc);
535	}
536}
537
538static void
539dump_metaslab_stats(metaslab_t *msp)
540{
541	char maxbuf[32];
542	space_map_t *sm = &msp->ms_map;
543	avl_tree_t *t = sm->sm_pp_root;
544	int free_pct = sm->sm_space * 100 / sm->sm_size;
545
546	zdb_nicenum(space_map_maxsize(sm), maxbuf);
547
548	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
549	    "segments", avl_numnodes(t), "maxsize", maxbuf,
550	    "freepct", free_pct);
551}
552
553static void
554dump_metaslab(metaslab_t *msp)
555{
556	vdev_t *vd = msp->ms_group->mg_vd;
557	spa_t *spa = vd->vdev_spa;
558	space_map_t *sm = &msp->ms_map;
559	space_map_obj_t *smo = &msp->ms_smo;
560	char freebuf[32];
561
562	zdb_nicenum(sm->sm_size - smo->smo_alloc, freebuf);
563
564	(void) printf(
565	    "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
566	    (u_longlong_t)(sm->sm_start / sm->sm_size),
567	    (u_longlong_t)sm->sm_start, (u_longlong_t)smo->smo_object, freebuf);
568
569	if (dump_opt['m'] > 1 && !dump_opt['L']) {
570		mutex_enter(&msp->ms_lock);
571		space_map_load_wait(sm);
572		if (!sm->sm_loaded)
573			VERIFY(space_map_load(sm, zfs_metaslab_ops,
574			    SM_FREE, smo, spa->spa_meta_objset) == 0);
575		dump_metaslab_stats(msp);
576		space_map_unload(sm);
577		mutex_exit(&msp->ms_lock);
578	}
579
580	if (dump_opt['d'] > 5 || dump_opt['m'] > 2) {
581		ASSERT(sm->sm_size == (1ULL << vd->vdev_ms_shift));
582
583		mutex_enter(&msp->ms_lock);
584		dump_spacemap(spa->spa_meta_objset, smo, sm);
585		mutex_exit(&msp->ms_lock);
586	}
587}
588
589static void
590print_vdev_metaslab_header(vdev_t *vd)
591{
592	(void) printf("\tvdev %10llu\n\t%-10s%5llu   %-19s   %-15s   %-10s\n",
593	    (u_longlong_t)vd->vdev_id,
594	    "metaslabs", (u_longlong_t)vd->vdev_ms_count,
595	    "offset", "spacemap", "free");
596	(void) printf("\t%15s   %19s   %15s   %10s\n",
597	    "---------------", "-------------------",
598	    "---------------", "-------------");
599}
600
601static void
602dump_metaslabs(spa_t *spa)
603{
604	vdev_t *vd, *rvd = spa->spa_root_vdev;
605	uint64_t m, c = 0, children = rvd->vdev_children;
606
607	(void) printf("\nMetaslabs:\n");
608
609	if (!dump_opt['d'] && zopt_objects > 0) {
610		c = zopt_object[0];
611
612		if (c >= children)
613			(void) fatal("bad vdev id: %llu", (u_longlong_t)c);
614
615		if (zopt_objects > 1) {
616			vd = rvd->vdev_child[c];
617			print_vdev_metaslab_header(vd);
618
619			for (m = 1; m < zopt_objects; m++) {
620				if (zopt_object[m] < vd->vdev_ms_count)
621					dump_metaslab(
622					    vd->vdev_ms[zopt_object[m]]);
623				else
624					(void) fprintf(stderr, "bad metaslab "
625					    "number %llu\n",
626					    (u_longlong_t)zopt_object[m]);
627			}
628			(void) printf("\n");
629			return;
630		}
631		children = c + 1;
632	}
633	for (; c < children; c++) {
634		vd = rvd->vdev_child[c];
635		print_vdev_metaslab_header(vd);
636
637		for (m = 0; m < vd->vdev_ms_count; m++)
638			dump_metaslab(vd->vdev_ms[m]);
639		(void) printf("\n");
640	}
641}
642
643static void
644dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
645{
646	const ddt_phys_t *ddp = dde->dde_phys;
647	const ddt_key_t *ddk = &dde->dde_key;
648	char *types[4] = { "ditto", "single", "double", "triple" };
649	char blkbuf[BP_SPRINTF_LEN];
650	blkptr_t blk;
651
652	for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
653		if (ddp->ddp_phys_birth == 0)
654			continue;
655		ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
656		sprintf_blkptr(blkbuf, &blk);
657		(void) printf("index %llx refcnt %llu %s %s\n",
658		    (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
659		    types[p], blkbuf);
660	}
661}
662
663static void
664dump_dedup_ratio(const ddt_stat_t *dds)
665{
666	double rL, rP, rD, D, dedup, compress, copies;
667
668	if (dds->dds_blocks == 0)
669		return;
670
671	rL = (double)dds->dds_ref_lsize;
672	rP = (double)dds->dds_ref_psize;
673	rD = (double)dds->dds_ref_dsize;
674	D = (double)dds->dds_dsize;
675
676	dedup = rD / D;
677	compress = rL / rP;
678	copies = rD / rP;
679
680	(void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
681	    "dedup * compress / copies = %.2f\n\n",
682	    dedup, compress, copies, dedup * compress / copies);
683}
684
685static void
686dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
687{
688	char name[DDT_NAMELEN];
689	ddt_entry_t dde;
690	uint64_t walk = 0;
691	dmu_object_info_t doi;
692	uint64_t count, dspace, mspace;
693	int error;
694
695	error = ddt_object_info(ddt, type, class, &doi);
696
697	if (error == ENOENT)
698		return;
699	ASSERT(error == 0);
700
701	if ((count = ddt_object_count(ddt, type, class)) == 0)
702		return;
703
704	dspace = doi.doi_physical_blocks_512 << 9;
705	mspace = doi.doi_fill_count * doi.doi_data_block_size;
706
707	ddt_object_name(ddt, type, class, name);
708
709	(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
710	    name,
711	    (u_longlong_t)count,
712	    (u_longlong_t)(dspace / count),
713	    (u_longlong_t)(mspace / count));
714
715	if (dump_opt['D'] < 3)
716		return;
717
718	zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
719
720	if (dump_opt['D'] < 4)
721		return;
722
723	if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
724		return;
725
726	(void) printf("%s contents:\n\n", name);
727
728	while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
729		dump_dde(ddt, &dde, walk);
730
731	ASSERT(error == ENOENT);
732
733	(void) printf("\n");
734}
735
736static void
737dump_all_ddts(spa_t *spa)
738{
739	ddt_histogram_t ddh_total = { 0 };
740	ddt_stat_t dds_total = { 0 };
741
742	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
743		ddt_t *ddt = spa->spa_ddt[c];
744		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
745			for (enum ddt_class class = 0; class < DDT_CLASSES;
746			    class++) {
747				dump_ddt(ddt, type, class);
748			}
749		}
750	}
751
752	ddt_get_dedup_stats(spa, &dds_total);
753
754	if (dds_total.dds_blocks == 0) {
755		(void) printf("All DDTs are empty\n");
756		return;
757	}
758
759	(void) printf("\n");
760
761	if (dump_opt['D'] > 1) {
762		(void) printf("DDT histogram (aggregated over all DDTs):\n");
763		ddt_get_dedup_histogram(spa, &ddh_total);
764		zpool_dump_ddt(&dds_total, &ddh_total);
765	}
766
767	dump_dedup_ratio(&dds_total);
768}
769
770static void
771dump_dtl_seg(space_map_t *sm, uint64_t start, uint64_t size)
772{
773	char *prefix = (void *)sm;
774
775	(void) printf("%s [%llu,%llu) length %llu\n",
776	    prefix,
777	    (u_longlong_t)start,
778	    (u_longlong_t)(start + size),
779	    (u_longlong_t)(size));
780}
781
782static void
783dump_dtl(vdev_t *vd, int indent)
784{
785	spa_t *spa = vd->vdev_spa;
786	boolean_t required;
787	char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" };
788	char prefix[256];
789
790	spa_vdev_state_enter(spa, SCL_NONE);
791	required = vdev_dtl_required(vd);
792	(void) spa_vdev_state_exit(spa, NULL, 0);
793
794	if (indent == 0)
795		(void) printf("\nDirty time logs:\n\n");
796
797	(void) printf("\t%*s%s [%s]\n", indent, "",
798	    vd->vdev_path ? vd->vdev_path :
799	    vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
800	    required ? "DTL-required" : "DTL-expendable");
801
802	for (int t = 0; t < DTL_TYPES; t++) {
803		space_map_t *sm = &vd->vdev_dtl[t];
804		if (sm->sm_space == 0)
805			continue;
806		(void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
807		    indent + 2, "", name[t]);
808		mutex_enter(sm->sm_lock);
809		space_map_walk(sm, dump_dtl_seg, (void *)prefix);
810		mutex_exit(sm->sm_lock);
811		if (dump_opt['d'] > 5 && vd->vdev_children == 0)
812			dump_spacemap(spa->spa_meta_objset,
813			    &vd->vdev_dtl_smo, sm);
814	}
815
816	for (int c = 0; c < vd->vdev_children; c++)
817		dump_dtl(vd->vdev_child[c], indent + 4);
818}
819
820static void
821dump_history(spa_t *spa)
822{
823	nvlist_t **events = NULL;
824	char buf[SPA_MAXBLOCKSIZE];
825	uint64_t resid, len, off = 0;
826	uint_t num = 0;
827	int error;
828	time_t tsec;
829	struct tm t;
830	char tbuf[30];
831	char internalstr[MAXPATHLEN];
832
833	do {
834		len = sizeof (buf);
835
836		if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
837			(void) fprintf(stderr, "Unable to read history: "
838			    "error %d\n", error);
839			return;
840		}
841
842		if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
843			break;
844
845		off -= resid;
846	} while (len != 0);
847
848	(void) printf("\nHistory:\n");
849	for (int i = 0; i < num; i++) {
850		uint64_t time, txg, ievent;
851		char *cmd, *intstr;
852
853		if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
854		    &time) != 0)
855			continue;
856		if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
857		    &cmd) != 0) {
858			if (nvlist_lookup_uint64(events[i],
859			    ZPOOL_HIST_INT_EVENT, &ievent) != 0)
860				continue;
861			verify(nvlist_lookup_uint64(events[i],
862			    ZPOOL_HIST_TXG, &txg) == 0);
863			verify(nvlist_lookup_string(events[i],
864			    ZPOOL_HIST_INT_STR, &intstr) == 0);
865			if (ievent >= LOG_END)
866				continue;
867
868			(void) snprintf(internalstr,
869			    sizeof (internalstr),
870			    "[internal %s txg:%lld] %s",
871			    zfs_history_event_names[ievent], txg,
872			    intstr);
873			cmd = internalstr;
874		}
875		tsec = time;
876		(void) localtime_r(&tsec, &t);
877		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
878		(void) printf("%s %s\n", tbuf, cmd);
879	}
880}
881
882/*ARGSUSED*/
883static void
884dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
885{
886}
887
888static uint64_t
889blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
890{
891	if (dnp == NULL) {
892		ASSERT(zb->zb_level < 0);
893		if (zb->zb_object == 0)
894			return (zb->zb_blkid);
895		return (zb->zb_blkid * BP_GET_LSIZE(bp));
896	}
897
898	ASSERT(zb->zb_level >= 0);
899
900	return ((zb->zb_blkid <<
901	    (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
902	    dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
903}
904
905static void
906sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp)
907{
908	const dva_t *dva = bp->blk_dva;
909	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
910
911	if (dump_opt['b'] >= 5) {
912		sprintf_blkptr(blkbuf, bp);
913		return;
914	}
915
916	blkbuf[0] = '\0';
917
918	for (int i = 0; i < ndvas; i++)
919		(void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
920		    (u_longlong_t)DVA_GET_VDEV(&dva[i]),
921		    (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
922		    (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
923
924	(void) sprintf(blkbuf + strlen(blkbuf),
925	    "%llxL/%llxP F=%llu B=%llu/%llu",
926	    (u_longlong_t)BP_GET_LSIZE(bp),
927	    (u_longlong_t)BP_GET_PSIZE(bp),
928	    (u_longlong_t)bp->blk_fill,
929	    (u_longlong_t)bp->blk_birth,
930	    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
931}
932
933static void
934print_indirect(blkptr_t *bp, const zbookmark_t *zb,
935    const dnode_phys_t *dnp)
936{
937	char blkbuf[BP_SPRINTF_LEN];
938	int l;
939
940	ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
941	ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
942
943	(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
944
945	ASSERT(zb->zb_level >= 0);
946
947	for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
948		if (l == zb->zb_level) {
949			(void) printf("L%llx", (u_longlong_t)zb->zb_level);
950		} else {
951			(void) printf(" ");
952		}
953	}
954
955	sprintf_blkptr_compact(blkbuf, bp);
956	(void) printf("%s\n", blkbuf);
957}
958
959static int
960visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
961    blkptr_t *bp, const zbookmark_t *zb)
962{
963	int err = 0;
964
965	if (bp->blk_birth == 0)
966		return (0);
967
968	print_indirect(bp, zb, dnp);
969
970	if (BP_GET_LEVEL(bp) > 0) {
971		uint32_t flags = ARC_WAIT;
972		int i;
973		blkptr_t *cbp;
974		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
975		arc_buf_t *buf;
976		uint64_t fill = 0;
977
978		err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf,
979		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
980		if (err)
981			return (err);
982		ASSERT(buf->b_data);
983
984		/* recursively visit blocks below this */
985		cbp = buf->b_data;
986		for (i = 0; i < epb; i++, cbp++) {
987			zbookmark_t czb;
988
989			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
990			    zb->zb_level - 1,
991			    zb->zb_blkid * epb + i);
992			err = visit_indirect(spa, dnp, cbp, &czb);
993			if (err)
994				break;
995			fill += cbp->blk_fill;
996		}
997		if (!err)
998			ASSERT3U(fill, ==, bp->blk_fill);
999		(void) arc_buf_remove_ref(buf, &buf);
1000	}
1001
1002	return (err);
1003}
1004
1005/*ARGSUSED*/
1006static void
1007dump_indirect(dnode_t *dn)
1008{
1009	dnode_phys_t *dnp = dn->dn_phys;
1010	int j;
1011	zbookmark_t czb;
1012
1013	(void) printf("Indirect blocks:\n");
1014
1015	SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
1016	    dn->dn_object, dnp->dn_nlevels - 1, 0);
1017	for (j = 0; j < dnp->dn_nblkptr; j++) {
1018		czb.zb_blkid = j;
1019		(void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
1020		    &dnp->dn_blkptr[j], &czb);
1021	}
1022
1023	(void) printf("\n");
1024}
1025
1026/*ARGSUSED*/
1027static void
1028dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
1029{
1030	dsl_dir_phys_t *dd = data;
1031	time_t crtime;
1032	char nice[32];
1033
1034	if (dd == NULL)
1035		return;
1036
1037	ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
1038
1039	crtime = dd->dd_creation_time;
1040	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1041	(void) printf("\t\thead_dataset_obj = %llu\n",
1042	    (u_longlong_t)dd->dd_head_dataset_obj);
1043	(void) printf("\t\tparent_dir_obj = %llu\n",
1044	    (u_longlong_t)dd->dd_parent_obj);
1045	(void) printf("\t\torigin_obj = %llu\n",
1046	    (u_longlong_t)dd->dd_origin_obj);
1047	(void) printf("\t\tchild_dir_zapobj = %llu\n",
1048	    (u_longlong_t)dd->dd_child_dir_zapobj);
1049	zdb_nicenum(dd->dd_used_bytes, nice);
1050	(void) printf("\t\tused_bytes = %s\n", nice);
1051	zdb_nicenum(dd->dd_compressed_bytes, nice);
1052	(void) printf("\t\tcompressed_bytes = %s\n", nice);
1053	zdb_nicenum(dd->dd_uncompressed_bytes, nice);
1054	(void) printf("\t\tuncompressed_bytes = %s\n", nice);
1055	zdb_nicenum(dd->dd_quota, nice);
1056	(void) printf("\t\tquota = %s\n", nice);
1057	zdb_nicenum(dd->dd_reserved, nice);
1058	(void) printf("\t\treserved = %s\n", nice);
1059	(void) printf("\t\tprops_zapobj = %llu\n",
1060	    (u_longlong_t)dd->dd_props_zapobj);
1061	(void) printf("\t\tdeleg_zapobj = %llu\n",
1062	    (u_longlong_t)dd->dd_deleg_zapobj);
1063	(void) printf("\t\tflags = %llx\n",
1064	    (u_longlong_t)dd->dd_flags);
1065
1066#define	DO(which) \
1067	zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
1068	(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
1069	DO(HEAD);
1070	DO(SNAP);
1071	DO(CHILD);
1072	DO(CHILD_RSRV);
1073	DO(REFRSRV);
1074#undef DO
1075}
1076
1077/*ARGSUSED*/
1078static void
1079dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
1080{
1081	dsl_dataset_phys_t *ds = data;
1082	time_t crtime;
1083	char used[32], compressed[32], uncompressed[32], unique[32];
1084	char blkbuf[BP_SPRINTF_LEN];
1085
1086	if (ds == NULL)
1087		return;
1088
1089	ASSERT(size == sizeof (*ds));
1090	crtime = ds->ds_creation_time;
1091	zdb_nicenum(ds->ds_used_bytes, used);
1092	zdb_nicenum(ds->ds_compressed_bytes, compressed);
1093	zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
1094	zdb_nicenum(ds->ds_unique_bytes, unique);
1095	sprintf_blkptr(blkbuf, &ds->ds_bp);
1096
1097	(void) printf("\t\tdir_obj = %llu\n",
1098	    (u_longlong_t)ds->ds_dir_obj);
1099	(void) printf("\t\tprev_snap_obj = %llu\n",
1100	    (u_longlong_t)ds->ds_prev_snap_obj);
1101	(void) printf("\t\tprev_snap_txg = %llu\n",
1102	    (u_longlong_t)ds->ds_prev_snap_txg);
1103	(void) printf("\t\tnext_snap_obj = %llu\n",
1104	    (u_longlong_t)ds->ds_next_snap_obj);
1105	(void) printf("\t\tsnapnames_zapobj = %llu\n",
1106	    (u_longlong_t)ds->ds_snapnames_zapobj);
1107	(void) printf("\t\tnum_children = %llu\n",
1108	    (u_longlong_t)ds->ds_num_children);
1109	(void) printf("\t\tuserrefs_obj = %llu\n",
1110	    (u_longlong_t)ds->ds_userrefs_obj);
1111	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1112	(void) printf("\t\tcreation_txg = %llu\n",
1113	    (u_longlong_t)ds->ds_creation_txg);
1114	(void) printf("\t\tdeadlist_obj = %llu\n",
1115	    (u_longlong_t)ds->ds_deadlist_obj);
1116	(void) printf("\t\tused_bytes = %s\n", used);
1117	(void) printf("\t\tcompressed_bytes = %s\n", compressed);
1118	(void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
1119	(void) printf("\t\tunique = %s\n", unique);
1120	(void) printf("\t\tfsid_guid = %llu\n",
1121	    (u_longlong_t)ds->ds_fsid_guid);
1122	(void) printf("\t\tguid = %llu\n",
1123	    (u_longlong_t)ds->ds_guid);
1124	(void) printf("\t\tflags = %llx\n",
1125	    (u_longlong_t)ds->ds_flags);
1126	(void) printf("\t\tnext_clones_obj = %llu\n",
1127	    (u_longlong_t)ds->ds_next_clones_obj);
1128	(void) printf("\t\tprops_obj = %llu\n",
1129	    (u_longlong_t)ds->ds_props_obj);
1130	(void) printf("\t\tbp = %s\n", blkbuf);
1131}
1132
1133/* ARGSUSED */
1134static int
1135dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1136{
1137	char blkbuf[BP_SPRINTF_LEN];
1138
1139	ASSERT(bp->blk_birth != 0);
1140	sprintf_blkptr_compact(blkbuf, bp);
1141	(void) printf("\t%s\n", blkbuf);
1142	return (0);
1143}
1144
1145static void
1146dump_bpobj(bpobj_t *bpo, char *name)
1147{
1148	char bytes[32];
1149	char comp[32];
1150	char uncomp[32];
1151
1152	if (dump_opt['d'] < 3)
1153		return;
1154
1155	zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes);
1156	if (bpo->bpo_havesubobj) {
1157		zdb_nicenum(bpo->bpo_phys->bpo_comp, comp);
1158		zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp);
1159		(void) printf("\n    %s: %llu local blkptrs, %llu subobjs, "
1160		    "%s (%s/%s comp)\n",
1161		    name, (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1162		    (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
1163		    bytes, comp, uncomp);
1164	} else {
1165		(void) printf("\n    %s: %llu blkptrs, %s\n",
1166		    name, (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, bytes);
1167	}
1168
1169	if (dump_opt['d'] < 5)
1170		return;
1171
1172	(void) printf("\n");
1173
1174	(void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
1175}
1176
1177static void
1178dump_deadlist(dsl_deadlist_t *dl)
1179{
1180	dsl_deadlist_entry_t *dle;
1181	char bytes[32];
1182	char comp[32];
1183	char uncomp[32];
1184
1185	if (dump_opt['d'] < 3)
1186		return;
1187
1188	zdb_nicenum(dl->dl_phys->dl_used, bytes);
1189	zdb_nicenum(dl->dl_phys->dl_comp, comp);
1190	zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp);
1191	(void) printf("\n    Deadlist: %s (%s/%s comp)\n",
1192	    bytes, comp, uncomp);
1193
1194	if (dump_opt['d'] < 4)
1195		return;
1196
1197	(void) printf("\n");
1198
1199	for (dle = avl_first(&dl->dl_tree); dle;
1200	    dle = AVL_NEXT(&dl->dl_tree, dle)) {
1201		(void) printf("      mintxg %llu -> obj %llu\n",
1202		    (longlong_t)dle->dle_mintxg,
1203		    (longlong_t)dle->dle_bpobj.bpo_object);
1204
1205		if (dump_opt['d'] >= 5)
1206			dump_bpobj(&dle->dle_bpobj, "");
1207	}
1208}
1209
1210static avl_tree_t idx_tree;
1211static avl_tree_t domain_tree;
1212static boolean_t fuid_table_loaded;
1213static boolean_t sa_loaded;
1214sa_attr_type_t *sa_attr_table;
1215
1216static void
1217fuid_table_destroy()
1218{
1219	if (fuid_table_loaded) {
1220		zfs_fuid_table_destroy(&idx_tree, &domain_tree);
1221		fuid_table_loaded = B_FALSE;
1222	}
1223}
1224
1225/*
1226 * print uid or gid information.
1227 * For normal POSIX id just the id is printed in decimal format.
1228 * For CIFS files with FUID the fuid is printed in hex followed by
1229 * the doman-rid string.
1230 */
1231static void
1232print_idstr(uint64_t id, const char *id_type)
1233{
1234	if (FUID_INDEX(id)) {
1235		char *domain;
1236
1237		domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
1238		(void) printf("\t%s     %llx [%s-%d]\n", id_type,
1239		    (u_longlong_t)id, domain, (int)FUID_RID(id));
1240	} else {
1241		(void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
1242	}
1243
1244}
1245
1246static void
1247dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
1248{
1249	uint32_t uid_idx, gid_idx;
1250
1251	uid_idx = FUID_INDEX(uid);
1252	gid_idx = FUID_INDEX(gid);
1253
1254	/* Load domain table, if not already loaded */
1255	if (!fuid_table_loaded && (uid_idx || gid_idx)) {
1256		uint64_t fuid_obj;
1257
1258		/* first find the fuid object.  It lives in the master node */
1259		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
1260		    8, 1, &fuid_obj) == 0);
1261		zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
1262		(void) zfs_fuid_table_load(os, fuid_obj,
1263		    &idx_tree, &domain_tree);
1264		fuid_table_loaded = B_TRUE;
1265	}
1266
1267	print_idstr(uid, "uid");
1268	print_idstr(gid, "gid");
1269}
1270
1271/*ARGSUSED*/
1272static void
1273dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
1274{
1275	char path[MAXPATHLEN * 2];	/* allow for xattr and failure prefix */
1276	sa_handle_t *hdl;
1277	uint64_t xattr, rdev, gen;
1278	uint64_t uid, gid, mode, fsize, parent, links;
1279	uint64_t pflags;
1280	uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
1281	time_t z_crtime, z_atime, z_mtime, z_ctime;
1282	sa_bulk_attr_t bulk[12];
1283	int idx = 0;
1284	int error;
1285
1286	if (!sa_loaded) {
1287		uint64_t sa_attrs = 0;
1288		uint64_t version;
1289
1290		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
1291		    8, 1, &version) == 0);
1292		if (version >= ZPL_VERSION_SA) {
1293			VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
1294			    8, 1, &sa_attrs) == 0);
1295		}
1296		if ((error = sa_setup(os, sa_attrs, zfs_attr_table,
1297		    ZPL_END, &sa_attr_table)) != 0) {
1298			(void) printf("sa_setup failed errno %d, can't "
1299			    "display znode contents\n", error);
1300			return;
1301		}
1302		sa_loaded = B_TRUE;
1303	}
1304
1305	if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
1306		(void) printf("Failed to get handle for SA znode\n");
1307		return;
1308	}
1309
1310	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
1311	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
1312	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
1313	    &links, 8);
1314	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
1315	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
1316	    &mode, 8);
1317	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
1318	    NULL, &parent, 8);
1319	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
1320	    &fsize, 8);
1321	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
1322	    acctm, 16);
1323	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
1324	    modtm, 16);
1325	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
1326	    crtm, 16);
1327	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
1328	    chgtm, 16);
1329	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
1330	    &pflags, 8);
1331
1332	if (sa_bulk_lookup(hdl, bulk, idx)) {
1333		(void) sa_handle_destroy(hdl);
1334		return;
1335	}
1336
1337	error = zfs_obj_to_path(os, object, path, sizeof (path));
1338	if (error != 0) {
1339		(void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
1340		    (u_longlong_t)object);
1341	}
1342	if (dump_opt['d'] < 3) {
1343		(void) printf("\t%s\n", path);
1344		(void) sa_handle_destroy(hdl);
1345		return;
1346	}
1347
1348	z_crtime = (time_t)crtm[0];
1349	z_atime = (time_t)acctm[0];
1350	z_mtime = (time_t)modtm[0];
1351	z_ctime = (time_t)chgtm[0];
1352
1353	(void) printf("\tpath	%s\n", path);
1354	dump_uidgid(os, uid, gid);
1355	(void) printf("\tatime	%s", ctime(&z_atime));
1356	(void) printf("\tmtime	%s", ctime(&z_mtime));
1357	(void) printf("\tctime	%s", ctime(&z_ctime));
1358	(void) printf("\tcrtime	%s", ctime(&z_crtime));
1359	(void) printf("\tgen	%llu\n", (u_longlong_t)gen);
1360	(void) printf("\tmode	%llo\n", (u_longlong_t)mode);
1361	(void) printf("\tsize	%llu\n", (u_longlong_t)fsize);
1362	(void) printf("\tparent	%llu\n", (u_longlong_t)parent);
1363	(void) printf("\tlinks	%llu\n", (u_longlong_t)links);
1364	(void) printf("\tpflags	%llx\n", (u_longlong_t)pflags);
1365	if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
1366	    sizeof (uint64_t)) == 0)
1367		(void) printf("\txattr	%llu\n", (u_longlong_t)xattr);
1368	if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
1369	    sizeof (uint64_t)) == 0)
1370		(void) printf("\trdev	0x%016llx\n", (u_longlong_t)rdev);
1371	sa_handle_destroy(hdl);
1372}
1373
1374/*ARGSUSED*/
1375static void
1376dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
1377{
1378}
1379
1380/*ARGSUSED*/
1381static void
1382dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
1383{
1384}
1385
1386static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
1387	dump_none,		/* unallocated			*/
1388	dump_zap,		/* object directory		*/
1389	dump_uint64,		/* object array			*/
1390	dump_none,		/* packed nvlist		*/
1391	dump_packed_nvlist,	/* packed nvlist size		*/
1392	dump_none,		/* bplist			*/
1393	dump_none,		/* bplist header		*/
1394	dump_none,		/* SPA space map header		*/
1395	dump_none,		/* SPA space map		*/
1396	dump_none,		/* ZIL intent log		*/
1397	dump_dnode,		/* DMU dnode			*/
1398	dump_dmu_objset,	/* DMU objset			*/
1399	dump_dsl_dir,		/* DSL directory		*/
1400	dump_zap,		/* DSL directory child map	*/
1401	dump_zap,		/* DSL dataset snap map		*/
1402	dump_zap,		/* DSL props			*/
1403	dump_dsl_dataset,	/* DSL dataset			*/
1404	dump_znode,		/* ZFS znode			*/
1405	dump_acl,		/* ZFS V0 ACL			*/
1406	dump_uint8,		/* ZFS plain file		*/
1407	dump_zpldir,		/* ZFS directory		*/
1408	dump_zap,		/* ZFS master node		*/
1409	dump_zap,		/* ZFS delete queue		*/
1410	dump_uint8,		/* zvol object			*/
1411	dump_zap,		/* zvol prop			*/
1412	dump_uint8,		/* other uint8[]		*/
1413	dump_uint64,		/* other uint64[]		*/
1414	dump_zap,		/* other ZAP			*/
1415	dump_zap,		/* persistent error log		*/
1416	dump_uint8,		/* SPA history			*/
1417	dump_uint64,		/* SPA history offsets		*/
1418	dump_zap,		/* Pool properties		*/
1419	dump_zap,		/* DSL permissions		*/
1420	dump_acl,		/* ZFS ACL			*/
1421	dump_uint8,		/* ZFS SYSACL			*/
1422	dump_none,		/* FUID nvlist			*/
1423	dump_packed_nvlist,	/* FUID nvlist size		*/
1424	dump_zap,		/* DSL dataset next clones	*/
1425	dump_zap,		/* DSL scrub queue		*/
1426	dump_zap,		/* ZFS user/group used		*/
1427	dump_zap,		/* ZFS user/group quota		*/
1428	dump_zap,		/* snapshot refcount tags	*/
1429	dump_ddt_zap,		/* DDT ZAP object		*/
1430	dump_zap,		/* DDT statistics		*/
1431	dump_znode,		/* SA object			*/
1432	dump_zap,		/* SA Master Node		*/
1433	dump_sa_attrs,		/* SA attribute registration	*/
1434	dump_sa_layouts,	/* SA attribute layouts		*/
1435	dump_zap,		/* DSL scrub translations	*/
1436	dump_none,		/* fake dedup BP		*/
1437	dump_zap,		/* deadlist			*/
1438	dump_none,		/* deadlist hdr			*/
1439	dump_zap,		/* dsl clones			*/
1440	dump_none,		/* bpobj subobjs		*/
1441	dump_unknown,		/* Unknown type, must be last	*/
1442};
1443
1444static void
1445dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
1446{
1447	dmu_buf_t *db = NULL;
1448	dmu_object_info_t doi;
1449	dnode_t *dn;
1450	void *bonus = NULL;
1451	size_t bsize = 0;
1452	char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
1453	char bonus_size[32];
1454	char aux[50];
1455	int error;
1456
1457	if (*print_header) {
1458		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1459		    "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
1460		    "%full", "type");
1461		*print_header = 0;
1462	}
1463
1464	if (object == 0) {
1465		dn = DMU_META_DNODE(os);
1466	} else {
1467		error = dmu_bonus_hold(os, object, FTAG, &db);
1468		if (error)
1469			fatal("dmu_bonus_hold(%llu) failed, errno %u",
1470			    object, error);
1471		bonus = db->db_data;
1472		bsize = db->db_size;
1473		dn = DB_DNODE((dmu_buf_impl_t *)db);
1474	}
1475	dmu_object_info_from_dnode(dn, &doi);
1476
1477	zdb_nicenum(doi.doi_metadata_block_size, iblk);
1478	zdb_nicenum(doi.doi_data_block_size, dblk);
1479	zdb_nicenum(doi.doi_max_offset, lsize);
1480	zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize);
1481	zdb_nicenum(doi.doi_bonus_size, bonus_size);
1482	(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
1483	    doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
1484	    doi.doi_max_offset);
1485
1486	aux[0] = '\0';
1487
1488	if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
1489		(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
1490		    ZDB_CHECKSUM_NAME(doi.doi_checksum));
1491	}
1492
1493	if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
1494		(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
1495		    ZDB_COMPRESS_NAME(doi.doi_compress));
1496	}
1497
1498	(void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %6s  %s%s\n",
1499	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
1500	    asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
1501
1502	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
1503		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1504		    "", "", "", "", "", bonus_size, "bonus",
1505		    ZDB_OT_NAME(doi.doi_bonus_type));
1506	}
1507
1508	if (verbosity >= 4) {
1509		(void) printf("\tdnode flags: %s%s%s\n",
1510		    (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
1511		    "USED_BYTES " : "",
1512		    (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
1513		    "USERUSED_ACCOUNTED " : "",
1514		    (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
1515		    "SPILL_BLKPTR" : "");
1516		(void) printf("\tdnode maxblkid: %llu\n",
1517		    (longlong_t)dn->dn_phys->dn_maxblkid);
1518
1519		object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
1520		    bonus, bsize);
1521		object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
1522		*print_header = 1;
1523	}
1524
1525	if (verbosity >= 5)
1526		dump_indirect(dn);
1527
1528	if (verbosity >= 5) {
1529		/*
1530		 * Report the list of segments that comprise the object.
1531		 */
1532		uint64_t start = 0;
1533		uint64_t end;
1534		uint64_t blkfill = 1;
1535		int minlvl = 1;
1536
1537		if (dn->dn_type == DMU_OT_DNODE) {
1538			minlvl = 0;
1539			blkfill = DNODES_PER_BLOCK;
1540		}
1541
1542		for (;;) {
1543			char segsize[32];
1544			error = dnode_next_offset(dn,
1545			    0, &start, minlvl, blkfill, 0);
1546			if (error)
1547				break;
1548			end = start;
1549			error = dnode_next_offset(dn,
1550			    DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
1551			zdb_nicenum(end - start, segsize);
1552			(void) printf("\t\tsegment [%016llx, %016llx)"
1553			    " size %5s\n", (u_longlong_t)start,
1554			    (u_longlong_t)end, segsize);
1555			if (error)
1556				break;
1557			start = end;
1558		}
1559	}
1560
1561	if (db != NULL)
1562		dmu_buf_rele(db, FTAG);
1563}
1564
1565static char *objset_types[DMU_OST_NUMTYPES] = {
1566	"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
1567
1568static void
1569dump_dir(objset_t *os)
1570{
1571	dmu_objset_stats_t dds;
1572	uint64_t object, object_count;
1573	uint64_t refdbytes, usedobjs, scratch;
1574	char numbuf[32];
1575	char blkbuf[BP_SPRINTF_LEN + 20];
1576	char osname[MAXNAMELEN];
1577	char *type = "UNKNOWN";
1578	int verbosity = dump_opt['d'];
1579	int print_header = 1;
1580	int i, error;
1581
1582	dmu_objset_fast_stat(os, &dds);
1583
1584	if (dds.dds_type < DMU_OST_NUMTYPES)
1585		type = objset_types[dds.dds_type];
1586
1587	if (dds.dds_type == DMU_OST_META) {
1588		dds.dds_creation_txg = TXG_INITIAL;
1589		usedobjs = os->os_rootbp->blk_fill;
1590		refdbytes = os->os_spa->spa_dsl_pool->
1591		    dp_mos_dir->dd_phys->dd_used_bytes;
1592	} else {
1593		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
1594	}
1595
1596	ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
1597
1598	zdb_nicenum(refdbytes, numbuf);
1599
1600	if (verbosity >= 4) {
1601		(void) sprintf(blkbuf, ", rootbp ");
1602		(void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp);
1603	} else {
1604		blkbuf[0] = '\0';
1605	}
1606
1607	dmu_objset_name(os, osname);
1608
1609	(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
1610	    "%s, %llu objects%s\n",
1611	    osname, type, (u_longlong_t)dmu_objset_id(os),
1612	    (u_longlong_t)dds.dds_creation_txg,
1613	    numbuf, (u_longlong_t)usedobjs, blkbuf);
1614
1615	if (zopt_objects != 0) {
1616		for (i = 0; i < zopt_objects; i++)
1617			dump_object(os, zopt_object[i], verbosity,
1618			    &print_header);
1619		(void) printf("\n");
1620		return;
1621	}
1622
1623	if (dump_opt['i'] != 0 || verbosity >= 2)
1624		dump_intent_log(dmu_objset_zil(os));
1625
1626	if (dmu_objset_ds(os) != NULL)
1627		dump_deadlist(&dmu_objset_ds(os)->ds_deadlist);
1628
1629	if (verbosity < 2)
1630		return;
1631
1632	if (os->os_rootbp->blk_birth == 0)
1633		return;
1634
1635	dump_object(os, 0, verbosity, &print_header);
1636	object_count = 0;
1637	if (DMU_USERUSED_DNODE(os) != NULL &&
1638	    DMU_USERUSED_DNODE(os)->dn_type != 0) {
1639		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
1640		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
1641	}
1642
1643	object = 0;
1644	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
1645		dump_object(os, object, verbosity, &print_header);
1646		object_count++;
1647	}
1648
1649	ASSERT3U(object_count, ==, usedobjs);
1650
1651	(void) printf("\n");
1652
1653	if (error != ESRCH) {
1654		(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
1655		abort();
1656	}
1657}
1658
1659static void
1660dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
1661{
1662	time_t timestamp = ub->ub_timestamp;
1663
1664	(void) printf(header ? header : "");
1665	(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
1666	(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
1667	(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
1668	(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
1669	(void) printf("\ttimestamp = %llu UTC = %s",
1670	    (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
1671	if (dump_opt['u'] >= 3) {
1672		char blkbuf[BP_SPRINTF_LEN];
1673		sprintf_blkptr(blkbuf, &ub->ub_rootbp);
1674		(void) printf("\trootbp = %s\n", blkbuf);
1675	}
1676	(void) printf(footer ? footer : "");
1677}
1678
1679static void
1680dump_config(spa_t *spa)
1681{
1682	dmu_buf_t *db;
1683	size_t nvsize = 0;
1684	int error = 0;
1685
1686
1687	error = dmu_bonus_hold(spa->spa_meta_objset,
1688	    spa->spa_config_object, FTAG, &db);
1689
1690	if (error == 0) {
1691		nvsize = *(uint64_t *)db->db_data;
1692		dmu_buf_rele(db, FTAG);
1693
1694		(void) printf("\nMOS Configuration:\n");
1695		dump_packed_nvlist(spa->spa_meta_objset,
1696		    spa->spa_config_object, (void *)&nvsize, 1);
1697	} else {
1698		(void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
1699		    (u_longlong_t)spa->spa_config_object, error);
1700	}
1701}
1702
1703static void
1704dump_cachefile(const char *cachefile)
1705{
1706	int fd;
1707	struct stat64 statbuf;
1708	char *buf;
1709	nvlist_t *config;
1710
1711	if ((fd = open64(cachefile, O_RDONLY)) < 0) {
1712		(void) printf("cannot open '%s': %s\n", cachefile,
1713		    strerror(errno));
1714		exit(1);
1715	}
1716
1717	if (fstat64(fd, &statbuf) != 0) {
1718		(void) printf("failed to stat '%s': %s\n", cachefile,
1719		    strerror(errno));
1720		exit(1);
1721	}
1722
1723	if ((buf = malloc(statbuf.st_size)) == NULL) {
1724		(void) fprintf(stderr, "failed to allocate %llu bytes\n",
1725		    (u_longlong_t)statbuf.st_size);
1726		exit(1);
1727	}
1728
1729	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1730		(void) fprintf(stderr, "failed to read %llu bytes\n",
1731		    (u_longlong_t)statbuf.st_size);
1732		exit(1);
1733	}
1734
1735	(void) close(fd);
1736
1737	if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
1738		(void) fprintf(stderr, "failed to unpack nvlist\n");
1739		exit(1);
1740	}
1741
1742	free(buf);
1743
1744	dump_nvlist(config, 0);
1745
1746	nvlist_free(config);
1747}
1748
1749#define	ZDB_MAX_UB_HEADER_SIZE 32
1750
1751static void
1752dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
1753{
1754	vdev_t vd;
1755	vdev_t *vdp = &vd;
1756	char header[ZDB_MAX_UB_HEADER_SIZE];
1757
1758	vd.vdev_ashift = ashift;
1759	vdp->vdev_top = vdp;
1760
1761	for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
1762		uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
1763		uberblock_t *ub = (void *)((char *)lbl + uoff);
1764
1765		if (uberblock_verify(ub))
1766			continue;
1767		(void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
1768		    "Uberblock[%d]\n", i);
1769		dump_uberblock(ub, header, "");
1770	}
1771}
1772
1773static void
1774dump_label(const char *dev)
1775{
1776	int fd;
1777	vdev_label_t label;
1778	char *path, *buf = label.vl_vdev_phys.vp_nvlist;
1779	size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
1780	struct stat64 statbuf;
1781	uint64_t psize, ashift;
1782	int len = strlen(dev) + 1;
1783
1784	if (strncmp(dev, "/dev/dsk/", 9) == 0) {
1785		len++;
1786		path = malloc(len);
1787		(void) snprintf(path, len, "%s%s", "/dev/rdsk/", dev + 9);
1788	} else {
1789		path = strdup(dev);
1790	}
1791
1792	if ((fd = open64(path, O_RDONLY)) < 0) {
1793		(void) printf("cannot open '%s': %s\n", path, strerror(errno));
1794		free(path);
1795		exit(1);
1796	}
1797
1798	if (fstat64(fd, &statbuf) != 0) {
1799		(void) printf("failed to stat '%s': %s\n", path,
1800		    strerror(errno));
1801		free(path);
1802		(void) close(fd);
1803		exit(1);
1804	}
1805
1806	if (S_ISBLK(statbuf.st_mode)) {
1807		(void) printf("cannot use '%s': character device required\n",
1808		    path);
1809		free(path);
1810		(void) close(fd);
1811		exit(1);
1812	}
1813
1814	psize = statbuf.st_size;
1815	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
1816
1817	for (int l = 0; l < VDEV_LABELS; l++) {
1818		nvlist_t *config = NULL;
1819
1820		(void) printf("--------------------------------------------\n");
1821		(void) printf("LABEL %d\n", l);
1822		(void) printf("--------------------------------------------\n");
1823
1824		if (pread64(fd, &label, sizeof (label),
1825		    vdev_label_offset(psize, l, 0)) != sizeof (label)) {
1826			(void) printf("failed to read label %d\n", l);
1827			continue;
1828		}
1829
1830		if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
1831			(void) printf("failed to unpack label %d\n", l);
1832			ashift = SPA_MINBLOCKSHIFT;
1833		} else {
1834			nvlist_t *vdev_tree = NULL;
1835
1836			dump_nvlist(config, 4);
1837			if ((nvlist_lookup_nvlist(config,
1838			    ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
1839			    (nvlist_lookup_uint64(vdev_tree,
1840			    ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
1841				ashift = SPA_MINBLOCKSHIFT;
1842			nvlist_free(config);
1843		}
1844		if (dump_opt['u'])
1845			dump_label_uberblocks(&label, ashift);
1846	}
1847
1848	free(path);
1849	(void) close(fd);
1850}
1851
1852/*ARGSUSED*/
1853static int
1854dump_one_dir(const char *dsname, void *arg)
1855{
1856	int error;
1857	objset_t *os;
1858
1859	error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
1860	if (error) {
1861		(void) printf("Could not open %s, error %d\n", dsname, error);
1862		return (0);
1863	}
1864	dump_dir(os);
1865	dmu_objset_disown(os, FTAG);
1866	fuid_table_destroy();
1867	sa_loaded = B_FALSE;
1868	return (0);
1869}
1870
1871/*
1872 * Block statistics.
1873 */
1874typedef struct zdb_blkstats {
1875	uint64_t	zb_asize;
1876	uint64_t	zb_lsize;
1877	uint64_t	zb_psize;
1878	uint64_t	zb_count;
1879} zdb_blkstats_t;
1880
1881/*
1882 * Extended object types to report deferred frees and dedup auto-ditto blocks.
1883 */
1884#define	ZDB_OT_DEFERRED	(DMU_OT_NUMTYPES + 0)
1885#define	ZDB_OT_DITTO	(DMU_OT_NUMTYPES + 1)
1886#define	ZDB_OT_TOTAL	(DMU_OT_NUMTYPES + 2)
1887
1888static char *zdb_ot_extname[] = {
1889	"deferred free",
1890	"dedup ditto",
1891	"Total",
1892};
1893
1894#define	ZB_TOTAL	DN_MAX_LEVELS
1895
1896typedef struct zdb_cb {
1897	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
1898	uint64_t	zcb_dedup_asize;
1899	uint64_t	zcb_dedup_blocks;
1900	uint64_t	zcb_errors[256];
1901	int		zcb_readfails;
1902	int		zcb_haderrors;
1903	spa_t		*zcb_spa;
1904} zdb_cb_t;
1905
1906static void
1907zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
1908    dmu_object_type_t type)
1909{
1910	uint64_t refcnt = 0;
1911
1912	ASSERT(type < ZDB_OT_TOTAL);
1913
1914	if (zilog && zil_bp_tree_add(zilog, bp) != 0)
1915		return;
1916
1917	for (int i = 0; i < 4; i++) {
1918		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
1919		int t = (i & 1) ? type : ZDB_OT_TOTAL;
1920		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
1921
1922		zb->zb_asize += BP_GET_ASIZE(bp);
1923		zb->zb_lsize += BP_GET_LSIZE(bp);
1924		zb->zb_psize += BP_GET_PSIZE(bp);
1925		zb->zb_count++;
1926	}
1927
1928	if (dump_opt['L'])
1929		return;
1930
1931	if (BP_GET_DEDUP(bp)) {
1932		ddt_t *ddt;
1933		ddt_entry_t *dde;
1934
1935		ddt = ddt_select(zcb->zcb_spa, bp);
1936		ddt_enter(ddt);
1937		dde = ddt_lookup(ddt, bp, B_FALSE);
1938
1939		if (dde == NULL) {
1940			refcnt = 0;
1941		} else {
1942			ddt_phys_t *ddp = ddt_phys_select(dde, bp);
1943			ddt_phys_decref(ddp);
1944			refcnt = ddp->ddp_refcnt;
1945			if (ddt_phys_total_refcnt(dde) == 0)
1946				ddt_remove(ddt, dde);
1947		}
1948		ddt_exit(ddt);
1949	}
1950
1951	VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
1952	    refcnt ? 0 : spa_first_txg(zcb->zcb_spa),
1953	    bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
1954}
1955
1956/* ARGSUSED */
1957static int
1958zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
1959    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
1960{
1961	zdb_cb_t *zcb = arg;
1962	char blkbuf[BP_SPRINTF_LEN];
1963	dmu_object_type_t type;
1964	boolean_t is_metadata;
1965
1966	if (bp == NULL)
1967		return (0);
1968
1969	type = BP_GET_TYPE(bp);
1970
1971	zdb_count_block(zcb, zilog, bp, type);
1972
1973	is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata);
1974
1975	if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
1976		int ioerr;
1977		size_t size = BP_GET_PSIZE(bp);
1978		void *data = malloc(size);
1979		int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
1980
1981		/* If it's an intent log block, failure is expected. */
1982		if (zb->zb_level == ZB_ZIL_LEVEL)
1983			flags |= ZIO_FLAG_SPECULATIVE;
1984
1985		ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
1986		    NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
1987
1988		free(data);
1989
1990		if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
1991			zcb->zcb_haderrors = 1;
1992			zcb->zcb_errors[ioerr]++;
1993
1994			if (dump_opt['b'] >= 2)
1995				sprintf_blkptr(blkbuf, bp);
1996			else
1997				blkbuf[0] = '\0';
1998
1999			(void) printf("zdb_blkptr_cb: "
2000			    "Got error %d reading "
2001			    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
2002			    ioerr,
2003			    (u_longlong_t)zb->zb_objset,
2004			    (u_longlong_t)zb->zb_object,
2005			    (u_longlong_t)zb->zb_level,
2006			    (u_longlong_t)zb->zb_blkid,
2007			    blkbuf);
2008		}
2009	}
2010
2011	zcb->zcb_readfails = 0;
2012
2013	if (dump_opt['b'] >= 4) {
2014		sprintf_blkptr(blkbuf, bp);
2015		(void) printf("objset %llu object %llu "
2016		    "level %lld offset 0x%llx %s\n",
2017		    (u_longlong_t)zb->zb_objset,
2018		    (u_longlong_t)zb->zb_object,
2019		    (longlong_t)zb->zb_level,
2020		    (u_longlong_t)blkid2offset(dnp, bp, zb),
2021		    blkbuf);
2022	}
2023
2024	return (0);
2025}
2026
2027static void
2028zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
2029{
2030	vdev_t *vd = sm->sm_ppd;
2031
2032	(void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
2033	    (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
2034}
2035
2036/* ARGSUSED */
2037static void
2038zdb_space_map_load(space_map_t *sm)
2039{
2040}
2041
2042static void
2043zdb_space_map_unload(space_map_t *sm)
2044{
2045	space_map_vacate(sm, zdb_leak, sm);
2046}
2047
2048/* ARGSUSED */
2049static void
2050zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
2051{
2052}
2053
2054static space_map_ops_t zdb_space_map_ops = {
2055	zdb_space_map_load,
2056	zdb_space_map_unload,
2057	NULL,	/* alloc */
2058	zdb_space_map_claim,
2059	NULL,	/* free */
2060	NULL	/* maxsize */
2061};
2062
2063static void
2064zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
2065{
2066	ddt_bookmark_t ddb = { 0 };
2067	ddt_entry_t dde;
2068	int error;
2069
2070	while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
2071		blkptr_t blk;
2072		ddt_phys_t *ddp = dde.dde_phys;
2073
2074		if (ddb.ddb_class == DDT_CLASS_UNIQUE)
2075			return;
2076
2077		ASSERT(ddt_phys_total_refcnt(&dde) > 1);
2078
2079		for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
2080			if (ddp->ddp_phys_birth == 0)
2081				continue;
2082			ddt_bp_create(ddb.ddb_checksum,
2083			    &dde.dde_key, ddp, &blk);
2084			if (p == DDT_PHYS_DITTO) {
2085				zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
2086			} else {
2087				zcb->zcb_dedup_asize +=
2088				    BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
2089				zcb->zcb_dedup_blocks++;
2090			}
2091		}
2092		if (!dump_opt['L']) {
2093			ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
2094			ddt_enter(ddt);
2095			VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
2096			ddt_exit(ddt);
2097		}
2098	}
2099
2100	ASSERT(error == ENOENT);
2101}
2102
2103static void
2104zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
2105{
2106	zcb->zcb_spa = spa;
2107
2108	if (!dump_opt['L']) {
2109		vdev_t *rvd = spa->spa_root_vdev;
2110		for (int c = 0; c < rvd->vdev_children; c++) {
2111			vdev_t *vd = rvd->vdev_child[c];
2112			for (int m = 0; m < vd->vdev_ms_count; m++) {
2113				metaslab_t *msp = vd->vdev_ms[m];
2114				mutex_enter(&msp->ms_lock);
2115				space_map_unload(&msp->ms_map);
2116				VERIFY(space_map_load(&msp->ms_map,
2117				    &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo,
2118				    spa->spa_meta_objset) == 0);
2119				msp->ms_map.sm_ppd = vd;
2120				mutex_exit(&msp->ms_lock);
2121			}
2122		}
2123	}
2124
2125	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2126
2127	zdb_ddt_leak_init(spa, zcb);
2128
2129	spa_config_exit(spa, SCL_CONFIG, FTAG);
2130}
2131
2132static void
2133zdb_leak_fini(spa_t *spa)
2134{
2135	if (!dump_opt['L']) {
2136		vdev_t *rvd = spa->spa_root_vdev;
2137		for (int c = 0; c < rvd->vdev_children; c++) {
2138			vdev_t *vd = rvd->vdev_child[c];
2139			for (int m = 0; m < vd->vdev_ms_count; m++) {
2140				metaslab_t *msp = vd->vdev_ms[m];
2141				mutex_enter(&msp->ms_lock);
2142				space_map_unload(&msp->ms_map);
2143				mutex_exit(&msp->ms_lock);
2144			}
2145		}
2146	}
2147}
2148
2149/* ARGSUSED */
2150static int
2151count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
2152{
2153	zdb_cb_t *zcb = arg;
2154
2155	if (dump_opt['b'] >= 4) {
2156		char blkbuf[BP_SPRINTF_LEN];
2157		sprintf_blkptr(blkbuf, bp);
2158		(void) printf("[%s] %s\n",
2159		    "deferred free", blkbuf);
2160	}
2161	zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
2162	return (0);
2163}
2164
2165static int
2166dump_block_stats(spa_t *spa)
2167{
2168	zdb_cb_t zcb = { 0 };
2169	zdb_blkstats_t *zb, *tzb;
2170	uint64_t norm_alloc, norm_space, total_alloc, total_found;
2171	int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
2172	int leaks = 0;
2173
2174	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
2175	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
2176	    (dump_opt['c'] == 1) ? "metadata " : "",
2177	    dump_opt['c'] ? "checksums " : "",
2178	    (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
2179	    !dump_opt['L'] ? "nothing leaked " : "");
2180
2181	/*
2182	 * Load all space maps as SM_ALLOC maps, then traverse the pool
2183	 * claiming each block we discover.  If the pool is perfectly
2184	 * consistent, the space maps will be empty when we're done.
2185	 * Anything left over is a leak; any block we can't claim (because
2186	 * it's not part of any space map) is a double allocation,
2187	 * reference to a freed block, or an unclaimed log block.
2188	 */
2189	zdb_leak_init(spa, &zcb);
2190
2191	/*
2192	 * If there's a deferred-free bplist, process that first.
2193	 */
2194	(void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
2195	    count_block_cb, &zcb, NULL);
2196	(void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
2197	    count_block_cb, &zcb, NULL);
2198
2199	if (dump_opt['c'] > 1)
2200		flags |= TRAVERSE_PREFETCH_DATA;
2201
2202	zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
2203
2204	if (zcb.zcb_haderrors) {
2205		(void) printf("\nError counts:\n\n");
2206		(void) printf("\t%5s  %s\n", "errno", "count");
2207		for (int e = 0; e < 256; e++) {
2208			if (zcb.zcb_errors[e] != 0) {
2209				(void) printf("\t%5d  %llu\n",
2210				    e, (u_longlong_t)zcb.zcb_errors[e]);
2211			}
2212		}
2213	}
2214
2215	/*
2216	 * Report any leaked segments.
2217	 */
2218	zdb_leak_fini(spa);
2219
2220	tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
2221
2222	norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
2223	norm_space = metaslab_class_get_space(spa_normal_class(spa));
2224
2225	total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
2226	total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
2227
2228	if (total_found == total_alloc) {
2229		if (!dump_opt['L'])
2230			(void) printf("\n\tNo leaks (block sum matches space"
2231			    " maps exactly)\n");
2232	} else {
2233		(void) printf("block traversal size %llu != alloc %llu "
2234		    "(%s %lld)\n",
2235		    (u_longlong_t)total_found,
2236		    (u_longlong_t)total_alloc,
2237		    (dump_opt['L']) ? "unreachable" : "leaked",
2238		    (longlong_t)(total_alloc - total_found));
2239		leaks = 1;
2240	}
2241
2242	if (tzb->zb_count == 0)
2243		return (2);
2244
2245	(void) printf("\n");
2246	(void) printf("\tbp count:      %10llu\n",
2247	    (u_longlong_t)tzb->zb_count);
2248	(void) printf("\tbp logical:    %10llu      avg: %6llu\n",
2249	    (u_longlong_t)tzb->zb_lsize,
2250	    (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
2251	(void) printf("\tbp physical:   %10llu      avg:"
2252	    " %6llu     compression: %6.2f\n",
2253	    (u_longlong_t)tzb->zb_psize,
2254	    (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
2255	    (double)tzb->zb_lsize / tzb->zb_psize);
2256	(void) printf("\tbp allocated:  %10llu      avg:"
2257	    " %6llu     compression: %6.2f\n",
2258	    (u_longlong_t)tzb->zb_asize,
2259	    (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
2260	    (double)tzb->zb_lsize / tzb->zb_asize);
2261	(void) printf("\tbp deduped:    %10llu    ref>1:"
2262	    " %6llu   deduplication: %6.2f\n",
2263	    (u_longlong_t)zcb.zcb_dedup_asize,
2264	    (u_longlong_t)zcb.zcb_dedup_blocks,
2265	    (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
2266	(void) printf("\tSPA allocated: %10llu     used: %5.2f%%\n",
2267	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
2268
2269	if (dump_opt['b'] >= 2) {
2270		int l, t, level;
2271		(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2272		    "\t  avg\t comp\t%%Total\tType\n");
2273
2274		for (t = 0; t <= ZDB_OT_TOTAL; t++) {
2275			char csize[32], lsize[32], psize[32], asize[32];
2276			char avg[32];
2277			char *typename;
2278
2279			if (t < DMU_OT_NUMTYPES)
2280				typename = dmu_ot[t].ot_name;
2281			else
2282				typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
2283
2284			if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
2285				(void) printf("%6s\t%5s\t%5s\t%5s"
2286				    "\t%5s\t%5s\t%6s\t%s\n",
2287				    "-",
2288				    "-",
2289				    "-",
2290				    "-",
2291				    "-",
2292				    "-",
2293				    "-",
2294				    typename);
2295				continue;
2296			}
2297
2298			for (l = ZB_TOTAL - 1; l >= -1; l--) {
2299				level = (l == -1 ? ZB_TOTAL : l);
2300				zb = &zcb.zcb_type[level][t];
2301
2302				if (zb->zb_asize == 0)
2303					continue;
2304
2305				if (dump_opt['b'] < 3 && level != ZB_TOTAL)
2306					continue;
2307
2308				if (level == 0 && zb->zb_asize ==
2309				    zcb.zcb_type[ZB_TOTAL][t].zb_asize)
2310					continue;
2311
2312				zdb_nicenum(zb->zb_count, csize);
2313				zdb_nicenum(zb->zb_lsize, lsize);
2314				zdb_nicenum(zb->zb_psize, psize);
2315				zdb_nicenum(zb->zb_asize, asize);
2316				zdb_nicenum(zb->zb_asize / zb->zb_count, avg);
2317
2318				(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
2319				    "\t%5.2f\t%6.2f\t",
2320				    csize, lsize, psize, asize, avg,
2321				    (double)zb->zb_lsize / zb->zb_psize,
2322				    100.0 * zb->zb_asize / tzb->zb_asize);
2323
2324				if (level == ZB_TOTAL)
2325					(void) printf("%s\n", typename);
2326				else
2327					(void) printf("    L%d %s\n",
2328					    level, typename);
2329			}
2330		}
2331	}
2332
2333	(void) printf("\n");
2334
2335	if (leaks)
2336		return (2);
2337
2338	if (zcb.zcb_haderrors)
2339		return (3);
2340
2341	return (0);
2342}
2343
2344typedef struct zdb_ddt_entry {
2345	ddt_key_t	zdde_key;
2346	uint64_t	zdde_ref_blocks;
2347	uint64_t	zdde_ref_lsize;
2348	uint64_t	zdde_ref_psize;
2349	uint64_t	zdde_ref_dsize;
2350	avl_node_t	zdde_node;
2351} zdb_ddt_entry_t;
2352
2353/* ARGSUSED */
2354static int
2355zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2356    arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
2357{
2358	avl_tree_t *t = arg;
2359	avl_index_t where;
2360	zdb_ddt_entry_t *zdde, zdde_search;
2361
2362	if (bp == NULL)
2363		return (0);
2364
2365	if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
2366		(void) printf("traversing objset %llu, %llu objects, "
2367		    "%lu blocks so far\n",
2368		    (u_longlong_t)zb->zb_objset,
2369		    (u_longlong_t)bp->blk_fill,
2370		    avl_numnodes(t));
2371	}
2372
2373	if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
2374	    BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata)
2375		return (0);
2376
2377	ddt_key_fill(&zdde_search.zdde_key, bp);
2378
2379	zdde = avl_find(t, &zdde_search, &where);
2380
2381	if (zdde == NULL) {
2382		zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
2383		zdde->zdde_key = zdde_search.zdde_key;
2384		avl_insert(t, zdde, where);
2385	}
2386
2387	zdde->zdde_ref_blocks += 1;
2388	zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
2389	zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
2390	zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
2391
2392	return (0);
2393}
2394
2395static void
2396dump_simulated_ddt(spa_t *spa)
2397{
2398	avl_tree_t t;
2399	void *cookie = NULL;
2400	zdb_ddt_entry_t *zdde;
2401	ddt_histogram_t ddh_total = { 0 };
2402	ddt_stat_t dds_total = { 0 };
2403
2404	avl_create(&t, ddt_entry_compare,
2405	    sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
2406
2407	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2408
2409	(void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
2410	    zdb_ddt_add_cb, &t);
2411
2412	spa_config_exit(spa, SCL_CONFIG, FTAG);
2413
2414	while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
2415		ddt_stat_t dds;
2416		uint64_t refcnt = zdde->zdde_ref_blocks;
2417		ASSERT(refcnt != 0);
2418
2419		dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
2420		dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
2421		dds.dds_psize = zdde->zdde_ref_psize / refcnt;
2422		dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
2423
2424		dds.dds_ref_blocks = zdde->zdde_ref_blocks;
2425		dds.dds_ref_lsize = zdde->zdde_ref_lsize;
2426		dds.dds_ref_psize = zdde->zdde_ref_psize;
2427		dds.dds_ref_dsize = zdde->zdde_ref_dsize;
2428
2429		ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
2430
2431		umem_free(zdde, sizeof (*zdde));
2432	}
2433
2434	avl_destroy(&t);
2435
2436	ddt_histogram_stat(&dds_total, &ddh_total);
2437
2438	(void) printf("Simulated DDT histogram:\n");
2439
2440	zpool_dump_ddt(&dds_total, &ddh_total);
2441
2442	dump_dedup_ratio(&dds_total);
2443}
2444
2445static void
2446dump_zpool(spa_t *spa)
2447{
2448	dsl_pool_t *dp = spa_get_dsl(spa);
2449	int rc = 0;
2450
2451	if (dump_opt['S']) {
2452		dump_simulated_ddt(spa);
2453		return;
2454	}
2455
2456	if (!dump_opt['e'] && dump_opt['C'] > 1) {
2457		(void) printf("\nCached configuration:\n");
2458		dump_nvlist(spa->spa_config, 8);
2459	}
2460
2461	if (dump_opt['C'])
2462		dump_config(spa);
2463
2464	if (dump_opt['u'])
2465		dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
2466
2467	if (dump_opt['D'])
2468		dump_all_ddts(spa);
2469
2470	if (dump_opt['d'] > 2 || dump_opt['m'])
2471		dump_metaslabs(spa);
2472
2473	if (dump_opt['d'] || dump_opt['i']) {
2474		dump_dir(dp->dp_meta_objset);
2475		if (dump_opt['d'] >= 3) {
2476			dump_bpobj(&spa->spa_deferred_bpobj, "Deferred frees");
2477			if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
2478				dump_bpobj(&spa->spa_dsl_pool->dp_free_bpobj,
2479				    "Pool frees");
2480			}
2481			dump_dtl(spa->spa_root_vdev, 0);
2482		}
2483		(void) dmu_objset_find(spa_name(spa), dump_one_dir,
2484		    NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
2485	}
2486	if (dump_opt['b'] || dump_opt['c'])
2487		rc = dump_block_stats(spa);
2488
2489	if (dump_opt['s'])
2490		show_pool_stats(spa);
2491
2492	if (dump_opt['h'])
2493		dump_history(spa);
2494
2495	if (rc != 0)
2496		exit(rc);
2497}
2498
2499#define	ZDB_FLAG_CHECKSUM	0x0001
2500#define	ZDB_FLAG_DECOMPRESS	0x0002
2501#define	ZDB_FLAG_BSWAP		0x0004
2502#define	ZDB_FLAG_GBH		0x0008
2503#define	ZDB_FLAG_INDIRECT	0x0010
2504#define	ZDB_FLAG_PHYS		0x0020
2505#define	ZDB_FLAG_RAW		0x0040
2506#define	ZDB_FLAG_PRINT_BLKPTR	0x0080
2507
2508int flagbits[256];
2509
2510static void
2511zdb_print_blkptr(blkptr_t *bp, int flags)
2512{
2513	char blkbuf[BP_SPRINTF_LEN];
2514
2515	if (flags & ZDB_FLAG_BSWAP)
2516		byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
2517
2518	sprintf_blkptr(blkbuf, bp);
2519	(void) printf("%s\n", blkbuf);
2520}
2521
2522static void
2523zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
2524{
2525	int i;
2526
2527	for (i = 0; i < nbps; i++)
2528		zdb_print_blkptr(&bp[i], flags);
2529}
2530
2531static void
2532zdb_dump_gbh(void *buf, int flags)
2533{
2534	zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
2535}
2536
2537static void
2538zdb_dump_block_raw(void *buf, uint64_t size, int flags)
2539{
2540	if (flags & ZDB_FLAG_BSWAP)
2541		byteswap_uint64_array(buf, size);
2542	(void) write(1, buf, size);
2543}
2544
2545static void
2546zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
2547{
2548	uint64_t *d = (uint64_t *)buf;
2549	int nwords = size / sizeof (uint64_t);
2550	int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
2551	int i, j;
2552	char *hdr, *c;
2553
2554
2555	if (do_bswap)
2556		hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
2557	else
2558		hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
2559
2560	(void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
2561
2562	for (i = 0; i < nwords; i += 2) {
2563		(void) printf("%06llx:  %016llx  %016llx  ",
2564		    (u_longlong_t)(i * sizeof (uint64_t)),
2565		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
2566		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
2567
2568		c = (char *)&d[i];
2569		for (j = 0; j < 2 * sizeof (uint64_t); j++)
2570			(void) printf("%c", isprint(c[j]) ? c[j] : '.');
2571		(void) printf("\n");
2572	}
2573}
2574
2575/*
2576 * There are two acceptable formats:
2577 *	leaf_name	  - For example: c1t0d0 or /tmp/ztest.0a
2578 *	child[.child]*    - For example: 0.1.1
2579 *
2580 * The second form can be used to specify arbitrary vdevs anywhere
2581 * in the heirarchy.  For example, in a pool with a mirror of
2582 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
2583 */
2584static vdev_t *
2585zdb_vdev_lookup(vdev_t *vdev, char *path)
2586{
2587	char *s, *p, *q;
2588	int i;
2589
2590	if (vdev == NULL)
2591		return (NULL);
2592
2593	/* First, assume the x.x.x.x format */
2594	i = (int)strtoul(path, &s, 10);
2595	if (s == path || (s && *s != '.' && *s != '\0'))
2596		goto name;
2597	if (i < 0 || i >= vdev->vdev_children)
2598		return (NULL);
2599
2600	vdev = vdev->vdev_child[i];
2601	if (*s == '\0')
2602		return (vdev);
2603	return (zdb_vdev_lookup(vdev, s+1));
2604
2605name:
2606	for (i = 0; i < vdev->vdev_children; i++) {
2607		vdev_t *vc = vdev->vdev_child[i];
2608
2609		if (vc->vdev_path == NULL) {
2610			vc = zdb_vdev_lookup(vc, path);
2611			if (vc == NULL)
2612				continue;
2613			else
2614				return (vc);
2615		}
2616
2617		p = strrchr(vc->vdev_path, '/');
2618		p = p ? p + 1 : vc->vdev_path;
2619		q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
2620
2621		if (strcmp(vc->vdev_path, path) == 0)
2622			return (vc);
2623		if (strcmp(p, path) == 0)
2624			return (vc);
2625		if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
2626			return (vc);
2627	}
2628
2629	return (NULL);
2630}
2631
2632/*
2633 * Read a block from a pool and print it out.  The syntax of the
2634 * block descriptor is:
2635 *
2636 *	pool:vdev_specifier:offset:size[:flags]
2637 *
2638 *	pool           - The name of the pool you wish to read from
2639 *	vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
2640 *	offset         - offset, in hex, in bytes
2641 *	size           - Amount of data to read, in hex, in bytes
2642 *	flags          - A string of characters specifying options
2643 *		 b: Decode a blkptr at given offset within block
2644 *		*c: Calculate and display checksums
2645 *		 d: Decompress data before dumping
2646 *		 e: Byteswap data before dumping
2647 *		 g: Display data as a gang block header
2648 *		 i: Display as an indirect block
2649 *		 p: Do I/O to physical offset
2650 *		 r: Dump raw data to stdout
2651 *
2652 *              * = not yet implemented
2653 */
2654static void
2655zdb_read_block(char *thing, spa_t *spa)
2656{
2657	blkptr_t blk, *bp = &blk;
2658	dva_t *dva = bp->blk_dva;
2659	int flags = 0;
2660	uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
2661	zio_t *zio;
2662	vdev_t *vd;
2663	void *pbuf, *lbuf, *buf;
2664	char *s, *p, *dup, *vdev, *flagstr;
2665	int i, error;
2666
2667	dup = strdup(thing);
2668	s = strtok(dup, ":");
2669	vdev = s ? s : "";
2670	s = strtok(NULL, ":");
2671	offset = strtoull(s ? s : "", NULL, 16);
2672	s = strtok(NULL, ":");
2673	size = strtoull(s ? s : "", NULL, 16);
2674	s = strtok(NULL, ":");
2675	flagstr = s ? s : "";
2676
2677	s = NULL;
2678	if (size == 0)
2679		s = "size must not be zero";
2680	if (!IS_P2ALIGNED(size, DEV_BSIZE))
2681		s = "size must be a multiple of sector size";
2682	if (!IS_P2ALIGNED(offset, DEV_BSIZE))
2683		s = "offset must be a multiple of sector size";
2684	if (s) {
2685		(void) printf("Invalid block specifier: %s  - %s\n", thing, s);
2686		free(dup);
2687		return;
2688	}
2689
2690	for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
2691		for (i = 0; flagstr[i]; i++) {
2692			int bit = flagbits[(uchar_t)flagstr[i]];
2693
2694			if (bit == 0) {
2695				(void) printf("***Invalid flag: %c\n",
2696				    flagstr[i]);
2697				continue;
2698			}
2699			flags |= bit;
2700
2701			/* If it's not something with an argument, keep going */
2702			if ((bit & (ZDB_FLAG_CHECKSUM |
2703			    ZDB_FLAG_PRINT_BLKPTR)) == 0)
2704				continue;
2705
2706			p = &flagstr[i + 1];
2707			if (bit == ZDB_FLAG_PRINT_BLKPTR)
2708				blkptr_offset = strtoull(p, &p, 16);
2709			if (*p != ':' && *p != '\0') {
2710				(void) printf("***Invalid flag arg: '%s'\n", s);
2711				free(dup);
2712				return;
2713			}
2714		}
2715	}
2716
2717	vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
2718	if (vd == NULL) {
2719		(void) printf("***Invalid vdev: %s\n", vdev);
2720		free(dup);
2721		return;
2722	} else {
2723		if (vd->vdev_path)
2724			(void) fprintf(stderr, "Found vdev: %s\n",
2725			    vd->vdev_path);
2726		else
2727			(void) fprintf(stderr, "Found vdev type: %s\n",
2728			    vd->vdev_ops->vdev_op_type);
2729	}
2730
2731	psize = size;
2732	lsize = size;
2733
2734	pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2735	lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2736
2737	BP_ZERO(bp);
2738
2739	DVA_SET_VDEV(&dva[0], vd->vdev_id);
2740	DVA_SET_OFFSET(&dva[0], offset);
2741	DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
2742	DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
2743
2744	BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
2745
2746	BP_SET_LSIZE(bp, lsize);
2747	BP_SET_PSIZE(bp, psize);
2748	BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
2749	BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
2750	BP_SET_TYPE(bp, DMU_OT_NONE);
2751	BP_SET_LEVEL(bp, 0);
2752	BP_SET_DEDUP(bp, 0);
2753	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
2754
2755	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
2756	zio = zio_root(spa, NULL, NULL, 0);
2757
2758	if (vd == vd->vdev_top) {
2759		/*
2760		 * Treat this as a normal block read.
2761		 */
2762		zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
2763		    ZIO_PRIORITY_SYNC_READ,
2764		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
2765	} else {
2766		/*
2767		 * Treat this as a vdev child I/O.
2768		 */
2769		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
2770		    ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
2771		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
2772		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
2773		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
2774	}
2775
2776	error = zio_wait(zio);
2777	spa_config_exit(spa, SCL_STATE, FTAG);
2778
2779	if (error) {
2780		(void) printf("Read of %s failed, error: %d\n", thing, error);
2781		goto out;
2782	}
2783
2784	if (flags & ZDB_FLAG_DECOMPRESS) {
2785		/*
2786		 * We don't know how the data was compressed, so just try
2787		 * every decompress function at every inflated blocksize.
2788		 */
2789		enum zio_compress c;
2790		void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2791		void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2792
2793		bcopy(pbuf, pbuf2, psize);
2794
2795		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
2796		    SPA_MAXBLOCKSIZE - psize) == 0);
2797
2798		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
2799		    SPA_MAXBLOCKSIZE - psize) == 0);
2800
2801		for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
2802		    lsize -= SPA_MINBLOCKSIZE) {
2803			for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
2804				if (zio_decompress_data(c, pbuf, lbuf,
2805				    psize, lsize) == 0 &&
2806				    zio_decompress_data(c, pbuf2, lbuf2,
2807				    psize, lsize) == 0 &&
2808				    bcmp(lbuf, lbuf2, lsize) == 0)
2809					break;
2810			}
2811			if (c != ZIO_COMPRESS_FUNCTIONS)
2812				break;
2813			lsize -= SPA_MINBLOCKSIZE;
2814		}
2815
2816		umem_free(pbuf2, SPA_MAXBLOCKSIZE);
2817		umem_free(lbuf2, SPA_MAXBLOCKSIZE);
2818
2819		if (lsize <= psize) {
2820			(void) printf("Decompress of %s failed\n", thing);
2821			goto out;
2822		}
2823		buf = lbuf;
2824		size = lsize;
2825	} else {
2826		buf = pbuf;
2827		size = psize;
2828	}
2829
2830	if (flags & ZDB_FLAG_PRINT_BLKPTR)
2831		zdb_print_blkptr((blkptr_t *)(void *)
2832		    ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
2833	else if (flags & ZDB_FLAG_RAW)
2834		zdb_dump_block_raw(buf, size, flags);
2835	else if (flags & ZDB_FLAG_INDIRECT)
2836		zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
2837		    flags);
2838	else if (flags & ZDB_FLAG_GBH)
2839		zdb_dump_gbh(buf, flags);
2840	else
2841		zdb_dump_block(thing, buf, size, flags);
2842
2843out:
2844	umem_free(pbuf, SPA_MAXBLOCKSIZE);
2845	umem_free(lbuf, SPA_MAXBLOCKSIZE);
2846	free(dup);
2847}
2848
2849static boolean_t
2850pool_match(nvlist_t *cfg, char *tgt)
2851{
2852	uint64_t v, guid = strtoull(tgt, NULL, 0);
2853	char *s;
2854
2855	if (guid != 0) {
2856		if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
2857			return (v == guid);
2858	} else {
2859		if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
2860			return (strcmp(s, tgt) == 0);
2861	}
2862	return (B_FALSE);
2863}
2864
2865static char *
2866find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
2867{
2868	nvlist_t *pools;
2869	nvlist_t *match = NULL;
2870	char *name = NULL;
2871	char *sepp = NULL;
2872	char sep;
2873	int count = 0;
2874	importargs_t args = { 0 };
2875
2876	args.paths = dirc;
2877	args.path = dirv;
2878	args.can_be_active = B_TRUE;
2879
2880	if ((sepp = strpbrk(*target, "/@")) != NULL) {
2881		sep = *sepp;
2882		*sepp = '\0';
2883	}
2884
2885	pools = zpool_search_import(g_zfs, &args);
2886
2887	if (pools != NULL) {
2888		nvpair_t *elem = NULL;
2889		while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
2890			verify(nvpair_value_nvlist(elem, configp) == 0);
2891			if (pool_match(*configp, *target)) {
2892				count++;
2893				if (match != NULL) {
2894					/* print previously found config */
2895					if (name != NULL) {
2896						(void) printf("%s\n", name);
2897						dump_nvlist(match, 8);
2898						name = NULL;
2899					}
2900					(void) printf("%s\n",
2901					    nvpair_name(elem));
2902					dump_nvlist(*configp, 8);
2903				} else {
2904					match = *configp;
2905					name = nvpair_name(elem);
2906				}
2907			}
2908		}
2909	}
2910	if (count > 1)
2911		(void) fatal("\tMatched %d pools - use pool GUID "
2912		    "instead of pool name or \n"
2913		    "\tpool name part of a dataset name to select pool", count);
2914
2915	if (sepp)
2916		*sepp = sep;
2917	/*
2918	 * If pool GUID was specified for pool id, replace it with pool name
2919	 */
2920	if (name && (strstr(*target, name) != *target)) {
2921		int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
2922
2923		*target = umem_alloc(sz, UMEM_NOFAIL);
2924		(void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
2925	}
2926
2927	*configp = name ? match : NULL;
2928
2929	return (name);
2930}
2931
2932int
2933main(int argc, char **argv)
2934{
2935	int i, c;
2936	struct rlimit rl = { 1024, 1024 };
2937	spa_t *spa = NULL;
2938	objset_t *os = NULL;
2939	int dump_all = 1;
2940	int verbose = 0;
2941	int error = 0;
2942	char **searchdirs = NULL;
2943	int nsearch = 0;
2944	char *target;
2945	nvlist_t *policy = NULL;
2946	uint64_t max_txg = UINT64_MAX;
2947	int rewind = ZPOOL_NEVER_REWIND;
2948
2949	(void) setrlimit(RLIMIT_NOFILE, &rl);
2950	(void) enable_extended_FILE_stdio(-1, -1);
2951
2952	dprintf_setup(&argc, argv);
2953
2954	while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) {
2955		switch (c) {
2956		case 'b':
2957		case 'c':
2958		case 'd':
2959		case 'h':
2960		case 'i':
2961		case 'l':
2962		case 'm':
2963		case 's':
2964		case 'u':
2965		case 'C':
2966		case 'D':
2967		case 'R':
2968		case 'S':
2969			dump_opt[c]++;
2970			dump_all = 0;
2971			break;
2972		case 'A':
2973		case 'F':
2974		case 'L':
2975		case 'X':
2976		case 'e':
2977		case 'P':
2978			dump_opt[c]++;
2979			break;
2980		case 'v':
2981			verbose++;
2982			break;
2983		case 'p':
2984			if (searchdirs == NULL) {
2985				searchdirs = umem_alloc(sizeof (char *),
2986				    UMEM_NOFAIL);
2987			} else {
2988				char **tmp = umem_alloc((nsearch + 1) *
2989				    sizeof (char *), UMEM_NOFAIL);
2990				bcopy(searchdirs, tmp, nsearch *
2991				    sizeof (char *));
2992				umem_free(searchdirs,
2993				    nsearch * sizeof (char *));
2994				searchdirs = tmp;
2995			}
2996			searchdirs[nsearch++] = optarg;
2997			break;
2998		case 't':
2999			max_txg = strtoull(optarg, NULL, 0);
3000			if (max_txg < TXG_INITIAL) {
3001				(void) fprintf(stderr, "incorrect txg "
3002				    "specified: %s\n", optarg);
3003				usage();
3004			}
3005			break;
3006		case 'U':
3007			spa_config_path = optarg;
3008			break;
3009		default:
3010			usage();
3011			break;
3012		}
3013	}
3014
3015	if (!dump_opt['e'] && searchdirs != NULL) {
3016		(void) fprintf(stderr, "-p option requires use of -e\n");
3017		usage();
3018	}
3019
3020	kernel_init(FREAD);
3021	g_zfs = libzfs_init();
3022	ASSERT(g_zfs != NULL);
3023
3024	if (dump_all)
3025		verbose = MAX(verbose, 1);
3026
3027	for (c = 0; c < 256; c++) {
3028		if (dump_all && !strchr("elAFLRSXP", c))
3029			dump_opt[c] = 1;
3030		if (dump_opt[c])
3031			dump_opt[c] += verbose;
3032	}
3033
3034	aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
3035	zfs_recover = (dump_opt['A'] > 1);
3036
3037	argc -= optind;
3038	argv += optind;
3039
3040	if (argc < 2 && dump_opt['R'])
3041		usage();
3042	if (argc < 1) {
3043		if (!dump_opt['e'] && dump_opt['C']) {
3044			dump_cachefile(spa_config_path);
3045			return (0);
3046		}
3047		usage();
3048	}
3049
3050	if (dump_opt['l']) {
3051		dump_label(argv[0]);
3052		return (0);
3053	}
3054
3055	if (dump_opt['X'] || dump_opt['F'])
3056		rewind = ZPOOL_DO_REWIND |
3057		    (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
3058
3059	if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
3060	    nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
3061	    nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0)
3062		fatal("internal error: %s", strerror(ENOMEM));
3063
3064	error = 0;
3065	target = argv[0];
3066
3067	if (dump_opt['e']) {
3068		nvlist_t *cfg = NULL;
3069		char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
3070
3071		error = ENOENT;
3072		if (name) {
3073			if (dump_opt['C'] > 1) {
3074				(void) printf("\nConfiguration for import:\n");
3075				dump_nvlist(cfg, 8);
3076			}
3077			if (nvlist_add_nvlist(cfg,
3078			    ZPOOL_REWIND_POLICY, policy) != 0) {
3079				fatal("can't open '%s': %s",
3080				    target, strerror(ENOMEM));
3081			}
3082			if ((error = spa_import(name, cfg, NULL,
3083			    ZFS_IMPORT_MISSING_LOG)) != 0) {
3084				error = spa_import(name, cfg, NULL,
3085				    ZFS_IMPORT_VERBATIM);
3086			}
3087		}
3088	}
3089
3090	if (error == 0) {
3091		if (strpbrk(target, "/@") == NULL || dump_opt['R']) {
3092			error = spa_open_rewind(target, &spa, FTAG, policy,
3093			    NULL);
3094			if (error) {
3095				/*
3096				 * If we're missing the log device then
3097				 * try opening the pool after clearing the
3098				 * log state.
3099				 */
3100				mutex_enter(&spa_namespace_lock);
3101				if ((spa = spa_lookup(target)) != NULL &&
3102				    spa->spa_log_state == SPA_LOG_MISSING) {
3103					spa->spa_log_state = SPA_LOG_CLEAR;
3104					error = 0;
3105				}
3106				mutex_exit(&spa_namespace_lock);
3107
3108				if (!error) {
3109					error = spa_open_rewind(target, &spa,
3110					    FTAG, policy, NULL);
3111				}
3112			}
3113		} else {
3114			error = dmu_objset_own(target, DMU_OST_ANY,
3115			    B_TRUE, FTAG, &os);
3116		}
3117	}
3118	nvlist_free(policy);
3119
3120	if (error)
3121		fatal("can't open '%s': %s", target, strerror(error));
3122
3123	argv++;
3124	argc--;
3125	if (!dump_opt['R']) {
3126		if (argc > 0) {
3127			zopt_objects = argc;
3128			zopt_object = calloc(zopt_objects, sizeof (uint64_t));
3129			for (i = 0; i < zopt_objects; i++) {
3130				errno = 0;
3131				zopt_object[i] = strtoull(argv[i], NULL, 0);
3132				if (zopt_object[i] == 0 && errno != 0)
3133					fatal("bad number %s: %s",
3134					    argv[i], strerror(errno));
3135			}
3136		}
3137		(os != NULL) ? dump_dir(os) : dump_zpool(spa);
3138	} else {
3139		flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
3140		flagbits['c'] = ZDB_FLAG_CHECKSUM;
3141		flagbits['d'] = ZDB_FLAG_DECOMPRESS;
3142		flagbits['e'] = ZDB_FLAG_BSWAP;
3143		flagbits['g'] = ZDB_FLAG_GBH;
3144		flagbits['i'] = ZDB_FLAG_INDIRECT;
3145		flagbits['p'] = ZDB_FLAG_PHYS;
3146		flagbits['r'] = ZDB_FLAG_RAW;
3147
3148		for (i = 0; i < argc; i++)
3149			zdb_read_block(argv[i], spa);
3150	}
3151
3152	(os != NULL) ? dmu_objset_disown(os, FTAG) : spa_close(spa, FTAG);
3153
3154	fuid_table_destroy();
3155	sa_loaded = B_FALSE;
3156
3157	libzfs_fini(g_zfs);
3158	kernel_fini();
3159
3160	return (0);
3161}
3162