zdb.c revision bc3975b5da87263594446fb360ece9c65e722fca
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <stdio.h>
27#include <stdio_ext.h>
28#include <stdlib.h>
29#include <ctype.h>
30#include <sys/zfs_context.h>
31#include <sys/spa.h>
32#include <sys/spa_impl.h>
33#include <sys/dmu.h>
34#include <sys/zap.h>
35#include <sys/fs/zfs.h>
36#include <sys/zfs_znode.h>
37#include <sys/vdev.h>
38#include <sys/vdev_impl.h>
39#include <sys/metaslab_impl.h>
40#include <sys/dmu_objset.h>
41#include <sys/dsl_dir.h>
42#include <sys/dsl_dataset.h>
43#include <sys/dsl_pool.h>
44#include <sys/dbuf.h>
45#include <sys/zil.h>
46#include <sys/zil_impl.h>
47#include <sys/stat.h>
48#include <sys/resource.h>
49#include <sys/dmu_traverse.h>
50#include <sys/zio_checksum.h>
51#include <sys/zio_compress.h>
52#include <sys/zfs_fuid.h>
53#include <sys/arc.h>
54#include <sys/ddt.h>
55#undef ZFS_MAXNAMELEN
56#undef verify
57#include <libzfs.h>
58
59#define	ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
60    zio_compress_table[(idx)].ci_name : "UNKNOWN")
61#define	ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
62    zio_checksum_table[(idx)].ci_name : "UNKNOWN")
63#define	ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \
64    dmu_ot[(idx)].ot_name : "UNKNOWN")
65#define	ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : DMU_OT_NUMTYPES)
66
67#ifndef lint
68extern int zfs_recover;
69#else
70int zfs_recover;
71#endif
72
73const char cmdname[] = "zdb";
74uint8_t dump_opt[256];
75
76typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
77
78extern void dump_intent_log(zilog_t *);
79uint64_t *zopt_object = NULL;
80int zopt_objects = 0;
81libzfs_handle_t *g_zfs;
82
83/*
84 * These libumem hooks provide a reasonable set of defaults for the allocator's
85 * debugging facilities.
86 */
87const char *
88_umem_debug_init()
89{
90	return ("default,verbose"); /* $UMEM_DEBUG setting */
91}
92
93const char *
94_umem_logging_init(void)
95{
96	return ("fail,contents"); /* $UMEM_LOGGING setting */
97}
98
99static void
100usage(void)
101{
102	(void) fprintf(stderr,
103	    "Usage: %s [-CumdibcsvhL] poolname [object...]\n"
104	    "       %s [-div] dataset [object...]\n"
105	    "       %s -m [-L] poolname [vdev [metaslab...]]\n"
106	    "       %s -R poolname vdev:offset:size[:flags]\n"
107	    "       %s -S poolname\n"
108	    "       %s -l [-u] device\n"
109	    "       %s -C\n\n",
110	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
111
112	(void) fprintf(stderr, "    Dataset name must include at least one "
113	    "separator character '/' or '@'\n");
114	(void) fprintf(stderr, "    If dataset name is specified, only that "
115	    "dataset is dumped\n");
116	(void) fprintf(stderr, "    If object numbers are specified, only "
117	    "those objects are dumped\n\n");
118	(void) fprintf(stderr, "    Options to control amount of output:\n");
119	(void) fprintf(stderr, "        -u uberblock\n");
120	(void) fprintf(stderr, "        -d dataset(s)\n");
121	(void) fprintf(stderr, "        -i intent logs\n");
122	(void) fprintf(stderr, "        -C config (or cachefile if alone)\n");
123	(void) fprintf(stderr, "        -h pool history\n");
124	(void) fprintf(stderr, "        -b block statistics\n");
125	(void) fprintf(stderr, "        -m metaslabs\n");
126	(void) fprintf(stderr, "        -c checksum all metadata (twice for "
127	    "all data) blocks\n");
128	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
129	(void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
130	(void) fprintf(stderr, "        -v verbose (applies to all others)\n");
131	(void) fprintf(stderr, "        -l dump label contents\n");
132	(void) fprintf(stderr, "        -L disable leak tracking (do not "
133	    "load spacemaps)\n");
134	(void) fprintf(stderr, "        -R read and display block from a "
135	    "device\n\n");
136	(void) fprintf(stderr, "    Below options are intended for use "
137	    "with other options (except -l):\n");
138	(void) fprintf(stderr, "        -A ignore assertions (-A), enable "
139	    "panic recovery (-AA) or both (-AAA)\n");
140	(void) fprintf(stderr, "        -F attempt automatic rewind within "
141	    "safe range of transaction groups\n");
142	(void) fprintf(stderr, "        -U <cachefile_path> -- use alternate "
143	    "cachefile\n");
144	(void) fprintf(stderr, "        -X attempt extreme rewind (does not "
145	    "work with dataset)\n");
146	(void) fprintf(stderr, "        -e pool is exported/destroyed/"
147	    "has altroot/not in a cachefile\n");
148	(void) fprintf(stderr, "        -p <path> -- use one or more with "
149	    "-e to specify path to vdev dir\n");
150	(void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
151	    "searching for uberblocks\n");
152	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
153	    "to make only that option verbose\n");
154	(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
155	exit(1);
156}
157
158/*
159 * Called for usage errors that are discovered after a call to spa_open(),
160 * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
161 */
162
163static void
164fatal(const char *fmt, ...)
165{
166	va_list ap;
167
168	va_start(ap, fmt);
169	(void) fprintf(stderr, "%s: ", cmdname);
170	(void) vfprintf(stderr, fmt, ap);
171	va_end(ap);
172	(void) fprintf(stderr, "\n");
173
174	exit(1);
175}
176
177/* ARGSUSED */
178static void
179dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
180{
181	nvlist_t *nv;
182	size_t nvsize = *(uint64_t *)data;
183	char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
184
185	VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
186
187	VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
188
189	umem_free(packed, nvsize);
190
191	dump_nvlist(nv, 8);
192
193	nvlist_free(nv);
194}
195
196const char dump_zap_stars[] = "****************************************";
197const int dump_zap_width = sizeof (dump_zap_stars) - 1;
198
199static void
200dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE])
201{
202	int i;
203	int minidx = ZAP_HISTOGRAM_SIZE - 1;
204	int maxidx = 0;
205	uint64_t max = 0;
206
207	for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) {
208		if (histo[i] > max)
209			max = histo[i];
210		if (histo[i] > 0 && i > maxidx)
211			maxidx = i;
212		if (histo[i] > 0 && i < minidx)
213			minidx = i;
214	}
215
216	if (max < dump_zap_width)
217		max = dump_zap_width;
218
219	for (i = minidx; i <= maxidx; i++)
220		(void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i],
221		    &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]);
222}
223
224static void
225dump_zap_stats(objset_t *os, uint64_t object)
226{
227	int error;
228	zap_stats_t zs;
229
230	error = zap_get_stats(os, object, &zs);
231	if (error)
232		return;
233
234	if (zs.zs_ptrtbl_len == 0) {
235		ASSERT(zs.zs_num_blocks == 1);
236		(void) printf("\tmicrozap: %llu bytes, %llu entries\n",
237		    (u_longlong_t)zs.zs_blocksize,
238		    (u_longlong_t)zs.zs_num_entries);
239		return;
240	}
241
242	(void) printf("\tFat ZAP stats:\n");
243
244	(void) printf("\t\tPointer table:\n");
245	(void) printf("\t\t\t%llu elements\n",
246	    (u_longlong_t)zs.zs_ptrtbl_len);
247	(void) printf("\t\t\tzt_blk: %llu\n",
248	    (u_longlong_t)zs.zs_ptrtbl_zt_blk);
249	(void) printf("\t\t\tzt_numblks: %llu\n",
250	    (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
251	(void) printf("\t\t\tzt_shift: %llu\n",
252	    (u_longlong_t)zs.zs_ptrtbl_zt_shift);
253	(void) printf("\t\t\tzt_blks_copied: %llu\n",
254	    (u_longlong_t)zs.zs_ptrtbl_blks_copied);
255	(void) printf("\t\t\tzt_nextblk: %llu\n",
256	    (u_longlong_t)zs.zs_ptrtbl_nextblk);
257
258	(void) printf("\t\tZAP entries: %llu\n",
259	    (u_longlong_t)zs.zs_num_entries);
260	(void) printf("\t\tLeaf blocks: %llu\n",
261	    (u_longlong_t)zs.zs_num_leafs);
262	(void) printf("\t\tTotal blocks: %llu\n",
263	    (u_longlong_t)zs.zs_num_blocks);
264	(void) printf("\t\tzap_block_type: 0x%llx\n",
265	    (u_longlong_t)zs.zs_block_type);
266	(void) printf("\t\tzap_magic: 0x%llx\n",
267	    (u_longlong_t)zs.zs_magic);
268	(void) printf("\t\tzap_salt: 0x%llx\n",
269	    (u_longlong_t)zs.zs_salt);
270
271	(void) printf("\t\tLeafs with 2^n pointers:\n");
272	dump_zap_histogram(zs.zs_leafs_with_2n_pointers);
273
274	(void) printf("\t\tBlocks with n*5 entries:\n");
275	dump_zap_histogram(zs.zs_blocks_with_n5_entries);
276
277	(void) printf("\t\tBlocks n/10 full:\n");
278	dump_zap_histogram(zs.zs_blocks_n_tenths_full);
279
280	(void) printf("\t\tEntries with n chunks:\n");
281	dump_zap_histogram(zs.zs_entries_using_n_chunks);
282
283	(void) printf("\t\tBuckets with n entries:\n");
284	dump_zap_histogram(zs.zs_buckets_with_n_entries);
285}
286
287/*ARGSUSED*/
288static void
289dump_none(objset_t *os, uint64_t object, void *data, size_t size)
290{
291}
292
293/*ARGSUSED*/
294static void
295dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
296{
297	(void) printf("\tUNKNOWN OBJECT TYPE\n");
298}
299
300/*ARGSUSED*/
301void
302dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
303{
304}
305
306/*ARGSUSED*/
307static void
308dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
309{
310}
311
312/*ARGSUSED*/
313static void
314dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
315{
316	zap_cursor_t zc;
317	zap_attribute_t attr;
318	void *prop;
319	int i;
320
321	dump_zap_stats(os, object);
322	(void) printf("\n");
323
324	for (zap_cursor_init(&zc, os, object);
325	    zap_cursor_retrieve(&zc, &attr) == 0;
326	    zap_cursor_advance(&zc)) {
327		(void) printf("\t\t%s = ", attr.za_name);
328		if (attr.za_num_integers == 0) {
329			(void) printf("\n");
330			continue;
331		}
332		prop = umem_zalloc(attr.za_num_integers *
333		    attr.za_integer_length, UMEM_NOFAIL);
334		(void) zap_lookup(os, object, attr.za_name,
335		    attr.za_integer_length, attr.za_num_integers, prop);
336		if (attr.za_integer_length == 1) {
337			(void) printf("%s", (char *)prop);
338		} else {
339			for (i = 0; i < attr.za_num_integers; i++) {
340				switch (attr.za_integer_length) {
341				case 2:
342					(void) printf("%u ",
343					    ((uint16_t *)prop)[i]);
344					break;
345				case 4:
346					(void) printf("%u ",
347					    ((uint32_t *)prop)[i]);
348					break;
349				case 8:
350					(void) printf("%lld ",
351					    (u_longlong_t)((int64_t *)prop)[i]);
352					break;
353				}
354			}
355		}
356		(void) printf("\n");
357		umem_free(prop, attr.za_num_integers * attr.za_integer_length);
358	}
359	zap_cursor_fini(&zc);
360}
361
362/*ARGSUSED*/
363static void
364dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
365{
366	dump_zap_stats(os, object);
367	/* contents are printed elsewhere, properly decoded */
368}
369
370/*ARGSUSED*/
371static void
372dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
373{
374	zap_cursor_t zc;
375	zap_attribute_t attr;
376	const char *typenames[] = {
377		/* 0 */ "not specified",
378		/* 1 */ "FIFO",
379		/* 2 */ "Character Device",
380		/* 3 */ "3 (invalid)",
381		/* 4 */ "Directory",
382		/* 5 */ "5 (invalid)",
383		/* 6 */ "Block Device",
384		/* 7 */ "7 (invalid)",
385		/* 8 */ "Regular File",
386		/* 9 */ "9 (invalid)",
387		/* 10 */ "Symbolic Link",
388		/* 11 */ "11 (invalid)",
389		/* 12 */ "Socket",
390		/* 13 */ "Door",
391		/* 14 */ "Event Port",
392		/* 15 */ "15 (invalid)",
393	};
394
395	dump_zap_stats(os, object);
396	(void) printf("\n");
397
398	for (zap_cursor_init(&zc, os, object);
399	    zap_cursor_retrieve(&zc, &attr) == 0;
400	    zap_cursor_advance(&zc)) {
401		(void) printf("\t\t%s = %lld (type: %s)\n",
402		    attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
403		    typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
404	}
405	zap_cursor_fini(&zc);
406}
407
408static void
409dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
410{
411	uint64_t alloc, offset, entry;
412	uint8_t mapshift = sm->sm_shift;
413	uint64_t mapstart = sm->sm_start;
414	char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
415			    "INVALID", "INVALID", "INVALID", "INVALID" };
416
417	if (smo->smo_object == 0)
418		return;
419
420	/*
421	 * Print out the freelist entries in both encoded and decoded form.
422	 */
423	alloc = 0;
424	for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
425		VERIFY(0 == dmu_read(os, smo->smo_object, offset,
426		    sizeof (entry), &entry, DMU_READ_PREFETCH));
427		if (SM_DEBUG_DECODE(entry)) {
428			(void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
429			    (u_longlong_t)(offset / sizeof (entry)),
430			    ddata[SM_DEBUG_ACTION_DECODE(entry)],
431			    (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
432			    (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
433		} else {
434			(void) printf("\t    [%6llu]    %c  range:"
435			    " %010llx-%010llx  size: %06llx\n",
436			    (u_longlong_t)(offset / sizeof (entry)),
437			    SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
438			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
439			    mapshift) + mapstart),
440			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
441			    mapshift) + mapstart + (SM_RUN_DECODE(entry) <<
442			    mapshift)),
443			    (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
444			if (SM_TYPE_DECODE(entry) == SM_ALLOC)
445				alloc += SM_RUN_DECODE(entry) << mapshift;
446			else
447				alloc -= SM_RUN_DECODE(entry) << mapshift;
448		}
449	}
450	if (alloc != smo->smo_alloc) {
451		(void) printf("space_map_object alloc (%llu) INCONSISTENT "
452		    "with space map summary (%llu)\n",
453		    (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc);
454	}
455}
456
457static void
458dump_metaslab_stats(metaslab_t *msp)
459{
460	char maxbuf[5];
461	space_map_t *sm = &msp->ms_map;
462	avl_tree_t *t = sm->sm_pp_root;
463	int free_pct = sm->sm_space * 100 / sm->sm_size;
464
465	nicenum(space_map_maxsize(sm), maxbuf);
466
467	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
468	    "segments", avl_numnodes(t), "maxsize", maxbuf,
469	    "freepct", free_pct);
470}
471
472static void
473dump_metaslab(metaslab_t *msp)
474{
475	vdev_t *vd = msp->ms_group->mg_vd;
476	spa_t *spa = vd->vdev_spa;
477	space_map_t *sm = &msp->ms_map;
478	space_map_obj_t *smo = &msp->ms_smo;
479	char freebuf[5];
480
481	nicenum(sm->sm_size - smo->smo_alloc, freebuf);
482
483	(void) printf(
484	    "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
485	    (u_longlong_t)(sm->sm_start / sm->sm_size),
486	    (u_longlong_t)sm->sm_start, (u_longlong_t)smo->smo_object, freebuf);
487
488	if (dump_opt['m'] > 1 && !dump_opt['L']) {
489		mutex_enter(&msp->ms_lock);
490		space_map_load_wait(sm);
491		if (!sm->sm_loaded)
492			VERIFY(space_map_load(sm, zfs_metaslab_ops,
493			    SM_FREE, smo, spa->spa_meta_objset) == 0);
494		dump_metaslab_stats(msp);
495		space_map_unload(sm);
496		mutex_exit(&msp->ms_lock);
497	}
498
499	if (dump_opt['d'] > 5 || dump_opt['m'] > 2) {
500		ASSERT(sm->sm_size == (1ULL << vd->vdev_ms_shift));
501
502		mutex_enter(&msp->ms_lock);
503		dump_spacemap(spa->spa_meta_objset, smo, sm);
504		mutex_exit(&msp->ms_lock);
505	}
506}
507
508static void
509print_vdev_metaslab_header(vdev_t *vd)
510{
511	(void) printf("\tvdev %10llu\n\t%-10s%5llu   %-19s   %-15s   %-10s\n",
512	    (u_longlong_t)vd->vdev_id,
513	    "metaslabs", (u_longlong_t)vd->vdev_ms_count,
514	    "offset", "spacemap", "free");
515	(void) printf("\t%15s   %19s   %15s   %10s\n",
516	    "---------------", "-------------------",
517	    "---------------", "-------------");
518}
519
520static void
521dump_metaslabs(spa_t *spa)
522{
523	vdev_t *vd, *rvd = spa->spa_root_vdev;
524	uint64_t m, c = 0, children = rvd->vdev_children;
525
526	(void) printf("\nMetaslabs:\n");
527
528	if (!dump_opt['d'] && zopt_objects > 0) {
529		c = zopt_object[0];
530
531		if (c >= children)
532			(void) fatal("bad vdev id: %llu", (u_longlong_t)c);
533
534		if (zopt_objects > 1) {
535			vd = rvd->vdev_child[c];
536			print_vdev_metaslab_header(vd);
537
538			for (m = 1; m < zopt_objects; m++) {
539				if (zopt_object[m] < vd->vdev_ms_count)
540					dump_metaslab(
541					    vd->vdev_ms[zopt_object[m]]);
542				else
543					(void) fprintf(stderr, "bad metaslab "
544					    "number %llu\n",
545					    (u_longlong_t)zopt_object[m]);
546			}
547			(void) printf("\n");
548			return;
549		}
550		children = c + 1;
551	}
552	for (; c < children; c++) {
553		vd = rvd->vdev_child[c];
554		print_vdev_metaslab_header(vd);
555
556		for (m = 0; m < vd->vdev_ms_count; m++)
557			dump_metaslab(vd->vdev_ms[m]);
558		(void) printf("\n");
559	}
560}
561
562static void
563dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
564{
565	const ddt_phys_t *ddp = dde->dde_phys;
566	const ddt_key_t *ddk = &dde->dde_key;
567	char *types[4] = { "ditto", "single", "double", "triple" };
568	char blkbuf[BP_SPRINTF_LEN];
569	blkptr_t blk;
570
571	for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
572		if (ddp->ddp_phys_birth == 0)
573			continue;
574		ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
575		sprintf_blkptr(blkbuf, &blk);
576		(void) printf("index %llx refcnt %llu %s %s\n",
577		    (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
578		    types[p], blkbuf);
579	}
580}
581
582static void
583dump_dedup_ratio(const ddt_stat_t *dds)
584{
585	double rL, rP, rD, D, dedup, compress, copies;
586
587	if (dds->dds_blocks == 0)
588		return;
589
590	rL = (double)dds->dds_ref_lsize;
591	rP = (double)dds->dds_ref_psize;
592	rD = (double)dds->dds_ref_dsize;
593	D = (double)dds->dds_dsize;
594
595	dedup = rD / D;
596	compress = rL / rP;
597	copies = rD / rP;
598
599	(void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
600	    "dedup * compress / copies = %.2f\n\n",
601	    dedup, compress, copies, dedup * compress / copies);
602}
603
604static void
605dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
606{
607	char name[DDT_NAMELEN];
608	ddt_entry_t dde;
609	uint64_t walk = 0;
610	dmu_object_info_t doi;
611	uint64_t count, dspace, mspace;
612	int error;
613
614	error = ddt_object_info(ddt, type, class, &doi);
615
616	if (error == ENOENT)
617		return;
618	ASSERT(error == 0);
619
620	count = ddt_object_count(ddt, type, class);
621	dspace = doi.doi_physical_blocks_512 << 9;
622	mspace = doi.doi_fill_count * doi.doi_data_block_size;
623
624	ASSERT(count != 0);	/* we should have destroyed it */
625
626	ddt_object_name(ddt, type, class, name);
627
628	(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
629	    name,
630	    (u_longlong_t)count,
631	    (u_longlong_t)(dspace / count),
632	    (u_longlong_t)(mspace / count));
633
634	if (dump_opt['D'] < 3)
635		return;
636
637	zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
638
639	if (dump_opt['D'] < 4)
640		return;
641
642	if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
643		return;
644
645	(void) printf("%s contents:\n\n", name);
646
647	while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
648		dump_dde(ddt, &dde, walk);
649
650	ASSERT(error == ENOENT);
651
652	(void) printf("\n");
653}
654
655static void
656dump_all_ddts(spa_t *spa)
657{
658	ddt_histogram_t ddh_total = { 0 };
659	ddt_stat_t dds_total = { 0 };
660
661	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
662		ddt_t *ddt = spa->spa_ddt[c];
663		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
664			for (enum ddt_class class = 0; class < DDT_CLASSES;
665			    class++) {
666				dump_ddt(ddt, type, class);
667			}
668		}
669	}
670
671	ddt_get_dedup_stats(spa, &dds_total);
672
673	if (dds_total.dds_blocks == 0) {
674		(void) printf("All DDTs are empty\n");
675		return;
676	}
677
678	(void) printf("\n");
679
680	if (dump_opt['D'] > 1) {
681		(void) printf("DDT histogram (aggregated over all DDTs):\n");
682		ddt_get_dedup_histogram(spa, &ddh_total);
683		zpool_dump_ddt(&dds_total, &ddh_total);
684	}
685
686	dump_dedup_ratio(&dds_total);
687}
688
689static void
690dump_dtl_seg(space_map_t *sm, uint64_t start, uint64_t size)
691{
692	char *prefix = (void *)sm;
693
694	(void) printf("%s [%llu,%llu) length %llu\n",
695	    prefix,
696	    (u_longlong_t)start,
697	    (u_longlong_t)(start + size),
698	    (u_longlong_t)(size));
699}
700
701static void
702dump_dtl(vdev_t *vd, int indent)
703{
704	spa_t *spa = vd->vdev_spa;
705	boolean_t required;
706	char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" };
707	char prefix[256];
708
709	spa_vdev_state_enter(spa, SCL_NONE);
710	required = vdev_dtl_required(vd);
711	(void) spa_vdev_state_exit(spa, NULL, 0);
712
713	if (indent == 0)
714		(void) printf("\nDirty time logs:\n\n");
715
716	(void) printf("\t%*s%s [%s]\n", indent, "",
717	    vd->vdev_path ? vd->vdev_path :
718	    vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
719	    required ? "DTL-required" : "DTL-expendable");
720
721	for (int t = 0; t < DTL_TYPES; t++) {
722		space_map_t *sm = &vd->vdev_dtl[t];
723		if (sm->sm_space == 0)
724			continue;
725		(void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
726		    indent + 2, "", name[t]);
727		mutex_enter(sm->sm_lock);
728		space_map_walk(sm, dump_dtl_seg, (void *)prefix);
729		mutex_exit(sm->sm_lock);
730		if (dump_opt['d'] > 5 && vd->vdev_children == 0)
731			dump_spacemap(spa->spa_meta_objset,
732			    &vd->vdev_dtl_smo, sm);
733	}
734
735	for (int c = 0; c < vd->vdev_children; c++)
736		dump_dtl(vd->vdev_child[c], indent + 4);
737}
738
739static void
740dump_history(spa_t *spa)
741{
742	nvlist_t **events = NULL;
743	char buf[SPA_MAXBLOCKSIZE];
744	uint64_t resid, len, off = 0;
745	uint_t num = 0;
746	int error;
747	time_t tsec;
748	struct tm t;
749	char tbuf[30];
750	char internalstr[MAXPATHLEN];
751
752	do {
753		len = sizeof (buf);
754
755		if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
756			(void) fprintf(stderr, "Unable to read history: "
757			    "error %d\n", error);
758			return;
759		}
760
761		if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
762			break;
763
764		off -= resid;
765	} while (len != 0);
766
767	(void) printf("\nHistory:\n");
768	for (int i = 0; i < num; i++) {
769		uint64_t time, txg, ievent;
770		char *cmd, *intstr;
771
772		if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
773		    &time) != 0)
774			continue;
775		if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
776		    &cmd) != 0) {
777			if (nvlist_lookup_uint64(events[i],
778			    ZPOOL_HIST_INT_EVENT, &ievent) != 0)
779				continue;
780			verify(nvlist_lookup_uint64(events[i],
781			    ZPOOL_HIST_TXG, &txg) == 0);
782			verify(nvlist_lookup_string(events[i],
783			    ZPOOL_HIST_INT_STR, &intstr) == 0);
784			if (ievent >= LOG_END)
785				continue;
786
787			(void) snprintf(internalstr,
788			    sizeof (internalstr),
789			    "[internal %s txg:%lld] %s",
790			    hist_event_table[ievent], txg,
791			    intstr);
792			cmd = internalstr;
793		}
794		tsec = time;
795		(void) localtime_r(&tsec, &t);
796		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
797		(void) printf("%s %s\n", tbuf, cmd);
798	}
799}
800
801/*ARGSUSED*/
802static void
803dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
804{
805}
806
807static uint64_t
808blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
809{
810	if (dnp == NULL) {
811		ASSERT(zb->zb_level < 0);
812		if (zb->zb_object == 0)
813			return (zb->zb_blkid);
814		return (zb->zb_blkid * BP_GET_LSIZE(bp));
815	}
816
817	ASSERT(zb->zb_level >= 0);
818
819	return ((zb->zb_blkid <<
820	    (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
821	    dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
822}
823
824static void
825sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp)
826{
827	dva_t *dva = bp->blk_dva;
828	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
829
830	if (dump_opt['b'] >= 5) {
831		sprintf_blkptr(blkbuf, bp);
832		return;
833	}
834
835	blkbuf[0] = '\0';
836
837	for (int i = 0; i < ndvas; i++)
838		(void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
839		    (u_longlong_t)DVA_GET_VDEV(&dva[i]),
840		    (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
841		    (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
842
843	(void) sprintf(blkbuf + strlen(blkbuf),
844	    "%llxL/%llxP F=%llu B=%llu/%llu",
845	    (u_longlong_t)BP_GET_LSIZE(bp),
846	    (u_longlong_t)BP_GET_PSIZE(bp),
847	    (u_longlong_t)bp->blk_fill,
848	    (u_longlong_t)bp->blk_birth,
849	    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
850}
851
852static void
853print_indirect(blkptr_t *bp, const zbookmark_t *zb,
854    const dnode_phys_t *dnp)
855{
856	char blkbuf[BP_SPRINTF_LEN];
857	int l;
858
859	ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
860	ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
861
862	(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
863
864	ASSERT(zb->zb_level >= 0);
865
866	for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
867		if (l == zb->zb_level) {
868			(void) printf("L%llx", (u_longlong_t)zb->zb_level);
869		} else {
870			(void) printf(" ");
871		}
872	}
873
874	sprintf_blkptr_compact(blkbuf, bp);
875	(void) printf("%s\n", blkbuf);
876}
877
878static int
879visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
880    blkptr_t *bp, const zbookmark_t *zb)
881{
882	int err = 0;
883
884	if (bp->blk_birth == 0)
885		return (0);
886
887	print_indirect(bp, zb, dnp);
888
889	if (BP_GET_LEVEL(bp) > 0) {
890		uint32_t flags = ARC_WAIT;
891		int i;
892		blkptr_t *cbp;
893		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
894		arc_buf_t *buf;
895		uint64_t fill = 0;
896
897		err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf,
898		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
899		if (err)
900			return (err);
901
902		/* recursively visit blocks below this */
903		cbp = buf->b_data;
904		for (i = 0; i < epb; i++, cbp++) {
905			zbookmark_t czb;
906
907			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
908			    zb->zb_level - 1,
909			    zb->zb_blkid * epb + i);
910			err = visit_indirect(spa, dnp, cbp, &czb);
911			if (err)
912				break;
913			fill += cbp->blk_fill;
914		}
915		if (!err)
916			ASSERT3U(fill, ==, bp->blk_fill);
917		(void) arc_buf_remove_ref(buf, &buf);
918	}
919
920	return (err);
921}
922
923/*ARGSUSED*/
924static void
925dump_indirect(dnode_t *dn)
926{
927	dnode_phys_t *dnp = dn->dn_phys;
928	int j;
929	zbookmark_t czb;
930
931	(void) printf("Indirect blocks:\n");
932
933	SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
934	    dn->dn_object, dnp->dn_nlevels - 1, 0);
935	for (j = 0; j < dnp->dn_nblkptr; j++) {
936		czb.zb_blkid = j;
937		(void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
938		    &dnp->dn_blkptr[j], &czb);
939	}
940
941	(void) printf("\n");
942}
943
944/*ARGSUSED*/
945static void
946dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
947{
948	dsl_dir_phys_t *dd = data;
949	time_t crtime;
950	char nice[6];
951
952	if (dd == NULL)
953		return;
954
955	ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
956
957	crtime = dd->dd_creation_time;
958	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
959	(void) printf("\t\thead_dataset_obj = %llu\n",
960	    (u_longlong_t)dd->dd_head_dataset_obj);
961	(void) printf("\t\tparent_dir_obj = %llu\n",
962	    (u_longlong_t)dd->dd_parent_obj);
963	(void) printf("\t\torigin_obj = %llu\n",
964	    (u_longlong_t)dd->dd_origin_obj);
965	(void) printf("\t\tchild_dir_zapobj = %llu\n",
966	    (u_longlong_t)dd->dd_child_dir_zapobj);
967	nicenum(dd->dd_used_bytes, nice);
968	(void) printf("\t\tused_bytes = %s\n", nice);
969	nicenum(dd->dd_compressed_bytes, nice);
970	(void) printf("\t\tcompressed_bytes = %s\n", nice);
971	nicenum(dd->dd_uncompressed_bytes, nice);
972	(void) printf("\t\tuncompressed_bytes = %s\n", nice);
973	nicenum(dd->dd_quota, nice);
974	(void) printf("\t\tquota = %s\n", nice);
975	nicenum(dd->dd_reserved, nice);
976	(void) printf("\t\treserved = %s\n", nice);
977	(void) printf("\t\tprops_zapobj = %llu\n",
978	    (u_longlong_t)dd->dd_props_zapobj);
979	(void) printf("\t\tdeleg_zapobj = %llu\n",
980	    (u_longlong_t)dd->dd_deleg_zapobj);
981	(void) printf("\t\tflags = %llx\n",
982	    (u_longlong_t)dd->dd_flags);
983
984#define	DO(which) \
985	nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
986	(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
987	DO(HEAD);
988	DO(SNAP);
989	DO(CHILD);
990	DO(CHILD_RSRV);
991	DO(REFRSRV);
992#undef DO
993}
994
995/*ARGSUSED*/
996static void
997dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
998{
999	dsl_dataset_phys_t *ds = data;
1000	time_t crtime;
1001	char used[6], compressed[6], uncompressed[6], unique[6];
1002	char blkbuf[BP_SPRINTF_LEN];
1003
1004	if (ds == NULL)
1005		return;
1006
1007	ASSERT(size == sizeof (*ds));
1008	crtime = ds->ds_creation_time;
1009	nicenum(ds->ds_used_bytes, used);
1010	nicenum(ds->ds_compressed_bytes, compressed);
1011	nicenum(ds->ds_uncompressed_bytes, uncompressed);
1012	nicenum(ds->ds_unique_bytes, unique);
1013	sprintf_blkptr(blkbuf, &ds->ds_bp);
1014
1015	(void) printf("\t\tdir_obj = %llu\n",
1016	    (u_longlong_t)ds->ds_dir_obj);
1017	(void) printf("\t\tprev_snap_obj = %llu\n",
1018	    (u_longlong_t)ds->ds_prev_snap_obj);
1019	(void) printf("\t\tprev_snap_txg = %llu\n",
1020	    (u_longlong_t)ds->ds_prev_snap_txg);
1021	(void) printf("\t\tnext_snap_obj = %llu\n",
1022	    (u_longlong_t)ds->ds_next_snap_obj);
1023	(void) printf("\t\tsnapnames_zapobj = %llu\n",
1024	    (u_longlong_t)ds->ds_snapnames_zapobj);
1025	(void) printf("\t\tnum_children = %llu\n",
1026	    (u_longlong_t)ds->ds_num_children);
1027	(void) printf("\t\tuserrefs_obj = %llu\n",
1028	    (u_longlong_t)ds->ds_userrefs_obj);
1029	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
1030	(void) printf("\t\tcreation_txg = %llu\n",
1031	    (u_longlong_t)ds->ds_creation_txg);
1032	(void) printf("\t\tdeadlist_obj = %llu\n",
1033	    (u_longlong_t)ds->ds_deadlist_obj);
1034	(void) printf("\t\tused_bytes = %s\n", used);
1035	(void) printf("\t\tcompressed_bytes = %s\n", compressed);
1036	(void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
1037	(void) printf("\t\tunique = %s\n", unique);
1038	(void) printf("\t\tfsid_guid = %llu\n",
1039	    (u_longlong_t)ds->ds_fsid_guid);
1040	(void) printf("\t\tguid = %llu\n",
1041	    (u_longlong_t)ds->ds_guid);
1042	(void) printf("\t\tflags = %llx\n",
1043	    (u_longlong_t)ds->ds_flags);
1044	(void) printf("\t\tnext_clones_obj = %llu\n",
1045	    (u_longlong_t)ds->ds_next_clones_obj);
1046	(void) printf("\t\tprops_obj = %llu\n",
1047	    (u_longlong_t)ds->ds_props_obj);
1048	(void) printf("\t\tbp = %s\n", blkbuf);
1049}
1050
1051static void
1052dump_bplist(objset_t *mos, uint64_t object, char *name)
1053{
1054	bplist_t bpl = { 0 };
1055	blkptr_t blk, *bp = &blk;
1056	uint64_t itor = 0;
1057	char bytes[6];
1058	char comp[6];
1059	char uncomp[6];
1060
1061	if (dump_opt['d'] < 3)
1062		return;
1063
1064	bplist_init(&bpl);
1065	VERIFY(0 == bplist_open(&bpl, mos, object));
1066	if (bplist_empty(&bpl)) {
1067		bplist_close(&bpl);
1068		bplist_fini(&bpl);
1069		return;
1070	}
1071
1072	nicenum(bpl.bpl_phys->bpl_bytes, bytes);
1073	if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) {
1074		nicenum(bpl.bpl_phys->bpl_comp, comp);
1075		nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
1076		(void) printf("\n    %s: %llu entries, %s (%s/%s comp)\n",
1077		    name, (u_longlong_t)bpl.bpl_phys->bpl_entries,
1078		    bytes, comp, uncomp);
1079	} else {
1080		(void) printf("\n    %s: %llu entries, %s\n",
1081		    name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes);
1082	}
1083
1084	if (dump_opt['d'] < 5) {
1085		bplist_close(&bpl);
1086		bplist_fini(&bpl);
1087		return;
1088	}
1089
1090	(void) printf("\n");
1091
1092	while (bplist_iterate(&bpl, &itor, bp) == 0) {
1093		char blkbuf[BP_SPRINTF_LEN];
1094
1095		ASSERT(bp->blk_birth != 0);
1096		sprintf_blkptr_compact(blkbuf, bp);
1097		(void) printf("\tItem %3llu: %s\n",
1098		    (u_longlong_t)itor - 1, blkbuf);
1099	}
1100
1101	bplist_close(&bpl);
1102	bplist_fini(&bpl);
1103}
1104
1105static avl_tree_t idx_tree;
1106static avl_tree_t domain_tree;
1107static boolean_t fuid_table_loaded;
1108
1109static void
1110fuid_table_destroy()
1111{
1112	if (fuid_table_loaded) {
1113		zfs_fuid_table_destroy(&idx_tree, &domain_tree);
1114		fuid_table_loaded = B_FALSE;
1115	}
1116}
1117
1118/*
1119 * print uid or gid information.
1120 * For normal POSIX id just the id is printed in decimal format.
1121 * For CIFS files with FUID the fuid is printed in hex followed by
1122 * the doman-rid string.
1123 */
1124static void
1125print_idstr(uint64_t id, const char *id_type)
1126{
1127	if (FUID_INDEX(id)) {
1128		char *domain;
1129
1130		domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
1131		(void) printf("\t%s     %llx [%s-%d]\n", id_type,
1132		    (u_longlong_t)id, domain, (int)FUID_RID(id));
1133	} else {
1134		(void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
1135	}
1136
1137}
1138
1139static void
1140dump_uidgid(objset_t *os, znode_phys_t *zp)
1141{
1142	uint32_t uid_idx, gid_idx;
1143
1144	uid_idx = FUID_INDEX(zp->zp_uid);
1145	gid_idx = FUID_INDEX(zp->zp_gid);
1146
1147	/* Load domain table, if not already loaded */
1148	if (!fuid_table_loaded && (uid_idx || gid_idx)) {
1149		uint64_t fuid_obj;
1150
1151		/* first find the fuid object.  It lives in the master node */
1152		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
1153		    8, 1, &fuid_obj) == 0);
1154		zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
1155		(void) zfs_fuid_table_load(os, fuid_obj,
1156		    &idx_tree, &domain_tree);
1157		fuid_table_loaded = B_TRUE;
1158	}
1159
1160	print_idstr(zp->zp_uid, "uid");
1161	print_idstr(zp->zp_gid, "gid");
1162}
1163
1164/*ARGSUSED*/
1165static void
1166dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
1167{
1168	znode_phys_t *zp = data;
1169	time_t z_crtime, z_atime, z_mtime, z_ctime;
1170	char path[MAXPATHLEN * 2];	/* allow for xattr and failure prefix */
1171	int error;
1172
1173	ASSERT(size >= sizeof (znode_phys_t));
1174
1175	error = zfs_obj_to_path(os, object, path, sizeof (path));
1176	if (error != 0) {
1177		(void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
1178		    (u_longlong_t)object);
1179	}
1180
1181	if (dump_opt['d'] < 3) {
1182		(void) printf("\t%s\n", path);
1183		return;
1184	}
1185
1186	z_crtime = (time_t)zp->zp_crtime[0];
1187	z_atime = (time_t)zp->zp_atime[0];
1188	z_mtime = (time_t)zp->zp_mtime[0];
1189	z_ctime = (time_t)zp->zp_ctime[0];
1190
1191	(void) printf("\tpath	%s\n", path);
1192	dump_uidgid(os, zp);
1193	(void) printf("\tatime	%s", ctime(&z_atime));
1194	(void) printf("\tmtime	%s", ctime(&z_mtime));
1195	(void) printf("\tctime	%s", ctime(&z_ctime));
1196	(void) printf("\tcrtime	%s", ctime(&z_crtime));
1197	(void) printf("\tgen	%llu\n", (u_longlong_t)zp->zp_gen);
1198	(void) printf("\tmode	%llo\n", (u_longlong_t)zp->zp_mode);
1199	(void) printf("\tsize	%llu\n", (u_longlong_t)zp->zp_size);
1200	(void) printf("\tparent	%llu\n", (u_longlong_t)zp->zp_parent);
1201	(void) printf("\tlinks	%llu\n", (u_longlong_t)zp->zp_links);
1202	(void) printf("\txattr	%llu\n", (u_longlong_t)zp->zp_xattr);
1203	(void) printf("\trdev	0x%016llx\n", (u_longlong_t)zp->zp_rdev);
1204}
1205
1206/*ARGSUSED*/
1207static void
1208dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
1209{
1210}
1211
1212/*ARGSUSED*/
1213static void
1214dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
1215{
1216}
1217
1218static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
1219	dump_none,		/* unallocated			*/
1220	dump_zap,		/* object directory		*/
1221	dump_uint64,		/* object array			*/
1222	dump_none,		/* packed nvlist		*/
1223	dump_packed_nvlist,	/* packed nvlist size		*/
1224	dump_none,		/* bplist			*/
1225	dump_none,		/* bplist header		*/
1226	dump_none,		/* SPA space map header		*/
1227	dump_none,		/* SPA space map		*/
1228	dump_none,		/* ZIL intent log		*/
1229	dump_dnode,		/* DMU dnode			*/
1230	dump_dmu_objset,	/* DMU objset			*/
1231	dump_dsl_dir,		/* DSL directory		*/
1232	dump_zap,		/* DSL directory child map	*/
1233	dump_zap,		/* DSL dataset snap map		*/
1234	dump_zap,		/* DSL props			*/
1235	dump_dsl_dataset,	/* DSL dataset			*/
1236	dump_znode,		/* ZFS znode			*/
1237	dump_acl,		/* ZFS V0 ACL			*/
1238	dump_uint8,		/* ZFS plain file		*/
1239	dump_zpldir,		/* ZFS directory		*/
1240	dump_zap,		/* ZFS master node		*/
1241	dump_zap,		/* ZFS delete queue		*/
1242	dump_uint8,		/* zvol object			*/
1243	dump_zap,		/* zvol prop			*/
1244	dump_uint8,		/* other uint8[]		*/
1245	dump_uint64,		/* other uint64[]		*/
1246	dump_zap,		/* other ZAP			*/
1247	dump_zap,		/* persistent error log		*/
1248	dump_uint8,		/* SPA history			*/
1249	dump_uint64,		/* SPA history offsets		*/
1250	dump_zap,		/* Pool properties		*/
1251	dump_zap,		/* DSL permissions		*/
1252	dump_acl,		/* ZFS ACL			*/
1253	dump_uint8,		/* ZFS SYSACL			*/
1254	dump_none,		/* FUID nvlist			*/
1255	dump_packed_nvlist,	/* FUID nvlist size		*/
1256	dump_zap,		/* DSL dataset next clones	*/
1257	dump_zap,		/* DSL scrub queue		*/
1258	dump_zap,		/* ZFS user/group used		*/
1259	dump_zap,		/* ZFS user/group quota		*/
1260	dump_zap,		/* snapshot refcount tags	*/
1261	dump_ddt_zap,		/* DDT ZAP object		*/
1262	dump_zap,		/* DDT statistics		*/
1263	dump_unknown		/* Unknown type, must be last	*/
1264};
1265
1266static void
1267dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
1268{
1269	dmu_buf_t *db = NULL;
1270	dmu_object_info_t doi;
1271	dnode_t *dn;
1272	void *bonus = NULL;
1273	size_t bsize = 0;
1274	char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], fill[7];
1275	char aux[50];
1276	int error;
1277
1278	if (*print_header) {
1279		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1280		    "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
1281		    "%full", "type");
1282		*print_header = 0;
1283	}
1284
1285	if (object == 0) {
1286		dn = os->os_meta_dnode;
1287	} else {
1288		error = dmu_bonus_hold(os, object, FTAG, &db);
1289		if (error)
1290			fatal("dmu_bonus_hold(%llu) failed, errno %u",
1291			    object, error);
1292		bonus = db->db_data;
1293		bsize = db->db_size;
1294		dn = ((dmu_buf_impl_t *)db)->db_dnode;
1295	}
1296	dmu_object_info_from_dnode(dn, &doi);
1297
1298	nicenum(doi.doi_metadata_block_size, iblk);
1299	nicenum(doi.doi_data_block_size, dblk);
1300	nicenum(doi.doi_max_offset, lsize);
1301	nicenum(doi.doi_physical_blocks_512 << 9, asize);
1302	nicenum(doi.doi_bonus_size, bonus_size);
1303	(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
1304	    doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
1305	    doi.doi_max_offset);
1306
1307	aux[0] = '\0';
1308
1309	if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
1310		(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
1311		    ZDB_CHECKSUM_NAME(doi.doi_checksum));
1312	}
1313
1314	if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
1315		(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
1316		    ZDB_COMPRESS_NAME(doi.doi_compress));
1317	}
1318
1319	(void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %6s  %s%s\n",
1320	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
1321	    asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
1322
1323	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
1324		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1325		    "", "", "", "", "", bonus_size, "bonus",
1326		    ZDB_OT_NAME(doi.doi_bonus_type));
1327	}
1328
1329	if (verbosity >= 4) {
1330		(void) printf("\tdnode flags: %s%s\n",
1331		    (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
1332		    "USED_BYTES " : "",
1333		    (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
1334		    "USERUSED_ACCOUNTED " : "");
1335		(void) printf("\tdnode maxblkid: %llu\n",
1336		    (longlong_t)dn->dn_phys->dn_maxblkid);
1337
1338		object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
1339		    bonus, bsize);
1340		object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
1341		*print_header = 1;
1342	}
1343
1344	if (verbosity >= 5)
1345		dump_indirect(dn);
1346
1347	if (verbosity >= 5) {
1348		/*
1349		 * Report the list of segments that comprise the object.
1350		 */
1351		uint64_t start = 0;
1352		uint64_t end;
1353		uint64_t blkfill = 1;
1354		int minlvl = 1;
1355
1356		if (dn->dn_type == DMU_OT_DNODE) {
1357			minlvl = 0;
1358			blkfill = DNODES_PER_BLOCK;
1359		}
1360
1361		for (;;) {
1362			char segsize[6];
1363			error = dnode_next_offset(dn,
1364			    0, &start, minlvl, blkfill, 0);
1365			if (error)
1366				break;
1367			end = start;
1368			error = dnode_next_offset(dn,
1369			    DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
1370			nicenum(end - start, segsize);
1371			(void) printf("\t\tsegment [%016llx, %016llx)"
1372			    " size %5s\n", (u_longlong_t)start,
1373			    (u_longlong_t)end, segsize);
1374			if (error)
1375				break;
1376			start = end;
1377		}
1378	}
1379
1380	if (db != NULL)
1381		dmu_buf_rele(db, FTAG);
1382}
1383
1384static char *objset_types[DMU_OST_NUMTYPES] = {
1385	"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
1386
1387static void
1388dump_dir(objset_t *os)
1389{
1390	dmu_objset_stats_t dds;
1391	uint64_t object, object_count;
1392	uint64_t refdbytes, usedobjs, scratch;
1393	char numbuf[8];
1394	char blkbuf[BP_SPRINTF_LEN + 20];
1395	char osname[MAXNAMELEN];
1396	char *type = "UNKNOWN";
1397	int verbosity = dump_opt['d'];
1398	int print_header = 1;
1399	int i, error;
1400
1401	dmu_objset_fast_stat(os, &dds);
1402
1403	if (dds.dds_type < DMU_OST_NUMTYPES)
1404		type = objset_types[dds.dds_type];
1405
1406	if (dds.dds_type == DMU_OST_META) {
1407		dds.dds_creation_txg = TXG_INITIAL;
1408		usedobjs = os->os_rootbp->blk_fill;
1409		refdbytes = os->os_spa->spa_dsl_pool->
1410		    dp_mos_dir->dd_phys->dd_used_bytes;
1411	} else {
1412		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
1413	}
1414
1415	ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
1416
1417	nicenum(refdbytes, numbuf);
1418
1419	if (verbosity >= 4) {
1420		(void) sprintf(blkbuf, ", rootbp ");
1421		(void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp);
1422	} else {
1423		blkbuf[0] = '\0';
1424	}
1425
1426	dmu_objset_name(os, osname);
1427
1428	(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
1429	    "%s, %llu objects%s\n",
1430	    osname, type, (u_longlong_t)dmu_objset_id(os),
1431	    (u_longlong_t)dds.dds_creation_txg,
1432	    numbuf, (u_longlong_t)usedobjs, blkbuf);
1433
1434	if (zopt_objects != 0) {
1435		for (i = 0; i < zopt_objects; i++)
1436			dump_object(os, zopt_object[i], verbosity,
1437			    &print_header);
1438		(void) printf("\n");
1439		return;
1440	}
1441
1442	if (dump_opt['i'] != 0 || verbosity >= 2)
1443		dump_intent_log(dmu_objset_zil(os));
1444
1445	if (dmu_objset_ds(os) != NULL)
1446		dump_bplist(dmu_objset_pool(os)->dp_meta_objset,
1447		    dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist");
1448
1449	if (verbosity < 2)
1450		return;
1451
1452	if (os->os_rootbp->blk_birth == 0)
1453		return;
1454
1455	dump_object(os, 0, verbosity, &print_header);
1456	object_count = 0;
1457	if (os->os_userused_dnode &&
1458	    os->os_userused_dnode->dn_type != 0) {
1459		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
1460		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
1461	}
1462
1463	object = 0;
1464	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
1465		dump_object(os, object, verbosity, &print_header);
1466		object_count++;
1467	}
1468
1469	ASSERT3U(object_count, ==, usedobjs);
1470
1471	(void) printf("\n");
1472
1473	if (error != ESRCH) {
1474		(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
1475		abort();
1476	}
1477}
1478
1479static void
1480dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
1481{
1482	time_t timestamp = ub->ub_timestamp;
1483
1484	(void) printf(header ? header : "");
1485	(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
1486	(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
1487	(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
1488	(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
1489	(void) printf("\ttimestamp = %llu UTC = %s",
1490	    (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
1491	if (dump_opt['u'] >= 3) {
1492		char blkbuf[BP_SPRINTF_LEN];
1493		sprintf_blkptr(blkbuf, &ub->ub_rootbp);
1494		(void) printf("\trootbp = %s\n", blkbuf);
1495	}
1496	(void) printf(footer ? footer : "");
1497}
1498
1499static void
1500dump_config(spa_t *spa)
1501{
1502	dmu_buf_t *db;
1503	size_t nvsize = 0;
1504	int error = 0;
1505
1506
1507	error = dmu_bonus_hold(spa->spa_meta_objset,
1508	    spa->spa_config_object, FTAG, &db);
1509
1510	if (error == 0) {
1511		nvsize = *(uint64_t *)db->db_data;
1512		dmu_buf_rele(db, FTAG);
1513
1514		(void) printf("\nMOS Configuration:\n");
1515		dump_packed_nvlist(spa->spa_meta_objset,
1516		    spa->spa_config_object, (void *)&nvsize, 1);
1517	} else {
1518		(void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
1519		    (u_longlong_t)spa->spa_config_object, error);
1520	}
1521}
1522
1523static void
1524dump_cachefile(const char *cachefile)
1525{
1526	int fd;
1527	struct stat64 statbuf;
1528	char *buf;
1529	nvlist_t *config;
1530
1531	if ((fd = open64(cachefile, O_RDONLY)) < 0) {
1532		(void) printf("cannot open '%s': %s\n", cachefile,
1533		    strerror(errno));
1534		exit(1);
1535	}
1536
1537	if (fstat64(fd, &statbuf) != 0) {
1538		(void) printf("failed to stat '%s': %s\n", cachefile,
1539		    strerror(errno));
1540		exit(1);
1541	}
1542
1543	if ((buf = malloc(statbuf.st_size)) == NULL) {
1544		(void) fprintf(stderr, "failed to allocate %llu bytes\n",
1545		    (u_longlong_t)statbuf.st_size);
1546		exit(1);
1547	}
1548
1549	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1550		(void) fprintf(stderr, "failed to read %llu bytes\n",
1551		    (u_longlong_t)statbuf.st_size);
1552		exit(1);
1553	}
1554
1555	(void) close(fd);
1556
1557	if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
1558		(void) fprintf(stderr, "failed to unpack nvlist\n");
1559		exit(1);
1560	}
1561
1562	free(buf);
1563
1564	dump_nvlist(config, 0);
1565
1566	nvlist_free(config);
1567}
1568
1569#define	ZDB_MAX_UB_HEADER_SIZE 32
1570
1571static void
1572dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
1573{
1574	vdev_t vd;
1575	vdev_t *vdp = &vd;
1576	char header[ZDB_MAX_UB_HEADER_SIZE];
1577
1578	vd.vdev_ashift = ashift;
1579	vdp->vdev_top = vdp;
1580
1581	for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
1582		uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
1583		uberblock_t *ub = (void *)((char *)lbl + uoff);
1584
1585		if (uberblock_verify(ub))
1586			continue;
1587		(void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
1588		    "Uberblock[%d]\n", i);
1589		dump_uberblock(ub, header, "");
1590	}
1591}
1592
1593static void
1594dump_label(const char *dev)
1595{
1596	int fd;
1597	vdev_label_t label;
1598	char *buf = label.vl_vdev_phys.vp_nvlist;
1599	size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
1600	struct stat64 statbuf;
1601	uint64_t psize, ashift;
1602
1603	if ((fd = open64(dev, O_RDONLY)) < 0) {
1604		(void) printf("cannot open '%s': %s\n", dev, strerror(errno));
1605		exit(1);
1606	}
1607
1608	if (fstat64(fd, &statbuf) != 0) {
1609		(void) printf("failed to stat '%s': %s\n", dev,
1610		    strerror(errno));
1611	}
1612
1613	psize = statbuf.st_size;
1614	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
1615
1616	for (int l = 0; l < VDEV_LABELS; l++) {
1617		nvlist_t *config = NULL;
1618
1619		(void) printf("--------------------------------------------\n");
1620		(void) printf("LABEL %d\n", l);
1621		(void) printf("--------------------------------------------\n");
1622
1623		if (pread64(fd, &label, sizeof (label),
1624		    vdev_label_offset(psize, l, 0)) != sizeof (label)) {
1625			(void) printf("failed to read label %d\n", l);
1626			continue;
1627		}
1628
1629		if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
1630			(void) printf("failed to unpack label %d\n", l);
1631			ashift = SPA_MINBLOCKSHIFT;
1632		} else {
1633			nvlist_t *vdev_tree = NULL;
1634
1635			dump_nvlist(config, 4);
1636			if ((nvlist_lookup_nvlist(config,
1637			    ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
1638			    (nvlist_lookup_uint64(vdev_tree,
1639			    ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
1640				ashift = SPA_MINBLOCKSHIFT;
1641			nvlist_free(config);
1642		}
1643		if (dump_opt['u'])
1644			dump_label_uberblocks(&label, ashift);
1645	}
1646}
1647
1648/*ARGSUSED*/
1649static int
1650dump_one_dir(const char *dsname, void *arg)
1651{
1652	int error;
1653	objset_t *os;
1654
1655	error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
1656	if (error) {
1657		(void) printf("Could not open %s, error %d\n", dsname, error);
1658		return (0);
1659	}
1660	dump_dir(os);
1661	dmu_objset_disown(os, FTAG);
1662	fuid_table_destroy();
1663	return (0);
1664}
1665
1666/*
1667 * Block statistics.
1668 */
1669typedef struct zdb_blkstats {
1670	uint64_t	zb_asize;
1671	uint64_t	zb_lsize;
1672	uint64_t	zb_psize;
1673	uint64_t	zb_count;
1674} zdb_blkstats_t;
1675
1676/*
1677 * Extended object types to report deferred frees and dedup auto-ditto blocks.
1678 */
1679#define	ZDB_OT_DEFERRED	(DMU_OT_NUMTYPES + 0)
1680#define	ZDB_OT_DITTO	(DMU_OT_NUMTYPES + 1)
1681#define	ZDB_OT_TOTAL	(DMU_OT_NUMTYPES + 2)
1682
1683static char *zdb_ot_extname[] = {
1684	"deferred free",
1685	"dedup ditto",
1686	"Total",
1687};
1688
1689#define	ZB_TOTAL	DN_MAX_LEVELS
1690
1691typedef struct zdb_cb {
1692	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
1693	uint64_t	zcb_dedup_asize;
1694	uint64_t	zcb_dedup_blocks;
1695	uint64_t	zcb_errors[256];
1696	int		zcb_readfails;
1697	int		zcb_haderrors;
1698} zdb_cb_t;
1699
1700static void
1701zdb_count_block(spa_t *spa, zilog_t *zilog, zdb_cb_t *zcb, const blkptr_t *bp,
1702    dmu_object_type_t type)
1703{
1704	uint64_t refcnt = 0;
1705
1706	ASSERT(type < ZDB_OT_TOTAL);
1707
1708	if (zilog && zil_bp_tree_add(zilog, bp) != 0)
1709		return;
1710
1711	for (int i = 0; i < 4; i++) {
1712		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
1713		int t = (i & 1) ? type : ZDB_OT_TOTAL;
1714		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
1715
1716		zb->zb_asize += BP_GET_ASIZE(bp);
1717		zb->zb_lsize += BP_GET_LSIZE(bp);
1718		zb->zb_psize += BP_GET_PSIZE(bp);
1719		zb->zb_count++;
1720	}
1721
1722	if (dump_opt['L'])
1723		return;
1724
1725	if (BP_GET_DEDUP(bp)) {
1726		ddt_t *ddt;
1727		ddt_entry_t *dde;
1728
1729		ddt = ddt_select(spa, bp);
1730		ddt_enter(ddt);
1731		dde = ddt_lookup(ddt, bp, B_FALSE);
1732
1733		if (dde == NULL) {
1734			refcnt = 0;
1735		} else {
1736			ddt_phys_t *ddp = ddt_phys_select(dde, bp);
1737			ddt_phys_decref(ddp);
1738			refcnt = ddp->ddp_refcnt;
1739			if (ddt_phys_total_refcnt(dde) == 0)
1740				ddt_remove(ddt, dde);
1741		}
1742		ddt_exit(ddt);
1743	}
1744
1745	VERIFY3U(zio_wait(zio_claim(NULL, spa,
1746	    refcnt ? 0 : spa_first_txg(spa),
1747	    bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
1748}
1749
1750static int
1751zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
1752    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
1753{
1754	zdb_cb_t *zcb = arg;
1755	char blkbuf[BP_SPRINTF_LEN];
1756	dmu_object_type_t type;
1757	boolean_t is_metadata;
1758
1759	if (bp == NULL)
1760		return (0);
1761
1762	type = BP_GET_TYPE(bp);
1763
1764	zdb_count_block(spa, zilog, zcb, bp, type);
1765
1766	is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata);
1767
1768	if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
1769		int ioerr;
1770		size_t size = BP_GET_PSIZE(bp);
1771		void *data = malloc(size);
1772		int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
1773
1774		/* If it's an intent log block, failure is expected. */
1775		if (zb->zb_level == ZB_ZIL_LEVEL)
1776			flags |= ZIO_FLAG_SPECULATIVE;
1777
1778		ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
1779		    NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
1780
1781		free(data);
1782
1783		if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
1784			zcb->zcb_haderrors = 1;
1785			zcb->zcb_errors[ioerr]++;
1786
1787			if (dump_opt['b'] >= 2)
1788				sprintf_blkptr(blkbuf, bp);
1789			else
1790				blkbuf[0] = '\0';
1791
1792			(void) printf("zdb_blkptr_cb: "
1793			    "Got error %d reading "
1794			    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
1795			    ioerr,
1796			    (u_longlong_t)zb->zb_objset,
1797			    (u_longlong_t)zb->zb_object,
1798			    (u_longlong_t)zb->zb_level,
1799			    (u_longlong_t)zb->zb_blkid,
1800			    blkbuf);
1801		}
1802	}
1803
1804	zcb->zcb_readfails = 0;
1805
1806	if (dump_opt['b'] >= 4) {
1807		sprintf_blkptr(blkbuf, bp);
1808		(void) printf("objset %llu object %llu "
1809		    "level %lld offset 0x%llx %s\n",
1810		    (u_longlong_t)zb->zb_objset,
1811		    (u_longlong_t)zb->zb_object,
1812		    (longlong_t)zb->zb_level,
1813		    (u_longlong_t)blkid2offset(dnp, bp, zb),
1814		    blkbuf);
1815	}
1816
1817	return (0);
1818}
1819
1820static void
1821zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
1822{
1823	vdev_t *vd = sm->sm_ppd;
1824
1825	(void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
1826	    (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
1827}
1828
1829/* ARGSUSED */
1830static void
1831zdb_space_map_load(space_map_t *sm)
1832{
1833}
1834
1835static void
1836zdb_space_map_unload(space_map_t *sm)
1837{
1838	space_map_vacate(sm, zdb_leak, sm);
1839}
1840
1841/* ARGSUSED */
1842static void
1843zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
1844{
1845}
1846
1847static space_map_ops_t zdb_space_map_ops = {
1848	zdb_space_map_load,
1849	zdb_space_map_unload,
1850	NULL,	/* alloc */
1851	zdb_space_map_claim,
1852	NULL,	/* free */
1853	NULL	/* maxsize */
1854};
1855
1856static void
1857zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
1858{
1859	ddt_bookmark_t ddb = { 0 };
1860	ddt_entry_t dde;
1861	int error;
1862
1863	while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
1864		blkptr_t blk;
1865		ddt_phys_t *ddp = dde.dde_phys;
1866
1867		if (ddb.ddb_class == DDT_CLASS_UNIQUE)
1868			return;
1869
1870		ASSERT(ddt_phys_total_refcnt(&dde) > 1);
1871
1872		for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
1873			if (ddp->ddp_phys_birth == 0)
1874				continue;
1875			ddt_bp_create(ddb.ddb_checksum,
1876			    &dde.dde_key, ddp, &blk);
1877			if (p == DDT_PHYS_DITTO) {
1878				zdb_count_block(spa, NULL, zcb, &blk,
1879				    ZDB_OT_DITTO);
1880			} else {
1881				zcb->zcb_dedup_asize +=
1882				    BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
1883				zcb->zcb_dedup_blocks++;
1884			}
1885		}
1886		if (!dump_opt['L']) {
1887			ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
1888			ddt_enter(ddt);
1889			VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
1890			ddt_exit(ddt);
1891		}
1892	}
1893
1894	ASSERT(error == ENOENT);
1895}
1896
1897static void
1898zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
1899{
1900	if (!dump_opt['L']) {
1901		vdev_t *rvd = spa->spa_root_vdev;
1902		for (int c = 0; c < rvd->vdev_children; c++) {
1903			vdev_t *vd = rvd->vdev_child[c];
1904			for (int m = 0; m < vd->vdev_ms_count; m++) {
1905				metaslab_t *msp = vd->vdev_ms[m];
1906				mutex_enter(&msp->ms_lock);
1907				space_map_unload(&msp->ms_map);
1908				VERIFY(space_map_load(&msp->ms_map,
1909				    &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo,
1910				    spa->spa_meta_objset) == 0);
1911				msp->ms_map.sm_ppd = vd;
1912				mutex_exit(&msp->ms_lock);
1913			}
1914		}
1915	}
1916
1917	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
1918
1919	zdb_ddt_leak_init(spa, zcb);
1920
1921	spa_config_exit(spa, SCL_CONFIG, FTAG);
1922}
1923
1924static void
1925zdb_leak_fini(spa_t *spa)
1926{
1927	if (!dump_opt['L']) {
1928		vdev_t *rvd = spa->spa_root_vdev;
1929		for (int c = 0; c < rvd->vdev_children; c++) {
1930			vdev_t *vd = rvd->vdev_child[c];
1931			for (int m = 0; m < vd->vdev_ms_count; m++) {
1932				metaslab_t *msp = vd->vdev_ms[m];
1933				mutex_enter(&msp->ms_lock);
1934				space_map_unload(&msp->ms_map);
1935				mutex_exit(&msp->ms_lock);
1936			}
1937		}
1938	}
1939}
1940
1941static int
1942dump_block_stats(spa_t *spa)
1943{
1944	zdb_cb_t zcb = { 0 };
1945	zdb_blkstats_t *zb, *tzb;
1946	uint64_t norm_alloc, norm_space, total_alloc, total_found;
1947	int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
1948	int leaks = 0;
1949
1950	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
1951	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
1952	    (dump_opt['c'] == 1) ? "metadata " : "",
1953	    dump_opt['c'] ? "checksums " : "",
1954	    (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
1955	    !dump_opt['L'] ? "nothing leaked " : "");
1956
1957	/*
1958	 * Load all space maps as SM_ALLOC maps, then traverse the pool
1959	 * claiming each block we discover.  If the pool is perfectly
1960	 * consistent, the space maps will be empty when we're done.
1961	 * Anything left over is a leak; any block we can't claim (because
1962	 * it's not part of any space map) is a double allocation,
1963	 * reference to a freed block, or an unclaimed log block.
1964	 */
1965	zdb_leak_init(spa, &zcb);
1966
1967	/*
1968	 * If there's a deferred-free bplist, process that first.
1969	 */
1970	if (spa->spa_deferred_bplist_obj != 0) {
1971		bplist_t *bpl = &spa->spa_deferred_bplist;
1972		blkptr_t blk;
1973		uint64_t itor = 0;
1974
1975		VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset,
1976		    spa->spa_deferred_bplist_obj));
1977
1978		while (bplist_iterate(bpl, &itor, &blk) == 0) {
1979			if (dump_opt['b'] >= 4) {
1980				char blkbuf[BP_SPRINTF_LEN];
1981				sprintf_blkptr(blkbuf, &blk);
1982				(void) printf("[%s] %s\n",
1983				    "deferred free", blkbuf);
1984			}
1985			zdb_count_block(spa, NULL, &zcb, &blk, ZDB_OT_DEFERRED);
1986		}
1987
1988		bplist_close(bpl);
1989	}
1990
1991	if (dump_opt['c'] > 1)
1992		flags |= TRAVERSE_PREFETCH_DATA;
1993
1994	zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
1995
1996	if (zcb.zcb_haderrors) {
1997		(void) printf("\nError counts:\n\n");
1998		(void) printf("\t%5s  %s\n", "errno", "count");
1999		for (int e = 0; e < 256; e++) {
2000			if (zcb.zcb_errors[e] != 0) {
2001				(void) printf("\t%5d  %llu\n",
2002				    e, (u_longlong_t)zcb.zcb_errors[e]);
2003			}
2004		}
2005	}
2006
2007	/*
2008	 * Report any leaked segments.
2009	 */
2010	zdb_leak_fini(spa);
2011
2012	tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
2013
2014	norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
2015	norm_space = metaslab_class_get_space(spa_normal_class(spa));
2016
2017	total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
2018	total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
2019
2020	if (total_found == total_alloc) {
2021		if (!dump_opt['L'])
2022			(void) printf("\n\tNo leaks (block sum matches space"
2023			    " maps exactly)\n");
2024	} else {
2025		(void) printf("block traversal size %llu != alloc %llu "
2026		    "(%s %lld)\n",
2027		    (u_longlong_t)total_found,
2028		    (u_longlong_t)total_alloc,
2029		    (dump_opt['L']) ? "unreachable" : "leaked",
2030		    (longlong_t)(total_alloc - total_found));
2031		leaks = 1;
2032	}
2033
2034	if (tzb->zb_count == 0)
2035		return (2);
2036
2037	(void) printf("\n");
2038	(void) printf("\tbp count:      %10llu\n",
2039	    (u_longlong_t)tzb->zb_count);
2040	(void) printf("\tbp logical:    %10llu      avg: %6llu\n",
2041	    (u_longlong_t)tzb->zb_lsize,
2042	    (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
2043	(void) printf("\tbp physical:   %10llu      avg:"
2044	    " %6llu     compression: %6.2f\n",
2045	    (u_longlong_t)tzb->zb_psize,
2046	    (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
2047	    (double)tzb->zb_lsize / tzb->zb_psize);
2048	(void) printf("\tbp allocated:  %10llu      avg:"
2049	    " %6llu     compression: %6.2f\n",
2050	    (u_longlong_t)tzb->zb_asize,
2051	    (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
2052	    (double)tzb->zb_lsize / tzb->zb_asize);
2053	(void) printf("\tbp deduped:    %10llu    ref>1:"
2054	    " %6llu   deduplication: %6.2f\n",
2055	    (u_longlong_t)zcb.zcb_dedup_asize,
2056	    (u_longlong_t)zcb.zcb_dedup_blocks,
2057	    (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
2058	(void) printf("\tSPA allocated: %10llu     used: %5.2f%%\n",
2059	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
2060
2061	if (dump_opt['b'] >= 2) {
2062		int l, t, level;
2063		(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2064		    "\t  avg\t comp\t%%Total\tType\n");
2065
2066		for (t = 0; t <= ZDB_OT_TOTAL; t++) {
2067			char csize[6], lsize[6], psize[6], asize[6], avg[6];
2068			char *typename;
2069
2070			if (t < DMU_OT_NUMTYPES)
2071				typename = dmu_ot[t].ot_name;
2072			else
2073				typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
2074
2075			if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
2076				(void) printf("%6s\t%5s\t%5s\t%5s"
2077				    "\t%5s\t%5s\t%6s\t%s\n",
2078				    "-",
2079				    "-",
2080				    "-",
2081				    "-",
2082				    "-",
2083				    "-",
2084				    "-",
2085				    typename);
2086				continue;
2087			}
2088
2089			for (l = ZB_TOTAL - 1; l >= -1; l--) {
2090				level = (l == -1 ? ZB_TOTAL : l);
2091				zb = &zcb.zcb_type[level][t];
2092
2093				if (zb->zb_asize == 0)
2094					continue;
2095
2096				if (dump_opt['b'] < 3 && level != ZB_TOTAL)
2097					continue;
2098
2099				if (level == 0 && zb->zb_asize ==
2100				    zcb.zcb_type[ZB_TOTAL][t].zb_asize)
2101					continue;
2102
2103				nicenum(zb->zb_count, csize);
2104				nicenum(zb->zb_lsize, lsize);
2105				nicenum(zb->zb_psize, psize);
2106				nicenum(zb->zb_asize, asize);
2107				nicenum(zb->zb_asize / zb->zb_count, avg);
2108
2109				(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
2110				    "\t%5.2f\t%6.2f\t",
2111				    csize, lsize, psize, asize, avg,
2112				    (double)zb->zb_lsize / zb->zb_psize,
2113				    100.0 * zb->zb_asize / tzb->zb_asize);
2114
2115				if (level == ZB_TOTAL)
2116					(void) printf("%s\n", typename);
2117				else
2118					(void) printf("    L%d %s\n",
2119					    level, typename);
2120			}
2121		}
2122	}
2123
2124	(void) printf("\n");
2125
2126	if (leaks)
2127		return (2);
2128
2129	if (zcb.zcb_haderrors)
2130		return (3);
2131
2132	return (0);
2133}
2134
2135typedef struct zdb_ddt_entry {
2136	ddt_key_t	zdde_key;
2137	uint64_t	zdde_ref_blocks;
2138	uint64_t	zdde_ref_lsize;
2139	uint64_t	zdde_ref_psize;
2140	uint64_t	zdde_ref_dsize;
2141	avl_node_t	zdde_node;
2142} zdb_ddt_entry_t;
2143
2144/* ARGSUSED */
2145static int
2146zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2147    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
2148{
2149	avl_tree_t *t = arg;
2150	avl_index_t where;
2151	zdb_ddt_entry_t *zdde, zdde_search;
2152
2153	if (bp == NULL)
2154		return (0);
2155
2156	if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
2157		(void) printf("traversing objset %llu, %llu objects, "
2158		    "%lu blocks so far\n",
2159		    (u_longlong_t)zb->zb_objset,
2160		    (u_longlong_t)bp->blk_fill,
2161		    avl_numnodes(t));
2162	}
2163
2164	if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
2165	    BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata)
2166		return (0);
2167
2168	ddt_key_fill(&zdde_search.zdde_key, bp);
2169
2170	zdde = avl_find(t, &zdde_search, &where);
2171
2172	if (zdde == NULL) {
2173		zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
2174		zdde->zdde_key = zdde_search.zdde_key;
2175		avl_insert(t, zdde, where);
2176	}
2177
2178	zdde->zdde_ref_blocks += 1;
2179	zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
2180	zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
2181	zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
2182
2183	return (0);
2184}
2185
2186static void
2187dump_simulated_ddt(spa_t *spa)
2188{
2189	avl_tree_t t;
2190	void *cookie = NULL;
2191	zdb_ddt_entry_t *zdde;
2192	ddt_histogram_t ddh_total = { 0 };
2193	ddt_stat_t dds_total = { 0 };
2194
2195	avl_create(&t, ddt_entry_compare,
2196	    sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
2197
2198	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2199
2200	(void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
2201	    zdb_ddt_add_cb, &t);
2202
2203	spa_config_exit(spa, SCL_CONFIG, FTAG);
2204
2205	while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
2206		ddt_stat_t dds;
2207		uint64_t refcnt = zdde->zdde_ref_blocks;
2208		ASSERT(refcnt != 0);
2209
2210		dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
2211		dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
2212		dds.dds_psize = zdde->zdde_ref_psize / refcnt;
2213		dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
2214
2215		dds.dds_ref_blocks = zdde->zdde_ref_blocks;
2216		dds.dds_ref_lsize = zdde->zdde_ref_lsize;
2217		dds.dds_ref_psize = zdde->zdde_ref_psize;
2218		dds.dds_ref_dsize = zdde->zdde_ref_dsize;
2219
2220		ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
2221
2222		umem_free(zdde, sizeof (*zdde));
2223	}
2224
2225	avl_destroy(&t);
2226
2227	ddt_histogram_stat(&dds_total, &ddh_total);
2228
2229	(void) printf("Simulated DDT histogram:\n");
2230
2231	zpool_dump_ddt(&dds_total, &ddh_total);
2232
2233	dump_dedup_ratio(&dds_total);
2234}
2235
2236static void
2237dump_zpool(spa_t *spa)
2238{
2239	dsl_pool_t *dp = spa_get_dsl(spa);
2240	int rc = 0;
2241
2242	if (dump_opt['S']) {
2243		dump_simulated_ddt(spa);
2244		return;
2245	}
2246
2247	if (!dump_opt['e'] && dump_opt['C'] > 1) {
2248		(void) printf("\nCached configuration:\n");
2249		dump_nvlist(spa->spa_config, 8);
2250	}
2251
2252	if (dump_opt['C'])
2253		dump_config(spa);
2254
2255	if (dump_opt['u'])
2256		dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
2257
2258	if (dump_opt['D'])
2259		dump_all_ddts(spa);
2260
2261	if (dump_opt['d'] > 2 || dump_opt['m'])
2262		dump_metaslabs(spa);
2263
2264	if (dump_opt['d'] || dump_opt['i']) {
2265		dump_dir(dp->dp_meta_objset);
2266		if (dump_opt['d'] >= 3) {
2267			dump_bplist(dp->dp_meta_objset,
2268			    spa->spa_deferred_bplist_obj, "Deferred frees");
2269			dump_dtl(spa->spa_root_vdev, 0);
2270		}
2271		(void) dmu_objset_find(spa_name(spa), dump_one_dir,
2272		    NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
2273	}
2274	if (dump_opt['b'] || dump_opt['c'])
2275		rc = dump_block_stats(spa);
2276
2277	if (dump_opt['s'])
2278		show_pool_stats(spa);
2279
2280	if (dump_opt['h'])
2281		dump_history(spa);
2282
2283	if (rc != 0)
2284		exit(rc);
2285}
2286
2287#define	ZDB_FLAG_CHECKSUM	0x0001
2288#define	ZDB_FLAG_DECOMPRESS	0x0002
2289#define	ZDB_FLAG_BSWAP		0x0004
2290#define	ZDB_FLAG_GBH		0x0008
2291#define	ZDB_FLAG_INDIRECT	0x0010
2292#define	ZDB_FLAG_PHYS		0x0020
2293#define	ZDB_FLAG_RAW		0x0040
2294#define	ZDB_FLAG_PRINT_BLKPTR	0x0080
2295
2296int flagbits[256];
2297
2298static void
2299zdb_print_blkptr(blkptr_t *bp, int flags)
2300{
2301	char blkbuf[BP_SPRINTF_LEN];
2302
2303	if (flags & ZDB_FLAG_BSWAP)
2304		byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
2305
2306	sprintf_blkptr(blkbuf, bp);
2307	(void) printf("%s\n", blkbuf);
2308}
2309
2310static void
2311zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
2312{
2313	int i;
2314
2315	for (i = 0; i < nbps; i++)
2316		zdb_print_blkptr(&bp[i], flags);
2317}
2318
2319static void
2320zdb_dump_gbh(void *buf, int flags)
2321{
2322	zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
2323}
2324
2325static void
2326zdb_dump_block_raw(void *buf, uint64_t size, int flags)
2327{
2328	if (flags & ZDB_FLAG_BSWAP)
2329		byteswap_uint64_array(buf, size);
2330	(void) write(1, buf, size);
2331}
2332
2333static void
2334zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
2335{
2336	uint64_t *d = (uint64_t *)buf;
2337	int nwords = size / sizeof (uint64_t);
2338	int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
2339	int i, j;
2340	char *hdr, *c;
2341
2342
2343	if (do_bswap)
2344		hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
2345	else
2346		hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
2347
2348	(void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
2349
2350	for (i = 0; i < nwords; i += 2) {
2351		(void) printf("%06llx:  %016llx  %016llx  ",
2352		    (u_longlong_t)(i * sizeof (uint64_t)),
2353		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
2354		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
2355
2356		c = (char *)&d[i];
2357		for (j = 0; j < 2 * sizeof (uint64_t); j++)
2358			(void) printf("%c", isprint(c[j]) ? c[j] : '.');
2359		(void) printf("\n");
2360	}
2361}
2362
2363/*
2364 * There are two acceptable formats:
2365 *	leaf_name	  - For example: c1t0d0 or /tmp/ztest.0a
2366 *	child[.child]*    - For example: 0.1.1
2367 *
2368 * The second form can be used to specify arbitrary vdevs anywhere
2369 * in the heirarchy.  For example, in a pool with a mirror of
2370 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
2371 */
2372static vdev_t *
2373zdb_vdev_lookup(vdev_t *vdev, char *path)
2374{
2375	char *s, *p, *q;
2376	int i;
2377
2378	if (vdev == NULL)
2379		return (NULL);
2380
2381	/* First, assume the x.x.x.x format */
2382	i = (int)strtoul(path, &s, 10);
2383	if (s == path || (s && *s != '.' && *s != '\0'))
2384		goto name;
2385	if (i < 0 || i >= vdev->vdev_children)
2386		return (NULL);
2387
2388	vdev = vdev->vdev_child[i];
2389	if (*s == '\0')
2390		return (vdev);
2391	return (zdb_vdev_lookup(vdev, s+1));
2392
2393name:
2394	for (i = 0; i < vdev->vdev_children; i++) {
2395		vdev_t *vc = vdev->vdev_child[i];
2396
2397		if (vc->vdev_path == NULL) {
2398			vc = zdb_vdev_lookup(vc, path);
2399			if (vc == NULL)
2400				continue;
2401			else
2402				return (vc);
2403		}
2404
2405		p = strrchr(vc->vdev_path, '/');
2406		p = p ? p + 1 : vc->vdev_path;
2407		q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
2408
2409		if (strcmp(vc->vdev_path, path) == 0)
2410			return (vc);
2411		if (strcmp(p, path) == 0)
2412			return (vc);
2413		if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
2414			return (vc);
2415	}
2416
2417	return (NULL);
2418}
2419
2420/*
2421 * Read a block from a pool and print it out.  The syntax of the
2422 * block descriptor is:
2423 *
2424 *	pool:vdev_specifier:offset:size[:flags]
2425 *
2426 *	pool           - The name of the pool you wish to read from
2427 *	vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
2428 *	offset         - offset, in hex, in bytes
2429 *	size           - Amount of data to read, in hex, in bytes
2430 *	flags          - A string of characters specifying options
2431 *		 b: Decode a blkptr at given offset within block
2432 *		*c: Calculate and display checksums
2433 *		 d: Decompress data before dumping
2434 *		 e: Byteswap data before dumping
2435 *		 g: Display data as a gang block header
2436 *		 i: Display as an indirect block
2437 *		 p: Do I/O to physical offset
2438 *		 r: Dump raw data to stdout
2439 *
2440 *              * = not yet implemented
2441 */
2442static void
2443zdb_read_block(char *thing, spa_t *spa)
2444{
2445	blkptr_t blk, *bp = &blk;
2446	dva_t *dva = bp->blk_dva;
2447	int flags = 0;
2448	uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
2449	zio_t *zio;
2450	vdev_t *vd;
2451	void *pbuf, *lbuf, *buf;
2452	char *s, *p, *dup, *vdev, *flagstr;
2453	int i, error;
2454
2455	dup = strdup(thing);
2456	s = strtok(dup, ":");
2457	vdev = s ? s : "";
2458	s = strtok(NULL, ":");
2459	offset = strtoull(s ? s : "", NULL, 16);
2460	s = strtok(NULL, ":");
2461	size = strtoull(s ? s : "", NULL, 16);
2462	s = strtok(NULL, ":");
2463	flagstr = s ? s : "";
2464
2465	s = NULL;
2466	if (size == 0)
2467		s = "size must not be zero";
2468	if (!IS_P2ALIGNED(size, DEV_BSIZE))
2469		s = "size must be a multiple of sector size";
2470	if (!IS_P2ALIGNED(offset, DEV_BSIZE))
2471		s = "offset must be a multiple of sector size";
2472	if (s) {
2473		(void) printf("Invalid block specifier: %s  - %s\n", thing, s);
2474		free(dup);
2475		return;
2476	}
2477
2478	for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
2479		for (i = 0; flagstr[i]; i++) {
2480			int bit = flagbits[(uchar_t)flagstr[i]];
2481
2482			if (bit == 0) {
2483				(void) printf("***Invalid flag: %c\n",
2484				    flagstr[i]);
2485				continue;
2486			}
2487			flags |= bit;
2488
2489			/* If it's not something with an argument, keep going */
2490			if ((bit & (ZDB_FLAG_CHECKSUM |
2491			    ZDB_FLAG_PRINT_BLKPTR)) == 0)
2492				continue;
2493
2494			p = &flagstr[i + 1];
2495			if (bit == ZDB_FLAG_PRINT_BLKPTR)
2496				blkptr_offset = strtoull(p, &p, 16);
2497			if (*p != ':' && *p != '\0') {
2498				(void) printf("***Invalid flag arg: '%s'\n", s);
2499				free(dup);
2500				return;
2501			}
2502		}
2503	}
2504
2505	vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
2506	if (vd == NULL) {
2507		(void) printf("***Invalid vdev: %s\n", vdev);
2508		free(dup);
2509		return;
2510	} else {
2511		if (vd->vdev_path)
2512			(void) fprintf(stderr, "Found vdev: %s\n",
2513			    vd->vdev_path);
2514		else
2515			(void) fprintf(stderr, "Found vdev type: %s\n",
2516			    vd->vdev_ops->vdev_op_type);
2517	}
2518
2519	psize = size;
2520	lsize = size;
2521
2522	pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2523	lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2524
2525	BP_ZERO(bp);
2526
2527	DVA_SET_VDEV(&dva[0], vd->vdev_id);
2528	DVA_SET_OFFSET(&dva[0], offset);
2529	DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
2530	DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
2531
2532	BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
2533
2534	BP_SET_LSIZE(bp, lsize);
2535	BP_SET_PSIZE(bp, psize);
2536	BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
2537	BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
2538	BP_SET_TYPE(bp, DMU_OT_NONE);
2539	BP_SET_LEVEL(bp, 0);
2540	BP_SET_DEDUP(bp, 0);
2541	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
2542
2543	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
2544	zio = zio_root(spa, NULL, NULL, 0);
2545
2546	if (vd == vd->vdev_top) {
2547		/*
2548		 * Treat this as a normal block read.
2549		 */
2550		zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
2551		    ZIO_PRIORITY_SYNC_READ,
2552		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
2553	} else {
2554		/*
2555		 * Treat this as a vdev child I/O.
2556		 */
2557		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
2558		    ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
2559		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
2560		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
2561		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
2562	}
2563
2564	error = zio_wait(zio);
2565	spa_config_exit(spa, SCL_STATE, FTAG);
2566
2567	if (error) {
2568		(void) printf("Read of %s failed, error: %d\n", thing, error);
2569		goto out;
2570	}
2571
2572	if (flags & ZDB_FLAG_DECOMPRESS) {
2573		/*
2574		 * We don't know how the data was compressed, so just try
2575		 * every decompress function at every inflated blocksize.
2576		 */
2577		enum zio_compress c;
2578		void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2579		void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
2580
2581		bcopy(pbuf, pbuf2, psize);
2582
2583		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
2584		    SPA_MAXBLOCKSIZE - psize) == 0);
2585
2586		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
2587		    SPA_MAXBLOCKSIZE - psize) == 0);
2588
2589		for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
2590		    lsize -= SPA_MINBLOCKSIZE) {
2591			for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
2592				if (zio_decompress_data(c, pbuf, lbuf,
2593				    psize, lsize) == 0 &&
2594				    zio_decompress_data(c, pbuf2, lbuf2,
2595				    psize, lsize) == 0 &&
2596				    bcmp(lbuf, lbuf2, lsize) == 0)
2597					break;
2598			}
2599			if (c != ZIO_COMPRESS_FUNCTIONS)
2600				break;
2601			lsize -= SPA_MINBLOCKSIZE;
2602		}
2603
2604		umem_free(pbuf2, SPA_MAXBLOCKSIZE);
2605		umem_free(lbuf2, SPA_MAXBLOCKSIZE);
2606
2607		if (lsize <= psize) {
2608			(void) printf("Decompress of %s failed\n", thing);
2609			goto out;
2610		}
2611		buf = lbuf;
2612		size = lsize;
2613	} else {
2614		buf = pbuf;
2615		size = psize;
2616	}
2617
2618	if (flags & ZDB_FLAG_PRINT_BLKPTR)
2619		zdb_print_blkptr((blkptr_t *)(void *)
2620		    ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
2621	else if (flags & ZDB_FLAG_RAW)
2622		zdb_dump_block_raw(buf, size, flags);
2623	else if (flags & ZDB_FLAG_INDIRECT)
2624		zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
2625		    flags);
2626	else if (flags & ZDB_FLAG_GBH)
2627		zdb_dump_gbh(buf, flags);
2628	else
2629		zdb_dump_block(thing, buf, size, flags);
2630
2631out:
2632	umem_free(pbuf, SPA_MAXBLOCKSIZE);
2633	umem_free(lbuf, SPA_MAXBLOCKSIZE);
2634	free(dup);
2635}
2636
2637static boolean_t
2638pool_match(nvlist_t *cfg, char *tgt)
2639{
2640	uint64_t v, guid = strtoull(tgt, NULL, 0);
2641	char *s;
2642
2643	if (guid != 0) {
2644		if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
2645			return (v == guid);
2646	} else {
2647		if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
2648			return (strcmp(s, tgt) == 0);
2649	}
2650	return (B_FALSE);
2651}
2652
2653static char *
2654find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
2655{
2656	nvlist_t *pools;
2657	nvlist_t *match = NULL;
2658	char *name = NULL;
2659	char *sepp = NULL;
2660	char sep;
2661	int count = 0;
2662	importargs_t args = { 0 };
2663
2664	args.paths = dirc;
2665	args.path = dirv;
2666	args.can_be_active = B_TRUE;
2667
2668	if ((sepp = strpbrk(*target, "/@")) != NULL) {
2669		sep = *sepp;
2670		*sepp = '\0';
2671	}
2672
2673	pools = zpool_search_import(g_zfs, &args);
2674
2675	if (pools != NULL) {
2676		nvpair_t *elem = NULL;
2677		while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
2678			verify(nvpair_value_nvlist(elem, configp) == 0);
2679			if (pool_match(*configp, *target)) {
2680				count++;
2681				if (match != NULL) {
2682					/* print previously found config */
2683					if (name != NULL) {
2684						(void) printf("%s\n", name);
2685						dump_nvlist(match, 8);
2686						name = NULL;
2687					}
2688					(void) printf("%s\n",
2689					    nvpair_name(elem));
2690					dump_nvlist(*configp, 8);
2691				} else {
2692					match = *configp;
2693					name = nvpair_name(elem);
2694				}
2695			}
2696		}
2697	}
2698	if (count > 1)
2699		(void) fatal("\tMatched %d pools - use pool GUID "
2700		    "instead of pool name or \n"
2701		    "\tpool name part of a dataset name to select pool", count);
2702
2703	if (sepp)
2704		*sepp = sep;
2705	/*
2706	 * If pool GUID was specified for pool id, replace it with pool name
2707	 */
2708	if (name && (strstr(*target, name) != *target)) {
2709		int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
2710
2711		*target = umem_alloc(sz, UMEM_NOFAIL);
2712		(void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
2713	}
2714
2715	*configp = name ? match : NULL;
2716
2717	return (name);
2718}
2719
2720int
2721main(int argc, char **argv)
2722{
2723	int i, c;
2724	struct rlimit rl = { 1024, 1024 };
2725	spa_t *spa = NULL;
2726	objset_t *os = NULL;
2727	int dump_all = 1;
2728	int verbose = 0;
2729	int error = 0;
2730	char **searchdirs = NULL;
2731	int nsearch = 0;
2732	char *target;
2733	nvlist_t *policy = NULL;
2734	uint64_t max_txg = UINT64_MAX;
2735	int rewind = ZPOOL_NEVER_REWIND;
2736
2737	(void) setrlimit(RLIMIT_NOFILE, &rl);
2738	(void) enable_extended_FILE_stdio(-1, -1);
2739
2740	dprintf_setup(&argc, argv);
2741
2742	while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:")) != -1) {
2743		switch (c) {
2744		case 'b':
2745		case 'c':
2746		case 'd':
2747		case 'h':
2748		case 'i':
2749		case 'l':
2750		case 'm':
2751		case 's':
2752		case 'u':
2753		case 'C':
2754		case 'D':
2755		case 'R':
2756		case 'S':
2757			dump_opt[c]++;
2758			dump_all = 0;
2759			break;
2760		case 'A':
2761		case 'F':
2762		case 'L':
2763		case 'X':
2764		case 'e':
2765			dump_opt[c]++;
2766			break;
2767		case 'v':
2768			verbose++;
2769			break;
2770		case 'p':
2771			if (searchdirs == NULL) {
2772				searchdirs = umem_alloc(sizeof (char *),
2773				    UMEM_NOFAIL);
2774			} else {
2775				char **tmp = umem_alloc((nsearch + 1) *
2776				    sizeof (char *), UMEM_NOFAIL);
2777				bcopy(searchdirs, tmp, nsearch *
2778				    sizeof (char *));
2779				umem_free(searchdirs,
2780				    nsearch * sizeof (char *));
2781				searchdirs = tmp;
2782			}
2783			searchdirs[nsearch++] = optarg;
2784			break;
2785		case 't':
2786			max_txg = strtoull(optarg, NULL, 0);
2787			if (max_txg < TXG_INITIAL) {
2788				(void) fprintf(stderr, "incorrect txg "
2789				    "specified: %s\n", optarg);
2790				usage();
2791			}
2792			break;
2793		case 'U':
2794			spa_config_path = optarg;
2795			break;
2796		default:
2797			usage();
2798			break;
2799		}
2800	}
2801
2802	if (!dump_opt['e'] && searchdirs != NULL) {
2803		(void) fprintf(stderr, "-p option requires use of -e\n");
2804		usage();
2805	}
2806
2807	kernel_init(FREAD);
2808	g_zfs = libzfs_init();
2809	ASSERT(g_zfs != NULL);
2810
2811	if (dump_all)
2812		verbose = MAX(verbose, 1);
2813
2814	for (c = 0; c < 256; c++) {
2815		if (dump_all && !strchr("elAFLRSX", c))
2816			dump_opt[c] = 1;
2817		if (dump_opt[c])
2818			dump_opt[c] += verbose;
2819	}
2820
2821	aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
2822	zfs_recover = (dump_opt['A'] > 1);
2823
2824	argc -= optind;
2825	argv += optind;
2826
2827	if (argc < 2 && dump_opt['R'])
2828		usage();
2829	if (argc < 1) {
2830		if (!dump_opt['e'] && dump_opt['C']) {
2831			dump_cachefile(spa_config_path);
2832			return (0);
2833		}
2834		usage();
2835	}
2836
2837	if (dump_opt['l']) {
2838		dump_label(argv[0]);
2839		return (0);
2840	}
2841
2842	if (dump_opt['X'] || dump_opt['F'])
2843		rewind = ZPOOL_DO_REWIND |
2844		    (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
2845
2846	if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
2847	    nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
2848	    nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0)
2849		fatal("internal error: %s", strerror(ENOMEM));
2850
2851	error = 0;
2852	target = argv[0];
2853
2854	if (dump_opt['e']) {
2855		nvlist_t *cfg = NULL;
2856		char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
2857
2858		error = ENOENT;
2859		if (name) {
2860			if (dump_opt['C'] > 1) {
2861				(void) printf("\nConfiguration for import:\n");
2862				dump_nvlist(cfg, 8);
2863			}
2864			if (nvlist_add_nvlist(cfg,
2865			    ZPOOL_REWIND_POLICY, policy) != 0) {
2866				fatal("can't open '%s': %s",
2867				    target, strerror(ENOMEM));
2868			}
2869			if ((error = spa_import(name, cfg, NULL)) != 0)
2870				error = spa_import_verbatim(name, cfg, NULL);
2871		}
2872	}
2873
2874	if (error == 0) {
2875		if (strpbrk(target, "/@") == NULL || dump_opt['R']) {
2876			error = spa_open_rewind(target, &spa, FTAG, policy,
2877			    NULL);
2878			if (error) {
2879				/*
2880				 * If we're missing the log device then
2881				 * try opening the pool after clearing the
2882				 * log state.
2883				 */
2884				mutex_enter(&spa_namespace_lock);
2885				if ((spa = spa_lookup(target)) != NULL &&
2886				    spa->spa_log_state == SPA_LOG_MISSING) {
2887					spa->spa_log_state = SPA_LOG_CLEAR;
2888					error = 0;
2889				}
2890				mutex_exit(&spa_namespace_lock);
2891
2892				if (!error) {
2893					error = spa_open_rewind(target, &spa,
2894					    FTAG, policy, NULL);
2895				}
2896			}
2897		} else {
2898			error = dmu_objset_own(target, DMU_OST_ANY,
2899			    B_TRUE, FTAG, &os);
2900		}
2901	}
2902	nvlist_free(policy);
2903
2904	if (error)
2905		fatal("can't open '%s': %s", target, strerror(error));
2906
2907	argv++;
2908	argc--;
2909	if (!dump_opt['R']) {
2910		if (argc > 0) {
2911			zopt_objects = argc;
2912			zopt_object = calloc(zopt_objects, sizeof (uint64_t));
2913			for (i = 0; i < zopt_objects; i++) {
2914				errno = 0;
2915				zopt_object[i] = strtoull(argv[i], NULL, 0);
2916				if (zopt_object[i] == 0 && errno != 0)
2917					fatal("bad number %s: %s",
2918					    argv[i], strerror(errno));
2919			}
2920		}
2921		(os != NULL) ? dump_dir(os) : dump_zpool(spa);
2922	} else {
2923		flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
2924		flagbits['c'] = ZDB_FLAG_CHECKSUM;
2925		flagbits['d'] = ZDB_FLAG_DECOMPRESS;
2926		flagbits['e'] = ZDB_FLAG_BSWAP;
2927		flagbits['g'] = ZDB_FLAG_GBH;
2928		flagbits['i'] = ZDB_FLAG_INDIRECT;
2929		flagbits['p'] = ZDB_FLAG_PHYS;
2930		flagbits['r'] = ZDB_FLAG_RAW;
2931
2932		for (i = 0; i < argc; i++)
2933			zdb_read_block(argv[i], spa);
2934	}
2935
2936	(os != NULL) ? dmu_objset_disown(os, FTAG) : spa_close(spa, FTAG);
2937
2938	fuid_table_destroy();
2939
2940	libzfs_fini(g_zfs);
2941	kernel_fini();
2942
2943	return (0);
2944}
2945