1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
25 * Copyright 2020 Joyent, Inc.
26 */
27
28/* Portions Copyright 2010 Robert Milkowski */
29
30/*
31 * ZFS_MDB lets dmu.h know that we don't have dmu_ot, and we will define our
32 * own macros to access the target's dmu_ot.  Therefore it must be defined
33 * before including any ZFS headers.  Note that we don't define
34 * DMU_OT_IS_ENCRYPTED_IMPL() or DMU_OT_BYTESWAP_IMPL(), therefore using them
35 * will result in a compilation error.  If they are needed in the future, we
36 * can implement them similarly to mdb_dmu_ot_is_encrypted_impl().
37 */
38#define	ZFS_MDB
39#define	DMU_OT_IS_ENCRYPTED_IMPL(ot) mdb_dmu_ot_is_encrypted_impl(ot)
40
41#include <mdb/mdb_ctf.h>
42#include <sys/zfs_context.h>
43#include <sys/mdb_modapi.h>
44#include <sys/dbuf.h>
45#include <sys/dmu_objset.h>
46#include <sys/dsl_dir.h>
47#include <sys/dsl_pool.h>
48#include <sys/metaslab_impl.h>
49#include <sys/space_map.h>
50#include <sys/list.h>
51#include <sys/vdev_impl.h>
52#include <sys/zap_leaf.h>
53#include <sys/zap_impl.h>
54#include <ctype.h>
55#include <sys/zfs_acl.h>
56#include <sys/sa_impl.h>
57#include <sys/multilist.h>
58#include <sys/btree.h>
59
60#ifdef _KERNEL
61#define	ZFS_OBJ_NAME	"zfs"
62extern int64_t mdb_gethrtime(void);
63#else
64#define	ZFS_OBJ_NAME	"libzpool.so.1"
65#endif
66
67#define	ZFS_STRUCT	"struct " ZFS_OBJ_NAME "`"
68
69#ifndef _KERNEL
70int aok;
71#endif
72
73enum spa_flags {
74	SPA_FLAG_CONFIG			= 1 << 0,
75	SPA_FLAG_VDEVS			= 1 << 1,
76	SPA_FLAG_ERRORS			= 1 << 2,
77	SPA_FLAG_METASLAB_GROUPS	= 1 << 3,
78	SPA_FLAG_METASLABS		= 1 << 4,
79	SPA_FLAG_HISTOGRAMS		= 1 << 5
80};
81
82/*
83 * If any of these flags are set, call spa_vdevs in spa_print
84 */
85#define	SPA_FLAG_ALL_VDEV	\
86	(SPA_FLAG_VDEVS | SPA_FLAG_ERRORS | SPA_FLAG_METASLAB_GROUPS | \
87	SPA_FLAG_METASLABS)
88
89static int
90getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp,
91    const char *member, int len, void *buf)
92{
93	mdb_ctf_id_t id;
94	ulong_t off;
95	char name[64];
96
97	if (idp == NULL) {
98		if (mdb_ctf_lookup_by_name(type, &id) == -1) {
99			mdb_warn("couldn't find type %s", type);
100			return (DCMD_ERR);
101		}
102		idp = &id;
103	} else {
104		type = name;
105		mdb_ctf_type_name(*idp, name, sizeof (name));
106	}
107
108	if (mdb_ctf_offsetof(*idp, member, &off) == -1) {
109		mdb_warn("couldn't find member %s of type %s\n", member, type);
110		return (DCMD_ERR);
111	}
112	if (off % 8 != 0) {
113		mdb_warn("member %s of type %s is unsupported bitfield",
114		    member, type);
115		return (DCMD_ERR);
116	}
117	off /= 8;
118
119	if (mdb_vread(buf, len, addr + off) == -1) {
120		mdb_warn("failed to read %s from %s at %p",
121		    member, type, addr + off);
122		return (DCMD_ERR);
123	}
124	/* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */
125
126	return (0);
127}
128
129#define	GETMEMB(addr, structname, member, dest) \
130	getmember(addr, ZFS_STRUCT structname, NULL, #member, \
131	sizeof (dest), &(dest))
132
133#define	GETMEMBID(addr, ctfid, member, dest) \
134	getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest))
135
136static boolean_t
137strisprint(const char *cp)
138{
139	for (; *cp; cp++) {
140		if (!isprint(*cp))
141			return (B_FALSE);
142	}
143	return (B_TRUE);
144}
145
146/*
147 * <addr>::sm_entries <buffer length in bytes>
148 *
149 * Treat the buffer specified by the given address as a buffer that contains
150 * space map entries. Iterate over the specified number of entries and print
151 * them in both encoded and decoded form.
152 */
153/* ARGSUSED */
154static int
155sm_entries(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
156{
157	uint64_t bufsz = 0;
158	boolean_t preview = B_FALSE;
159
160	if (!(flags & DCMD_ADDRSPEC))
161		return (DCMD_USAGE);
162
163	if (argc < 1) {
164		preview = B_TRUE;
165		bufsz = 2;
166	} else if (argc != 1) {
167		return (DCMD_USAGE);
168	} else {
169		switch (argv[0].a_type) {
170		case MDB_TYPE_STRING:
171			bufsz = mdb_strtoull(argv[0].a_un.a_str);
172			break;
173		case MDB_TYPE_IMMEDIATE:
174			bufsz = argv[0].a_un.a_val;
175			break;
176		default:
177			return (DCMD_USAGE);
178		}
179	}
180
181	char *actions[] = { "ALLOC", "FREE", "INVALID" };
182	for (uintptr_t bufend = addr + bufsz; addr < bufend;
183	    addr += sizeof (uint64_t)) {
184		uint64_t nwords;
185		uint64_t start_addr = addr;
186
187		uint64_t word = 0;
188		if (mdb_vread(&word, sizeof (word), addr) == -1) {
189			mdb_warn("failed to read space map entry %p", addr);
190			return (DCMD_ERR);
191		}
192
193		if (SM_PREFIX_DECODE(word) == SM_DEBUG_PREFIX) {
194			(void) mdb_printf("\t    [%6llu] %s: txg %llu, "
195			    "pass %llu\n",
196			    (u_longlong_t)(addr),
197			    actions[SM_DEBUG_ACTION_DECODE(word)],
198			    (u_longlong_t)SM_DEBUG_TXG_DECODE(word),
199			    (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word));
200			continue;
201		}
202
203		char entry_type;
204		uint64_t raw_offset, raw_run, vdev_id = SM_NO_VDEVID;
205
206		if (SM_PREFIX_DECODE(word) != SM2_PREFIX) {
207			entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ?
208			    'A' : 'F';
209			raw_offset = SM_OFFSET_DECODE(word);
210			raw_run = SM_RUN_DECODE(word);
211			nwords = 1;
212		} else {
213			ASSERT3U(SM_PREFIX_DECODE(word), ==, SM2_PREFIX);
214
215			raw_run = SM2_RUN_DECODE(word);
216			vdev_id = SM2_VDEV_DECODE(word);
217
218			/* it is a two-word entry so we read another word */
219			addr += sizeof (uint64_t);
220			if (addr >= bufend) {
221				mdb_warn("buffer ends in the middle of a two "
222				    "word entry\n", addr);
223				return (DCMD_ERR);
224			}
225
226			if (mdb_vread(&word, sizeof (word), addr) == -1) {
227				mdb_warn("failed to read space map entry %p",
228				    addr);
229				return (DCMD_ERR);
230			}
231
232			entry_type = (SM2_TYPE_DECODE(word) == SM_ALLOC) ?
233			    'A' : 'F';
234			raw_offset = SM2_OFFSET_DECODE(word);
235			nwords = 2;
236		}
237
238		(void) mdb_printf("\t    [%6llx]    %c  range:"
239		    " %010llx-%010llx  size: %06llx vdev: %06llu words: %llu\n",
240		    (u_longlong_t)start_addr,
241		    entry_type, (u_longlong_t)raw_offset,
242		    (u_longlong_t)(raw_offset + raw_run),
243		    (u_longlong_t)raw_run,
244		    (u_longlong_t)vdev_id, (u_longlong_t)nwords);
245
246		if (preview)
247			break;
248	}
249	return (DCMD_OK);
250}
251
252static int
253mdb_dsl_dir_name(uintptr_t addr, char *buf)
254{
255	static int gotid;
256	static mdb_ctf_id_t dd_id;
257	uintptr_t dd_parent;
258	char dd_myname[ZFS_MAX_DATASET_NAME_LEN];
259
260	if (!gotid) {
261		if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dir",
262		    &dd_id) == -1) {
263			mdb_warn("couldn't find struct dsl_dir");
264			return (DCMD_ERR);
265		}
266		gotid = TRUE;
267	}
268	if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) ||
269	    GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) {
270		return (DCMD_ERR);
271	}
272
273	if (dd_parent) {
274		if (mdb_dsl_dir_name(dd_parent, buf))
275			return (DCMD_ERR);
276		strcat(buf, "/");
277	}
278
279	if (dd_myname[0])
280		strcat(buf, dd_myname);
281	else
282		strcat(buf, "???");
283
284	return (0);
285}
286
287static int
288objset_name(uintptr_t addr, char *buf)
289{
290	static int gotid;
291	static mdb_ctf_id_t os_id, ds_id;
292	uintptr_t os_dsl_dataset;
293	char ds_snapname[ZFS_MAX_DATASET_NAME_LEN];
294	uintptr_t ds_dir;
295
296	buf[0] = '\0';
297
298	if (!gotid) {
299		if (mdb_ctf_lookup_by_name(ZFS_STRUCT "objset",
300		    &os_id) == -1) {
301			mdb_warn("couldn't find struct objset");
302			return (DCMD_ERR);
303		}
304		if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dataset",
305		    &ds_id) == -1) {
306			mdb_warn("couldn't find struct dsl_dataset");
307			return (DCMD_ERR);
308		}
309
310		gotid = TRUE;
311	}
312
313	if (GETMEMBID(addr, &os_id, os_dsl_dataset, os_dsl_dataset))
314		return (DCMD_ERR);
315
316	if (os_dsl_dataset == 0) {
317		strcat(buf, "mos");
318		return (0);
319	}
320
321	if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) ||
322	    GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) {
323		return (DCMD_ERR);
324	}
325
326	if (ds_dir && mdb_dsl_dir_name(ds_dir, buf))
327		return (DCMD_ERR);
328
329	if (ds_snapname[0]) {
330		strcat(buf, "@");
331		strcat(buf, ds_snapname);
332	}
333	return (0);
334}
335
336static int
337enum_lookup(char *type, int val, const char *prefix, size_t size, char *out)
338{
339	const char *cp;
340	size_t len = strlen(prefix);
341	mdb_ctf_id_t enum_type;
342
343	if (mdb_ctf_lookup_by_name(type, &enum_type) != 0) {
344		mdb_warn("Could not find enum for %s", type);
345		return (-1);
346	}
347
348	if ((cp = mdb_ctf_enum_name(enum_type, val)) != NULL) {
349		if (strncmp(cp, prefix, len) == 0)
350			cp += len;
351		(void) strncpy(out, cp, size);
352	} else {
353		mdb_snprintf(out, size, "? (%d)", val);
354	}
355	return (0);
356}
357
358/* ARGSUSED */
359static int
360zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
361{
362	/*
363	 * This table can be approximately generated by running:
364	 * egrep "^[a-z0-9_]+ [a-z0-9_]+( =.*)?;" *.c | cut -d ' ' -f 2
365	 */
366	static const char *params[] = {
367		"arc_lotsfree_percent",
368		"arc_pages_pp_reserve",
369		"arc_reduce_dnlc_percent",
370		"arc_swapfs_reserve",
371		"arc_zio_arena_free_shift",
372		"dbuf_cache_hiwater_pct",
373		"dbuf_cache_lowater_pct",
374		"dbuf_cache_max_bytes",
375		"dbuf_cache_max_shift",
376		"ddt_zap_indirect_blockshift",
377		"ddt_zap_leaf_blockshift",
378		"ditto_same_vdev_distance_shift",
379		"dmu_find_threads",
380		"dmu_rescan_dnode_threshold",
381		"dsl_scan_delay_completion",
382		"fzap_default_block_shift",
383		"l2arc_feed_again",
384		"l2arc_feed_min_ms",
385		"l2arc_feed_secs",
386		"l2arc_headroom",
387		"l2arc_headroom_boost",
388		"l2arc_noprefetch",
389		"l2arc_norw",
390		"l2arc_write_boost",
391		"l2arc_write_max",
392		"metaslab_aliquot",
393		"metaslab_bias_enabled",
394		"metaslab_debug_load",
395		"metaslab_debug_unload",
396		"metaslab_df_alloc_threshold",
397		"metaslab_df_free_pct",
398		"metaslab_fragmentation_factor_enabled",
399		"metaslab_force_ganging",
400		"metaslab_lba_weighting_enabled",
401		"metaslab_load_pct",
402		"metaslab_min_alloc_size",
403		"metaslab_ndf_clump_shift",
404		"metaslab_preload_enabled",
405		"metaslab_preload_limit",
406		"metaslab_trace_enabled",
407		"metaslab_trace_max_entries",
408		"metaslab_unload_delay",
409		"metaslabs_per_vdev",
410		"reference_history",
411		"reference_tracking_enable",
412		"send_holes_without_birth_time",
413		"spa_asize_inflation",
414		"spa_load_verify_data",
415		"spa_load_verify_maxinflight",
416		"spa_load_verify_metadata",
417		"spa_max_replication_override",
418		"spa_min_slop",
419		"spa_mode_global",
420		"spa_slop_shift",
421		"space_map_blksz",
422		"vdev_mirror_shift",
423		"zfetch_max_distance",
424		"zfs_abd_chunk_size",
425		"zfs_abd_scatter_enabled",
426		"zfs_arc_average_blocksize",
427		"zfs_arc_evict_batch_limit",
428		"zfs_arc_grow_retry",
429		"zfs_arc_max",
430		"zfs_arc_meta_limit",
431		"zfs_arc_meta_min",
432		"zfs_arc_min",
433		"zfs_arc_p_min_shift",
434		"zfs_arc_shrink_shift",
435		"zfs_async_block_max_blocks",
436		"zfs_ccw_retry_interval",
437		"zfs_commit_timeout_pct",
438		"zfs_compressed_arc_enabled",
439		"zfs_condense_indirect_commit_entry_delay_ticks",
440		"zfs_condense_indirect_vdevs_enable",
441		"zfs_condense_max_obsolete_bytes",
442		"zfs_condense_min_mapping_bytes",
443		"zfs_condense_pct",
444		"zfs_dbgmsg_maxsize",
445		"zfs_deadman_checktime_ms",
446		"zfs_deadman_enabled",
447		"zfs_deadman_synctime_ms",
448		"zfs_dedup_prefetch",
449		"zfs_default_bs",
450		"zfs_default_ibs",
451		"zfs_delay_max_ns",
452		"zfs_delay_min_dirty_percent",
453		"zfs_delay_resolution_ns",
454		"zfs_delay_scale",
455		"zfs_dirty_data_max",
456		"zfs_dirty_data_max_max",
457		"zfs_dirty_data_max_percent",
458		"zfs_dirty_data_sync",
459		"zfs_flags",
460		"zfs_free_bpobj_enabled",
461		"zfs_free_leak_on_eio",
462		"zfs_free_min_time_ms",
463		"zfs_fsync_sync_cnt",
464		"zfs_immediate_write_sz",
465		"zfs_indirect_condense_obsolete_pct",
466		"zfs_lua_check_instrlimit_interval",
467		"zfs_lua_max_instrlimit",
468		"zfs_lua_max_memlimit",
469		"zfs_max_recordsize",
470		"zfs_mdcomp_disable",
471		"zfs_metaslab_condense_block_threshold",
472		"zfs_metaslab_fragmentation_threshold",
473		"zfs_metaslab_segment_weight_enabled",
474		"zfs_metaslab_switch_threshold",
475		"zfs_mg_fragmentation_threshold",
476		"zfs_mg_noalloc_threshold",
477		"zfs_multilist_num_sublists",
478		"zfs_no_scrub_io",
479		"zfs_no_scrub_prefetch",
480		"zfs_nocacheflush",
481		"zfs_nopwrite_enabled",
482		"zfs_object_remap_one_indirect_delay_ticks",
483		"zfs_obsolete_min_time_ms",
484		"zfs_pd_bytes_max",
485		"zfs_per_txg_dirty_frees_percent",
486		"zfs_prefetch_disable",
487		"zfs_read_chunk_size",
488		"zfs_recover",
489		"zfs_recv_queue_length",
490		"zfs_redundant_metadata_most_ditto_level",
491		"zfs_remap_blkptr_enable",
492		"zfs_remove_max_copy_bytes",
493		"zfs_remove_max_segment",
494		"zfs_resilver_delay",
495		"zfs_resilver_min_time_ms",
496		"zfs_scan_idle",
497		"zfs_scan_min_time_ms",
498		"zfs_scrub_delay",
499		"zfs_scrub_limit",
500		"zfs_send_corrupt_data",
501		"zfs_send_queue_length",
502		"zfs_send_set_freerecords_bit",
503		"zfs_sync_pass_deferred_free",
504		"zfs_sync_pass_dont_compress",
505		"zfs_sync_pass_rewrite",
506		"zfs_sync_taskq_batch_pct",
507		"zfs_top_maxinflight",
508		"zfs_txg_timeout",
509		"zfs_vdev_aggregation_limit",
510		"zfs_vdev_async_read_max_active",
511		"zfs_vdev_async_read_min_active",
512		"zfs_vdev_async_write_active_max_dirty_percent",
513		"zfs_vdev_async_write_active_min_dirty_percent",
514		"zfs_vdev_async_write_max_active",
515		"zfs_vdev_async_write_min_active",
516		"zfs_vdev_cache_bshift",
517		"zfs_vdev_cache_max",
518		"zfs_vdev_cache_size",
519		"zfs_vdev_max_active",
520		"zfs_vdev_queue_depth_pct",
521		"zfs_vdev_read_gap_limit",
522		"zfs_vdev_removal_max_active",
523		"zfs_vdev_removal_min_active",
524		"zfs_vdev_scrub_max_active",
525		"zfs_vdev_scrub_min_active",
526		"zfs_vdev_sync_read_max_active",
527		"zfs_vdev_sync_read_min_active",
528		"zfs_vdev_sync_write_max_active",
529		"zfs_vdev_sync_write_min_active",
530		"zfs_vdev_write_gap_limit",
531		"zfs_write_implies_delete_child",
532		"zfs_zil_clean_taskq_maxalloc",
533		"zfs_zil_clean_taskq_minalloc",
534		"zfs_zil_clean_taskq_nthr_pct",
535		"zil_replay_disable",
536		"zil_slog_bulk",
537		"zio_buf_debug_limit",
538		"zio_dva_throttle_enabled",
539		"zio_injection_enabled",
540		"zvol_immediate_write_sz",
541		"zvol_maxphys",
542		"zvol_unmap_enabled",
543		"zvol_unmap_sync_enabled",
544		"zfs_max_dataset_nesting",
545	};
546
547	for (int i = 0; i < sizeof (params) / sizeof (params[0]); i++) {
548		int sz;
549		uint64_t val64;
550		uint32_t *val32p = (uint32_t *)&val64;
551
552		sz = mdb_readvar(&val64, params[i]);
553		if (sz == 4) {
554			mdb_printf("%s = 0x%x\n", params[i], *val32p);
555		} else if (sz == 8) {
556			mdb_printf("%s = 0x%llx\n", params[i], val64);
557		} else {
558			mdb_warn("variable %s not found", params[i]);
559		}
560	}
561
562	return (DCMD_OK);
563}
564
565/* ARGSUSED */
566static int
567dva(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
568{
569	dva_t dva;
570	if (mdb_vread(&dva, sizeof (dva_t), addr) == -1) {
571		mdb_warn("failed to read dva_t");
572		return (DCMD_ERR);
573	}
574	mdb_printf("<%llu:%llx:%llx>\n",
575	    (u_longlong_t)DVA_GET_VDEV(&dva),
576	    (u_longlong_t)DVA_GET_OFFSET(&dva),
577	    (u_longlong_t)DVA_GET_ASIZE(&dva));
578
579	return (DCMD_OK);
580}
581
582typedef struct mdb_dmu_object_type_info {
583	boolean_t ot_encrypt;
584} mdb_dmu_object_type_info_t;
585
586static boolean_t
587mdb_dmu_ot_is_encrypted_impl(dmu_object_type_t ot)
588{
589	mdb_dmu_object_type_info_t mdoti;
590	GElf_Sym sym;
591	size_t sz = mdb_ctf_sizeof_by_name("dmu_object_type_info_t");
592
593	if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "dmu_ot", &sym)) {
594		mdb_warn("failed to find " ZFS_OBJ_NAME "`dmu_ot");
595		return (B_FALSE);
596	}
597
598	if (mdb_ctf_vread(&mdoti, "dmu_object_type_info_t",
599	    "mdb_dmu_object_type_info_t", sym.st_value + sz * ot, 0) != 0) {
600		return (B_FALSE);
601	}
602
603	return (mdoti.ot_encrypt);
604}
605
606/* ARGSUSED */
607static int
608blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
609{
610	char type[80], checksum[80], compress[80];
611	blkptr_t blk, *bp = &blk;
612	char buf[BP_SPRINTF_LEN];
613
614	if (mdb_vread(&blk, sizeof (blkptr_t), addr) == -1) {
615		mdb_warn("failed to read blkptr_t");
616		return (DCMD_ERR);
617	}
618
619	if (enum_lookup("enum dmu_object_type", BP_GET_TYPE(bp), "DMU_OT_",
620	    sizeof (type), type) == -1 ||
621	    enum_lookup("enum zio_checksum", BP_GET_CHECKSUM(bp),
622	    "ZIO_CHECKSUM_", sizeof (checksum), checksum) == -1 ||
623	    enum_lookup("enum zio_compress", BP_GET_COMPRESS(bp),
624	    "ZIO_COMPRESS_", sizeof (compress), compress) == -1) {
625		mdb_warn("Could not find blkptr enumerated types");
626		return (DCMD_ERR);
627	}
628
629	SNPRINTF_BLKPTR(mdb_snprintf, '\n', buf, sizeof (buf), bp, type,
630	    checksum, compress);
631
632	mdb_printf("%s\n", buf);
633
634	return (DCMD_OK);
635}
636
637typedef struct mdb_dmu_buf_impl {
638	struct {
639		uint64_t db_object;
640		uintptr_t db_data;
641	} db;
642	uintptr_t db_objset;
643	uint64_t db_level;
644	uint64_t db_blkid;
645	struct {
646		uint64_t rc_count;
647	} db_holds;
648} mdb_dmu_buf_impl_t;
649
650/* ARGSUSED */
651static int
652dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
653{
654	mdb_dmu_buf_impl_t db;
655	char objectname[32];
656	char blkidname[32];
657	char path[ZFS_MAX_DATASET_NAME_LEN];
658	int ptr_width = (int)(sizeof (void *)) * 2;
659
660	if (DCMD_HDRSPEC(flags))
661		mdb_printf("%*s %8s %3s %9s %5s %s\n",
662		    ptr_width, "addr", "object", "lvl", "blkid", "holds", "os");
663
664	if (mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t",
665	    addr, 0) == -1)
666		return (DCMD_ERR);
667
668	if (db.db.db_object == DMU_META_DNODE_OBJECT)
669		(void) strcpy(objectname, "mdn");
670	else
671		(void) mdb_snprintf(objectname, sizeof (objectname), "%llx",
672		    (u_longlong_t)db.db.db_object);
673
674	if (db.db_blkid == DMU_BONUS_BLKID)
675		(void) strcpy(blkidname, "bonus");
676	else
677		(void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx",
678		    (u_longlong_t)db.db_blkid);
679
680	if (objset_name(db.db_objset, path)) {
681		return (DCMD_ERR);
682	}
683
684	mdb_printf("%*p %8s %3u %9s %5llu %s\n", ptr_width, addr,
685	    objectname, (int)db.db_level, blkidname,
686	    db.db_holds.rc_count, path);
687
688	return (DCMD_OK);
689}
690
691/* ARGSUSED */
692static int
693dbuf_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
694{
695#define	HISTOSZ 32
696	uintptr_t dbp;
697	dmu_buf_impl_t db;
698	dbuf_hash_table_t ht;
699	uint64_t bucket, ndbufs;
700	uint64_t histo[HISTOSZ];
701	uint64_t histo2[HISTOSZ];
702	int i, maxidx;
703
704	if (mdb_readvar(&ht, "dbuf_hash_table") == -1) {
705		mdb_warn("failed to read 'dbuf_hash_table'");
706		return (DCMD_ERR);
707	}
708
709	for (i = 0; i < HISTOSZ; i++) {
710		histo[i] = 0;
711		histo2[i] = 0;
712	}
713
714	ndbufs = 0;
715	for (bucket = 0; bucket < ht.hash_table_mask+1; bucket++) {
716		int len;
717
718		if (mdb_vread(&dbp, sizeof (void *),
719		    (uintptr_t)(ht.hash_table+bucket)) == -1) {
720			mdb_warn("failed to read hash bucket %u at %p",
721			    bucket, ht.hash_table+bucket);
722			return (DCMD_ERR);
723		}
724
725		len = 0;
726		while (dbp != 0) {
727			if (mdb_vread(&db, sizeof (dmu_buf_impl_t),
728			    dbp) == -1) {
729				mdb_warn("failed to read dbuf at %p", dbp);
730				return (DCMD_ERR);
731			}
732			dbp = (uintptr_t)db.db_hash_next;
733			for (i = MIN(len, HISTOSZ - 1); i >= 0; i--)
734				histo2[i]++;
735			len++;
736			ndbufs++;
737		}
738
739		if (len >= HISTOSZ)
740			len = HISTOSZ-1;
741		histo[len]++;
742	}
743
744	mdb_printf("hash table has %llu buckets, %llu dbufs "
745	    "(avg %llu buckets/dbuf)\n",
746	    ht.hash_table_mask+1, ndbufs,
747	    (ht.hash_table_mask+1)/ndbufs);
748
749	mdb_printf("\n");
750	maxidx = 0;
751	for (i = 0; i < HISTOSZ; i++)
752		if (histo[i] > 0)
753			maxidx = i;
754	mdb_printf("hash chain length	number of buckets\n");
755	for (i = 0; i <= maxidx; i++)
756		mdb_printf("%u			%llu\n", i, histo[i]);
757
758	mdb_printf("\n");
759	maxidx = 0;
760	for (i = 0; i < HISTOSZ; i++)
761		if (histo2[i] > 0)
762			maxidx = i;
763	mdb_printf("hash chain depth	number of dbufs\n");
764	for (i = 0; i <= maxidx; i++)
765		mdb_printf("%u or more		%llu	%llu%%\n",
766		    i, histo2[i], histo2[i]*100/ndbufs);
767
768
769	return (DCMD_OK);
770}
771
772#define	CHAIN_END 0xffff
773/*
774 * ::zap_leaf [-v]
775 *
776 * Print a zap_leaf_phys_t, assumed to be 16k
777 */
778/* ARGSUSED */
779static int
780zap_leaf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
781{
782	char buf[16*1024];
783	int verbose = B_FALSE;
784	int four = B_FALSE;
785	dmu_buf_t l_dbuf;
786	zap_leaf_t l;
787	zap_leaf_phys_t *zlp = (void *)buf;
788	int i;
789
790	if (mdb_getopts(argc, argv,
791	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
792	    '4', MDB_OPT_SETBITS, TRUE, &four,
793	    NULL) != argc)
794		return (DCMD_USAGE);
795
796	l_dbuf.db_data = zlp;
797	l.l_dbuf = &l_dbuf;
798	l.l_bs = 14; /* assume 16k blocks */
799	if (four)
800		l.l_bs = 12;
801
802	if (!(flags & DCMD_ADDRSPEC)) {
803		return (DCMD_USAGE);
804	}
805
806	if (mdb_vread(buf, sizeof (buf), addr) == -1) {
807		mdb_warn("failed to read zap_leaf_phys_t at %p", addr);
808		return (DCMD_ERR);
809	}
810
811	if (zlp->l_hdr.lh_block_type != ZBT_LEAF ||
812	    zlp->l_hdr.lh_magic != ZAP_LEAF_MAGIC) {
813		mdb_warn("This does not appear to be a zap_leaf_phys_t");
814		return (DCMD_ERR);
815	}
816
817	mdb_printf("zap_leaf_phys_t at %p:\n", addr);
818	mdb_printf("    lh_prefix_len = %u\n", zlp->l_hdr.lh_prefix_len);
819	mdb_printf("    lh_prefix = %llx\n", zlp->l_hdr.lh_prefix);
820	mdb_printf("    lh_nentries = %u\n", zlp->l_hdr.lh_nentries);
821	mdb_printf("    lh_nfree = %u\n", zlp->l_hdr.lh_nfree,
822	    zlp->l_hdr.lh_nfree * 100 / (ZAP_LEAF_NUMCHUNKS(&l)));
823	mdb_printf("    lh_freelist = %u\n", zlp->l_hdr.lh_freelist);
824	mdb_printf("    lh_flags = %x (%s)\n", zlp->l_hdr.lh_flags,
825	    zlp->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED ?
826	    "ENTRIES_CDSORTED" : "");
827
828	if (verbose) {
829		mdb_printf(" hash table:\n");
830		for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) {
831			if (zlp->l_hash[i] != CHAIN_END)
832				mdb_printf("    %u: %u\n", i, zlp->l_hash[i]);
833		}
834	}
835
836	mdb_printf(" chunks:\n");
837	for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
838		/* LINTED: alignment */
839		zap_leaf_chunk_t *zlc = &ZAP_LEAF_CHUNK(&l, i);
840		switch (zlc->l_entry.le_type) {
841		case ZAP_CHUNK_FREE:
842			if (verbose) {
843				mdb_printf("    %u: free; lf_next = %u\n",
844				    i, zlc->l_free.lf_next);
845			}
846			break;
847		case ZAP_CHUNK_ENTRY:
848			mdb_printf("    %u: entry\n", i);
849			if (verbose) {
850				mdb_printf("        le_next = %u\n",
851				    zlc->l_entry.le_next);
852			}
853			mdb_printf("        le_name_chunk = %u\n",
854			    zlc->l_entry.le_name_chunk);
855			mdb_printf("        le_name_numints = %u\n",
856			    zlc->l_entry.le_name_numints);
857			mdb_printf("        le_value_chunk = %u\n",
858			    zlc->l_entry.le_value_chunk);
859			mdb_printf("        le_value_intlen = %u\n",
860			    zlc->l_entry.le_value_intlen);
861			mdb_printf("        le_value_numints = %u\n",
862			    zlc->l_entry.le_value_numints);
863			mdb_printf("        le_cd = %u\n",
864			    zlc->l_entry.le_cd);
865			mdb_printf("        le_hash = %llx\n",
866			    zlc->l_entry.le_hash);
867			break;
868		case ZAP_CHUNK_ARRAY:
869			mdb_printf("    %u: array", i);
870			if (strisprint((char *)zlc->l_array.la_array))
871				mdb_printf(" \"%s\"", zlc->l_array.la_array);
872			mdb_printf("\n");
873			if (verbose) {
874				int j;
875				mdb_printf("        ");
876				for (j = 0; j < ZAP_LEAF_ARRAY_BYTES; j++) {
877					mdb_printf("%02x ",
878					    zlc->l_array.la_array[j]);
879				}
880				mdb_printf("\n");
881			}
882			if (zlc->l_array.la_next != CHAIN_END) {
883				mdb_printf("        lf_next = %u\n",
884				    zlc->l_array.la_next);
885			}
886			break;
887		default:
888			mdb_printf("    %u: undefined type %u\n",
889			    zlc->l_entry.le_type);
890		}
891	}
892
893	return (DCMD_OK);
894}
895
896typedef struct dbufs_data {
897	mdb_ctf_id_t id;
898	uint64_t objset;
899	uint64_t object;
900	uint64_t level;
901	uint64_t blkid;
902	char *osname;
903} dbufs_data_t;
904
905#define	DBUFS_UNSET	(0xbaddcafedeadbeefULL)
906
907/* ARGSUSED */
908static int
909dbufs_cb(uintptr_t addr, const void *unknown, void *arg)
910{
911	dbufs_data_t *data = arg;
912	uintptr_t objset;
913	dmu_buf_t db;
914	uint8_t level;
915	uint64_t blkid;
916	char osname[ZFS_MAX_DATASET_NAME_LEN];
917
918	if (GETMEMBID(addr, &data->id, db_objset, objset) ||
919	    GETMEMBID(addr, &data->id, db, db) ||
920	    GETMEMBID(addr, &data->id, db_level, level) ||
921	    GETMEMBID(addr, &data->id, db_blkid, blkid)) {
922		return (WALK_ERR);
923	}
924
925	if ((data->objset == DBUFS_UNSET || data->objset == objset) &&
926	    (data->osname == NULL || (objset_name(objset, osname) == 0 &&
927	    strcmp(data->osname, osname) == 0)) &&
928	    (data->object == DBUFS_UNSET || data->object == db.db_object) &&
929	    (data->level == DBUFS_UNSET || data->level == level) &&
930	    (data->blkid == DBUFS_UNSET || data->blkid == blkid)) {
931		mdb_printf("%#lr\n", addr);
932	}
933	return (WALK_NEXT);
934}
935
936/* ARGSUSED */
937static int
938dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
939{
940	dbufs_data_t data;
941	char *object = NULL;
942	char *blkid = NULL;
943
944	data.objset = data.object = data.level = data.blkid = DBUFS_UNSET;
945	data.osname = NULL;
946
947	if (mdb_getopts(argc, argv,
948	    'O', MDB_OPT_UINT64, &data.objset,
949	    'n', MDB_OPT_STR, &data.osname,
950	    'o', MDB_OPT_STR, &object,
951	    'l', MDB_OPT_UINT64, &data.level,
952	    'b', MDB_OPT_STR, &blkid,
953	    NULL) != argc) {
954		return (DCMD_USAGE);
955	}
956
957	if (object) {
958		if (strcmp(object, "mdn") == 0) {
959			data.object = DMU_META_DNODE_OBJECT;
960		} else {
961			data.object = mdb_strtoull(object);
962		}
963	}
964
965	if (blkid) {
966		if (strcmp(blkid, "bonus") == 0) {
967			data.blkid = DMU_BONUS_BLKID;
968		} else {
969			data.blkid = mdb_strtoull(blkid);
970		}
971	}
972
973	if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dmu_buf_impl", &data.id) == -1) {
974		mdb_warn("couldn't find struct dmu_buf_impl_t");
975		return (DCMD_ERR);
976	}
977
978	if (mdb_walk("dmu_buf_impl_t", dbufs_cb, &data) != 0) {
979		mdb_warn("can't walk dbufs");
980		return (DCMD_ERR);
981	}
982
983	return (DCMD_OK);
984}
985
986typedef struct abuf_find_data {
987	dva_t dva;
988	mdb_ctf_id_t id;
989} abuf_find_data_t;
990
991/* ARGSUSED */
992static int
993abuf_find_cb(uintptr_t addr, const void *unknown, void *arg)
994{
995	abuf_find_data_t *data = arg;
996	dva_t dva;
997
998	if (GETMEMBID(addr, &data->id, b_dva, dva)) {
999		return (WALK_ERR);
1000	}
1001
1002	if (dva.dva_word[0] == data->dva.dva_word[0] &&
1003	    dva.dva_word[1] == data->dva.dva_word[1]) {
1004		mdb_printf("%#lr\n", addr);
1005	}
1006	return (WALK_NEXT);
1007}
1008
1009typedef struct mdb_arc_state {
1010	uintptr_t	arcs_list[ARC_BUFC_NUMTYPES];
1011} mdb_arc_state_t;
1012
1013/* ARGSUSED */
1014static int
1015abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1016{
1017	abuf_find_data_t data;
1018	GElf_Sym sym;
1019	int i, j;
1020	const char *syms[] = {
1021		"ARC_mru",
1022		"ARC_mru_ghost",
1023		"ARC_mfu",
1024		"ARC_mfu_ghost",
1025	};
1026
1027	if (argc != 2)
1028		return (DCMD_USAGE);
1029
1030	for (i = 0; i < 2; i ++) {
1031		switch (argv[i].a_type) {
1032		case MDB_TYPE_STRING:
1033			data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str);
1034			break;
1035		case MDB_TYPE_IMMEDIATE:
1036			data.dva.dva_word[i] = argv[i].a_un.a_val;
1037			break;
1038		default:
1039			return (DCMD_USAGE);
1040		}
1041	}
1042
1043	if (mdb_ctf_lookup_by_name(ZFS_STRUCT "arc_buf_hdr", &data.id) == -1) {
1044		mdb_warn("couldn't find struct arc_buf_hdr");
1045		return (DCMD_ERR);
1046	}
1047
1048	for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) {
1049		mdb_arc_state_t mas;
1050
1051		if (mdb_lookup_by_obj(ZFS_OBJ_NAME, syms[i], &sym)) {
1052			mdb_warn("can't find symbol %s", syms[i]);
1053			return (DCMD_ERR);
1054		}
1055
1056		if (mdb_ctf_vread(&mas, "arc_state_t", "mdb_arc_state_t",
1057		    sym.st_value, 0) != 0) {
1058			mdb_warn("can't read arcs_list of %s", syms[i]);
1059			return (DCMD_ERR);
1060		}
1061
1062		for (j = 0; j < ARC_BUFC_NUMTYPES; j++) {
1063			uintptr_t addr = mas.arcs_list[j];
1064
1065			if (addr == 0)
1066				continue;
1067
1068			if (mdb_pwalk("multilist", abuf_find_cb, &data,
1069			    addr) != 0) {
1070				mdb_warn("can't walk %s", syms[i]);
1071				return (DCMD_ERR);
1072			}
1073		}
1074	}
1075
1076	return (DCMD_OK);
1077}
1078
1079
1080typedef struct dbgmsg_arg {
1081	boolean_t da_verbose;
1082	boolean_t da_address;
1083} dbgmsg_arg_t;
1084
1085/* ARGSUSED */
1086static int
1087dbgmsg_cb(uintptr_t addr, const void *unknown, void *arg)
1088{
1089	static mdb_ctf_id_t id;
1090	static boolean_t gotid;
1091	static ulong_t off;
1092
1093	dbgmsg_arg_t *da = arg;
1094	time_t timestamp;
1095	char buf[1024];
1096
1097	if (!gotid) {
1098		if (mdb_ctf_lookup_by_name(ZFS_STRUCT "zfs_dbgmsg", &id) ==
1099		    -1) {
1100			mdb_warn("couldn't find struct zfs_dbgmsg");
1101			return (WALK_ERR);
1102		}
1103		gotid = TRUE;
1104		if (mdb_ctf_offsetof(id, "zdm_msg", &off) == -1) {
1105			mdb_warn("couldn't find zdm_msg");
1106			return (WALK_ERR);
1107		}
1108		off /= 8;
1109	}
1110
1111
1112	if (GETMEMBID(addr, &id, zdm_timestamp, timestamp)) {
1113		return (WALK_ERR);
1114	}
1115
1116	if (mdb_readstr(buf, sizeof (buf), addr + off) == -1) {
1117		mdb_warn("failed to read zdm_msg at %p\n", addr + off);
1118		return (DCMD_ERR);
1119	}
1120
1121	if (da->da_address)
1122		mdb_printf("%p ", addr);
1123	if (da->da_verbose)
1124		mdb_printf("%Y ", timestamp);
1125
1126	mdb_printf("%s\n", buf);
1127
1128	if (da->da_verbose)
1129		(void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL);
1130
1131	return (WALK_NEXT);
1132}
1133
1134/* ARGSUSED */
1135static int
1136dbgmsg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1137{
1138	GElf_Sym sym;
1139	dbgmsg_arg_t da = { 0 };
1140
1141	if (mdb_getopts(argc, argv,
1142	    'v', MDB_OPT_SETBITS, B_TRUE, &da.da_verbose,
1143	    'a', MDB_OPT_SETBITS, B_TRUE, &da.da_address,
1144	    NULL) != argc)
1145		return (DCMD_USAGE);
1146
1147	if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "zfs_dbgmsgs", &sym)) {
1148		mdb_warn("can't find zfs_dbgmsgs");
1149		return (DCMD_ERR);
1150	}
1151
1152	if (mdb_pwalk("list", dbgmsg_cb, &da, sym.st_value) != 0) {
1153		mdb_warn("can't walk zfs_dbgmsgs");
1154		return (DCMD_ERR);
1155	}
1156
1157	return (DCMD_OK);
1158}
1159
1160/*ARGSUSED*/
1161static int
1162arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1163{
1164	kstat_named_t *stats;
1165	GElf_Sym sym;
1166	int nstats, i;
1167	uint_t opt_a = FALSE;
1168	uint_t opt_b = FALSE;
1169	uint_t shift = 0;
1170	const char *suffix;
1171
1172	static const char *bytestats[] = {
1173		"p", "c", "c_min", "c_max", "size", "duplicate_buffers_size",
1174		"arc_meta_used", "arc_meta_limit", "arc_meta_max",
1175		"arc_meta_min", "hdr_size", "data_size", "metadata_size",
1176		"other_size", "anon_size", "anon_evictable_data",
1177		"anon_evictable_metadata", "mru_size", "mru_evictable_data",
1178		"mru_evictable_metadata", "mru_ghost_size",
1179		"mru_ghost_evictable_data", "mru_ghost_evictable_metadata",
1180		"mfu_size", "mfu_evictable_data", "mfu_evictable_metadata",
1181		"mfu_ghost_size", "mfu_ghost_evictable_data",
1182		"mfu_ghost_evictable_metadata", "evict_l2_cached",
1183		"evict_l2_eligible", "evict_l2_ineligible", "l2_read_bytes",
1184		"l2_write_bytes", "l2_size", "l2_asize", "l2_hdr_size",
1185		"compressed_size", "uncompressed_size", "overhead_size",
1186		NULL
1187	};
1188
1189	static const char *extras[] = {
1190		"arc_no_grow", "arc_tempreserve",
1191		NULL
1192	};
1193
1194	if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "arc_stats", &sym) == -1) {
1195		mdb_warn("failed to find 'arc_stats'");
1196		return (DCMD_ERR);
1197	}
1198
1199	stats = mdb_zalloc(sym.st_size, UM_SLEEP | UM_GC);
1200
1201	if (mdb_vread(stats, sym.st_size, sym.st_value) == -1) {
1202		mdb_warn("couldn't read 'arc_stats' at %p", sym.st_value);
1203		return (DCMD_ERR);
1204	}
1205
1206	nstats = sym.st_size / sizeof (kstat_named_t);
1207
1208	/* NB: -a / opt_a are ignored for backwards compatability */
1209	if (mdb_getopts(argc, argv,
1210	    'a', MDB_OPT_SETBITS, TRUE, &opt_a,
1211	    'b', MDB_OPT_SETBITS, TRUE, &opt_b,
1212	    'k', MDB_OPT_SETBITS, 10, &shift,
1213	    'm', MDB_OPT_SETBITS, 20, &shift,
1214	    'g', MDB_OPT_SETBITS, 30, &shift,
1215	    NULL) != argc)
1216		return (DCMD_USAGE);
1217
1218	if (!opt_b && !shift)
1219		shift = 20;
1220
1221	switch (shift) {
1222	case 0:
1223		suffix = "B";
1224		break;
1225	case 10:
1226		suffix = "KB";
1227		break;
1228	case 20:
1229		suffix = "MB";
1230		break;
1231	case 30:
1232		suffix = "GB";
1233		break;
1234	default:
1235		suffix = "XX";
1236	}
1237
1238	for (i = 0; i < nstats; i++) {
1239		int j;
1240		boolean_t bytes = B_FALSE;
1241
1242		for (j = 0; bytestats[j]; j++) {
1243			if (strcmp(stats[i].name, bytestats[j]) == 0) {
1244				bytes = B_TRUE;
1245				break;
1246			}
1247		}
1248
1249		if (bytes) {
1250			mdb_printf("%-25s = %9llu %s\n", stats[i].name,
1251			    stats[i].value.ui64 >> shift, suffix);
1252		} else {
1253			mdb_printf("%-25s = %9llu\n", stats[i].name,
1254			    stats[i].value.ui64);
1255		}
1256	}
1257
1258	for (i = 0; extras[i]; i++) {
1259		uint64_t buf;
1260
1261		if (mdb_lookup_by_obj(ZFS_OBJ_NAME, extras[i], &sym) == -1) {
1262			mdb_warn("failed to find '%s'", extras[i]);
1263			return (DCMD_ERR);
1264		}
1265
1266		if (sym.st_size != sizeof (uint64_t) &&
1267		    sym.st_size != sizeof (uint32_t)) {
1268			mdb_warn("expected scalar for variable '%s'\n",
1269			    extras[i]);
1270			return (DCMD_ERR);
1271		}
1272
1273		if (mdb_vread(&buf, sym.st_size, sym.st_value) == -1) {
1274			mdb_warn("couldn't read '%s'", extras[i]);
1275			return (DCMD_ERR);
1276		}
1277
1278		mdb_printf("%-25s = ", extras[i]);
1279
1280		/* NB: all the 64-bit extras happen to be byte counts */
1281		if (sym.st_size == sizeof (uint64_t))
1282			mdb_printf("%9llu %s\n", buf >> shift, suffix);
1283
1284		if (sym.st_size == sizeof (uint32_t))
1285			mdb_printf("%9d\n", *((uint32_t *)&buf));
1286	}
1287	return (DCMD_OK);
1288}
1289
1290typedef struct mdb_spa_print {
1291	pool_state_t spa_state;
1292	char spa_name[ZFS_MAX_DATASET_NAME_LEN];
1293	uintptr_t spa_normal_class;
1294} mdb_spa_print_t;
1295
1296
1297const char histo_stars[] = "****************************************";
1298const int histo_width = sizeof (histo_stars) - 1;
1299
1300static void
1301dump_histogram(const uint64_t *histo, int size, int offset)
1302{
1303	int i;
1304	int minidx = size - 1;
1305	int maxidx = 0;
1306	uint64_t max = 0;
1307
1308	for (i = 0; i < size; i++) {
1309		if (histo[i] > max)
1310			max = histo[i];
1311		if (histo[i] > 0 && i > maxidx)
1312			maxidx = i;
1313		if (histo[i] > 0 && i < minidx)
1314			minidx = i;
1315	}
1316
1317	if (max < histo_width)
1318		max = histo_width;
1319
1320	for (i = minidx; i <= maxidx; i++) {
1321		mdb_printf("%3u: %6llu %s\n",
1322		    i + offset, (u_longlong_t)histo[i],
1323		    &histo_stars[(max - histo[i]) * histo_width / max]);
1324	}
1325}
1326
1327typedef struct mdb_metaslab_class {
1328	uint64_t mc_histogram[RANGE_TREE_HISTOGRAM_SIZE];
1329} mdb_metaslab_class_t;
1330
1331/*
1332 * spa_class_histogram(uintptr_t class_addr)
1333 *
1334 * Prints free space histogram for a device class
1335 *
1336 * Returns DCMD_OK, or DCMD_ERR.
1337 */
1338static int
1339spa_class_histogram(uintptr_t class_addr)
1340{
1341	mdb_metaslab_class_t mc;
1342	if (mdb_ctf_vread(&mc, "metaslab_class_t",
1343	    "mdb_metaslab_class_t", class_addr, 0) == -1)
1344		return (DCMD_ERR);
1345
1346	mdb_inc_indent(4);
1347	dump_histogram(mc.mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
1348	mdb_dec_indent(4);
1349	return (DCMD_OK);
1350}
1351
1352/*
1353 * ::spa
1354 *
1355 *	-c	Print configuration information as well
1356 *	-v	Print vdev state
1357 *	-e	Print vdev error stats
1358 *	-m	Print vdev metaslab info
1359 *	-M	print vdev metaslab group info
1360 *	-h	Print histogram info (must be combined with -m or -M)
1361 *
1362 * Print a summarized spa_t.  When given no arguments, prints out a table of all
1363 * active pools on the system.
1364 */
1365/* ARGSUSED */
1366static int
1367spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1368{
1369	const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED",
1370		"SPARE", "L2CACHE", "UNINIT", "UNAVAIL", "POTENTIAL" };
1371	const char *state;
1372	int spa_flags = 0;
1373
1374	if (mdb_getopts(argc, argv,
1375	    'c', MDB_OPT_SETBITS, SPA_FLAG_CONFIG, &spa_flags,
1376	    'v', MDB_OPT_SETBITS, SPA_FLAG_VDEVS, &spa_flags,
1377	    'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags,
1378	    'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags,
1379	    'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags,
1380	    'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags,
1381	    NULL) != argc)
1382		return (DCMD_USAGE);
1383
1384	if (!(flags & DCMD_ADDRSPEC)) {
1385		if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) {
1386			mdb_warn("can't walk spa");
1387			return (DCMD_ERR);
1388		}
1389
1390		return (DCMD_OK);
1391	}
1392
1393	if (flags & DCMD_PIPE_OUT) {
1394		mdb_printf("%#lr\n", addr);
1395		return (DCMD_OK);
1396	}
1397
1398	if (DCMD_HDRSPEC(flags))
1399		mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE",
1400		    sizeof (uintptr_t) == 4 ? 60 : 52, "NAME");
1401
1402	mdb_spa_print_t spa;
1403	if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_print_t", addr, 0) == -1)
1404		return (DCMD_ERR);
1405
1406	if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL)
1407		state = "UNKNOWN";
1408	else
1409		state = statetab[spa.spa_state];
1410
1411	mdb_printf("%0?p %9s %s\n", addr, state, spa.spa_name);
1412	if (spa_flags & SPA_FLAG_HISTOGRAMS)
1413		spa_class_histogram(spa.spa_normal_class);
1414
1415	if (spa_flags & SPA_FLAG_CONFIG) {
1416		mdb_printf("\n");
1417		mdb_inc_indent(4);
1418		if (mdb_call_dcmd("spa_config", addr, flags, 0,
1419		    NULL) != DCMD_OK)
1420			return (DCMD_ERR);
1421		mdb_dec_indent(4);
1422	}
1423
1424	if (spa_flags & SPA_FLAG_ALL_VDEV) {
1425		mdb_arg_t v;
1426		char opts[100] = "-";
1427		int args =
1428		    (spa_flags | SPA_FLAG_VDEVS) == SPA_FLAG_VDEVS ? 0 : 1;
1429
1430		if (spa_flags & SPA_FLAG_ERRORS)
1431			strcat(opts, "e");
1432		if (spa_flags & SPA_FLAG_METASLABS)
1433			strcat(opts, "m");
1434		if (spa_flags & SPA_FLAG_METASLAB_GROUPS)
1435			strcat(opts, "M");
1436		if (spa_flags & SPA_FLAG_HISTOGRAMS)
1437			strcat(opts, "h");
1438
1439		v.a_type = MDB_TYPE_STRING;
1440		v.a_un.a_str = opts;
1441
1442		mdb_printf("\n");
1443		mdb_inc_indent(4);
1444		if (mdb_call_dcmd("spa_vdevs", addr, flags, args,
1445		    &v) != DCMD_OK)
1446			return (DCMD_ERR);
1447		mdb_dec_indent(4);
1448	}
1449
1450	return (DCMD_OK);
1451}
1452
1453typedef struct mdb_spa_config_spa {
1454	uintptr_t spa_config;
1455} mdb_spa_config_spa_t;
1456
1457/*
1458 * ::spa_config
1459 *
1460 * Given a spa_t, print the configuration information stored in spa_config.
1461 * Since it's just an nvlist, format it as an indented list of name=value pairs.
1462 * We simply read the value of spa_config and pass off to ::nvlist.
1463 */
1464/* ARGSUSED */
1465static int
1466spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1467{
1468	mdb_spa_config_spa_t spa;
1469
1470	if (argc != 0 || !(flags & DCMD_ADDRSPEC))
1471		return (DCMD_USAGE);
1472
1473	if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_config_spa_t",
1474	    addr, 0) == -1)
1475		return (DCMD_ERR);
1476
1477	if (spa.spa_config == 0) {
1478		mdb_printf("(none)\n");
1479		return (DCMD_OK);
1480	}
1481
1482	return (mdb_call_dcmd("nvlist", spa.spa_config, flags,
1483	    0, NULL));
1484}
1485
1486typedef struct mdb_range_tree {
1487	struct {
1488		uint64_t bt_num_elems;
1489		uint64_t bt_num_nodes;
1490	} rt_root;
1491	uint64_t rt_space;
1492	range_seg_type_t rt_type;
1493	uint8_t		rt_shift;
1494	uint64_t	rt_start;
1495} mdb_range_tree_t;
1496
1497typedef struct mdb_metaslab_group {
1498	uint64_t mg_fragmentation;
1499	uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE];
1500	uintptr_t mg_vd;
1501} mdb_metaslab_group_t;
1502
1503typedef struct mdb_metaslab {
1504	uint64_t ms_id;
1505	uint64_t ms_start;
1506	uint64_t ms_size;
1507	int64_t ms_deferspace;
1508	uint64_t ms_fragmentation;
1509	uint64_t ms_weight;
1510	uintptr_t ms_allocating[TXG_SIZE];
1511	uintptr_t ms_checkpointing;
1512	uintptr_t ms_freeing;
1513	uintptr_t ms_freed;
1514	uintptr_t ms_allocatable;
1515	uintptr_t ms_unflushed_frees;
1516	uintptr_t ms_unflushed_allocs;
1517	uintptr_t ms_sm;
1518} mdb_metaslab_t;
1519
1520typedef struct mdb_space_map_phys_t {
1521	int64_t smp_alloc;
1522	uint64_t smp_histogram[SPACE_MAP_HISTOGRAM_SIZE];
1523} mdb_space_map_phys_t;
1524
1525typedef struct mdb_space_map {
1526	uint64_t sm_size;
1527	uint8_t sm_shift;
1528	uintptr_t sm_phys;
1529} mdb_space_map_t;
1530
1531typedef struct mdb_vdev {
1532	uint64_t vdev_id;
1533	uint64_t vdev_state;
1534	uintptr_t vdev_ops;
1535	struct {
1536		uint64_t vs_aux;
1537		uint64_t vs_ops[VS_ZIO_TYPES];
1538		uint64_t vs_bytes[VS_ZIO_TYPES];
1539		uint64_t vs_read_errors;
1540		uint64_t vs_write_errors;
1541		uint64_t vs_checksum_errors;
1542	} vdev_stat;
1543	uintptr_t vdev_child;
1544	uint64_t vdev_children;
1545	uint64_t vdev_ms_count;
1546	uintptr_t vdev_mg;
1547	uintptr_t vdev_ms;
1548	uintptr_t vdev_path;
1549} mdb_vdev_t;
1550
1551typedef struct mdb_vdev_ops {
1552	char vdev_op_type[16];
1553} mdb_vdev_ops_t;
1554
1555static int
1556metaslab_stats(mdb_vdev_t *vd, int spa_flags)
1557{
1558	mdb_inc_indent(4);
1559	mdb_printf("%<u>%-?s %6s %20s %10s %10s %10s%</u>\n", "ADDR", "ID",
1560	    "OFFSET", "FREE", "FRAG", "UCMU");
1561
1562	uintptr_t *vdev_ms = mdb_alloc(vd->vdev_ms_count * sizeof (vdev_ms),
1563	    UM_SLEEP | UM_GC);
1564	if (mdb_vread(vdev_ms, vd->vdev_ms_count * sizeof (uintptr_t),
1565	    vd->vdev_ms) == -1) {
1566		mdb_warn("failed to read vdev_ms at %p\n", vd->vdev_ms);
1567		return (DCMD_ERR);
1568	}
1569
1570	for (int m = 0; m < vd->vdev_ms_count; m++) {
1571		mdb_metaslab_t ms;
1572		mdb_space_map_t sm = { 0 };
1573		mdb_space_map_phys_t smp = { 0 };
1574		mdb_range_tree_t rt;
1575		uint64_t uallocs, ufrees, raw_free, raw_uchanges_mem;
1576		char free[MDB_NICENUM_BUFLEN];
1577		char uchanges_mem[MDB_NICENUM_BUFLEN];
1578
1579		if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t",
1580		    vdev_ms[m], 0) == -1)
1581			return (DCMD_ERR);
1582
1583		if (ms.ms_sm != 0 &&
1584		    mdb_ctf_vread(&sm, "space_map_t", "mdb_space_map_t",
1585		    ms.ms_sm, 0) == -1)
1586			return (DCMD_ERR);
1587
1588		if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t",
1589		    ms.ms_unflushed_frees, 0) == -1)
1590			return (DCMD_ERR);
1591		ufrees = rt.rt_space;
1592		raw_uchanges_mem = rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE;
1593
1594		if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t",
1595		    ms.ms_unflushed_allocs, 0) == -1)
1596			return (DCMD_ERR);
1597		uallocs = rt.rt_space;
1598		raw_uchanges_mem += rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE;
1599		mdb_nicenum(raw_uchanges_mem, uchanges_mem);
1600
1601		raw_free = ms.ms_size;
1602		if (ms.ms_sm != 0 && sm.sm_phys != 0) {
1603			(void) mdb_ctf_vread(&smp, "space_map_phys_t",
1604			    "mdb_space_map_phys_t", sm.sm_phys, 0);
1605			raw_free -= smp.smp_alloc;
1606		}
1607		raw_free += ufrees - uallocs;
1608		mdb_nicenum(raw_free, free);
1609
1610		mdb_printf("%0?p %6llu %20llx %10s ", vdev_ms[m], ms.ms_id,
1611		    ms.ms_start, free);
1612		if (ms.ms_fragmentation == ZFS_FRAG_INVALID)
1613			mdb_printf("%9s ", "-");
1614		else
1615			mdb_printf("%9llu%% ", ms.ms_fragmentation);
1616		mdb_printf("%10s\n", uchanges_mem);
1617
1618		if ((spa_flags & SPA_FLAG_HISTOGRAMS) && ms.ms_sm != 0 &&
1619		    sm.sm_phys != 0) {
1620			dump_histogram(smp.smp_histogram,
1621			    SPACE_MAP_HISTOGRAM_SIZE, sm.sm_shift);
1622		}
1623	}
1624	mdb_dec_indent(4);
1625	return (DCMD_OK);
1626}
1627
1628static int
1629metaslab_group_stats(mdb_vdev_t *vd, int spa_flags)
1630{
1631	mdb_metaslab_group_t mg;
1632	if (mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t",
1633	    vd->vdev_mg, 0) == -1) {
1634		mdb_warn("failed to read vdev_mg at %p\n", vd->vdev_mg);
1635		return (DCMD_ERR);
1636	}
1637
1638	mdb_inc_indent(4);
1639	mdb_printf("%<u>%-?s %7s %9s%</u>\n", "ADDR", "FRAG", "UCMU");
1640
1641	if (mg.mg_fragmentation == ZFS_FRAG_INVALID)
1642		mdb_printf("%0?p %6s\n", vd->vdev_mg, "-");
1643	else
1644		mdb_printf("%0?p %6llu%%", vd->vdev_mg, mg.mg_fragmentation);
1645
1646
1647	uintptr_t *vdev_ms = mdb_alloc(vd->vdev_ms_count * sizeof (vdev_ms),
1648	    UM_SLEEP | UM_GC);
1649	if (mdb_vread(vdev_ms, vd->vdev_ms_count * sizeof (uintptr_t),
1650	    vd->vdev_ms) == -1) {
1651		mdb_warn("failed to read vdev_ms at %p\n", vd->vdev_ms);
1652		return (DCMD_ERR);
1653	}
1654
1655	uint64_t raw_uchanges_mem = 0;
1656	char uchanges_mem[MDB_NICENUM_BUFLEN];
1657	for (int m = 0; m < vd->vdev_ms_count; m++) {
1658		mdb_metaslab_t ms;
1659		mdb_range_tree_t rt;
1660
1661		if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t",
1662		    vdev_ms[m], 0) == -1)
1663			return (DCMD_ERR);
1664
1665		if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t",
1666		    ms.ms_unflushed_frees, 0) == -1)
1667			return (DCMD_ERR);
1668		raw_uchanges_mem += rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE;
1669
1670		if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t",
1671		    ms.ms_unflushed_allocs, 0) == -1)
1672			return (DCMD_ERR);
1673		raw_uchanges_mem += rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE;
1674	}
1675	mdb_nicenum(raw_uchanges_mem, uchanges_mem);
1676	mdb_printf("%10s\n", uchanges_mem);
1677
1678	if (spa_flags & SPA_FLAG_HISTOGRAMS)
1679		dump_histogram(mg.mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
1680	mdb_dec_indent(4);
1681	return (DCMD_OK);
1682}
1683
1684/*
1685 * ::vdev
1686 *
1687 * Print out a summarized vdev_t, in the following form:
1688 *
1689 * ADDR             STATE	AUX            DESC
1690 * fffffffbcde23df0 HEALTHY	-              /dev/dsk/c0t0d0
1691 *
1692 * If '-r' is specified, recursively visit all children.
1693 *
1694 * With '-e', the statistics associated with the vdev are printed as well.
1695 */
1696static int
1697do_print_vdev(uintptr_t addr, int flags, int depth, boolean_t recursive,
1698    int spa_flags)
1699{
1700	mdb_vdev_t vd;
1701	if (mdb_ctf_vread(&vd, "vdev_t", "mdb_vdev_t",
1702	    (uintptr_t)addr, 0) == -1)
1703		return (DCMD_ERR);
1704
1705	if (flags & DCMD_PIPE_OUT) {
1706		mdb_printf("%#lr\n", addr);
1707	} else {
1708		char desc[MAXNAMELEN];
1709		if (vd.vdev_path != 0) {
1710			if (mdb_readstr(desc, sizeof (desc),
1711			    (uintptr_t)vd.vdev_path) == -1) {
1712				mdb_warn("failed to read vdev_path at %p\n",
1713				    vd.vdev_path);
1714				return (DCMD_ERR);
1715			}
1716		} else if (vd.vdev_ops != 0) {
1717			vdev_ops_t ops;
1718			if (mdb_vread(&ops, sizeof (ops),
1719			    (uintptr_t)vd.vdev_ops) == -1) {
1720				mdb_warn("failed to read vdev_ops at %p\n",
1721				    vd.vdev_ops);
1722				return (DCMD_ERR);
1723			}
1724			(void) strcpy(desc, ops.vdev_op_type);
1725		} else {
1726			(void) strcpy(desc, "<unknown>");
1727		}
1728
1729		if (depth == 0 && DCMD_HDRSPEC(flags))
1730			mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n",
1731			    "ADDR", "STATE", "AUX",
1732			    sizeof (uintptr_t) == 4 ? 43 : 35,
1733			    "DESCRIPTION");
1734
1735		mdb_printf("%0?p ", addr);
1736
1737		const char *state, *aux;
1738		switch (vd.vdev_state) {
1739		case VDEV_STATE_CLOSED:
1740			state = "CLOSED";
1741			break;
1742		case VDEV_STATE_OFFLINE:
1743			state = "OFFLINE";
1744			break;
1745		case VDEV_STATE_CANT_OPEN:
1746			state = "CANT_OPEN";
1747			break;
1748		case VDEV_STATE_DEGRADED:
1749			state = "DEGRADED";
1750			break;
1751		case VDEV_STATE_HEALTHY:
1752			state = "HEALTHY";
1753			break;
1754		case VDEV_STATE_REMOVED:
1755			state = "REMOVED";
1756			break;
1757		case VDEV_STATE_FAULTED:
1758			state = "FAULTED";
1759			break;
1760		default:
1761			state = "UNKNOWN";
1762			break;
1763		}
1764
1765		switch (vd.vdev_stat.vs_aux) {
1766		case VDEV_AUX_NONE:
1767			aux = "-";
1768			break;
1769		case VDEV_AUX_OPEN_FAILED:
1770			aux = "OPEN_FAILED";
1771			break;
1772		case VDEV_AUX_CORRUPT_DATA:
1773			aux = "CORRUPT_DATA";
1774			break;
1775		case VDEV_AUX_NO_REPLICAS:
1776			aux = "NO_REPLICAS";
1777			break;
1778		case VDEV_AUX_BAD_GUID_SUM:
1779			aux = "BAD_GUID_SUM";
1780			break;
1781		case VDEV_AUX_TOO_SMALL:
1782			aux = "TOO_SMALL";
1783			break;
1784		case VDEV_AUX_BAD_LABEL:
1785			aux = "BAD_LABEL";
1786			break;
1787		case VDEV_AUX_VERSION_NEWER:
1788			aux = "VERS_NEWER";
1789			break;
1790		case VDEV_AUX_VERSION_OLDER:
1791			aux = "VERS_OLDER";
1792			break;
1793		case VDEV_AUX_UNSUP_FEAT:
1794			aux = "UNSUP_FEAT";
1795			break;
1796		case VDEV_AUX_SPARED:
1797			aux = "SPARED";
1798			break;
1799		case VDEV_AUX_ERR_EXCEEDED:
1800			aux = "ERR_EXCEEDED";
1801			break;
1802		case VDEV_AUX_IO_FAILURE:
1803			aux = "IO_FAILURE";
1804			break;
1805		case VDEV_AUX_BAD_LOG:
1806			aux = "BAD_LOG";
1807			break;
1808		case VDEV_AUX_EXTERNAL:
1809			aux = "EXTERNAL";
1810			break;
1811		case VDEV_AUX_SPLIT_POOL:
1812			aux = "SPLIT_POOL";
1813			break;
1814		case VDEV_AUX_CHILDREN_OFFLINE:
1815			aux = "CHILDREN_OFFLINE";
1816			break;
1817		default:
1818			aux = "UNKNOWN";
1819			break;
1820		}
1821
1822		mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc);
1823
1824		if (spa_flags & SPA_FLAG_ERRORS) {
1825			int i;
1826
1827			mdb_inc_indent(4);
1828			mdb_printf("\n");
1829			mdb_printf("%<u>       %12s %12s %12s %12s "
1830			    "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM",
1831			    "IOCTL");
1832			mdb_printf("OPS     ");
1833			for (i = 1; i < VS_ZIO_TYPES; i++)
1834				mdb_printf("%11#llx%s",
1835				    vd.vdev_stat.vs_ops[i],
1836				    i == VS_ZIO_TYPES - 1 ? "" : "  ");
1837			mdb_printf("\n");
1838			mdb_printf("BYTES   ");
1839			for (i = 1; i < VS_ZIO_TYPES; i++)
1840				mdb_printf("%11#llx%s",
1841				    vd.vdev_stat.vs_bytes[i],
1842				    i == VS_ZIO_TYPES - 1 ? "" : "  ");
1843
1844
1845			mdb_printf("\n");
1846			mdb_printf("EREAD    %10#llx\n",
1847			    vd.vdev_stat.vs_read_errors);
1848			mdb_printf("EWRITE   %10#llx\n",
1849			    vd.vdev_stat.vs_write_errors);
1850			mdb_printf("ECKSUM   %10#llx\n",
1851			    vd.vdev_stat.vs_checksum_errors);
1852			mdb_dec_indent(4);
1853			mdb_printf("\n");
1854		}
1855
1856		if ((spa_flags & SPA_FLAG_METASLAB_GROUPS) &&
1857		    vd.vdev_mg != 0) {
1858			metaslab_group_stats(&vd, spa_flags);
1859		}
1860		if ((spa_flags & SPA_FLAG_METASLABS) && vd.vdev_ms != 0) {
1861			metaslab_stats(&vd, spa_flags);
1862		}
1863	}
1864
1865	uint64_t children = vd.vdev_children;
1866	if (children == 0 || !recursive)
1867		return (DCMD_OK);
1868
1869	uintptr_t *child = mdb_alloc(children * sizeof (child),
1870	    UM_SLEEP | UM_GC);
1871	if (mdb_vread(child, children * sizeof (void *), vd.vdev_child) == -1) {
1872		mdb_warn("failed to read vdev children at %p", vd.vdev_child);
1873		return (DCMD_ERR);
1874	}
1875
1876	for (uint64_t c = 0; c < children; c++) {
1877		if (do_print_vdev(child[c], flags, depth + 2, recursive,
1878		    spa_flags)) {
1879			return (DCMD_ERR);
1880		}
1881	}
1882
1883	return (DCMD_OK);
1884}
1885
1886static int
1887vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1888{
1889	uint64_t depth = 0;
1890	boolean_t recursive = B_FALSE;
1891	int spa_flags = 0;
1892
1893	if (mdb_getopts(argc, argv,
1894	    'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags,
1895	    'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags,
1896	    'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags,
1897	    'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags,
1898	    'r', MDB_OPT_SETBITS, TRUE, &recursive,
1899	    'd', MDB_OPT_UINT64, &depth, NULL) != argc)
1900		return (DCMD_USAGE);
1901
1902	if (!(flags & DCMD_ADDRSPEC)) {
1903		mdb_warn("no vdev_t address given\n");
1904		return (DCMD_ERR);
1905	}
1906
1907	return (do_print_vdev(addr, flags, (int)depth, recursive, spa_flags));
1908}
1909
1910typedef struct mdb_metaslab_alloc_trace {
1911	uintptr_t mat_mg;
1912	uintptr_t mat_msp;
1913	uint64_t mat_size;
1914	uint64_t mat_weight;
1915	uint64_t mat_offset;
1916	uint32_t mat_dva_id;
1917	int mat_allocator;
1918} mdb_metaslab_alloc_trace_t;
1919
1920static void
1921metaslab_print_weight(uint64_t weight)
1922{
1923	char buf[100];
1924
1925	if (WEIGHT_IS_SPACEBASED(weight)) {
1926		mdb_nicenum(
1927		    weight & ~(METASLAB_ACTIVE_MASK | METASLAB_WEIGHT_TYPE),
1928		    buf);
1929	} else {
1930		char size[MDB_NICENUM_BUFLEN];
1931		mdb_nicenum(1ULL << WEIGHT_GET_INDEX(weight), size);
1932		(void) mdb_snprintf(buf, sizeof (buf), "%llu x %s",
1933		    WEIGHT_GET_COUNT(weight), size);
1934	}
1935	mdb_printf("%11s ", buf);
1936}
1937
1938/* ARGSUSED */
1939static int
1940metaslab_weight(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1941{
1942	uint64_t weight = 0;
1943	char active;
1944
1945	if (argc == 0 && (flags & DCMD_ADDRSPEC)) {
1946		if (mdb_vread(&weight, sizeof (uint64_t), addr) == -1) {
1947			mdb_warn("failed to read weight at %p\n", addr);
1948			return (DCMD_ERR);
1949		}
1950	} else if (argc == 1 && !(flags & DCMD_ADDRSPEC)) {
1951		weight = (argv[0].a_type == MDB_TYPE_IMMEDIATE) ?
1952		    argv[0].a_un.a_val : mdb_strtoull(argv[0].a_un.a_str);
1953	} else {
1954		return (DCMD_USAGE);
1955	}
1956
1957	if (DCMD_HDRSPEC(flags)) {
1958		mdb_printf("%<u>%-6s %9s %9s%</u>\n",
1959		    "ACTIVE", "ALGORITHM", "WEIGHT");
1960	}
1961
1962	if (weight & METASLAB_WEIGHT_PRIMARY)
1963		active = 'P';
1964	else if (weight & METASLAB_WEIGHT_SECONDARY)
1965		active = 'S';
1966	else
1967		active = '-';
1968	mdb_printf("%6c %8s ", active,
1969	    WEIGHT_IS_SPACEBASED(weight) ? "SPACE" : "SEGMENT");
1970	metaslab_print_weight(weight);
1971	mdb_printf("\n");
1972
1973	return (DCMD_OK);
1974}
1975
1976/* ARGSUSED */
1977static int
1978metaslab_trace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1979{
1980	mdb_metaslab_alloc_trace_t mat;
1981	mdb_metaslab_group_t mg = { 0 };
1982	char result_type[100];
1983
1984	if (mdb_ctf_vread(&mat, "metaslab_alloc_trace_t",
1985	    "mdb_metaslab_alloc_trace_t", addr, 0) == -1) {
1986		return (DCMD_ERR);
1987	}
1988
1989	if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) {
1990		mdb_printf("%<u>%6s %6s %8s %11s %11s %18s %18s%</u>\n",
1991		    "MSID", "DVA", "ASIZE", "ALLOCATOR", "WEIGHT", "RESULT",
1992		    "VDEV");
1993	}
1994
1995	if (mat.mat_msp != 0) {
1996		mdb_metaslab_t ms;
1997
1998		if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t",
1999		    mat.mat_msp, 0) == -1) {
2000			return (DCMD_ERR);
2001		}
2002		mdb_printf("%6llu ", ms.ms_id);
2003	} else {
2004		mdb_printf("%6s ", "-");
2005	}
2006
2007	mdb_printf("%6d %8llx %11llx ", mat.mat_dva_id, mat.mat_size,
2008	    mat.mat_allocator);
2009
2010	metaslab_print_weight(mat.mat_weight);
2011
2012	if ((int64_t)mat.mat_offset < 0) {
2013		if (enum_lookup("enum trace_alloc_type", mat.mat_offset,
2014		    "TRACE_", sizeof (result_type), result_type) == -1) {
2015			mdb_warn("Could not find enum for trace_alloc_type");
2016			return (DCMD_ERR);
2017		}
2018		mdb_printf("%18s ", result_type);
2019	} else {
2020		mdb_printf("%<b>%18llx%</b> ", mat.mat_offset);
2021	}
2022
2023	if (mat.mat_mg != 0 &&
2024	    mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t",
2025	    mat.mat_mg, 0) == -1) {
2026		return (DCMD_ERR);
2027	}
2028
2029	if (mg.mg_vd != 0) {
2030		mdb_vdev_t vdev;
2031		char desc[MAXNAMELEN];
2032
2033		if (mdb_ctf_vread(&vdev, "vdev_t", "mdb_vdev_t",
2034		    mg.mg_vd, 0) == -1) {
2035			return (DCMD_ERR);
2036		}
2037
2038		if (vdev.vdev_path != 0) {
2039			char path[MAXNAMELEN];
2040
2041			if (mdb_readstr(path, sizeof (path),
2042			    vdev.vdev_path) == -1) {
2043				mdb_warn("failed to read vdev_path at %p\n",
2044				    vdev.vdev_path);
2045				return (DCMD_ERR);
2046			}
2047			char *slash;
2048			if ((slash = strrchr(path, '/')) != NULL) {
2049				strcpy(desc, slash + 1);
2050			} else {
2051				strcpy(desc, path);
2052			}
2053		} else if (vdev.vdev_ops != 0) {
2054			mdb_vdev_ops_t ops;
2055			if (mdb_ctf_vread(&ops, "vdev_ops_t", "mdb_vdev_ops_t",
2056			    vdev.vdev_ops, 0) == -1) {
2057				mdb_warn("failed to read vdev_ops at %p\n",
2058				    vdev.vdev_ops);
2059				return (DCMD_ERR);
2060			}
2061			(void) mdb_snprintf(desc, sizeof (desc),
2062			    "%s-%llu", ops.vdev_op_type, vdev.vdev_id);
2063		} else {
2064			(void) strcpy(desc, "<unknown>");
2065		}
2066		mdb_printf("%18s\n", desc);
2067	}
2068
2069	return (DCMD_OK);
2070}
2071
2072typedef struct metaslab_walk_data {
2073	uint64_t mw_numvdevs;
2074	uintptr_t *mw_vdevs;
2075	int mw_curvdev;
2076	uint64_t mw_nummss;
2077	uintptr_t *mw_mss;
2078	int mw_curms;
2079} metaslab_walk_data_t;
2080
2081static int
2082metaslab_walk_step(mdb_walk_state_t *wsp)
2083{
2084	metaslab_walk_data_t *mw = wsp->walk_data;
2085	metaslab_t ms;
2086	uintptr_t msp;
2087
2088	if (mw->mw_curvdev >= mw->mw_numvdevs)
2089		return (WALK_DONE);
2090
2091	if (mw->mw_mss == NULL) {
2092		uintptr_t mssp;
2093		uintptr_t vdevp;
2094
2095		ASSERT(mw->mw_curms == 0);
2096		ASSERT(mw->mw_nummss == 0);
2097
2098		vdevp = mw->mw_vdevs[mw->mw_curvdev];
2099		if (GETMEMB(vdevp, "vdev", vdev_ms, mssp) ||
2100		    GETMEMB(vdevp, "vdev", vdev_ms_count, mw->mw_nummss)) {
2101			return (WALK_ERR);
2102		}
2103
2104		mw->mw_mss = mdb_alloc(mw->mw_nummss * sizeof (void*),
2105		    UM_SLEEP | UM_GC);
2106		if (mdb_vread(mw->mw_mss, mw->mw_nummss * sizeof (void*),
2107		    mssp) == -1) {
2108			mdb_warn("failed to read vdev_ms at %p", mssp);
2109			return (WALK_ERR);
2110		}
2111	}
2112
2113	if (mw->mw_curms >= mw->mw_nummss) {
2114		mw->mw_mss = NULL;
2115		mw->mw_curms = 0;
2116		mw->mw_nummss = 0;
2117		mw->mw_curvdev++;
2118		return (WALK_NEXT);
2119	}
2120
2121	msp = mw->mw_mss[mw->mw_curms];
2122	if (mdb_vread(&ms, sizeof (metaslab_t), msp) == -1) {
2123		mdb_warn("failed to read metaslab_t at %p", msp);
2124		return (WALK_ERR);
2125	}
2126
2127	mw->mw_curms++;
2128
2129	return (wsp->walk_callback(msp, &ms, wsp->walk_cbdata));
2130}
2131
2132static int
2133metaslab_walk_init(mdb_walk_state_t *wsp)
2134{
2135	metaslab_walk_data_t *mw;
2136	uintptr_t root_vdevp;
2137	uintptr_t childp;
2138
2139	if (wsp->walk_addr == 0) {
2140		mdb_warn("must supply address of spa_t\n");
2141		return (WALK_ERR);
2142	}
2143
2144	mw = mdb_zalloc(sizeof (metaslab_walk_data_t), UM_SLEEP | UM_GC);
2145
2146	if (GETMEMB(wsp->walk_addr, "spa", spa_root_vdev, root_vdevp) ||
2147	    GETMEMB(root_vdevp, "vdev", vdev_children, mw->mw_numvdevs) ||
2148	    GETMEMB(root_vdevp, "vdev", vdev_child, childp)) {
2149		return (DCMD_ERR);
2150	}
2151
2152	mw->mw_vdevs = mdb_alloc(mw->mw_numvdevs * sizeof (void *),
2153	    UM_SLEEP | UM_GC);
2154	if (mdb_vread(mw->mw_vdevs, mw->mw_numvdevs * sizeof (void *),
2155	    childp) == -1) {
2156		mdb_warn("failed to read root vdev children at %p", childp);
2157		return (DCMD_ERR);
2158	}
2159
2160	wsp->walk_data = mw;
2161
2162	return (WALK_NEXT);
2163}
2164
2165typedef struct mdb_spa {
2166	uintptr_t spa_dsl_pool;
2167	uintptr_t spa_root_vdev;
2168} mdb_spa_t;
2169
2170typedef struct mdb_dsl_pool {
2171	uintptr_t dp_root_dir;
2172} mdb_dsl_pool_t;
2173
2174typedef struct mdb_dsl_dir {
2175	uintptr_t dd_dbuf;
2176	int64_t dd_space_towrite[TXG_SIZE];
2177} mdb_dsl_dir_t;
2178
2179typedef struct mdb_dsl_dir_phys {
2180	uint64_t dd_used_bytes;
2181	uint64_t dd_compressed_bytes;
2182	uint64_t dd_uncompressed_bytes;
2183} mdb_dsl_dir_phys_t;
2184
2185typedef struct space_data {
2186	uint64_t ms_allocating[TXG_SIZE];
2187	uint64_t ms_checkpointing;
2188	uint64_t ms_freeing;
2189	uint64_t ms_freed;
2190	uint64_t ms_unflushed_frees;
2191	uint64_t ms_unflushed_allocs;
2192	uint64_t ms_allocatable;
2193	int64_t ms_deferspace;
2194	uint64_t avail;
2195} space_data_t;
2196
2197/* ARGSUSED */
2198static int
2199space_cb(uintptr_t addr, const void *unknown, void *arg)
2200{
2201	space_data_t *sd = arg;
2202	mdb_metaslab_t ms;
2203	mdb_range_tree_t rt;
2204	mdb_space_map_t sm = { 0 };
2205	mdb_space_map_phys_t smp = { 0 };
2206	uint64_t uallocs, ufrees;
2207	int i;
2208
2209	if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t",
2210	    addr, 0) == -1)
2211		return (WALK_ERR);
2212
2213	for (i = 0; i < TXG_SIZE; i++) {
2214		if (mdb_ctf_vread(&rt, "range_tree_t",
2215		    "mdb_range_tree_t", ms.ms_allocating[i], 0) == -1)
2216			return (WALK_ERR);
2217		sd->ms_allocating[i] += rt.rt_space;
2218	}
2219
2220	if (mdb_ctf_vread(&rt, "range_tree_t",
2221	    "mdb_range_tree_t", ms.ms_checkpointing, 0) == -1)
2222		return (WALK_ERR);
2223	sd->ms_checkpointing += rt.rt_space;
2224
2225	if (mdb_ctf_vread(&rt, "range_tree_t",
2226	    "mdb_range_tree_t", ms.ms_freeing, 0) == -1)
2227		return (WALK_ERR);
2228	sd->ms_freeing += rt.rt_space;
2229
2230	if (mdb_ctf_vread(&rt, "range_tree_t",
2231	    "mdb_range_tree_t", ms.ms_freed, 0) == -1)
2232		return (WALK_ERR);
2233	sd->ms_freed += rt.rt_space;
2234
2235	if (mdb_ctf_vread(&rt, "range_tree_t",
2236	    "mdb_range_tree_t", ms.ms_allocatable, 0) == -1)
2237		return (WALK_ERR);
2238	sd->ms_allocatable += rt.rt_space;
2239
2240	if (mdb_ctf_vread(&rt, "range_tree_t",
2241	    "mdb_range_tree_t", ms.ms_unflushed_frees, 0) == -1)
2242		return (WALK_ERR);
2243	sd->ms_unflushed_frees += rt.rt_space;
2244	ufrees = rt.rt_space;
2245
2246	if (mdb_ctf_vread(&rt, "range_tree_t",
2247	    "mdb_range_tree_t", ms.ms_unflushed_allocs, 0) == -1)
2248		return (WALK_ERR);
2249	sd->ms_unflushed_allocs += rt.rt_space;
2250	uallocs = rt.rt_space;
2251
2252	if (ms.ms_sm != 0 &&
2253	    mdb_ctf_vread(&sm, "space_map_t",
2254	    "mdb_space_map_t", ms.ms_sm, 0) == -1)
2255		return (WALK_ERR);
2256
2257	if (sm.sm_phys != 0) {
2258		(void) mdb_ctf_vread(&smp, "space_map_phys_t",
2259		    "mdb_space_map_phys_t", sm.sm_phys, 0);
2260	}
2261
2262	sd->ms_deferspace += ms.ms_deferspace;
2263	sd->avail += sm.sm_size - smp.smp_alloc + ufrees - uallocs;
2264
2265	return (WALK_NEXT);
2266}
2267
2268/*
2269 * ::spa_space [-b]
2270 *
2271 * Given a spa_t, print out it's on-disk space usage and in-core
2272 * estimates of future usage.  If -b is given, print space in bytes.
2273 * Otherwise print in megabytes.
2274 */
2275/* ARGSUSED */
2276static int
2277spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2278{
2279	mdb_spa_t spa;
2280	mdb_dsl_pool_t dp;
2281	mdb_dsl_dir_t dd;
2282	mdb_dmu_buf_impl_t db;
2283	mdb_dsl_dir_phys_t dsp;
2284	space_data_t sd;
2285	int shift = 20;
2286	char *suffix = "M";
2287	int bytes = B_FALSE;
2288
2289	if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bytes, NULL) !=
2290	    argc)
2291		return (DCMD_USAGE);
2292	if (!(flags & DCMD_ADDRSPEC))
2293		return (DCMD_USAGE);
2294
2295	if (bytes) {
2296		shift = 0;
2297		suffix = "";
2298	}
2299
2300	if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_t",
2301	    addr, 0) == -1 ||
2302	    mdb_ctf_vread(&dp, ZFS_STRUCT "dsl_pool", "mdb_dsl_pool_t",
2303	    spa.spa_dsl_pool, 0) == -1 ||
2304	    mdb_ctf_vread(&dd, ZFS_STRUCT "dsl_dir", "mdb_dsl_dir_t",
2305	    dp.dp_root_dir, 0) == -1 ||
2306	    mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t",
2307	    dd.dd_dbuf, 0) == -1 ||
2308	    mdb_ctf_vread(&dsp, ZFS_STRUCT "dsl_dir_phys",
2309	    "mdb_dsl_dir_phys_t", db.db.db_data, 0) == -1) {
2310		return (DCMD_ERR);
2311	}
2312
2313	mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n",
2314	    dd.dd_space_towrite[0] >> shift, suffix,
2315	    dd.dd_space_towrite[1] >> shift, suffix,
2316	    dd.dd_space_towrite[2] >> shift, suffix,
2317	    dd.dd_space_towrite[3] >> shift, suffix);
2318
2319	mdb_printf("dd_phys.dd_used_bytes = %llu%s\n",
2320	    dsp.dd_used_bytes >> shift, suffix);
2321	mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n",
2322	    dsp.dd_compressed_bytes >> shift, suffix);
2323	mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n",
2324	    dsp.dd_uncompressed_bytes >> shift, suffix);
2325
2326	bzero(&sd, sizeof (sd));
2327	if (mdb_pwalk("metaslab", space_cb, &sd, addr) != 0) {
2328		mdb_warn("can't walk metaslabs");
2329		return (DCMD_ERR);
2330	}
2331
2332	mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n",
2333	    sd.ms_allocating[0] >> shift, suffix,
2334	    sd.ms_allocating[1] >> shift, suffix,
2335	    sd.ms_allocating[2] >> shift, suffix,
2336	    sd.ms_allocating[3] >> shift, suffix);
2337	mdb_printf("ms_checkpointing = %llu%s\n",
2338	    sd.ms_checkpointing >> shift, suffix);
2339	mdb_printf("ms_freeing = %llu%s\n",
2340	    sd.ms_freeing >> shift, suffix);
2341	mdb_printf("ms_freed = %llu%s\n",
2342	    sd.ms_freed >> shift, suffix);
2343	mdb_printf("ms_unflushed_frees = %llu%s\n",
2344	    sd.ms_unflushed_frees >> shift, suffix);
2345	mdb_printf("ms_unflushed_allocs = %llu%s\n",
2346	    sd.ms_unflushed_allocs >> shift, suffix);
2347	mdb_printf("ms_allocatable = %llu%s\n",
2348	    sd.ms_allocatable >> shift, suffix);
2349	mdb_printf("ms_deferspace = %llu%s\n",
2350	    sd.ms_deferspace >> shift, suffix);
2351	mdb_printf("current avail = %llu%s\n",
2352	    sd.avail >> shift, suffix);
2353
2354	return (DCMD_OK);
2355}
2356
2357typedef struct mdb_spa_aux_vdev {
2358	int sav_count;
2359	uintptr_t sav_vdevs;
2360} mdb_spa_aux_vdev_t;
2361
2362typedef struct mdb_spa_vdevs {
2363	uintptr_t spa_root_vdev;
2364	mdb_spa_aux_vdev_t spa_l2cache;
2365	mdb_spa_aux_vdev_t spa_spares;
2366} mdb_spa_vdevs_t;
2367
2368static int
2369spa_print_aux(mdb_spa_aux_vdev_t *sav, uint_t flags, mdb_arg_t *v,
2370    const char *name)
2371{
2372	uintptr_t *aux;
2373	size_t len;
2374	int ret, i;
2375
2376	/*
2377	 * Iterate over aux vdevs and print those out as well.  This is a
2378	 * little annoying because we don't have a root vdev to pass to ::vdev.
2379	 * Instead, we print a single line and then call it for each child
2380	 * vdev.
2381	 */
2382	if (sav->sav_count != 0) {
2383		v[1].a_type = MDB_TYPE_STRING;
2384		v[1].a_un.a_str = "-d";
2385		v[2].a_type = MDB_TYPE_IMMEDIATE;
2386		v[2].a_un.a_val = 2;
2387
2388		len = sav->sav_count * sizeof (uintptr_t);
2389		aux = mdb_alloc(len, UM_SLEEP);
2390		if (mdb_vread(aux, len, sav->sav_vdevs) == -1) {
2391			mdb_free(aux, len);
2392			mdb_warn("failed to read l2cache vdevs at %p",
2393			    sav->sav_vdevs);
2394			return (DCMD_ERR);
2395		}
2396
2397		mdb_printf("%-?s %-9s %-12s %s\n", "-", "-", "-", name);
2398
2399		for (i = 0; i < sav->sav_count; i++) {
2400			ret = mdb_call_dcmd("vdev", aux[i], flags, 3, v);
2401			if (ret != DCMD_OK) {
2402				mdb_free(aux, len);
2403				return (ret);
2404			}
2405		}
2406
2407		mdb_free(aux, len);
2408	}
2409
2410	return (0);
2411}
2412
2413/*
2414 * ::spa_vdevs
2415 *
2416 *	-e	Include error stats
2417 *	-m	Include metaslab information
2418 *	-M	Include metaslab group information
2419 *	-h	Include histogram information (requires -m or -M)
2420 *
2421 * Print out a summarized list of vdevs for the given spa_t.
2422 * This is accomplished by invoking "::vdev -re" on the root vdev, as well as
2423 * iterating over the cache devices.
2424 */
2425/* ARGSUSED */
2426static int
2427spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2428{
2429	mdb_arg_t v[3];
2430	int ret;
2431	char opts[100] = "-r";
2432	int spa_flags = 0;
2433
2434	if (mdb_getopts(argc, argv,
2435	    'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags,
2436	    'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags,
2437	    'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags,
2438	    'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags,
2439	    NULL) != argc)
2440		return (DCMD_USAGE);
2441
2442	if (!(flags & DCMD_ADDRSPEC))
2443		return (DCMD_USAGE);
2444
2445	mdb_spa_vdevs_t spa;
2446	if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_vdevs_t", addr, 0) == -1)
2447		return (DCMD_ERR);
2448
2449	/*
2450	 * Unitialized spa_t structures can have a NULL root vdev.
2451	 */
2452	if (spa.spa_root_vdev == 0) {
2453		mdb_printf("no associated vdevs\n");
2454		return (DCMD_OK);
2455	}
2456
2457	if (spa_flags & SPA_FLAG_ERRORS)
2458		strcat(opts, "e");
2459	if (spa_flags & SPA_FLAG_METASLABS)
2460		strcat(opts, "m");
2461	if (spa_flags & SPA_FLAG_METASLAB_GROUPS)
2462		strcat(opts, "M");
2463	if (spa_flags & SPA_FLAG_HISTOGRAMS)
2464		strcat(opts, "h");
2465
2466	v[0].a_type = MDB_TYPE_STRING;
2467	v[0].a_un.a_str = opts;
2468
2469	ret = mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev,
2470	    flags, 1, v);
2471	if (ret != DCMD_OK)
2472		return (ret);
2473
2474	if (spa_print_aux(&spa.spa_l2cache, flags, v, "cache") != 0 ||
2475	    spa_print_aux(&spa.spa_spares, flags, v, "spares") != 0)
2476		return (DCMD_ERR);
2477
2478	return (DCMD_OK);
2479}
2480
2481/*
2482 * ::zio
2483 *
2484 * Print a summary of zio_t and all its children.  This is intended to display a
2485 * zio tree, and hence we only pick the most important pieces of information for
2486 * the main summary.  More detailed information can always be found by doing a
2487 * '::print zio' on the underlying zio_t.  The columns we display are:
2488 *
2489 *	ADDRESS  TYPE  STAGE  WAITER  TIME_ELAPSED
2490 *
2491 * The 'address' column is indented by one space for each depth level as we
2492 * descend down the tree.
2493 */
2494
2495#define	ZIO_MAXINDENT	7
2496#define	ZIO_MAXWIDTH	(sizeof (uintptr_t) * 2 + ZIO_MAXINDENT)
2497#define	ZIO_WALK_SELF	0
2498#define	ZIO_WALK_CHILD	1
2499#define	ZIO_WALK_PARENT	2
2500
2501typedef struct zio_print_args {
2502	int	zpa_current_depth;
2503	int	zpa_min_depth;
2504	int	zpa_max_depth;
2505	int	zpa_type;
2506	uint_t	zpa_flags;
2507} zio_print_args_t;
2508
2509typedef struct mdb_zio {
2510	enum zio_type io_type;
2511	enum zio_stage io_stage;
2512	uintptr_t io_waiter;
2513	uintptr_t io_spa;
2514	struct {
2515		struct {
2516			uintptr_t list_next;
2517		} list_head;
2518	} io_parent_list;
2519	int io_error;
2520} mdb_zio_t;
2521
2522typedef struct mdb_zio_timestamp {
2523	hrtime_t io_timestamp;
2524} mdb_zio_timestamp_t;
2525
2526static int zio_child_cb(uintptr_t addr, const void *unknown, void *arg);
2527
2528static int
2529zio_print_cb(uintptr_t addr, zio_print_args_t *zpa)
2530{
2531	mdb_ctf_id_t type_enum, stage_enum;
2532	int indent = zpa->zpa_current_depth;
2533	const char *type, *stage;
2534	uintptr_t laddr;
2535	mdb_zio_t zio;
2536	mdb_zio_timestamp_t zio_timestamp = { 0 };
2537
2538	if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", addr, 0) == -1)
2539		return (WALK_ERR);
2540	(void) mdb_ctf_vread(&zio_timestamp, ZFS_STRUCT "zio",
2541	    "mdb_zio_timestamp_t", addr, MDB_CTF_VREAD_QUIET);
2542
2543	if (indent > ZIO_MAXINDENT)
2544		indent = ZIO_MAXINDENT;
2545
2546	if (mdb_ctf_lookup_by_name("enum zio_type", &type_enum) == -1 ||
2547	    mdb_ctf_lookup_by_name("enum zio_stage", &stage_enum) == -1) {
2548		mdb_warn("failed to lookup zio enums");
2549		return (WALK_ERR);
2550	}
2551
2552	if ((type = mdb_ctf_enum_name(type_enum, zio.io_type)) != NULL)
2553		type += sizeof ("ZIO_TYPE_") - 1;
2554	else
2555		type = "?";
2556
2557	if (zio.io_error == 0) {
2558		stage = mdb_ctf_enum_name(stage_enum, zio.io_stage);
2559		if (stage != NULL)
2560			stage += sizeof ("ZIO_STAGE_") - 1;
2561		else
2562			stage = "?";
2563	} else {
2564		stage = "FAILED";
2565	}
2566
2567	if (zpa->zpa_current_depth >= zpa->zpa_min_depth) {
2568		if (zpa->zpa_flags & DCMD_PIPE_OUT) {
2569			mdb_printf("%?p\n", addr);
2570		} else {
2571			mdb_printf("%*s%-*p %-5s %-16s ", indent, "",
2572			    ZIO_MAXWIDTH - indent, addr, type, stage);
2573			if (zio.io_waiter != 0)
2574				mdb_printf("%-16lx ", zio.io_waiter);
2575			else
2576				mdb_printf("%-16s ", "-");
2577#ifdef _KERNEL
2578			if (zio_timestamp.io_timestamp != 0) {
2579				mdb_printf("%llums", (mdb_gethrtime() -
2580				    zio_timestamp.io_timestamp) /
2581				    1000000);
2582			} else {
2583				mdb_printf("%-12s ", "-");
2584			}
2585#else
2586			mdb_printf("%-12s ", "-");
2587#endif
2588			mdb_printf("\n");
2589		}
2590	}
2591
2592	if (zpa->zpa_current_depth >= zpa->zpa_max_depth)
2593		return (WALK_NEXT);
2594
2595	if (zpa->zpa_type == ZIO_WALK_PARENT)
2596		laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio",
2597		    "io_parent_list");
2598	else
2599		laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio",
2600		    "io_child_list");
2601
2602	zpa->zpa_current_depth++;
2603	if (mdb_pwalk("list", zio_child_cb, zpa, laddr) != 0) {
2604		mdb_warn("failed to walk zio_t children at %p\n", laddr);
2605		return (WALK_ERR);
2606	}
2607	zpa->zpa_current_depth--;
2608
2609	return (WALK_NEXT);
2610}
2611
2612/* ARGSUSED */
2613static int
2614zio_child_cb(uintptr_t addr, const void *unknown, void *arg)
2615{
2616	zio_link_t zl;
2617	uintptr_t ziop;
2618	zio_print_args_t *zpa = arg;
2619
2620	if (mdb_vread(&zl, sizeof (zl), addr) == -1) {
2621		mdb_warn("failed to read zio_link_t at %p", addr);
2622		return (WALK_ERR);
2623	}
2624
2625	if (zpa->zpa_type == ZIO_WALK_PARENT)
2626		ziop = (uintptr_t)zl.zl_parent;
2627	else
2628		ziop = (uintptr_t)zl.zl_child;
2629
2630	return (zio_print_cb(ziop, zpa));
2631}
2632
2633/* ARGSUSED */
2634static int
2635zio_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2636{
2637	zio_print_args_t zpa = { 0 };
2638
2639	if (!(flags & DCMD_ADDRSPEC))
2640		return (DCMD_USAGE);
2641
2642	if (mdb_getopts(argc, argv,
2643	    'r', MDB_OPT_SETBITS, INT_MAX, &zpa.zpa_max_depth,
2644	    'c', MDB_OPT_SETBITS, ZIO_WALK_CHILD, &zpa.zpa_type,
2645	    'p', MDB_OPT_SETBITS, ZIO_WALK_PARENT, &zpa.zpa_type,
2646	    NULL) != argc)
2647		return (DCMD_USAGE);
2648
2649	zpa.zpa_flags = flags;
2650	if (zpa.zpa_max_depth != 0) {
2651		if (zpa.zpa_type == ZIO_WALK_SELF)
2652			zpa.zpa_type = ZIO_WALK_CHILD;
2653	} else if (zpa.zpa_type != ZIO_WALK_SELF) {
2654		zpa.zpa_min_depth = 1;
2655		zpa.zpa_max_depth = 1;
2656	}
2657
2658	if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) {
2659		mdb_printf("%<u>%-*s %-5s %-16s %-16s %-12s%</u>\n",
2660		    ZIO_MAXWIDTH, "ADDRESS", "TYPE", "STAGE", "WAITER",
2661		    "TIME_ELAPSED");
2662	}
2663
2664	if (zio_print_cb(addr, &zpa) != WALK_NEXT)
2665		return (DCMD_ERR);
2666
2667	return (DCMD_OK);
2668}
2669
2670/*
2671 * [addr]::zio_state
2672 *
2673 * Print a summary of all zio_t structures on the system, or for a particular
2674 * pool.  This is equivalent to '::walk zio_root | ::zio'.
2675 */
2676/*ARGSUSED*/
2677static int
2678zio_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2679{
2680	/*
2681	 * MDB will remember the last address of the pipeline, so if we don't
2682	 * zero this we'll end up trying to walk zio structures for a
2683	 * non-existent spa_t.
2684	 */
2685	if (!(flags & DCMD_ADDRSPEC))
2686		addr = 0;
2687
2688	return (mdb_pwalk_dcmd("zio_root", "zio", argc, argv, addr));
2689}
2690
2691
2692typedef struct mdb_zfs_btree_hdr {
2693	uintptr_t		bth_parent;
2694	boolean_t		bth_core;
2695	/*
2696	 * For both leaf and core nodes, represents the number of elements in
2697	 * the node. For core nodes, they will have bth_count + 1 children.
2698	 */
2699	uint32_t		bth_count;
2700} mdb_zfs_btree_hdr_t;
2701
2702typedef struct mdb_zfs_btree_core {
2703	mdb_zfs_btree_hdr_t	btc_hdr;
2704	uintptr_t		btc_children[BTREE_CORE_ELEMS + 1];
2705	uint8_t			btc_elems[];
2706} mdb_zfs_btree_core_t;
2707
2708typedef struct mdb_zfs_btree_leaf {
2709	mdb_zfs_btree_hdr_t	btl_hdr;
2710	uint8_t			btl_elems[];
2711} mdb_zfs_btree_leaf_t;
2712
2713typedef struct mdb_zfs_btree {
2714	uintptr_t		bt_root;
2715	size_t			bt_elem_size;
2716} mdb_zfs_btree_t;
2717
2718typedef struct btree_walk_data {
2719	mdb_zfs_btree_t		bwd_btree;
2720	mdb_zfs_btree_hdr_t	*bwd_node;
2721	uint64_t		bwd_offset; // In units of bt_node_size
2722} btree_walk_data_t;
2723
2724static uintptr_t
2725btree_leftmost_child(uintptr_t addr, mdb_zfs_btree_hdr_t *buf)
2726{
2727	size_t size = offsetof(zfs_btree_core_t, btc_children) +
2728	    sizeof (uintptr_t);
2729	for (;;) {
2730		if (mdb_vread(buf, size, addr) == -1) {
2731			mdb_warn("failed to read at %p\n", addr);
2732			return ((uintptr_t)0ULL);
2733		}
2734		if (!buf->bth_core)
2735			return (addr);
2736		mdb_zfs_btree_core_t *node = (mdb_zfs_btree_core_t *)buf;
2737		addr = node->btc_children[0];
2738	}
2739}
2740
2741static int
2742btree_walk_step(mdb_walk_state_t *wsp)
2743{
2744	btree_walk_data_t *bwd = wsp->walk_data;
2745	size_t elem_size = bwd->bwd_btree.bt_elem_size;
2746	if (wsp->walk_addr == 0ULL)
2747		return (WALK_DONE);
2748
2749	if (!bwd->bwd_node->bth_core) {
2750		/*
2751		 * For the first element in a leaf node, read in the full
2752		 * leaf, since we only had part of it read in before.
2753		 */
2754		if (bwd->bwd_offset == 0) {
2755			if (mdb_vread(bwd->bwd_node, BTREE_LEAF_SIZE,
2756			    wsp->walk_addr) == -1) {
2757				mdb_warn("failed to read at %p\n",
2758				    wsp->walk_addr);
2759				return (WALK_ERR);
2760			}
2761		}
2762
2763		int status = wsp->walk_callback((uintptr_t)(wsp->walk_addr +
2764		    offsetof(mdb_zfs_btree_leaf_t, btl_elems) +
2765		    bwd->bwd_offset * elem_size), bwd->bwd_node,
2766		    wsp->walk_cbdata);
2767		if (status != WALK_NEXT)
2768			return (status);
2769		bwd->bwd_offset++;
2770
2771		/* Find the next element, if we're at the end of the leaf. */
2772		while (bwd->bwd_offset == bwd->bwd_node->bth_count) {
2773			uintptr_t par = bwd->bwd_node->bth_parent;
2774			uintptr_t cur = wsp->walk_addr;
2775			wsp->walk_addr = par;
2776			if (par == 0ULL)
2777				return (WALK_NEXT);
2778
2779			size_t size = sizeof (zfs_btree_core_t) +
2780			    BTREE_CORE_ELEMS * elem_size;
2781			if (mdb_vread(bwd->bwd_node, size, wsp->walk_addr) ==
2782			    -1) {
2783				mdb_warn("failed to read at %p\n",
2784				    wsp->walk_addr);
2785				return (WALK_ERR);
2786			}
2787			mdb_zfs_btree_core_t *node =
2788			    (mdb_zfs_btree_core_t *)bwd->bwd_node;
2789			int i;
2790			for (i = 0; i <= bwd->bwd_node->bth_count; i++) {
2791				if (node->btc_children[i] == cur)
2792					break;
2793			}
2794			if (i > bwd->bwd_node->bth_count) {
2795				mdb_warn("btree parent/child mismatch at "
2796				    "%#lx\n", cur);
2797				return (WALK_ERR);
2798			}
2799			bwd->bwd_offset = i;
2800		}
2801		return (WALK_NEXT);
2802	}
2803
2804	if (!bwd->bwd_node->bth_core) {
2805		mdb_warn("Invalid btree node at %#lx\n", wsp->walk_addr);
2806		return (WALK_ERR);
2807	}
2808	mdb_zfs_btree_core_t *node = (mdb_zfs_btree_core_t *)bwd->bwd_node;
2809	int status = wsp->walk_callback((uintptr_t)(wsp->walk_addr +
2810	    offsetof(mdb_zfs_btree_core_t, btc_elems) + bwd->bwd_offset *
2811	    elem_size), bwd->bwd_node, wsp->walk_cbdata);
2812	if (status != WALK_NEXT)
2813		return (status);
2814
2815	uintptr_t new_child = node->btc_children[bwd->bwd_offset + 1];
2816	wsp->walk_addr = btree_leftmost_child(new_child, bwd->bwd_node);
2817	if (wsp->walk_addr == 0ULL)
2818		return (WALK_ERR);
2819
2820	bwd->bwd_offset = 0;
2821	return (WALK_NEXT);
2822}
2823
2824static int
2825btree_walk_init(mdb_walk_state_t *wsp)
2826{
2827	btree_walk_data_t *bwd;
2828
2829	if (wsp->walk_addr == 0ULL) {
2830		mdb_warn("must supply address of zfs_btree_t\n");
2831		return (WALK_ERR);
2832	}
2833
2834	bwd = mdb_zalloc(sizeof (btree_walk_data_t), UM_SLEEP);
2835	if (mdb_ctf_vread(&bwd->bwd_btree, "zfs_btree_t", "mdb_zfs_btree_t",
2836	    wsp->walk_addr, 0) == -1) {
2837		mdb_free(bwd, sizeof (*bwd));
2838		return (WALK_ERR);
2839	}
2840
2841	if (bwd->bwd_btree.bt_elem_size == 0) {
2842		mdb_warn("invalid or uninitialized btree at %#lx\n",
2843		    wsp->walk_addr);
2844		mdb_free(bwd, sizeof (*bwd));
2845		return (WALK_ERR);
2846	}
2847
2848	size_t size = MAX(BTREE_LEAF_SIZE, sizeof (zfs_btree_core_t) +
2849	    BTREE_CORE_ELEMS * bwd->bwd_btree.bt_elem_size);
2850	bwd->bwd_node = mdb_zalloc(size, UM_SLEEP);
2851
2852	uintptr_t node = (uintptr_t)bwd->bwd_btree.bt_root;
2853	if (node == 0ULL) {
2854		wsp->walk_addr = 0ULL;
2855		wsp->walk_data = bwd;
2856		return (WALK_NEXT);
2857	}
2858	node = btree_leftmost_child(node, bwd->bwd_node);
2859	if (node == 0ULL) {
2860		mdb_free(bwd->bwd_node, size);
2861		mdb_free(bwd, sizeof (*bwd));
2862		return (WALK_ERR);
2863	}
2864	bwd->bwd_offset = 0;
2865
2866	wsp->walk_addr = node;
2867	wsp->walk_data = bwd;
2868	return (WALK_NEXT);
2869}
2870
2871static void
2872btree_walk_fini(mdb_walk_state_t *wsp)
2873{
2874	btree_walk_data_t *bwd = (btree_walk_data_t *)wsp->walk_data;
2875
2876	if (bwd == NULL)
2877		return;
2878
2879	size_t size = MAX(BTREE_LEAF_SIZE, sizeof (zfs_btree_core_t) +
2880	    BTREE_CORE_ELEMS * bwd->bwd_btree.bt_elem_size);
2881	if (bwd->bwd_node != NULL)
2882		mdb_free(bwd->bwd_node, size);
2883
2884	mdb_free(bwd, sizeof (*bwd));
2885}
2886
2887typedef struct mdb_multilist {
2888	uint64_t ml_num_sublists;
2889	uintptr_t ml_sublists;
2890} mdb_multilist_t;
2891
2892static int
2893multilist_walk_step(mdb_walk_state_t *wsp)
2894{
2895	return (wsp->walk_callback(wsp->walk_addr, wsp->walk_layer,
2896	    wsp->walk_cbdata));
2897}
2898
2899static int
2900multilist_walk_init(mdb_walk_state_t *wsp)
2901{
2902	mdb_multilist_t ml;
2903	ssize_t sublist_sz;
2904	int list_offset;
2905	size_t i;
2906
2907	if (wsp->walk_addr == 0) {
2908		mdb_warn("must supply address of multilist_t\n");
2909		return (WALK_ERR);
2910	}
2911
2912	if (mdb_ctf_vread(&ml, "multilist_t", "mdb_multilist_t",
2913	    wsp->walk_addr, 0) == -1) {
2914		return (WALK_ERR);
2915	}
2916
2917	if (ml.ml_num_sublists == 0 || ml.ml_sublists == 0) {
2918		mdb_warn("invalid or uninitialized multilist at %#lx\n",
2919		    wsp->walk_addr);
2920		return (WALK_ERR);
2921	}
2922
2923	/* mdb_ctf_sizeof_by_name() will print an error for us */
2924	sublist_sz = mdb_ctf_sizeof_by_name("multilist_sublist_t");
2925	if (sublist_sz == -1)
2926		return (WALK_ERR);
2927
2928	/* mdb_ctf_offsetof_by_name will print an error for us */
2929	list_offset = mdb_ctf_offsetof_by_name("multilist_sublist_t",
2930	    "mls_list");
2931	if (list_offset == -1)
2932		return (WALK_ERR);
2933
2934	for (i = 0; i < ml.ml_num_sublists; i++) {
2935		wsp->walk_addr = ml.ml_sublists + i * sublist_sz + list_offset;
2936
2937		if (mdb_layered_walk("list", wsp) == -1) {
2938			mdb_warn("can't walk multilist sublist");
2939			return (WALK_ERR);
2940		}
2941	}
2942
2943	return (WALK_NEXT);
2944}
2945
2946typedef struct mdb_txg_list {
2947	size_t		tl_offset;
2948	uintptr_t	tl_head[TXG_SIZE];
2949} mdb_txg_list_t;
2950
2951typedef struct txg_list_walk_data {
2952	uintptr_t lw_head[TXG_SIZE];
2953	int	lw_txgoff;
2954	int	lw_maxoff;
2955	size_t	lw_offset;
2956	void	*lw_obj;
2957} txg_list_walk_data_t;
2958
2959static int
2960txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff)
2961{
2962	txg_list_walk_data_t *lwd;
2963	mdb_txg_list_t list;
2964	int i;
2965
2966	lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC);
2967	if (mdb_ctf_vread(&list, "txg_list_t", "mdb_txg_list_t", wsp->walk_addr,
2968	    0) == -1) {
2969		mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr);
2970		return (WALK_ERR);
2971	}
2972
2973	for (i = 0; i < TXG_SIZE; i++)
2974		lwd->lw_head[i] = list.tl_head[i];
2975	lwd->lw_offset = list.tl_offset;
2976	lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t),
2977	    UM_SLEEP | UM_GC);
2978	lwd->lw_txgoff = txg;
2979	lwd->lw_maxoff = maxoff;
2980
2981	wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
2982	wsp->walk_data = lwd;
2983
2984	return (WALK_NEXT);
2985}
2986
2987static int
2988txg_list_walk_init(mdb_walk_state_t *wsp)
2989{
2990	return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1));
2991}
2992
2993static int
2994txg_list0_walk_init(mdb_walk_state_t *wsp)
2995{
2996	return (txg_list_walk_init_common(wsp, 0, 0));
2997}
2998
2999static int
3000txg_list1_walk_init(mdb_walk_state_t *wsp)
3001{
3002	return (txg_list_walk_init_common(wsp, 1, 1));
3003}
3004
3005static int
3006txg_list2_walk_init(mdb_walk_state_t *wsp)
3007{
3008	return (txg_list_walk_init_common(wsp, 2, 2));
3009}
3010
3011static int
3012txg_list3_walk_init(mdb_walk_state_t *wsp)
3013{
3014	return (txg_list_walk_init_common(wsp, 3, 3));
3015}
3016
3017static int
3018txg_list_walk_step(mdb_walk_state_t *wsp)
3019{
3020	txg_list_walk_data_t *lwd = wsp->walk_data;
3021	uintptr_t addr;
3022	txg_node_t *node;
3023	int status;
3024
3025	while (wsp->walk_addr == 0 && lwd->lw_txgoff < lwd->lw_maxoff) {
3026		lwd->lw_txgoff++;
3027		wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
3028	}
3029
3030	if (wsp->walk_addr == 0)
3031		return (WALK_DONE);
3032
3033	addr = wsp->walk_addr - lwd->lw_offset;
3034
3035	if (mdb_vread(lwd->lw_obj,
3036	    lwd->lw_offset + sizeof (txg_node_t), addr) == -1) {
3037		mdb_warn("failed to read list element at %#lx", addr);
3038		return (WALK_ERR);
3039	}
3040
3041	status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata);
3042	node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset);
3043	wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff];
3044
3045	return (status);
3046}
3047
3048/*
3049 * ::walk spa
3050 *
3051 * Walk all named spa_t structures in the namespace.  This is nothing more than
3052 * a layered avl walk.
3053 */
3054static int
3055spa_walk_init(mdb_walk_state_t *wsp)
3056{
3057	GElf_Sym sym;
3058
3059	if (wsp->walk_addr != 0) {
3060		mdb_warn("spa walk only supports global walks\n");
3061		return (WALK_ERR);
3062	}
3063
3064	if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) {
3065		mdb_warn("failed to find symbol 'spa_namespace_avl'");
3066		return (WALK_ERR);
3067	}
3068
3069	wsp->walk_addr = (uintptr_t)sym.st_value;
3070
3071	if (mdb_layered_walk("avl", wsp) == -1) {
3072		mdb_warn("failed to walk 'avl'\n");
3073		return (WALK_ERR);
3074	}
3075
3076	return (WALK_NEXT);
3077}
3078
3079static int
3080spa_walk_step(mdb_walk_state_t *wsp)
3081{
3082	return (wsp->walk_callback(wsp->walk_addr, NULL, wsp->walk_cbdata));
3083}
3084
3085/*
3086 * [addr]::walk zio
3087 *
3088 * Walk all active zio_t structures on the system.  This is simply a layered
3089 * walk on top of ::walk zio_cache, with the optional ability to limit the
3090 * structures to a particular pool.
3091 */
3092static int
3093zio_walk_init(mdb_walk_state_t *wsp)
3094{
3095	wsp->walk_data = (void *)wsp->walk_addr;
3096
3097	if (mdb_layered_walk("zio_cache", wsp) == -1) {
3098		mdb_warn("failed to walk 'zio_cache'\n");
3099		return (WALK_ERR);
3100	}
3101
3102	return (WALK_NEXT);
3103}
3104
3105static int
3106zio_walk_step(mdb_walk_state_t *wsp)
3107{
3108	mdb_zio_t zio;
3109	uintptr_t spa = (uintptr_t)wsp->walk_data;
3110
3111	if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t",
3112	    wsp->walk_addr, 0) == -1)
3113		return (WALK_ERR);
3114
3115	if (spa != 0 && spa != zio.io_spa)
3116		return (WALK_NEXT);
3117
3118	return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
3119}
3120
3121/*
3122 * [addr]::walk zio_root
3123 *
3124 * Walk only root zio_t structures, optionally for a particular spa_t.
3125 */
3126static int
3127zio_walk_root_step(mdb_walk_state_t *wsp)
3128{
3129	mdb_zio_t zio;
3130	uintptr_t spa = (uintptr_t)wsp->walk_data;
3131
3132	if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t",
3133	    wsp->walk_addr, 0) == -1)
3134		return (WALK_ERR);
3135
3136	if (spa != 0 && spa != zio.io_spa)
3137		return (WALK_NEXT);
3138
3139	/* If the parent list is not empty, ignore */
3140	if (zio.io_parent_list.list_head.list_next !=
3141	    wsp->walk_addr +
3142	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", "io_parent_list") +
3143	    mdb_ctf_offsetof_by_name("struct list", "list_head"))
3144		return (WALK_NEXT);
3145
3146	return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
3147}
3148
3149/*
3150 * ::zfs_blkstats
3151 *
3152 *	-v	print verbose per-level information
3153 *
3154 */
3155static int
3156zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3157{
3158	boolean_t verbose = B_FALSE;
3159	zfs_all_blkstats_t stats;
3160	dmu_object_type_t t;
3161	zfs_blkstat_t *tzb;
3162	uint64_t ditto;
3163
3164	if (mdb_getopts(argc, argv,
3165	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3166	    NULL) != argc)
3167		return (DCMD_USAGE);
3168
3169	if (!(flags & DCMD_ADDRSPEC))
3170		return (DCMD_USAGE);
3171
3172	if (GETMEMB(addr, "spa", spa_dsl_pool, addr) ||
3173	    GETMEMB(addr, "dsl_pool", dp_blkstats, addr) ||
3174	    mdb_vread(&stats, sizeof (zfs_all_blkstats_t), addr) == -1) {
3175		mdb_warn("failed to read data at %p;", addr);
3176		mdb_printf("maybe no stats? run \"zpool scrub\" first.");
3177		return (DCMD_ERR);
3178	}
3179
3180	tzb = &stats.zab_type[DN_MAX_LEVELS][DMU_OT_TOTAL];
3181	if (tzb->zb_gangs != 0) {
3182		mdb_printf("Ganged blocks: %llu\n",
3183		    (longlong_t)tzb->zb_gangs);
3184	}
3185
3186	ditto = tzb->zb_ditto_2_of_2_samevdev + tzb->zb_ditto_2_of_3_samevdev +
3187	    tzb->zb_ditto_3_of_3_samevdev;
3188	if (ditto != 0) {
3189		mdb_printf("Dittoed blocks on same vdev: %llu\n",
3190		    (longlong_t)ditto);
3191	}
3192
3193	mdb_printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
3194	    "\t  avg\t comp\t%%Total\tType\n");
3195
3196	for (t = 0; t <= DMU_OT_TOTAL; t++) {
3197		char csize[MDB_NICENUM_BUFLEN], lsize[MDB_NICENUM_BUFLEN];
3198		char psize[MDB_NICENUM_BUFLEN], asize[MDB_NICENUM_BUFLEN];
3199		char avg[MDB_NICENUM_BUFLEN];
3200		char comp[MDB_NICENUM_BUFLEN], pct[MDB_NICENUM_BUFLEN];
3201		char typename[64];
3202		int l;
3203
3204
3205		if (t == DMU_OT_DEFERRED)
3206			strcpy(typename, "deferred free");
3207		else if (t == DMU_OT_OTHER)
3208			strcpy(typename, "other");
3209		else if (t == DMU_OT_TOTAL)
3210			strcpy(typename, "Total");
3211		else if (enum_lookup("enum dmu_object_type",
3212		    t, "DMU_OT_", sizeof (typename), typename) == -1) {
3213			mdb_warn("failed to read type name");
3214			return (DCMD_ERR);
3215		}
3216
3217		if (stats.zab_type[DN_MAX_LEVELS][t].zb_asize == 0)
3218			continue;
3219
3220		for (l = -1; l < DN_MAX_LEVELS; l++) {
3221			int level = (l == -1 ? DN_MAX_LEVELS : l);
3222			zfs_blkstat_t *zb = &stats.zab_type[level][t];
3223
3224			if (zb->zb_asize == 0)
3225				continue;
3226
3227			/*
3228			 * Don't print each level unless requested.
3229			 */
3230			if (!verbose && level != DN_MAX_LEVELS)
3231				continue;
3232
3233			/*
3234			 * If all the space is level 0, don't print the
3235			 * level 0 separately.
3236			 */
3237			if (level == 0 && zb->zb_asize ==
3238			    stats.zab_type[DN_MAX_LEVELS][t].zb_asize)
3239				continue;
3240
3241			mdb_nicenum(zb->zb_count, csize);
3242			mdb_nicenum(zb->zb_lsize, lsize);
3243			mdb_nicenum(zb->zb_psize, psize);
3244			mdb_nicenum(zb->zb_asize, asize);
3245			mdb_nicenum(zb->zb_asize / zb->zb_count, avg);
3246			(void) mdb_snprintfrac(comp, MDB_NICENUM_BUFLEN,
3247			    zb->zb_lsize, zb->zb_psize, 2);
3248			(void) mdb_snprintfrac(pct, MDB_NICENUM_BUFLEN,
3249			    100 * zb->zb_asize, tzb->zb_asize, 2);
3250
3251			mdb_printf("%6s\t%5s\t%5s\t%5s\t%5s"
3252			    "\t%5s\t%6s\t",
3253			    csize, lsize, psize, asize, avg, comp, pct);
3254
3255			if (level == DN_MAX_LEVELS)
3256				mdb_printf("%s\n", typename);
3257			else
3258				mdb_printf("  L%d %s\n",
3259				    level, typename);
3260		}
3261	}
3262
3263	return (DCMD_OK);
3264}
3265
3266typedef struct mdb_reference {
3267	uintptr_t ref_holder;
3268	uintptr_t ref_removed;
3269	uint64_t ref_number;
3270} mdb_reference_t;
3271
3272/* ARGSUSED */
3273static int
3274reference_cb(uintptr_t addr, const void *ignored, void *arg)
3275{
3276	mdb_reference_t ref;
3277	boolean_t holder_is_str = B_FALSE;
3278	char holder_str[128];
3279	boolean_t removed = (boolean_t)arg;
3280
3281	if (mdb_ctf_vread(&ref, "reference_t", "mdb_reference_t", addr,
3282	    0) == -1)
3283		return (DCMD_ERR);
3284
3285	if (mdb_readstr(holder_str, sizeof (holder_str),
3286	    ref.ref_holder) != -1)
3287		holder_is_str = strisprint(holder_str);
3288
3289	if (removed)
3290		mdb_printf("removed ");
3291	mdb_printf("reference ");
3292	if (ref.ref_number != 1)
3293		mdb_printf("with count=%llu ", ref.ref_number);
3294	mdb_printf("with tag %lx", ref.ref_holder);
3295	if (holder_is_str)
3296		mdb_printf(" \"%s\"", holder_str);
3297	mdb_printf(", held at:\n");
3298
3299	(void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL);
3300
3301	if (removed) {
3302		mdb_printf("removed at:\n");
3303		(void) mdb_call_dcmd("whatis", ref.ref_removed,
3304		    DCMD_ADDRSPEC, 0, NULL);
3305	}
3306
3307	mdb_printf("\n");
3308
3309	return (WALK_NEXT);
3310}
3311
3312typedef struct mdb_zfs_refcount {
3313	uint64_t rc_count;
3314} mdb_zfs_refcount_t;
3315
3316typedef struct mdb_zfs_refcount_removed {
3317	uint64_t rc_removed_count;
3318} mdb_zfs_refcount_removed_t;
3319
3320typedef struct mdb_zfs_refcount_tracked {
3321	boolean_t rc_tracked;
3322} mdb_zfs_refcount_tracked_t;
3323
3324/* ARGSUSED */
3325static int
3326zfs_refcount(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3327{
3328	mdb_zfs_refcount_t rc;
3329	mdb_zfs_refcount_removed_t rcr;
3330	mdb_zfs_refcount_tracked_t rct;
3331	int off;
3332	boolean_t released = B_FALSE;
3333
3334	if (!(flags & DCMD_ADDRSPEC))
3335		return (DCMD_USAGE);
3336
3337	if (mdb_getopts(argc, argv,
3338	    'r', MDB_OPT_SETBITS, B_TRUE, &released,
3339	    NULL) != argc)
3340		return (DCMD_USAGE);
3341
3342	if (mdb_ctf_vread(&rc, "zfs_refcount_t", "mdb_zfs_refcount_t", addr,
3343	    0) == -1)
3344		return (DCMD_ERR);
3345
3346	if (mdb_ctf_vread(&rcr, "zfs_refcount_t", "mdb_zfs_refcount_removed_t",
3347	    addr, MDB_CTF_VREAD_QUIET) == -1) {
3348		mdb_printf("zfs_refcount_t at %p has %llu holds (untracked)\n",
3349		    addr, (longlong_t)rc.rc_count);
3350		return (DCMD_OK);
3351	}
3352
3353	if (mdb_ctf_vread(&rct, "zfs_refcount_t", "mdb_zfs_refcount_tracked_t",
3354	    addr, MDB_CTF_VREAD_QUIET) == -1) {
3355		/* If this is an old target, it might be tracked. */
3356		rct.rc_tracked = B_TRUE;
3357	}
3358
3359	mdb_printf("zfs_refcount_t at %p has %llu current holds, "
3360	    "%llu recently released holds\n",
3361	    addr, (longlong_t)rc.rc_count, (longlong_t)rcr.rc_removed_count);
3362
3363	if (rct.rc_tracked && rc.rc_count > 0)
3364		mdb_printf("current holds:\n");
3365	off = mdb_ctf_offsetof_by_name("zfs_refcount_t", "rc_list");
3366	if (off == -1)
3367		return (DCMD_ERR);
3368	mdb_pwalk("list", reference_cb, (void*)B_FALSE, addr + off);
3369
3370	if (released && rcr.rc_removed_count > 0) {
3371		mdb_printf("released holds:\n");
3372
3373		off = mdb_ctf_offsetof_by_name("zfs_refcount_t", "rc_removed");
3374		if (off == -1)
3375			return (DCMD_ERR);
3376		mdb_pwalk("list", reference_cb, (void*)B_TRUE, addr + off);
3377	}
3378
3379	return (DCMD_OK);
3380}
3381
3382/* ARGSUSED */
3383static int
3384sa_attr_table(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3385{
3386	sa_attr_table_t *table;
3387	sa_os_t sa_os;
3388	char *name;
3389	int i;
3390
3391	if (mdb_vread(&sa_os, sizeof (sa_os_t), addr) == -1) {
3392		mdb_warn("failed to read sa_os at %p", addr);
3393		return (DCMD_ERR);
3394	}
3395
3396	table = mdb_alloc(sizeof (sa_attr_table_t) * sa_os.sa_num_attrs,
3397	    UM_SLEEP | UM_GC);
3398	name = mdb_alloc(MAXPATHLEN, UM_SLEEP | UM_GC);
3399
3400	if (mdb_vread(table, sizeof (sa_attr_table_t) * sa_os.sa_num_attrs,
3401	    (uintptr_t)sa_os.sa_attr_table) == -1) {
3402		mdb_warn("failed to read sa_os at %p", addr);
3403		return (DCMD_ERR);
3404	}
3405
3406	mdb_printf("%<u>%-10s %-10s %-10s %-10s %s%</u>\n",
3407	    "ATTR ID", "REGISTERED", "LENGTH", "BSWAP", "NAME");
3408	for (i = 0; i != sa_os.sa_num_attrs; i++) {
3409		mdb_readstr(name, MAXPATHLEN, (uintptr_t)table[i].sa_name);
3410		mdb_printf("%5x   %8x %8x %8x          %-s\n",
3411		    (int)table[i].sa_attr, (int)table[i].sa_registered,
3412		    (int)table[i].sa_length, table[i].sa_byteswap, name);
3413	}
3414
3415	return (DCMD_OK);
3416}
3417
3418static int
3419sa_get_off_table(uintptr_t addr, uint32_t **off_tab, int attr_count)
3420{
3421	uintptr_t idx_table;
3422
3423	if (GETMEMB(addr, "sa_idx_tab", sa_idx_tab, idx_table)) {
3424		mdb_printf("can't find offset table in sa_idx_tab\n");
3425		return (-1);
3426	}
3427
3428	*off_tab = mdb_alloc(attr_count * sizeof (uint32_t),
3429	    UM_SLEEP | UM_GC);
3430
3431	if (mdb_vread(*off_tab,
3432	    attr_count * sizeof (uint32_t), idx_table) == -1) {
3433		mdb_warn("failed to attribute offset table %p", idx_table);
3434		return (-1);
3435	}
3436
3437	return (DCMD_OK);
3438}
3439
3440/*ARGSUSED*/
3441static int
3442sa_attr_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3443{
3444	uint32_t *offset_tab;
3445	int attr_count;
3446	uint64_t attr_id;
3447	uintptr_t attr_addr;
3448	uintptr_t bonus_tab, spill_tab;
3449	uintptr_t db_bonus, db_spill;
3450	uintptr_t os, os_sa;
3451	uintptr_t db_data;
3452
3453	if (argc != 1)
3454		return (DCMD_USAGE);
3455
3456	if (argv[0].a_type == MDB_TYPE_STRING)
3457		attr_id = mdb_strtoull(argv[0].a_un.a_str);
3458	else
3459		return (DCMD_USAGE);
3460
3461	if (GETMEMB(addr, "sa_handle", sa_bonus_tab, bonus_tab) ||
3462	    GETMEMB(addr, "sa_handle", sa_spill_tab, spill_tab) ||
3463	    GETMEMB(addr, "sa_handle", sa_os, os) ||
3464	    GETMEMB(addr, "sa_handle", sa_bonus, db_bonus) ||
3465	    GETMEMB(addr, "sa_handle", sa_spill, db_spill)) {
3466		mdb_printf("Can't find necessary information in sa_handle "
3467		    "in sa_handle\n");
3468		return (DCMD_ERR);
3469	}
3470
3471	if (GETMEMB(os, "objset", os_sa, os_sa)) {
3472		mdb_printf("Can't find os_sa in objset\n");
3473		return (DCMD_ERR);
3474	}
3475
3476	if (GETMEMB(os_sa, "sa_os", sa_num_attrs, attr_count)) {
3477		mdb_printf("Can't find sa_num_attrs\n");
3478		return (DCMD_ERR);
3479	}
3480
3481	if (attr_id > attr_count) {
3482		mdb_printf("attribute id number is out of range\n");
3483		return (DCMD_ERR);
3484	}
3485
3486	if (bonus_tab) {
3487		if (sa_get_off_table(bonus_tab, &offset_tab,
3488		    attr_count) == -1) {
3489			return (DCMD_ERR);
3490		}
3491
3492		if (GETMEMB(db_bonus, "dmu_buf", db_data, db_data)) {
3493			mdb_printf("can't find db_data in bonus dbuf\n");
3494			return (DCMD_ERR);
3495		}
3496	}
3497
3498	if (bonus_tab && !TOC_ATTR_PRESENT(offset_tab[attr_id]) &&
3499	    spill_tab == 0) {
3500		mdb_printf("Attribute does not exist\n");
3501		return (DCMD_ERR);
3502	} else if (!TOC_ATTR_PRESENT(offset_tab[attr_id]) && spill_tab) {
3503		if (sa_get_off_table(spill_tab, &offset_tab,
3504		    attr_count) == -1) {
3505			return (DCMD_ERR);
3506		}
3507		if (GETMEMB(db_spill, "dmu_buf", db_data, db_data)) {
3508			mdb_printf("can't find db_data in spill dbuf\n");
3509			return (DCMD_ERR);
3510		}
3511		if (!TOC_ATTR_PRESENT(offset_tab[attr_id])) {
3512			mdb_printf("Attribute does not exist\n");
3513			return (DCMD_ERR);
3514		}
3515	}
3516	attr_addr = db_data + TOC_OFF(offset_tab[attr_id]);
3517	mdb_printf("%p\n", attr_addr);
3518	return (DCMD_OK);
3519}
3520
3521/* ARGSUSED */
3522static int
3523zfs_ace_print_common(uintptr_t addr, uint_t flags,
3524    uint64_t id, uint32_t access_mask, uint16_t ace_flags,
3525    uint16_t ace_type, int verbose)
3526{
3527	if (DCMD_HDRSPEC(flags) && !verbose)
3528		mdb_printf("%<u>%-?s %-8s %-8s %-8s %s%</u>\n",
3529		    "ADDR", "FLAGS", "MASK", "TYPE", "ID");
3530
3531	if (!verbose) {
3532		mdb_printf("%0?p %-8x %-8x %-8x %-llx\n", addr,
3533		    ace_flags, access_mask, ace_type, id);
3534		return (DCMD_OK);
3535	}
3536
3537	switch (ace_flags & ACE_TYPE_FLAGS) {
3538	case ACE_OWNER:
3539		mdb_printf("owner@:");
3540		break;
3541	case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
3542		mdb_printf("group@:");
3543		break;
3544	case ACE_EVERYONE:
3545		mdb_printf("everyone@:");
3546		break;
3547	case ACE_IDENTIFIER_GROUP:
3548		mdb_printf("group:%llx:", (u_longlong_t)id);
3549		break;
3550	case 0: /* User entry */
3551		mdb_printf("user:%llx:", (u_longlong_t)id);
3552		break;
3553	}
3554
3555	/* print out permission mask */
3556	if (access_mask & ACE_READ_DATA)
3557		mdb_printf("r");
3558	else
3559		mdb_printf("-");
3560	if (access_mask & ACE_WRITE_DATA)
3561		mdb_printf("w");
3562	else
3563		mdb_printf("-");
3564	if (access_mask & ACE_EXECUTE)
3565		mdb_printf("x");
3566	else
3567		mdb_printf("-");
3568	if (access_mask & ACE_APPEND_DATA)
3569		mdb_printf("p");
3570	else
3571		mdb_printf("-");
3572	if (access_mask & ACE_DELETE)
3573		mdb_printf("d");
3574	else
3575		mdb_printf("-");
3576	if (access_mask & ACE_DELETE_CHILD)
3577		mdb_printf("D");
3578	else
3579		mdb_printf("-");
3580	if (access_mask & ACE_READ_ATTRIBUTES)
3581		mdb_printf("a");
3582	else
3583		mdb_printf("-");
3584	if (access_mask & ACE_WRITE_ATTRIBUTES)
3585		mdb_printf("A");
3586	else
3587		mdb_printf("-");
3588	if (access_mask & ACE_READ_NAMED_ATTRS)
3589		mdb_printf("R");
3590	else
3591		mdb_printf("-");
3592	if (access_mask & ACE_WRITE_NAMED_ATTRS)
3593		mdb_printf("W");
3594	else
3595		mdb_printf("-");
3596	if (access_mask & ACE_READ_ACL)
3597		mdb_printf("c");
3598	else
3599		mdb_printf("-");
3600	if (access_mask & ACE_WRITE_ACL)
3601		mdb_printf("C");
3602	else
3603		mdb_printf("-");
3604	if (access_mask & ACE_WRITE_OWNER)
3605		mdb_printf("o");
3606	else
3607		mdb_printf("-");
3608	if (access_mask & ACE_SYNCHRONIZE)
3609		mdb_printf("s");
3610	else
3611		mdb_printf("-");
3612
3613	mdb_printf(":");
3614
3615	/* Print out inheritance flags */
3616	if (ace_flags & ACE_FILE_INHERIT_ACE)
3617		mdb_printf("f");
3618	else
3619		mdb_printf("-");
3620	if (ace_flags & ACE_DIRECTORY_INHERIT_ACE)
3621		mdb_printf("d");
3622	else
3623		mdb_printf("-");
3624	if (ace_flags & ACE_INHERIT_ONLY_ACE)
3625		mdb_printf("i");
3626	else
3627		mdb_printf("-");
3628	if (ace_flags & ACE_NO_PROPAGATE_INHERIT_ACE)
3629		mdb_printf("n");
3630	else
3631		mdb_printf("-");
3632	if (ace_flags & ACE_SUCCESSFUL_ACCESS_ACE_FLAG)
3633		mdb_printf("S");
3634	else
3635		mdb_printf("-");
3636	if (ace_flags & ACE_FAILED_ACCESS_ACE_FLAG)
3637		mdb_printf("F");
3638	else
3639		mdb_printf("-");
3640	if (ace_flags & ACE_INHERITED_ACE)
3641		mdb_printf("I");
3642	else
3643		mdb_printf("-");
3644
3645	switch (ace_type) {
3646	case ACE_ACCESS_ALLOWED_ACE_TYPE:
3647		mdb_printf(":allow\n");
3648		break;
3649	case ACE_ACCESS_DENIED_ACE_TYPE:
3650		mdb_printf(":deny\n");
3651		break;
3652	case ACE_SYSTEM_AUDIT_ACE_TYPE:
3653		mdb_printf(":audit\n");
3654		break;
3655	case ACE_SYSTEM_ALARM_ACE_TYPE:
3656		mdb_printf(":alarm\n");
3657		break;
3658	default:
3659		mdb_printf(":?\n");
3660	}
3661	return (DCMD_OK);
3662}
3663
3664/* ARGSUSED */
3665static int
3666zfs_ace_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3667{
3668	zfs_ace_t zace;
3669	int verbose = FALSE;
3670	uint64_t id;
3671
3672	if (!(flags & DCMD_ADDRSPEC))
3673		return (DCMD_USAGE);
3674
3675	if (mdb_getopts(argc, argv,
3676	    'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
3677		return (DCMD_USAGE);
3678
3679	if (mdb_vread(&zace, sizeof (zfs_ace_t), addr) == -1) {
3680		mdb_warn("failed to read zfs_ace_t");
3681		return (DCMD_ERR);
3682	}
3683
3684	if ((zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == 0 ||
3685	    (zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP)
3686		id = zace.z_fuid;
3687	else
3688		id = -1;
3689
3690	return (zfs_ace_print_common(addr, flags, id, zace.z_hdr.z_access_mask,
3691	    zace.z_hdr.z_flags, zace.z_hdr.z_type, verbose));
3692}
3693
3694/* ARGSUSED */
3695static int
3696zfs_ace0_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3697{
3698	ace_t ace;
3699	uint64_t id;
3700	int verbose = FALSE;
3701
3702	if (!(flags & DCMD_ADDRSPEC))
3703		return (DCMD_USAGE);
3704
3705	if (mdb_getopts(argc, argv,
3706	    'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
3707		return (DCMD_USAGE);
3708
3709	if (mdb_vread(&ace, sizeof (ace_t), addr) == -1) {
3710		mdb_warn("failed to read ace_t");
3711		return (DCMD_ERR);
3712	}
3713
3714	if ((ace.a_flags & ACE_TYPE_FLAGS) == 0 ||
3715	    (ace.a_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP)
3716		id = ace.a_who;
3717	else
3718		id = -1;
3719
3720	return (zfs_ace_print_common(addr, flags, id, ace.a_access_mask,
3721	    ace.a_flags, ace.a_type, verbose));
3722}
3723
3724typedef struct acl_dump_args {
3725	int a_argc;
3726	const mdb_arg_t *a_argv;
3727	uint16_t a_version;
3728	int a_flags;
3729} acl_dump_args_t;
3730
3731/* ARGSUSED */
3732static int
3733acl_aces_cb(uintptr_t addr, const void *unknown, void *arg)
3734{
3735	acl_dump_args_t *acl_args = (acl_dump_args_t *)arg;
3736
3737	if (acl_args->a_version == 1) {
3738		if (mdb_call_dcmd("zfs_ace", addr,
3739		    DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc,
3740		    acl_args->a_argv) != DCMD_OK) {
3741			return (WALK_ERR);
3742		}
3743	} else {
3744		if (mdb_call_dcmd("zfs_ace0", addr,
3745		    DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc,
3746		    acl_args->a_argv) != DCMD_OK) {
3747			return (WALK_ERR);
3748		}
3749	}
3750	acl_args->a_flags = DCMD_LOOP;
3751	return (WALK_NEXT);
3752}
3753
3754/* ARGSUSED */
3755static int
3756acl_cb(uintptr_t addr, const void *unknown, void *arg)
3757{
3758	acl_dump_args_t *acl_args = (acl_dump_args_t *)arg;
3759
3760	if (acl_args->a_version == 1) {
3761		if (mdb_pwalk("zfs_acl_node_aces", acl_aces_cb,
3762		    arg, addr) != 0) {
3763			mdb_warn("can't walk ACEs");
3764			return (DCMD_ERR);
3765		}
3766	} else {
3767		if (mdb_pwalk("zfs_acl_node_aces0", acl_aces_cb,
3768		    arg, addr) != 0) {
3769			mdb_warn("can't walk ACEs");
3770			return (DCMD_ERR);
3771		}
3772	}
3773	return (WALK_NEXT);
3774}
3775
3776/* ARGSUSED */
3777static int
3778zfs_acl_dump(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3779{
3780	zfs_acl_t zacl;
3781	int verbose = FALSE;
3782	acl_dump_args_t acl_args;
3783
3784	if (!(flags & DCMD_ADDRSPEC))
3785		return (DCMD_USAGE);
3786
3787	if (mdb_getopts(argc, argv,
3788	    'v', MDB_OPT_SETBITS, TRUE, &verbose, NULL) != argc)
3789		return (DCMD_USAGE);
3790
3791	if (mdb_vread(&zacl, sizeof (zfs_acl_t), addr) == -1) {
3792		mdb_warn("failed to read zfs_acl_t");
3793		return (DCMD_ERR);
3794	}
3795
3796	acl_args.a_argc = argc;
3797	acl_args.a_argv = argv;
3798	acl_args.a_version = zacl.z_version;
3799	acl_args.a_flags = DCMD_LOOPFIRST;
3800
3801	if (mdb_pwalk("zfs_acl_node", acl_cb, &acl_args, addr) != 0) {
3802		mdb_warn("can't walk ACL");
3803		return (DCMD_ERR);
3804	}
3805
3806	return (DCMD_OK);
3807}
3808
3809/* ARGSUSED */
3810static int
3811zfs_acl_node_walk_init(mdb_walk_state_t *wsp)
3812{
3813	if (wsp->walk_addr == 0) {
3814		mdb_warn("must supply address of zfs_acl_node_t\n");
3815		return (WALK_ERR);
3816	}
3817
3818	wsp->walk_addr +=
3819	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "zfs_acl", "z_acl");
3820
3821	if (mdb_layered_walk("list", wsp) == -1) {
3822		mdb_warn("failed to walk 'list'\n");
3823		return (WALK_ERR);
3824	}
3825
3826	return (WALK_NEXT);
3827}
3828
3829static int
3830zfs_acl_node_walk_step(mdb_walk_state_t *wsp)
3831{
3832	zfs_acl_node_t	aclnode;
3833
3834	if (mdb_vread(&aclnode, sizeof (zfs_acl_node_t),
3835	    wsp->walk_addr) == -1) {
3836		mdb_warn("failed to read zfs_acl_node at %p", wsp->walk_addr);
3837		return (WALK_ERR);
3838	}
3839
3840	return (wsp->walk_callback(wsp->walk_addr, &aclnode, wsp->walk_cbdata));
3841}
3842
3843typedef struct ace_walk_data {
3844	int		ace_count;
3845	int		ace_version;
3846} ace_walk_data_t;
3847
3848static int
3849zfs_aces_walk_init_common(mdb_walk_state_t *wsp, int version,
3850    int ace_count, uintptr_t ace_data)
3851{
3852	ace_walk_data_t *ace_walk_data;
3853
3854	if (wsp->walk_addr == 0) {
3855		mdb_warn("must supply address of zfs_acl_node_t\n");
3856		return (WALK_ERR);
3857	}
3858
3859	ace_walk_data = mdb_alloc(sizeof (ace_walk_data_t), UM_SLEEP | UM_GC);
3860
3861	ace_walk_data->ace_count = ace_count;
3862	ace_walk_data->ace_version = version;
3863
3864	wsp->walk_addr = ace_data;
3865	wsp->walk_data = ace_walk_data;
3866
3867	return (WALK_NEXT);
3868}
3869
3870static int
3871zfs_acl_node_aces_walk_init_common(mdb_walk_state_t *wsp, int version)
3872{
3873	static int gotid;
3874	static mdb_ctf_id_t acl_id;
3875	int z_ace_count;
3876	uintptr_t z_acldata;
3877
3878	if (!gotid) {
3879		if (mdb_ctf_lookup_by_name("struct zfs_acl_node",
3880		    &acl_id) == -1) {
3881			mdb_warn("couldn't find struct zfs_acl_node");
3882			return (DCMD_ERR);
3883		}
3884		gotid = TRUE;
3885	}
3886
3887	if (GETMEMBID(wsp->walk_addr, &acl_id, z_ace_count, z_ace_count)) {
3888		return (DCMD_ERR);
3889	}
3890	if (GETMEMBID(wsp->walk_addr, &acl_id, z_acldata, z_acldata)) {
3891		return (DCMD_ERR);
3892	}
3893
3894	return (zfs_aces_walk_init_common(wsp, version,
3895	    z_ace_count, z_acldata));
3896}
3897
3898/* ARGSUSED */
3899static int
3900zfs_acl_node_aces_walk_init(mdb_walk_state_t *wsp)
3901{
3902	return (zfs_acl_node_aces_walk_init_common(wsp, 1));
3903}
3904
3905/* ARGSUSED */
3906static int
3907zfs_acl_node_aces0_walk_init(mdb_walk_state_t *wsp)
3908{
3909	return (zfs_acl_node_aces_walk_init_common(wsp, 0));
3910}
3911
3912static int
3913zfs_aces_walk_step(mdb_walk_state_t *wsp)
3914{
3915	ace_walk_data_t *ace_data = wsp->walk_data;
3916	zfs_ace_t zace;
3917	ace_t *acep;
3918	int status;
3919	int entry_type;
3920	int allow_type;
3921	uintptr_t ptr;
3922
3923	if (ace_data->ace_count == 0)
3924		return (WALK_DONE);
3925
3926	if (mdb_vread(&zace, sizeof (zfs_ace_t), wsp->walk_addr) == -1) {
3927		mdb_warn("failed to read zfs_ace_t at %#lx",
3928		    wsp->walk_addr);
3929		return (WALK_ERR);
3930	}
3931
3932	switch (ace_data->ace_version) {
3933	case 0:
3934		acep = (ace_t *)&zace;
3935		entry_type = acep->a_flags & ACE_TYPE_FLAGS;
3936		allow_type = acep->a_type;
3937		break;
3938	case 1:
3939		entry_type = zace.z_hdr.z_flags & ACE_TYPE_FLAGS;
3940		allow_type = zace.z_hdr.z_type;
3941		break;
3942	default:
3943		return (WALK_ERR);
3944	}
3945
3946	ptr = (uintptr_t)wsp->walk_addr;
3947	switch (entry_type) {
3948	case ACE_OWNER:
3949	case ACE_EVERYONE:
3950	case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
3951		ptr += ace_data->ace_version == 0 ?
3952		    sizeof (ace_t) : sizeof (zfs_ace_hdr_t);
3953		break;
3954	case ACE_IDENTIFIER_GROUP:
3955	default:
3956		switch (allow_type) {
3957		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
3958		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
3959		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
3960		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
3961			ptr += ace_data->ace_version == 0 ?
3962			    sizeof (ace_t) : sizeof (zfs_object_ace_t);
3963			break;
3964		default:
3965			ptr += ace_data->ace_version == 0 ?
3966			    sizeof (ace_t) : sizeof (zfs_ace_t);
3967			break;
3968		}
3969	}
3970
3971	ace_data->ace_count--;
3972	status = wsp->walk_callback(wsp->walk_addr,
3973	    (void *)(uintptr_t)&zace, wsp->walk_cbdata);
3974
3975	wsp->walk_addr = ptr;
3976	return (status);
3977}
3978
3979typedef struct mdb_zfs_rrwlock {
3980	uintptr_t	rr_writer;
3981	boolean_t	rr_writer_wanted;
3982} mdb_zfs_rrwlock_t;
3983
3984static uint_t rrw_key;
3985
3986/* ARGSUSED */
3987static int
3988rrwlock(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3989{
3990	mdb_zfs_rrwlock_t rrw;
3991
3992	if (rrw_key == 0) {
3993		if (mdb_ctf_readsym(&rrw_key, "uint_t", "rrw_tsd_key", 0) == -1)
3994			return (DCMD_ERR);
3995	}
3996
3997	if (mdb_ctf_vread(&rrw, "rrwlock_t", "mdb_zfs_rrwlock_t", addr,
3998	    0) == -1)
3999		return (DCMD_ERR);
4000
4001	if (rrw.rr_writer != 0) {
4002		mdb_printf("write lock held by thread %lx\n", rrw.rr_writer);
4003		return (DCMD_OK);
4004	}
4005
4006	if (rrw.rr_writer_wanted) {
4007		mdb_printf("writer wanted\n");
4008	}
4009
4010	mdb_printf("anonymous references:\n");
4011	(void) mdb_call_dcmd("zfs_refcount", addr +
4012	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_anon_rcount"),
4013	    DCMD_ADDRSPEC, 0, NULL);
4014
4015	mdb_printf("linked references:\n");
4016	(void) mdb_call_dcmd("zfs_refcount", addr +
4017	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_linked_rcount"),
4018	    DCMD_ADDRSPEC, 0, NULL);
4019
4020	/*
4021	 * XXX This should find references from
4022	 * "::walk thread | ::tsd -v <rrw_key>", but there is no support
4023	 * for programmatic consumption of dcmds, so this would be
4024	 * difficult, potentially requiring reimplementing ::tsd (both
4025	 * user and kernel versions) in this MDB module.
4026	 */
4027
4028	return (DCMD_OK);
4029}
4030
4031typedef struct mdb_arc_buf_hdr_t {
4032	uint16_t b_psize;
4033	uint16_t b_lsize;
4034	struct {
4035		uint32_t	b_bufcnt;
4036		uintptr_t	b_state;
4037	} b_l1hdr;
4038} mdb_arc_buf_hdr_t;
4039
4040enum arc_cflags {
4041	ARC_CFLAG_VERBOSE		= 1 << 0,
4042	ARC_CFLAG_ANON			= 1 << 1,
4043	ARC_CFLAG_MRU			= 1 << 2,
4044	ARC_CFLAG_MFU			= 1 << 3,
4045	ARC_CFLAG_BUFS			= 1 << 4,
4046};
4047
4048typedef struct arc_compression_stats_data {
4049	GElf_Sym anon_sym;	/* ARC_anon symbol */
4050	GElf_Sym mru_sym;	/* ARC_mru symbol */
4051	GElf_Sym mrug_sym;	/* ARC_mru_ghost symbol */
4052	GElf_Sym mfu_sym;	/* ARC_mfu symbol */
4053	GElf_Sym mfug_sym;	/* ARC_mfu_ghost symbol */
4054	GElf_Sym l2c_sym;	/* ARC_l2c_only symbol */
4055	uint64_t *anon_c_hist;	/* histogram of compressed sizes in anon */
4056	uint64_t *anon_u_hist;	/* histogram of uncompressed sizes in anon */
4057	uint64_t *anon_bufs;	/* histogram of buffer counts in anon state */
4058	uint64_t *mru_c_hist;	/* histogram of compressed sizes in mru */
4059	uint64_t *mru_u_hist;	/* histogram of uncompressed sizes in mru */
4060	uint64_t *mru_bufs;	/* histogram of buffer counts in mru */
4061	uint64_t *mfu_c_hist;	/* histogram of compressed sizes in mfu */
4062	uint64_t *mfu_u_hist;	/* histogram of uncompressed sizes in mfu */
4063	uint64_t *mfu_bufs;	/* histogram of buffer counts in mfu */
4064	uint64_t *all_c_hist;	/* histogram of compressed anon + mru + mfu */
4065	uint64_t *all_u_hist;	/* histogram of uncompressed anon + mru + mfu */
4066	uint64_t *all_bufs;	/* histogram of buffer counts in all states  */
4067	int arc_cflags;		/* arc compression flags, specified by user */
4068	int hist_nbuckets;	/* number of buckets in each histogram */
4069
4070	ulong_t l1hdr_off;	/* offset of b_l1hdr in arc_buf_hdr_t */
4071} arc_compression_stats_data_t;
4072
4073int
4074highbit64(uint64_t i)
4075{
4076	int h = 1;
4077
4078	if (i == 0)
4079		return (0);
4080	if (i & 0xffffffff00000000ULL) {
4081		h += 32; i >>= 32;
4082	}
4083	if (i & 0xffff0000) {
4084		h += 16; i >>= 16;
4085	}
4086	if (i & 0xff00) {
4087		h += 8; i >>= 8;
4088	}
4089	if (i & 0xf0) {
4090		h += 4; i >>= 4;
4091	}
4092	if (i & 0xc) {
4093		h += 2; i >>= 2;
4094	}
4095	if (i & 0x2) {
4096		h += 1;
4097	}
4098	return (h);
4099}
4100
4101/* ARGSUSED */
4102static int
4103arc_compression_stats_cb(uintptr_t addr, const void *unknown, void *arg)
4104{
4105	arc_compression_stats_data_t *data = arg;
4106	arc_flags_t flags;
4107	mdb_arc_buf_hdr_t hdr;
4108	int cbucket, ubucket, bufcnt;
4109
4110	/*
4111	 * mdb_ctf_vread() uses the sizeof the target type (e.g.
4112	 * sizeof (arc_buf_hdr_t) in the target) to read in the entire contents
4113	 * of the target type into a buffer and then copy the values of the
4114	 * desired members from the mdb typename (e.g. mdb_arc_buf_hdr_t) from
4115	 * this buffer. Unfortunately, the way arc_buf_hdr_t is used by zfs,
4116	 * the actual size allocated by the kernel for arc_buf_hdr_t is often
4117	 * smaller than `sizeof (arc_buf_hdr_t)` (see the definitions of
4118	 * l1arc_buf_hdr_t and arc_buf_hdr_t in
4119	 * usr/src/uts/common/fs/zfs/arc.c). Attempting to read the entire
4120	 * contents of arc_buf_hdr_t from the target (as mdb_ctf_vread() does)
4121	 * can cause an error if the allocated size is indeed smaller--it's
4122	 * possible that the 'missing' trailing members of arc_buf_hdr_t
4123	 * (l1arc_buf_hdr_t and/or arc_buf_hdr_crypt_t) may fall into unmapped
4124	 * memory.
4125	 *
4126	 * We use the GETMEMB macro instead which performs an mdb_vread()
4127	 * but only reads enough of the target to retrieve the desired struct
4128	 * member instead of the entire struct.
4129	 */
4130	if (GETMEMB(addr, "arc_buf_hdr", b_flags, flags) == -1)
4131		return (WALK_ERR);
4132
4133	/*
4134	 * We only count headers that have data loaded in the kernel.
4135	 * This means an L1 header must be present as well as the data
4136	 * that corresponds to the L1 header. If there's no L1 header,
4137	 * we can skip the arc_buf_hdr_t completely. If it's present, we
4138	 * must look at the ARC state (b_l1hdr.b_state) to determine if
4139	 * the data is present.
4140	 */
4141	if ((flags & ARC_FLAG_HAS_L1HDR) == 0)
4142		return (WALK_NEXT);
4143
4144	if (GETMEMB(addr, "arc_buf_hdr", b_psize, hdr.b_psize) == -1 ||
4145	    GETMEMB(addr, "arc_buf_hdr", b_lsize, hdr.b_lsize) == -1 ||
4146	    GETMEMB(addr + data->l1hdr_off, "l1arc_buf_hdr", b_bufcnt,
4147	    hdr.b_l1hdr.b_bufcnt) == -1 ||
4148	    GETMEMB(addr + data->l1hdr_off, "l1arc_buf_hdr", b_state,
4149	    hdr.b_l1hdr.b_state) == -1)
4150		return (WALK_ERR);
4151
4152	/*
4153	 * Headers in the ghost states, or the l2c_only state don't have
4154	 * arc buffers linked off of them. Thus, their compressed size
4155	 * is meaningless, so we skip these from the stats.
4156	 */
4157	if (hdr.b_l1hdr.b_state == data->mrug_sym.st_value ||
4158	    hdr.b_l1hdr.b_state == data->mfug_sym.st_value ||
4159	    hdr.b_l1hdr.b_state == data->l2c_sym.st_value) {
4160		return (WALK_NEXT);
4161	}
4162
4163	/*
4164	 * The physical size (compressed) and logical size
4165	 * (uncompressed) are in units of SPA_MINBLOCKSIZE. By default,
4166	 * we use the log2 of this value (rounded down to the nearest
4167	 * integer) to determine the bucket to assign this header to.
4168	 * Thus, the histogram is logarithmic with respect to the size
4169	 * of the header. For example, the following is a mapping of the
4170	 * bucket numbers and the range of header sizes they correspond to:
4171	 *
4172	 *	0: 0 byte headers
4173	 *	1: 512 byte headers
4174	 *	2: [1024 - 2048) byte headers
4175	 *	3: [2048 - 4096) byte headers
4176	 *	4: [4096 - 8192) byte headers
4177	 *	5: [8192 - 16394) byte headers
4178	 *	6: [16384 - 32768) byte headers
4179	 *	7: [32768 - 65536) byte headers
4180	 *	8: [65536 - 131072) byte headers
4181	 *	9: 131072 byte headers
4182	 *
4183	 * If the ARC_CFLAG_VERBOSE flag was specified, we use the
4184	 * physical and logical sizes directly. Thus, the histogram will
4185	 * no longer be logarithmic; instead it will be linear with
4186	 * respect to the size of the header. The following is a mapping
4187	 * of the first many bucket numbers and the header size they
4188	 * correspond to:
4189	 *
4190	 *	0: 0 byte headers
4191	 *	1: 512 byte headers
4192	 *	2: 1024 byte headers
4193	 *	3: 1536 byte headers
4194	 *	4: 2048 byte headers
4195	 *	5: 2560 byte headers
4196	 *	6: 3072 byte headers
4197	 *
4198	 * And so on. Keep in mind that a range of sizes isn't used in
4199	 * the case of linear scale because the headers can only
4200	 * increment or decrement in sizes of 512 bytes. So, it's not
4201	 * possible for a header to be sized in between whats listed
4202	 * above.
4203	 *
4204	 * Also, the above mapping values were calculated assuming a
4205	 * SPA_MINBLOCKSHIFT of 512 bytes and a SPA_MAXBLOCKSIZE of 128K.
4206	 */
4207
4208	if (data->arc_cflags & ARC_CFLAG_VERBOSE) {
4209		cbucket = hdr.b_psize;
4210		ubucket = hdr.b_lsize;
4211	} else {
4212		cbucket = highbit64(hdr.b_psize);
4213		ubucket = highbit64(hdr.b_lsize);
4214	}
4215
4216	bufcnt = hdr.b_l1hdr.b_bufcnt;
4217	if (bufcnt >= data->hist_nbuckets)
4218		bufcnt = data->hist_nbuckets - 1;
4219
4220	/* Ensure we stay within the bounds of the histogram array */
4221	ASSERT3U(cbucket, <, data->hist_nbuckets);
4222	ASSERT3U(ubucket, <, data->hist_nbuckets);
4223
4224	if (hdr.b_l1hdr.b_state == data->anon_sym.st_value) {
4225		data->anon_c_hist[cbucket]++;
4226		data->anon_u_hist[ubucket]++;
4227		data->anon_bufs[bufcnt]++;
4228	} else if (hdr.b_l1hdr.b_state == data->mru_sym.st_value) {
4229		data->mru_c_hist[cbucket]++;
4230		data->mru_u_hist[ubucket]++;
4231		data->mru_bufs[bufcnt]++;
4232	} else if (hdr.b_l1hdr.b_state == data->mfu_sym.st_value) {
4233		data->mfu_c_hist[cbucket]++;
4234		data->mfu_u_hist[ubucket]++;
4235		data->mfu_bufs[bufcnt]++;
4236	}
4237
4238	data->all_c_hist[cbucket]++;
4239	data->all_u_hist[ubucket]++;
4240	data->all_bufs[bufcnt]++;
4241
4242	return (WALK_NEXT);
4243}
4244
4245/* ARGSUSED */
4246static int
4247arc_compression_stats(uintptr_t addr, uint_t flags, int argc,
4248    const mdb_arg_t *argv)
4249{
4250	arc_compression_stats_data_t data = { 0 };
4251	unsigned int max_shifted = SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT;
4252	unsigned int hist_size;
4253	char range[32];
4254	int rc = DCMD_OK;
4255	int off;
4256
4257	if (mdb_getopts(argc, argv,
4258	    'v', MDB_OPT_SETBITS, ARC_CFLAG_VERBOSE, &data.arc_cflags,
4259	    'a', MDB_OPT_SETBITS, ARC_CFLAG_ANON, &data.arc_cflags,
4260	    'b', MDB_OPT_SETBITS, ARC_CFLAG_BUFS, &data.arc_cflags,
4261	    'r', MDB_OPT_SETBITS, ARC_CFLAG_MRU, &data.arc_cflags,
4262	    'f', MDB_OPT_SETBITS, ARC_CFLAG_MFU, &data.arc_cflags,
4263	    NULL) != argc)
4264		return (DCMD_USAGE);
4265
4266	if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_anon", &data.anon_sym) ||
4267	    mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mru", &data.mru_sym) ||
4268	    mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mru_ghost", &data.mrug_sym) ||
4269	    mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mfu", &data.mfu_sym) ||
4270	    mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mfu_ghost", &data.mfug_sym) ||
4271	    mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_l2c_only", &data.l2c_sym)) {
4272		mdb_warn("can't find arc state symbol");
4273		return (DCMD_ERR);
4274	}
4275
4276	/*
4277	 * Determine the maximum expected size for any header, and use
4278	 * this to determine the number of buckets needed for each
4279	 * histogram. If ARC_CFLAG_VERBOSE is specified, this value is
4280	 * used directly; otherwise the log2 of the maximum size is
4281	 * used. Thus, if using a log2 scale there's a maximum of 10
4282	 * possible buckets, while the linear scale (when using
4283	 * ARC_CFLAG_VERBOSE) has a maximum of 257 buckets.
4284	 */
4285	if (data.arc_cflags & ARC_CFLAG_VERBOSE)
4286		data.hist_nbuckets = max_shifted + 1;
4287	else
4288		data.hist_nbuckets = highbit64(max_shifted) + 1;
4289
4290	hist_size = sizeof (uint64_t) * data.hist_nbuckets;
4291
4292	data.anon_c_hist = mdb_zalloc(hist_size, UM_SLEEP);
4293	data.anon_u_hist = mdb_zalloc(hist_size, UM_SLEEP);
4294	data.anon_bufs = mdb_zalloc(hist_size, UM_SLEEP);
4295
4296	data.mru_c_hist = mdb_zalloc(hist_size, UM_SLEEP);
4297	data.mru_u_hist = mdb_zalloc(hist_size, UM_SLEEP);
4298	data.mru_bufs = mdb_zalloc(hist_size, UM_SLEEP);
4299
4300	data.mfu_c_hist = mdb_zalloc(hist_size, UM_SLEEP);
4301	data.mfu_u_hist = mdb_zalloc(hist_size, UM_SLEEP);
4302	data.mfu_bufs = mdb_zalloc(hist_size, UM_SLEEP);
4303
4304	data.all_c_hist = mdb_zalloc(hist_size, UM_SLEEP);
4305	data.all_u_hist = mdb_zalloc(hist_size, UM_SLEEP);
4306	data.all_bufs = mdb_zalloc(hist_size, UM_SLEEP);
4307
4308	if ((off = mdb_ctf_offsetof_by_name(ZFS_STRUCT "arc_buf_hdr",
4309	    "b_l1hdr")) == -1) {
4310		mdb_warn("could not get offset of b_l1hdr from arc_buf_hdr_t");
4311		rc = DCMD_ERR;
4312		goto out;
4313	}
4314	data.l1hdr_off = off;
4315
4316	if (mdb_walk("arc_buf_hdr_t_full", arc_compression_stats_cb,
4317	    &data) != 0) {
4318		mdb_warn("can't walk arc_buf_hdr's");
4319		rc = DCMD_ERR;
4320		goto out;
4321	}
4322
4323	if (data.arc_cflags & ARC_CFLAG_VERBOSE) {
4324		rc = mdb_snprintf(range, sizeof (range),
4325		    "[n*%llu, (n+1)*%llu)", SPA_MINBLOCKSIZE,
4326		    SPA_MINBLOCKSIZE);
4327	} else {
4328		rc = mdb_snprintf(range, sizeof (range),
4329		    "[2^(n-1)*%llu, 2^n*%llu)", SPA_MINBLOCKSIZE,
4330		    SPA_MINBLOCKSIZE);
4331	}
4332
4333	if (rc < 0) {
4334		/* snprintf failed, abort the dcmd */
4335		rc = DCMD_ERR;
4336		goto out;
4337	} else {
4338		/* snprintf succeeded above, reset return code */
4339		rc = DCMD_OK;
4340	}
4341
4342	if (data.arc_cflags & ARC_CFLAG_ANON) {
4343		if (data.arc_cflags & ARC_CFLAG_BUFS) {
4344			mdb_printf("Histogram of the number of anon buffers "
4345			    "that are associated with an arc hdr.\n");
4346			dump_histogram(data.anon_bufs, data.hist_nbuckets, 0);
4347			mdb_printf("\n");
4348		}
4349		mdb_printf("Histogram of compressed anon buffers.\n"
4350		    "Each bucket represents buffers of size: %s.\n", range);
4351		dump_histogram(data.anon_c_hist, data.hist_nbuckets, 0);
4352		mdb_printf("\n");
4353
4354		mdb_printf("Histogram of uncompressed anon buffers.\n"
4355		    "Each bucket represents buffers of size: %s.\n", range);
4356		dump_histogram(data.anon_u_hist, data.hist_nbuckets, 0);
4357		mdb_printf("\n");
4358	}
4359
4360	if (data.arc_cflags & ARC_CFLAG_MRU) {
4361		if (data.arc_cflags & ARC_CFLAG_BUFS) {
4362			mdb_printf("Histogram of the number of mru buffers "
4363			    "that are associated with an arc hdr.\n");
4364			dump_histogram(data.mru_bufs, data.hist_nbuckets, 0);
4365			mdb_printf("\n");
4366		}
4367		mdb_printf("Histogram of compressed mru buffers.\n"
4368		    "Each bucket represents buffers of size: %s.\n", range);
4369		dump_histogram(data.mru_c_hist, data.hist_nbuckets, 0);
4370		mdb_printf("\n");
4371
4372		mdb_printf("Histogram of uncompressed mru buffers.\n"
4373		    "Each bucket represents buffers of size: %s.\n", range);
4374		dump_histogram(data.mru_u_hist, data.hist_nbuckets, 0);
4375		mdb_printf("\n");
4376	}
4377
4378	if (data.arc_cflags & ARC_CFLAG_MFU) {
4379		if (data.arc_cflags & ARC_CFLAG_BUFS) {
4380			mdb_printf("Histogram of the number of mfu buffers "
4381			    "that are associated with an arc hdr.\n");
4382			dump_histogram(data.mfu_bufs, data.hist_nbuckets, 0);
4383			mdb_printf("\n");
4384		}
4385
4386		mdb_printf("Histogram of compressed mfu buffers.\n"
4387		    "Each bucket represents buffers of size: %s.\n", range);
4388		dump_histogram(data.mfu_c_hist, data.hist_nbuckets, 0);
4389		mdb_printf("\n");
4390
4391		mdb_printf("Histogram of uncompressed mfu buffers.\n"
4392		    "Each bucket represents buffers of size: %s.\n", range);
4393		dump_histogram(data.mfu_u_hist, data.hist_nbuckets, 0);
4394		mdb_printf("\n");
4395	}
4396
4397	if (data.arc_cflags & ARC_CFLAG_BUFS) {
4398		mdb_printf("Histogram of all buffers that "
4399		    "are associated with an arc hdr.\n");
4400		dump_histogram(data.all_bufs, data.hist_nbuckets, 0);
4401		mdb_printf("\n");
4402	}
4403
4404	mdb_printf("Histogram of all compressed buffers.\n"
4405	    "Each bucket represents buffers of size: %s.\n", range);
4406	dump_histogram(data.all_c_hist, data.hist_nbuckets, 0);
4407	mdb_printf("\n");
4408
4409	mdb_printf("Histogram of all uncompressed buffers.\n"
4410	    "Each bucket represents buffers of size: %s.\n", range);
4411	dump_histogram(data.all_u_hist, data.hist_nbuckets, 0);
4412
4413out:
4414	mdb_free(data.anon_c_hist, hist_size);
4415	mdb_free(data.anon_u_hist, hist_size);
4416	mdb_free(data.anon_bufs, hist_size);
4417
4418	mdb_free(data.mru_c_hist, hist_size);
4419	mdb_free(data.mru_u_hist, hist_size);
4420	mdb_free(data.mru_bufs, hist_size);
4421
4422	mdb_free(data.mfu_c_hist, hist_size);
4423	mdb_free(data.mfu_u_hist, hist_size);
4424	mdb_free(data.mfu_bufs, hist_size);
4425
4426	mdb_free(data.all_c_hist, hist_size);
4427	mdb_free(data.all_u_hist, hist_size);
4428	mdb_free(data.all_bufs, hist_size);
4429
4430	return (rc);
4431}
4432
4433typedef struct mdb_range_seg64 {
4434	uint64_t rs_start;
4435	uint64_t rs_end;
4436} mdb_range_seg64_t;
4437
4438typedef struct mdb_range_seg32 {
4439	uint32_t rs_start;
4440	uint32_t rs_end;
4441} mdb_range_seg32_t;
4442
4443/* ARGSUSED */
4444static int
4445range_tree_cb(uintptr_t addr, const void *unknown, void *arg)
4446{
4447	mdb_range_tree_t *rt = (mdb_range_tree_t *)arg;
4448	uint64_t start, end;
4449
4450	if (rt->rt_type == RANGE_SEG64) {
4451		mdb_range_seg64_t rs;
4452
4453		if (mdb_ctf_vread(&rs, ZFS_STRUCT "range_seg64",
4454		    "mdb_range_seg64_t", addr, 0) == -1)
4455			return (DCMD_ERR);
4456		start = rs.rs_start;
4457		end = rs.rs_end;
4458	} else {
4459		ASSERT3U(rt->rt_type, ==, RANGE_SEG32);
4460		mdb_range_seg32_t rs;
4461
4462		if (mdb_ctf_vread(&rs, ZFS_STRUCT "range_seg32",
4463		    "mdb_range_seg32_t", addr, 0) == -1)
4464			return (DCMD_ERR);
4465		start = ((uint64_t)rs.rs_start << rt->rt_shift) + rt->rt_start;
4466		end = ((uint64_t)rs.rs_end << rt->rt_shift) + rt->rt_start;
4467	}
4468
4469	mdb_printf("\t[%llx %llx) (length %llx)\n", start, end, end - start);
4470
4471	return (0);
4472}
4473
4474/* ARGSUSED */
4475static int
4476range_tree(uintptr_t addr, uint_t flags, int argc,
4477    const mdb_arg_t *argv)
4478{
4479	mdb_range_tree_t rt;
4480	uintptr_t btree_addr;
4481
4482	if (!(flags & DCMD_ADDRSPEC))
4483		return (DCMD_USAGE);
4484
4485	if (mdb_ctf_vread(&rt, ZFS_STRUCT "range_tree", "mdb_range_tree_t",
4486	    addr, 0) == -1)
4487		return (DCMD_ERR);
4488
4489	mdb_printf("%p: range tree of %llu entries, %llu bytes\n",
4490	    addr, rt.rt_root.bt_num_elems, rt.rt_space);
4491
4492	btree_addr = addr +
4493	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "range_tree", "rt_root");
4494
4495	if (mdb_pwalk("zfs_btree", range_tree_cb, &rt, btree_addr) != 0) {
4496		mdb_warn("can't walk range_tree segments");
4497		return (DCMD_ERR);
4498	}
4499	return (DCMD_OK);
4500}
4501
4502typedef struct mdb_spa_log_sm {
4503	uint64_t sls_sm_obj;
4504	uint64_t sls_txg;
4505	uint64_t sls_nblocks;
4506	uint64_t sls_mscount;
4507} mdb_spa_log_sm_t;
4508
4509/* ARGSUSED */
4510static int
4511logsm_stats_cb(uintptr_t addr, const void *unknown, void *arg)
4512{
4513	mdb_spa_log_sm_t sls;
4514	if (mdb_ctf_vread(&sls, ZFS_STRUCT "spa_log_sm", "mdb_spa_log_sm_t",
4515	    addr, 0) == -1)
4516		return (WALK_ERR);
4517
4518	mdb_printf("%7lld %7lld %7lld %7lld\n",
4519	    sls.sls_txg, sls.sls_nblocks, sls.sls_mscount, sls.sls_sm_obj);
4520
4521	return (WALK_NEXT);
4522}
4523typedef struct mdb_log_summary_entry {
4524	uint64_t lse_start;
4525	uint64_t lse_blkcount;
4526	uint64_t lse_mscount;
4527} mdb_log_summary_entry_t;
4528
4529/* ARGSUSED */
4530static int
4531logsm_summary_cb(uintptr_t addr, const void *unknown, void *arg)
4532{
4533	mdb_log_summary_entry_t lse;
4534	if (mdb_ctf_vread(&lse, ZFS_STRUCT "log_summary_entry",
4535	    "mdb_log_summary_entry_t", addr, 0) == -1)
4536		return (WALK_ERR);
4537
4538	mdb_printf("%7lld %7lld %7lld\n",
4539	    lse.lse_start, lse.lse_blkcount, lse.lse_mscount);
4540	return (WALK_NEXT);
4541}
4542
4543/* ARGSUSED */
4544static int
4545logsm_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4546{
4547	if (!(flags & DCMD_ADDRSPEC))
4548		return (DCMD_USAGE);
4549
4550	uintptr_t sls_avl_addr = addr +
4551	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "spa", "spa_sm_logs_by_txg");
4552	uintptr_t summary_addr = addr +
4553	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "spa", "spa_log_summary");
4554
4555	mdb_printf("Log Entries:\n");
4556	mdb_printf("%7s %7s %7s %7s\n", "txg", "blk", "ms", "obj");
4557	if (mdb_pwalk("avl", logsm_stats_cb, NULL, sls_avl_addr) != 0)
4558		return (DCMD_ERR);
4559
4560	mdb_printf("\nSummary Entries:\n");
4561	mdb_printf("%7s %7s %7s\n", "txg", "blk", "ms");
4562	if (mdb_pwalk("list", logsm_summary_cb, NULL, summary_addr) != 0)
4563		return (DCMD_ERR);
4564
4565	return (DCMD_OK);
4566}
4567
4568/*
4569 * MDB module linkage information:
4570 *
4571 * We declare a list of structures describing our dcmds, and a function
4572 * named _mdb_init to return a pointer to our module information.
4573 */
4574
4575static const mdb_dcmd_t dcmds[] = {
4576	{ "arc", "[-bkmg]", "print ARC variables", arc_print },
4577	{ "blkptr", ":", "print blkptr_t", blkptr },
4578	{ "dva", ":", "print dva_t", dva },
4579	{ "dbuf", ":", "print dmu_buf_impl_t", dbuf },
4580	{ "dbuf_stats", ":", "dbuf stats", dbuf_stats },
4581	{ "dbufs",
4582	    "\t[-O objset_t*] [-n objset_name | \"mos\"] "
4583	    "[-o object | \"mdn\"] \n"
4584	    "\t[-l level] [-b blkid | \"bonus\"]",
4585	    "find dmu_buf_impl_t's that match specified criteria", dbufs },
4586	{ "abuf_find", "dva_word[0] dva_word[1]",
4587	    "find arc_buf_hdr_t of a specified DVA",
4588	    abuf_find },
4589	{ "logsm_stats", ":", "print log space map statistics of a spa_t",
4590	    logsm_stats},
4591	{ "spa", "?[-cevmMh]\n"
4592	    "\t-c display spa config\n"
4593	    "\t-e display vdev statistics\n"
4594	    "\t-v display vdev information\n"
4595	    "\t-m display metaslab statistics\n"
4596	    "\t-M display metaslab group statistics\n"
4597	    "\t-h display histogram (requires -m or -M)\n",
4598	    "spa_t summary", spa_print },
4599	{ "spa_config", ":", "print spa_t configuration", spa_print_config },
4600	{ "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space },
4601	{ "spa_vdevs", ":[-emMh]\n"
4602	    "\t-e display vdev statistics\n"
4603	    "\t-m dispaly metaslab statistics\n"
4604	    "\t-M display metaslab group statistic\n"
4605	    "\t-h display histogram (requires -m or -M)\n",
4606	    "given a spa_t, print vdev summary", spa_vdevs },
4607	{ "sm_entries", "<buffer length in bytes>",
4608	    "print out space map entries from a buffer decoded",
4609	    sm_entries},
4610	{ "vdev", ":[-remMh]\n"
4611	    "\t-r display recursively\n"
4612	    "\t-e display statistics\n"
4613	    "\t-m display metaslab statistics (top level vdev only)\n"
4614	    "\t-M display metaslab group statistics (top level vdev only)\n"
4615	    "\t-h display histogram (requires -m or -M)\n",
4616	    "vdev_t summary", vdev_print },
4617	{ "zio", ":[-cpr]\n"
4618	    "\t-c display children\n"
4619	    "\t-p display parents\n"
4620	    "\t-r display recursively",
4621	    "zio_t summary", zio_print },
4622	{ "zio_state", "?", "print out all zio_t structures on system or "
4623	    "for a particular pool", zio_state },
4624	{ "zfs_blkstats", ":[-v]",
4625	    "given a spa_t, print block type stats from last scrub",
4626	    zfs_blkstats },
4627	{ "zfs_params", "", "print zfs tunable parameters", zfs_params },
4628	{ "zfs_refcount", ":[-r]\n"
4629	    "\t-r display recently removed references",
4630	    "print zfs_refcount_t holders", zfs_refcount },
4631	{ "zap_leaf", "", "print zap_leaf_phys_t", zap_leaf },
4632	{ "zfs_aces", ":[-v]", "print all ACEs from a zfs_acl_t",
4633	    zfs_acl_dump },
4634	{ "zfs_ace", ":[-v]", "print zfs_ace", zfs_ace_print },
4635	{ "zfs_ace0", ":[-v]", "print zfs_ace0", zfs_ace0_print },
4636	{ "sa_attr_table", ":", "print SA attribute table from sa_os_t",
4637	    sa_attr_table},
4638	{ "sa_attr", ": attr_id",
4639	    "print SA attribute address when given sa_handle_t", sa_attr_print},
4640	{ "zfs_dbgmsg", ":[-va]",
4641	    "print zfs debug log", dbgmsg},
4642	{ "rrwlock", ":",
4643	    "print rrwlock_t, including readers", rrwlock},
4644	{ "metaslab_weight", "weight",
4645	    "print metaslab weight", metaslab_weight},
4646	{ "metaslab_trace", ":",
4647	    "print metaslab allocation trace records", metaslab_trace},
4648	{ "arc_compression_stats", ":[-vabrf]\n"
4649	    "\t-v verbose, display a linearly scaled histogram\n"
4650	    "\t-a display ARC_anon state statistics individually\n"
4651	    "\t-r display ARC_mru state statistics individually\n"
4652	    "\t-f display ARC_mfu state statistics individually\n"
4653	    "\t-b display histogram of buffer counts\n",
4654	    "print a histogram of compressed arc buffer sizes",
4655	    arc_compression_stats},
4656	{ "range_tree", ":",
4657	    "print entries in range_tree_t", range_tree},
4658	{ NULL }
4659};
4660
4661static const mdb_walker_t walkers[] = {
4662	{ "txg_list", "given any txg_list_t *, walk all entries in all txgs",
4663	    txg_list_walk_init, txg_list_walk_step, NULL },
4664	{ "txg_list0", "given any txg_list_t *, walk all entries in txg 0",
4665	    txg_list0_walk_init, txg_list_walk_step, NULL },
4666	{ "txg_list1", "given any txg_list_t *, walk all entries in txg 1",
4667	    txg_list1_walk_init, txg_list_walk_step, NULL },
4668	{ "txg_list2", "given any txg_list_t *, walk all entries in txg 2",
4669	    txg_list2_walk_init, txg_list_walk_step, NULL },
4670	{ "txg_list3", "given any txg_list_t *, walk all entries in txg 3",
4671	    txg_list3_walk_init, txg_list_walk_step, NULL },
4672	{ "zio", "walk all zio structures, optionally for a particular spa_t",
4673	    zio_walk_init, zio_walk_step, NULL },
4674	{ "zio_root",
4675	    "walk all root zio_t structures, optionally for a particular spa_t",
4676	    zio_walk_init, zio_walk_root_step, NULL },
4677	{ "spa", "walk all spa_t entries in the namespace",
4678	    spa_walk_init, spa_walk_step, NULL },
4679	{ "metaslab", "given a spa_t *, walk all metaslab_t structures",
4680	    metaslab_walk_init, metaslab_walk_step, NULL },
4681	{ "multilist", "given a multilist_t *, walk all list_t structures",
4682	    multilist_walk_init, multilist_walk_step, NULL },
4683	{ "zfs_acl_node", "given a zfs_acl_t, walk all zfs_acl_nodes",
4684	    zfs_acl_node_walk_init, zfs_acl_node_walk_step, NULL },
4685	{ "zfs_acl_node_aces", "given a zfs_acl_node_t, walk all ACEs",
4686	    zfs_acl_node_aces_walk_init, zfs_aces_walk_step, NULL },
4687	{ "zfs_acl_node_aces0",
4688	    "given a zfs_acl_node_t, walk all ACEs as ace_t",
4689	    zfs_acl_node_aces0_walk_init, zfs_aces_walk_step, NULL },
4690	{ "zfs_btree", "given a zfs_btree_t *, walk all entries",
4691	    btree_walk_init, btree_walk_step, btree_walk_fini },
4692	{ NULL }
4693};
4694
4695static const mdb_modinfo_t modinfo = {
4696	MDB_API_VERSION, dcmds, walkers
4697};
4698
4699const mdb_modinfo_t *
4700_mdb_init(void)
4701{
4702	return (&modinfo);
4703}
4704