1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
25 * Copyright 2019 Joyent, Inc.
26 */
27
28/* Portions Copyright 2010 Robert Milkowski */
29
30/*
31 * ZFS_MDB lets dmu.h know that we don't have dmu_ot, and we will define our
32 * own macros to access the target's dmu_ot.  Therefore it must be defined
33 * before including any ZFS headers.  Note that we don't define
34 * DMU_OT_IS_ENCRYPTED_IMPL() or DMU_OT_BYTESWAP_IMPL(), therefore using them
35 * will result in a compilation error.  If they are needed in the future, we
36 * can implement them similarly to mdb_dmu_ot_is_encrypted_impl().
37 */
38#define	ZFS_MDB
39#define	DMU_OT_IS_ENCRYPTED_IMPL(ot) mdb_dmu_ot_is_encrypted_impl(ot)
40
41#include <mdb/mdb_ctf.h>
42#include <sys/zfs_context.h>
43#include <sys/mdb_modapi.h>
44#include <sys/dbuf.h>
45#include <sys/dmu_objset.h>
46#include <sys/dsl_dir.h>
47#include <sys/dsl_pool.h>
48#include <sys/metaslab_impl.h>
49#include <sys/space_map.h>
50#include <sys/list.h>
51#include <sys/vdev_impl.h>
52#include <sys/zap_leaf.h>
53#include <sys/zap_impl.h>
54#include <ctype.h>
55#include <sys/zfs_acl.h>
56#include <sys/sa_impl.h>
57#include <sys/multilist.h>
58
59#ifdef _KERNEL
60#define	ZFS_OBJ_NAME	"zfs"
61extern int64_t mdb_gethrtime(void);
62#else
63#define	ZFS_OBJ_NAME	"libzpool.so.1"
64#endif
65
66#define	ZFS_STRUCT	"struct " ZFS_OBJ_NAME "`"
67
68#ifndef _KERNEL
69int aok;
70#endif
71
72enum spa_flags {
73	SPA_FLAG_CONFIG			= 1 << 0,
74	SPA_FLAG_VDEVS			= 1 << 1,
75	SPA_FLAG_ERRORS			= 1 << 2,
76	SPA_FLAG_METASLAB_GROUPS	= 1 << 3,
77	SPA_FLAG_METASLABS		= 1 << 4,
78	SPA_FLAG_HISTOGRAMS		= 1 << 5
79};
80
81/*
82 * If any of these flags are set, call spa_vdevs in spa_print
83 */
84#define	SPA_FLAG_ALL_VDEV	\
85	(SPA_FLAG_VDEVS | SPA_FLAG_ERRORS | SPA_FLAG_METASLAB_GROUPS | \
86	SPA_FLAG_METASLABS)
87
88static int
89getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp,
90    const char *member, int len, void *buf)
91{
92	mdb_ctf_id_t id;
93	ulong_t off;
94	char name[64];
95
96	if (idp == NULL) {
97		if (mdb_ctf_lookup_by_name(type, &id) == -1) {
98			mdb_warn("couldn't find type %s", type);
99			return (DCMD_ERR);
100		}
101		idp = &id;
102	} else {
103		type = name;
104		mdb_ctf_type_name(*idp, name, sizeof (name));
105	}
106
107	if (mdb_ctf_offsetof(*idp, member, &off) == -1) {
108		mdb_warn("couldn't find member %s of type %s\n", member, type);
109		return (DCMD_ERR);
110	}
111	if (off % 8 != 0) {
112		mdb_warn("member %s of type %s is unsupported bitfield",
113		    member, type);
114		return (DCMD_ERR);
115	}
116	off /= 8;
117
118	if (mdb_vread(buf, len, addr + off) == -1) {
119		mdb_warn("failed to read %s from %s at %p",
120		    member, type, addr + off);
121		return (DCMD_ERR);
122	}
123	/* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */
124
125	return (0);
126}
127
128#define	GETMEMB(addr, structname, member, dest) \
129	getmember(addr, ZFS_STRUCT structname, NULL, #member, \
130	sizeof (dest), &(dest))
131
132#define	GETMEMBID(addr, ctfid, member, dest) \
133	getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest))
134
135static boolean_t
136strisprint(const char *cp)
137{
138	for (; *cp; cp++) {
139		if (!isprint(*cp))
140			return (B_FALSE);
141	}
142	return (B_TRUE);
143}
144
145/*
146 * <addr>::sm_entries <buffer length in bytes>
147 *
148 * Treat the buffer specified by the given address as a buffer that contains
149 * space map entries. Iterate over the specified number of entries and print
150 * them in both encoded and decoded form.
151 */
152/* ARGSUSED */
153static int
154sm_entries(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
155{
156	uint64_t bufsz = 0;
157	boolean_t preview = B_FALSE;
158
159	if (!(flags & DCMD_ADDRSPEC))
160		return (DCMD_USAGE);
161
162	if (argc < 1) {
163		preview = B_TRUE;
164		bufsz = 2;
165	} else if (argc != 1) {
166		return (DCMD_USAGE);
167	} else {
168		switch (argv[0].a_type) {
169		case MDB_TYPE_STRING:
170			bufsz = mdb_strtoull(argv[0].a_un.a_str);
171			break;
172		case MDB_TYPE_IMMEDIATE:
173			bufsz = argv[0].a_un.a_val;
174			break;
175		default:
176			return (DCMD_USAGE);
177		}
178	}
179
180	char *actions[] = { "ALLOC", "FREE", "INVALID" };
181	for (uintptr_t bufend = addr + bufsz; addr < bufend;
182	    addr += sizeof (uint64_t)) {
183		uint64_t nwords;
184		uint64_t start_addr = addr;
185
186		uint64_t word = 0;
187		if (mdb_vread(&word, sizeof (word), addr) == -1) {
188			mdb_warn("failed to read space map entry %p", addr);
189			return (DCMD_ERR);
190		}
191
192		if (SM_PREFIX_DECODE(word) == SM_DEBUG_PREFIX) {
193			(void) mdb_printf("\t    [%6llu] %s: txg %llu, "
194			    "pass %llu\n",
195			    (u_longlong_t)(addr),
196			    actions[SM_DEBUG_ACTION_DECODE(word)],
197			    (u_longlong_t)SM_DEBUG_TXG_DECODE(word),
198			    (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word));
199			continue;
200		}
201
202		char entry_type;
203		uint64_t raw_offset, raw_run, vdev_id = SM_NO_VDEVID;
204
205		if (SM_PREFIX_DECODE(word) != SM2_PREFIX) {
206			entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ?
207			    'A' : 'F';
208			raw_offset = SM_OFFSET_DECODE(word);
209			raw_run = SM_RUN_DECODE(word);
210			nwords = 1;
211		} else {
212			ASSERT3U(SM_PREFIX_DECODE(word), ==, SM2_PREFIX);
213
214			raw_run = SM2_RUN_DECODE(word);
215			vdev_id = SM2_VDEV_DECODE(word);
216
217			/* it is a two-word entry so we read another word */
218			addr += sizeof (uint64_t);
219			if (addr >= bufend) {
220				mdb_warn("buffer ends in the middle of a two "
221				    "word entry\n", addr);
222				return (DCMD_ERR);
223			}
224
225			if (mdb_vread(&word, sizeof (word), addr) == -1) {
226				mdb_warn("failed to read space map entry %p",
227				    addr);
228				return (DCMD_ERR);
229			}
230
231			entry_type = (SM2_TYPE_DECODE(word) == SM_ALLOC) ?
232			    'A' : 'F';
233			raw_offset = SM2_OFFSET_DECODE(word);
234			nwords = 2;
235		}
236
237		(void) mdb_printf("\t    [%6llx]    %c  range:"
238		    " %010llx-%010llx  size: %06llx vdev: %06llu words: %llu\n",
239		    (u_longlong_t)start_addr,
240		    entry_type, (u_longlong_t)raw_offset,
241		    (u_longlong_t)(raw_offset + raw_run),
242		    (u_longlong_t)raw_run,
243		    (u_longlong_t)vdev_id, (u_longlong_t)nwords);
244
245		if (preview)
246			break;
247	}
248	return (DCMD_OK);
249}
250
251static int
252mdb_dsl_dir_name(uintptr_t addr, char *buf)
253{
254	static int gotid;
255	static mdb_ctf_id_t dd_id;
256	uintptr_t dd_parent;
257	char dd_myname[ZFS_MAX_DATASET_NAME_LEN];
258
259	if (!gotid) {
260		if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dir",
261		    &dd_id) == -1) {
262			mdb_warn("couldn't find struct dsl_dir");
263			return (DCMD_ERR);
264		}
265		gotid = TRUE;
266	}
267	if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) ||
268	    GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) {
269		return (DCMD_ERR);
270	}
271
272	if (dd_parent) {
273		if (mdb_dsl_dir_name(dd_parent, buf))
274			return (DCMD_ERR);
275		strcat(buf, "/");
276	}
277
278	if (dd_myname[0])
279		strcat(buf, dd_myname);
280	else
281		strcat(buf, "???");
282
283	return (0);
284}
285
286static int
287objset_name(uintptr_t addr, char *buf)
288{
289	static int gotid;
290	static mdb_ctf_id_t os_id, ds_id;
291	uintptr_t os_dsl_dataset;
292	char ds_snapname[ZFS_MAX_DATASET_NAME_LEN];
293	uintptr_t ds_dir;
294
295	buf[0] = '\0';
296
297	if (!gotid) {
298		if (mdb_ctf_lookup_by_name(ZFS_STRUCT "objset",
299		    &os_id) == -1) {
300			mdb_warn("couldn't find struct objset");
301			return (DCMD_ERR);
302		}
303		if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dataset",
304		    &ds_id) == -1) {
305			mdb_warn("couldn't find struct dsl_dataset");
306			return (DCMD_ERR);
307		}
308
309		gotid = TRUE;
310	}
311
312	if (GETMEMBID(addr, &os_id, os_dsl_dataset, os_dsl_dataset))
313		return (DCMD_ERR);
314
315	if (os_dsl_dataset == 0) {
316		strcat(buf, "mos");
317		return (0);
318	}
319
320	if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) ||
321	    GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) {
322		return (DCMD_ERR);
323	}
324
325	if (ds_dir && mdb_dsl_dir_name(ds_dir, buf))
326		return (DCMD_ERR);
327
328	if (ds_snapname[0]) {
329		strcat(buf, "@");
330		strcat(buf, ds_snapname);
331	}
332	return (0);
333}
334
335static int
336enum_lookup(char *type, int val, const char *prefix, size_t size, char *out)
337{
338	const char *cp;
339	size_t len = strlen(prefix);
340	mdb_ctf_id_t enum_type;
341
342	if (mdb_ctf_lookup_by_name(type, &enum_type) != 0) {
343		mdb_warn("Could not find enum for %s", type);
344		return (-1);
345	}
346
347	if ((cp = mdb_ctf_enum_name(enum_type, val)) != NULL) {
348		if (strncmp(cp, prefix, len) == 0)
349			cp += len;
350		(void) strncpy(out, cp, size);
351	} else {
352		mdb_snprintf(out, size, "? (%d)", val);
353	}
354	return (0);
355}
356
357/* ARGSUSED */
358static int
359zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
360{
361	/*
362	 * This table can be approximately generated by running:
363	 * egrep "^[a-z0-9_]+ [a-z0-9_]+( =.*)?;" *.c | cut -d ' ' -f 2
364	 */
365	static const char *params[] = {
366		"arc_lotsfree_percent",
367		"arc_pages_pp_reserve",
368		"arc_reduce_dnlc_percent",
369		"arc_swapfs_reserve",
370		"arc_zio_arena_free_shift",
371		"dbuf_cache_hiwater_pct",
372		"dbuf_cache_lowater_pct",
373		"dbuf_cache_max_bytes",
374		"dbuf_cache_max_shift",
375		"ddt_zap_indirect_blockshift",
376		"ddt_zap_leaf_blockshift",
377		"ditto_same_vdev_distance_shift",
378		"dmu_find_threads",
379		"dmu_rescan_dnode_threshold",
380		"dsl_scan_delay_completion",
381		"fzap_default_block_shift",
382		"l2arc_feed_again",
383		"l2arc_feed_min_ms",
384		"l2arc_feed_secs",
385		"l2arc_headroom",
386		"l2arc_headroom_boost",
387		"l2arc_noprefetch",
388		"l2arc_norw",
389		"l2arc_write_boost",
390		"l2arc_write_max",
391		"metaslab_aliquot",
392		"metaslab_bias_enabled",
393		"metaslab_debug_load",
394		"metaslab_debug_unload",
395		"metaslab_df_alloc_threshold",
396		"metaslab_df_free_pct",
397		"metaslab_fragmentation_factor_enabled",
398		"metaslab_force_ganging",
399		"metaslab_lba_weighting_enabled",
400		"metaslab_load_pct",
401		"metaslab_min_alloc_size",
402		"metaslab_ndf_clump_shift",
403		"metaslab_preload_enabled",
404		"metaslab_preload_limit",
405		"metaslab_trace_enabled",
406		"metaslab_trace_max_entries",
407		"metaslab_unload_delay",
408		"metaslabs_per_vdev",
409		"reference_history",
410		"reference_tracking_enable",
411		"send_holes_without_birth_time",
412		"spa_asize_inflation",
413		"spa_load_verify_data",
414		"spa_load_verify_maxinflight",
415		"spa_load_verify_metadata",
416		"spa_max_replication_override",
417		"spa_min_slop",
418		"spa_mode_global",
419		"spa_slop_shift",
420		"space_map_blksz",
421		"vdev_mirror_shift",
422		"zfetch_max_distance",
423		"zfs_abd_chunk_size",
424		"zfs_abd_scatter_enabled",
425		"zfs_arc_average_blocksize",
426		"zfs_arc_evict_batch_limit",
427		"zfs_arc_grow_retry",
428		"zfs_arc_max",
429		"zfs_arc_meta_limit",
430		"zfs_arc_meta_min",
431		"zfs_arc_min",
432		"zfs_arc_p_min_shift",
433		"zfs_arc_shrink_shift",
434		"zfs_async_block_max_blocks",
435		"zfs_ccw_retry_interval",
436		"zfs_commit_timeout_pct",
437		"zfs_compressed_arc_enabled",
438		"zfs_condense_indirect_commit_entry_delay_ticks",
439		"zfs_condense_indirect_vdevs_enable",
440		"zfs_condense_max_obsolete_bytes",
441		"zfs_condense_min_mapping_bytes",
442		"zfs_condense_pct",
443		"zfs_dbgmsg_maxsize",
444		"zfs_deadman_checktime_ms",
445		"zfs_deadman_enabled",
446		"zfs_deadman_synctime_ms",
447		"zfs_dedup_prefetch",
448		"zfs_default_bs",
449		"zfs_default_ibs",
450		"zfs_delay_max_ns",
451		"zfs_delay_min_dirty_percent",
452		"zfs_delay_resolution_ns",
453		"zfs_delay_scale",
454		"zfs_dirty_data_max",
455		"zfs_dirty_data_max_max",
456		"zfs_dirty_data_max_percent",
457		"zfs_dirty_data_sync",
458		"zfs_flags",
459		"zfs_free_bpobj_enabled",
460		"zfs_free_leak_on_eio",
461		"zfs_free_min_time_ms",
462		"zfs_fsync_sync_cnt",
463		"zfs_immediate_write_sz",
464		"zfs_indirect_condense_obsolete_pct",
465		"zfs_lua_check_instrlimit_interval",
466		"zfs_lua_max_instrlimit",
467		"zfs_lua_max_memlimit",
468		"zfs_max_recordsize",
469		"zfs_mdcomp_disable",
470		"zfs_metaslab_condense_block_threshold",
471		"zfs_metaslab_fragmentation_threshold",
472		"zfs_metaslab_segment_weight_enabled",
473		"zfs_metaslab_switch_threshold",
474		"zfs_mg_fragmentation_threshold",
475		"zfs_mg_noalloc_threshold",
476		"zfs_multilist_num_sublists",
477		"zfs_no_scrub_io",
478		"zfs_no_scrub_prefetch",
479		"zfs_nocacheflush",
480		"zfs_nopwrite_enabled",
481		"zfs_object_remap_one_indirect_delay_ticks",
482		"zfs_obsolete_min_time_ms",
483		"zfs_pd_bytes_max",
484		"zfs_per_txg_dirty_frees_percent",
485		"zfs_prefetch_disable",
486		"zfs_read_chunk_size",
487		"zfs_recover",
488		"zfs_recv_queue_length",
489		"zfs_redundant_metadata_most_ditto_level",
490		"zfs_remap_blkptr_enable",
491		"zfs_remove_max_copy_bytes",
492		"zfs_remove_max_segment",
493		"zfs_resilver_delay",
494		"zfs_resilver_min_time_ms",
495		"zfs_scan_idle",
496		"zfs_scan_min_time_ms",
497		"zfs_scrub_delay",
498		"zfs_scrub_limit",
499		"zfs_send_corrupt_data",
500		"zfs_send_queue_length",
501		"zfs_send_set_freerecords_bit",
502		"zfs_sync_pass_deferred_free",
503		"zfs_sync_pass_dont_compress",
504		"zfs_sync_pass_rewrite",
505		"zfs_sync_taskq_batch_pct",
506		"zfs_top_maxinflight",
507		"zfs_txg_timeout",
508		"zfs_vdev_aggregation_limit",
509		"zfs_vdev_async_read_max_active",
510		"zfs_vdev_async_read_min_active",
511		"zfs_vdev_async_write_active_max_dirty_percent",
512		"zfs_vdev_async_write_active_min_dirty_percent",
513		"zfs_vdev_async_write_max_active",
514		"zfs_vdev_async_write_min_active",
515		"zfs_vdev_cache_bshift",
516		"zfs_vdev_cache_max",
517		"zfs_vdev_cache_size",
518		"zfs_vdev_max_active",
519		"zfs_vdev_queue_depth_pct",
520		"zfs_vdev_read_gap_limit",
521		"zfs_vdev_removal_max_active",
522		"zfs_vdev_removal_min_active",
523		"zfs_vdev_scrub_max_active",
524		"zfs_vdev_scrub_min_active",
525		"zfs_vdev_sync_read_max_active",
526		"zfs_vdev_sync_read_min_active",
527		"zfs_vdev_sync_write_max_active",
528		"zfs_vdev_sync_write_min_active",
529		"zfs_vdev_write_gap_limit",
530		"zfs_write_implies_delete_child",
531		"zfs_zil_clean_taskq_maxalloc",
532		"zfs_zil_clean_taskq_minalloc",
533		"zfs_zil_clean_taskq_nthr_pct",
534		"zil_replay_disable",
535		"zil_slog_bulk",
536		"zio_buf_debug_limit",
537		"zio_dva_throttle_enabled",
538		"zio_injection_enabled",
539		"zvol_immediate_write_sz",
540		"zvol_maxphys",
541		"zvol_unmap_enabled",
542		"zvol_unmap_sync_enabled",
543		"zfs_max_dataset_nesting",
544	};
545
546	for (int i = 0; i < sizeof (params) / sizeof (params[0]); i++) {
547		int sz;
548		uint64_t val64;
549		uint32_t *val32p = (uint32_t *)&val64;
550
551		sz = mdb_readvar(&val64, params[i]);
552		if (sz == 4) {
553			mdb_printf("%s = 0x%x\n", params[i], *val32p);
554		} else if (sz == 8) {
555			mdb_printf("%s = 0x%llx\n", params[i], val64);
556		} else {
557			mdb_warn("variable %s not found", params[i]);
558		}
559	}
560
561	return (DCMD_OK);
562}
563
564/* ARGSUSED */
565static int
566dva(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
567{
568	dva_t dva;
569	if (mdb_vread(&dva, sizeof (dva_t), addr) == -1) {
570		mdb_warn("failed to read dva_t");
571		return (DCMD_ERR);
572	}
573	mdb_printf("<%llu:%llx:%llx>\n",
574	    (u_longlong_t)DVA_GET_VDEV(&dva),
575	    (u_longlong_t)DVA_GET_OFFSET(&dva),
576	    (u_longlong_t)DVA_GET_ASIZE(&dva));
577
578	return (DCMD_OK);
579}
580
581typedef struct mdb_dmu_object_type_info {
582	boolean_t ot_encrypt;
583} mdb_dmu_object_type_info_t;
584
585static boolean_t
586mdb_dmu_ot_is_encrypted_impl(dmu_object_type_t ot)
587{
588	mdb_dmu_object_type_info_t mdoti;
589	GElf_Sym sym;
590	size_t sz = mdb_ctf_sizeof_by_name("dmu_object_type_info_t");
591
592	if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "dmu_ot", &sym)) {
593		mdb_warn("failed to find " ZFS_OBJ_NAME "`dmu_ot");
594		return (B_FALSE);
595	}
596
597	if (mdb_ctf_vread(&mdoti, "dmu_object_type_info_t",
598	    "mdb_dmu_object_type_info_t", sym.st_value + sz * ot, 0) != 0) {
599		return (B_FALSE);
600	}
601
602	return (mdoti.ot_encrypt);
603}
604
605/* ARGSUSED */
606static int
607blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
608{
609	char type[80], checksum[80], compress[80];
610	blkptr_t blk, *bp = &blk;
611	char buf[BP_SPRINTF_LEN];
612
613	if (mdb_vread(&blk, sizeof (blkptr_t), addr) == -1) {
614		mdb_warn("failed to read blkptr_t");
615		return (DCMD_ERR);
616	}
617
618	if (enum_lookup("enum dmu_object_type", BP_GET_TYPE(bp), "DMU_OT_",
619	    sizeof (type), type) == -1 ||
620	    enum_lookup("enum zio_checksum", BP_GET_CHECKSUM(bp),
621	    "ZIO_CHECKSUM_", sizeof (checksum), checksum) == -1 ||
622	    enum_lookup("enum zio_compress", BP_GET_COMPRESS(bp),
623	    "ZIO_COMPRESS_", sizeof (compress), compress) == -1) {
624		mdb_warn("Could not find blkptr enumerated types");
625		return (DCMD_ERR);
626	}
627
628	SNPRINTF_BLKPTR(mdb_snprintf, '\n', buf, sizeof (buf), bp, type,
629	    checksum, compress);
630
631	mdb_printf("%s\n", buf);
632
633	return (DCMD_OK);
634}
635
636typedef struct mdb_dmu_buf_impl {
637	struct {
638		uint64_t db_object;
639		uintptr_t db_data;
640	} db;
641	uintptr_t db_objset;
642	uint64_t db_level;
643	uint64_t db_blkid;
644	struct {
645		uint64_t rc_count;
646	} db_holds;
647} mdb_dmu_buf_impl_t;
648
649/* ARGSUSED */
650static int
651dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
652{
653	mdb_dmu_buf_impl_t db;
654	char objectname[32];
655	char blkidname[32];
656	char path[ZFS_MAX_DATASET_NAME_LEN];
657	int ptr_width = (int)(sizeof (void *)) * 2;
658
659	if (DCMD_HDRSPEC(flags))
660		mdb_printf("%*s %8s %3s %9s %5s %s\n",
661		    ptr_width, "addr", "object", "lvl", "blkid", "holds", "os");
662
663	if (mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t",
664	    addr, 0) == -1)
665		return (DCMD_ERR);
666
667	if (db.db.db_object == DMU_META_DNODE_OBJECT)
668		(void) strcpy(objectname, "mdn");
669	else
670		(void) mdb_snprintf(objectname, sizeof (objectname), "%llx",
671		    (u_longlong_t)db.db.db_object);
672
673	if (db.db_blkid == DMU_BONUS_BLKID)
674		(void) strcpy(blkidname, "bonus");
675	else
676		(void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx",
677		    (u_longlong_t)db.db_blkid);
678
679	if (objset_name(db.db_objset, path)) {
680		return (DCMD_ERR);
681	}
682
683	mdb_printf("%*p %8s %3u %9s %5llu %s\n", ptr_width, addr,
684	    objectname, (int)db.db_level, blkidname,
685	    db.db_holds.rc_count, path);
686
687	return (DCMD_OK);
688}
689
690/* ARGSUSED */
691static int
692dbuf_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
693{
694#define	HISTOSZ 32
695	uintptr_t dbp;
696	dmu_buf_impl_t db;
697	dbuf_hash_table_t ht;
698	uint64_t bucket, ndbufs;
699	uint64_t histo[HISTOSZ];
700	uint64_t histo2[HISTOSZ];
701	int i, maxidx;
702
703	if (mdb_readvar(&ht, "dbuf_hash_table") == -1) {
704		mdb_warn("failed to read 'dbuf_hash_table'");
705		return (DCMD_ERR);
706	}
707
708	for (i = 0; i < HISTOSZ; i++) {
709		histo[i] = 0;
710		histo2[i] = 0;
711	}
712
713	ndbufs = 0;
714	for (bucket = 0; bucket < ht.hash_table_mask+1; bucket++) {
715		int len;
716
717		if (mdb_vread(&dbp, sizeof (void *),
718		    (uintptr_t)(ht.hash_table+bucket)) == -1) {
719			mdb_warn("failed to read hash bucket %u at %p",
720			    bucket, ht.hash_table+bucket);
721			return (DCMD_ERR);
722		}
723
724		len = 0;
725		while (dbp != 0) {
726			if (mdb_vread(&db, sizeof (dmu_buf_impl_t),
727			    dbp) == -1) {
728				mdb_warn("failed to read dbuf at %p", dbp);
729				return (DCMD_ERR);
730			}
731			dbp = (uintptr_t)db.db_hash_next;
732			for (i = MIN(len, HISTOSZ - 1); i >= 0; i--)
733				histo2[i]++;
734			len++;
735			ndbufs++;
736		}
737
738		if (len >= HISTOSZ)
739			len = HISTOSZ-1;
740		histo[len]++;
741	}
742
743	mdb_printf("hash table has %llu buckets, %llu dbufs "
744	    "(avg %llu buckets/dbuf)\n",
745	    ht.hash_table_mask+1, ndbufs,
746	    (ht.hash_table_mask+1)/ndbufs);
747
748	mdb_printf("\n");
749	maxidx = 0;
750	for (i = 0; i < HISTOSZ; i++)
751		if (histo[i] > 0)
752			maxidx = i;
753	mdb_printf("hash chain length	number of buckets\n");
754	for (i = 0; i <= maxidx; i++)
755		mdb_printf("%u			%llu\n", i, histo[i]);
756
757	mdb_printf("\n");
758	maxidx = 0;
759	for (i = 0; i < HISTOSZ; i++)
760		if (histo2[i] > 0)
761			maxidx = i;
762	mdb_printf("hash chain depth	number of dbufs\n");
763	for (i = 0; i <= maxidx; i++)
764		mdb_printf("%u or more		%llu	%llu%%\n",
765		    i, histo2[i], histo2[i]*100/ndbufs);
766
767
768	return (DCMD_OK);
769}
770
771#define	CHAIN_END 0xffff
772/*
773 * ::zap_leaf [-v]
774 *
775 * Print a zap_leaf_phys_t, assumed to be 16k
776 */
777/* ARGSUSED */
778static int
779zap_leaf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
780{
781	char buf[16*1024];
782	int verbose = B_FALSE;
783	int four = B_FALSE;
784	dmu_buf_t l_dbuf;
785	zap_leaf_t l;
786	zap_leaf_phys_t *zlp = (void *)buf;
787	int i;
788
789	if (mdb_getopts(argc, argv,
790	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
791	    '4', MDB_OPT_SETBITS, TRUE, &four,
792	    NULL) != argc)
793		return (DCMD_USAGE);
794
795	l_dbuf.db_data = zlp;
796	l.l_dbuf = &l_dbuf;
797	l.l_bs = 14; /* assume 16k blocks */
798	if (four)
799		l.l_bs = 12;
800
801	if (!(flags & DCMD_ADDRSPEC)) {
802		return (DCMD_USAGE);
803	}
804
805	if (mdb_vread(buf, sizeof (buf), addr) == -1) {
806		mdb_warn("failed to read zap_leaf_phys_t at %p", addr);
807		return (DCMD_ERR);
808	}
809
810	if (zlp->l_hdr.lh_block_type != ZBT_LEAF ||
811	    zlp->l_hdr.lh_magic != ZAP_LEAF_MAGIC) {
812		mdb_warn("This does not appear to be a zap_leaf_phys_t");
813		return (DCMD_ERR);
814	}
815
816	mdb_printf("zap_leaf_phys_t at %p:\n", addr);
817	mdb_printf("    lh_prefix_len = %u\n", zlp->l_hdr.lh_prefix_len);
818	mdb_printf("    lh_prefix = %llx\n", zlp->l_hdr.lh_prefix);
819	mdb_printf("    lh_nentries = %u\n", zlp->l_hdr.lh_nentries);
820	mdb_printf("    lh_nfree = %u\n", zlp->l_hdr.lh_nfree,
821	    zlp->l_hdr.lh_nfree * 100 / (ZAP_LEAF_NUMCHUNKS(&l)));
822	mdb_printf("    lh_freelist = %u\n", zlp->l_hdr.lh_freelist);
823	mdb_printf("    lh_flags = %x (%s)\n", zlp->l_hdr.lh_flags,
824	    zlp->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED ?
825	    "ENTRIES_CDSORTED" : "");
826
827	if (verbose) {
828		mdb_printf(" hash table:\n");
829		for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) {
830			if (zlp->l_hash[i] != CHAIN_END)
831				mdb_printf("    %u: %u\n", i, zlp->l_hash[i]);
832		}
833	}
834
835	mdb_printf(" chunks:\n");
836	for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
837		/* LINTED: alignment */
838		zap_leaf_chunk_t *zlc = &ZAP_LEAF_CHUNK(&l, i);
839		switch (zlc->l_entry.le_type) {
840		case ZAP_CHUNK_FREE:
841			if (verbose) {
842				mdb_printf("    %u: free; lf_next = %u\n",
843				    i, zlc->l_free.lf_next);
844			}
845			break;
846		case ZAP_CHUNK_ENTRY:
847			mdb_printf("    %u: entry\n", i);
848			if (verbose) {
849				mdb_printf("        le_next = %u\n",
850				    zlc->l_entry.le_next);
851			}
852			mdb_printf("        le_name_chunk = %u\n",
853			    zlc->l_entry.le_name_chunk);
854			mdb_printf("        le_name_numints = %u\n",
855			    zlc->l_entry.le_name_numints);
856			mdb_printf("        le_value_chunk = %u\n",
857			    zlc->l_entry.le_value_chunk);
858			mdb_printf("        le_value_intlen = %u\n",
859			    zlc->l_entry.le_value_intlen);
860			mdb_printf("        le_value_numints = %u\n",
861			    zlc->l_entry.le_value_numints);
862			mdb_printf("        le_cd = %u\n",
863			    zlc->l_entry.le_cd);
864			mdb_printf("        le_hash = %llx\n",
865			    zlc->l_entry.le_hash);
866			break;
867		case ZAP_CHUNK_ARRAY:
868			mdb_printf("    %u: array", i);
869			if (strisprint((char *)zlc->l_array.la_array))
870				mdb_printf(" \"%s\"", zlc->l_array.la_array);
871			mdb_printf("\n");
872			if (verbose) {
873				int j;
874				mdb_printf("        ");
875				for (j = 0; j < ZAP_LEAF_ARRAY_BYTES; j++) {
876					mdb_printf("%02x ",
877					    zlc->l_array.la_array[j]);
878				}
879				mdb_printf("\n");
880			}
881			if (zlc->l_array.la_next != CHAIN_END) {
882				mdb_printf("        lf_next = %u\n",
883				    zlc->l_array.la_next);
884			}
885			break;
886		default:
887			mdb_printf("    %u: undefined type %u\n",
888			    zlc->l_entry.le_type);
889		}
890	}
891
892	return (DCMD_OK);
893}
894
895typedef struct dbufs_data {
896	mdb_ctf_id_t id;
897	uint64_t objset;
898	uint64_t object;
899	uint64_t level;
900	uint64_t blkid;
901	char *osname;
902} dbufs_data_t;
903
904#define	DBUFS_UNSET	(0xbaddcafedeadbeefULL)
905
906/* ARGSUSED */
907static int
908dbufs_cb(uintptr_t addr, const void *unknown, void *arg)
909{
910	dbufs_data_t *data = arg;
911	uintptr_t objset;
912	dmu_buf_t db;
913	uint8_t level;
914	uint64_t blkid;
915	char osname[ZFS_MAX_DATASET_NAME_LEN];
916
917	if (GETMEMBID(addr, &data->id, db_objset, objset) ||
918	    GETMEMBID(addr, &data->id, db, db) ||
919	    GETMEMBID(addr, &data->id, db_level, level) ||
920	    GETMEMBID(addr, &data->id, db_blkid, blkid)) {
921		return (WALK_ERR);
922	}
923
924	if ((data->objset == DBUFS_UNSET || data->objset == objset) &&
925	    (data->osname == NULL || (objset_name(objset, osname) == 0 &&
926	    strcmp(data->osname, osname) == 0)) &&
927	    (data->object == DBUFS_UNSET || data->object == db.db_object) &&
928	    (data->level == DBUFS_UNSET || data->level == level) &&
929	    (data->blkid == DBUFS_UNSET || data->blkid == blkid)) {
930		mdb_printf("%#lr\n", addr);
931	}
932	return (WALK_NEXT);
933}
934
935/* ARGSUSED */
936static int
937dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
938{
939	dbufs_data_t data;
940	char *object = NULL;
941	char *blkid = NULL;
942
943	data.objset = data.object = data.level = data.blkid = DBUFS_UNSET;
944	data.osname = NULL;
945
946	if (mdb_getopts(argc, argv,
947	    'O', MDB_OPT_UINT64, &data.objset,
948	    'n', MDB_OPT_STR, &data.osname,
949	    'o', MDB_OPT_STR, &object,
950	    'l', MDB_OPT_UINT64, &data.level,
951	    'b', MDB_OPT_STR, &blkid,
952	    NULL) != argc) {
953		return (DCMD_USAGE);
954	}
955
956	if (object) {
957		if (strcmp(object, "mdn") == 0) {
958			data.object = DMU_META_DNODE_OBJECT;
959		} else {
960			data.object = mdb_strtoull(object);
961		}
962	}
963
964	if (blkid) {
965		if (strcmp(blkid, "bonus") == 0) {
966			data.blkid = DMU_BONUS_BLKID;
967		} else {
968			data.blkid = mdb_strtoull(blkid);
969		}
970	}
971
972	if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dmu_buf_impl", &data.id) == -1) {
973		mdb_warn("couldn't find struct dmu_buf_impl_t");
974		return (DCMD_ERR);
975	}
976
977	if (mdb_walk("dmu_buf_impl_t", dbufs_cb, &data) != 0) {
978		mdb_warn("can't walk dbufs");
979		return (DCMD_ERR);
980	}
981
982	return (DCMD_OK);
983}
984
985typedef struct abuf_find_data {
986	dva_t dva;
987	mdb_ctf_id_t id;
988} abuf_find_data_t;
989
990/* ARGSUSED */
991static int
992abuf_find_cb(uintptr_t addr, const void *unknown, void *arg)
993{
994	abuf_find_data_t *data = arg;
995	dva_t dva;
996
997	if (GETMEMBID(addr, &data->id, b_dva, dva)) {
998		return (WALK_ERR);
999	}
1000
1001	if (dva.dva_word[0] == data->dva.dva_word[0] &&
1002	    dva.dva_word[1] == data->dva.dva_word[1]) {
1003		mdb_printf("%#lr\n", addr);
1004	}
1005	return (WALK_NEXT);
1006}
1007
1008/* ARGSUSED */
1009static int
1010abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1011{
1012	abuf_find_data_t data;
1013	GElf_Sym sym;
1014	int i;
1015	const char *syms[] = {
1016		"ARC_mru",
1017		"ARC_mru_ghost",
1018		"ARC_mfu",
1019		"ARC_mfu_ghost",
1020	};
1021
1022	if (argc != 2)
1023		return (DCMD_USAGE);
1024
1025	for (i = 0; i < 2; i ++) {
1026		switch (argv[i].a_type) {
1027		case MDB_TYPE_STRING:
1028			data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str);
1029			break;
1030		case MDB_TYPE_IMMEDIATE:
1031			data.dva.dva_word[i] = argv[i].a_un.a_val;
1032			break;
1033		default:
1034			return (DCMD_USAGE);
1035		}
1036	}
1037
1038	if (mdb_ctf_lookup_by_name(ZFS_STRUCT "arc_buf_hdr", &data.id) == -1) {
1039		mdb_warn("couldn't find struct arc_buf_hdr");
1040		return (DCMD_ERR);
1041	}
1042
1043	for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) {
1044		if (mdb_lookup_by_obj(ZFS_OBJ_NAME, syms[i], &sym)) {
1045			mdb_warn("can't find symbol %s", syms[i]);
1046			return (DCMD_ERR);
1047		}
1048
1049		if (mdb_pwalk("list", abuf_find_cb, &data, sym.st_value) != 0) {
1050			mdb_warn("can't walk %s", syms[i]);
1051			return (DCMD_ERR);
1052		}
1053	}
1054
1055	return (DCMD_OK);
1056}
1057
1058
1059typedef struct dbgmsg_arg {
1060	boolean_t da_verbose;
1061	boolean_t da_address;
1062} dbgmsg_arg_t;
1063
1064/* ARGSUSED */
1065static int
1066dbgmsg_cb(uintptr_t addr, const void *unknown, void *arg)
1067{
1068	static mdb_ctf_id_t id;
1069	static boolean_t gotid;
1070	static ulong_t off;
1071
1072	dbgmsg_arg_t *da = arg;
1073	time_t timestamp;
1074	char buf[1024];
1075
1076	if (!gotid) {
1077		if (mdb_ctf_lookup_by_name(ZFS_STRUCT "zfs_dbgmsg", &id) ==
1078		    -1) {
1079			mdb_warn("couldn't find struct zfs_dbgmsg");
1080			return (WALK_ERR);
1081		}
1082		gotid = TRUE;
1083		if (mdb_ctf_offsetof(id, "zdm_msg", &off) == -1) {
1084			mdb_warn("couldn't find zdm_msg");
1085			return (WALK_ERR);
1086		}
1087		off /= 8;
1088	}
1089
1090
1091	if (GETMEMBID(addr, &id, zdm_timestamp, timestamp)) {
1092		return (WALK_ERR);
1093	}
1094
1095	if (mdb_readstr(buf, sizeof (buf), addr + off) == -1) {
1096		mdb_warn("failed to read zdm_msg at %p\n", addr + off);
1097		return (DCMD_ERR);
1098	}
1099
1100	if (da->da_address)
1101		mdb_printf("%p ", addr);
1102	if (da->da_verbose)
1103		mdb_printf("%Y ", timestamp);
1104
1105	mdb_printf("%s\n", buf);
1106
1107	if (da->da_verbose)
1108		(void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL);
1109
1110	return (WALK_NEXT);
1111}
1112
1113/* ARGSUSED */
1114static int
1115dbgmsg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1116{
1117	GElf_Sym sym;
1118	dbgmsg_arg_t da = { 0 };
1119
1120	if (mdb_getopts(argc, argv,
1121	    'v', MDB_OPT_SETBITS, B_TRUE, &da.da_verbose,
1122	    'a', MDB_OPT_SETBITS, B_TRUE, &da.da_address,
1123	    NULL) != argc)
1124		return (DCMD_USAGE);
1125
1126	if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "zfs_dbgmsgs", &sym)) {
1127		mdb_warn("can't find zfs_dbgmsgs");
1128		return (DCMD_ERR);
1129	}
1130
1131	if (mdb_pwalk("list", dbgmsg_cb, &da, sym.st_value) != 0) {
1132		mdb_warn("can't walk zfs_dbgmsgs");
1133		return (DCMD_ERR);
1134	}
1135
1136	return (DCMD_OK);
1137}
1138
1139/*ARGSUSED*/
1140static int
1141arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1142{
1143	kstat_named_t *stats;
1144	GElf_Sym sym;
1145	int nstats, i;
1146	uint_t opt_a = FALSE;
1147	uint_t opt_b = FALSE;
1148	uint_t shift = 0;
1149	const char *suffix;
1150
1151	static const char *bytestats[] = {
1152		"p", "c", "c_min", "c_max", "size", "duplicate_buffers_size",
1153		"arc_meta_used", "arc_meta_limit", "arc_meta_max",
1154		"arc_meta_min", "hdr_size", "data_size", "metadata_size",
1155		"other_size", "anon_size", "anon_evictable_data",
1156		"anon_evictable_metadata", "mru_size", "mru_evictable_data",
1157		"mru_evictable_metadata", "mru_ghost_size",
1158		"mru_ghost_evictable_data", "mru_ghost_evictable_metadata",
1159		"mfu_size", "mfu_evictable_data", "mfu_evictable_metadata",
1160		"mfu_ghost_size", "mfu_ghost_evictable_data",
1161		"mfu_ghost_evictable_metadata", "evict_l2_cached",
1162		"evict_l2_eligible", "evict_l2_ineligible", "l2_read_bytes",
1163		"l2_write_bytes", "l2_size", "l2_asize", "l2_hdr_size",
1164		"compressed_size", "uncompressed_size", "overhead_size",
1165		NULL
1166	};
1167
1168	static const char *extras[] = {
1169		"arc_no_grow", "arc_tempreserve",
1170		NULL
1171	};
1172
1173	if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "arc_stats", &sym) == -1) {
1174		mdb_warn("failed to find 'arc_stats'");
1175		return (DCMD_ERR);
1176	}
1177
1178	stats = mdb_zalloc(sym.st_size, UM_SLEEP | UM_GC);
1179
1180	if (mdb_vread(stats, sym.st_size, sym.st_value) == -1) {
1181		mdb_warn("couldn't read 'arc_stats' at %p", sym.st_value);
1182		return (DCMD_ERR);
1183	}
1184
1185	nstats = sym.st_size / sizeof (kstat_named_t);
1186
1187	/* NB: -a / opt_a are ignored for backwards compatability */
1188	if (mdb_getopts(argc, argv,
1189	    'a', MDB_OPT_SETBITS, TRUE, &opt_a,
1190	    'b', MDB_OPT_SETBITS, TRUE, &opt_b,
1191	    'k', MDB_OPT_SETBITS, 10, &shift,
1192	    'm', MDB_OPT_SETBITS, 20, &shift,
1193	    'g', MDB_OPT_SETBITS, 30, &shift,
1194	    NULL) != argc)
1195		return (DCMD_USAGE);
1196
1197	if (!opt_b && !shift)
1198		shift = 20;
1199
1200	switch (shift) {
1201	case 0:
1202		suffix = "B";
1203		break;
1204	case 10:
1205		suffix = "KB";
1206		break;
1207	case 20:
1208		suffix = "MB";
1209		break;
1210	case 30:
1211		suffix = "GB";
1212		break;
1213	default:
1214		suffix = "XX";
1215	}
1216
1217	for (i = 0; i < nstats; i++) {
1218		int j;
1219		boolean_t bytes = B_FALSE;
1220
1221		for (j = 0; bytestats[j]; j++) {
1222			if (strcmp(stats[i].name, bytestats[j]) == 0) {
1223				bytes = B_TRUE;
1224				break;
1225			}
1226		}
1227
1228		if (bytes) {
1229			mdb_printf("%-25s = %9llu %s\n", stats[i].name,
1230			    stats[i].value.ui64 >> shift, suffix);
1231		} else {
1232			mdb_printf("%-25s = %9llu\n", stats[i].name,
1233			    stats[i].value.ui64);
1234		}
1235	}
1236
1237	for (i = 0; extras[i]; i++) {
1238		uint64_t buf;
1239
1240		if (mdb_lookup_by_obj(ZFS_OBJ_NAME, extras[i], &sym) == -1) {
1241			mdb_warn("failed to find '%s'", extras[i]);
1242			return (DCMD_ERR);
1243		}
1244
1245		if (sym.st_size != sizeof (uint64_t) &&
1246		    sym.st_size != sizeof (uint32_t)) {
1247			mdb_warn("expected scalar for variable '%s'\n",
1248			    extras[i]);
1249			return (DCMD_ERR);
1250		}
1251
1252		if (mdb_vread(&buf, sym.st_size, sym.st_value) == -1) {
1253			mdb_warn("couldn't read '%s'", extras[i]);
1254			return (DCMD_ERR);
1255		}
1256
1257		mdb_printf("%-25s = ", extras[i]);
1258
1259		/* NB: all the 64-bit extras happen to be byte counts */
1260		if (sym.st_size == sizeof (uint64_t))
1261			mdb_printf("%9llu %s\n", buf >> shift, suffix);
1262
1263		if (sym.st_size == sizeof (uint32_t))
1264			mdb_printf("%9d\n", *((uint32_t *)&buf));
1265	}
1266	return (DCMD_OK);
1267}
1268
1269typedef struct mdb_spa_print {
1270	pool_state_t spa_state;
1271	char spa_name[ZFS_MAX_DATASET_NAME_LEN];
1272	uintptr_t spa_normal_class;
1273} mdb_spa_print_t;
1274
1275
1276const char histo_stars[] = "****************************************";
1277const int histo_width = sizeof (histo_stars) - 1;
1278
1279static void
1280dump_histogram(const uint64_t *histo, int size, int offset)
1281{
1282	int i;
1283	int minidx = size - 1;
1284	int maxidx = 0;
1285	uint64_t max = 0;
1286
1287	for (i = 0; i < size; i++) {
1288		if (histo[i] > max)
1289			max = histo[i];
1290		if (histo[i] > 0 && i > maxidx)
1291			maxidx = i;
1292		if (histo[i] > 0 && i < minidx)
1293			minidx = i;
1294	}
1295
1296	if (max < histo_width)
1297		max = histo_width;
1298
1299	for (i = minidx; i <= maxidx; i++) {
1300		mdb_printf("%3u: %6llu %s\n",
1301		    i + offset, (u_longlong_t)histo[i],
1302		    &histo_stars[(max - histo[i]) * histo_width / max]);
1303	}
1304}
1305
1306typedef struct mdb_metaslab_class {
1307	uint64_t mc_histogram[RANGE_TREE_HISTOGRAM_SIZE];
1308} mdb_metaslab_class_t;
1309
1310/*
1311 * spa_class_histogram(uintptr_t class_addr)
1312 *
1313 * Prints free space histogram for a device class
1314 *
1315 * Returns DCMD_OK, or DCMD_ERR.
1316 */
1317static int
1318spa_class_histogram(uintptr_t class_addr)
1319{
1320	mdb_metaslab_class_t mc;
1321	if (mdb_ctf_vread(&mc, "metaslab_class_t",
1322	    "mdb_metaslab_class_t", class_addr, 0) == -1)
1323		return (DCMD_ERR);
1324
1325	mdb_inc_indent(4);
1326	dump_histogram(mc.mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
1327	mdb_dec_indent(4);
1328	return (DCMD_OK);
1329}
1330
1331/*
1332 * ::spa
1333 *
1334 *	-c	Print configuration information as well
1335 *	-v	Print vdev state
1336 *	-e	Print vdev error stats
1337 *	-m	Print vdev metaslab info
1338 *	-M	print vdev metaslab group info
1339 *	-h	Print histogram info (must be combined with -m or -M)
1340 *
1341 * Print a summarized spa_t.  When given no arguments, prints out a table of all
1342 * active pools on the system.
1343 */
1344/* ARGSUSED */
1345static int
1346spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1347{
1348	const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED",
1349		"SPARE", "L2CACHE", "UNINIT", "UNAVAIL", "POTENTIAL" };
1350	const char *state;
1351	int spa_flags = 0;
1352
1353	if (mdb_getopts(argc, argv,
1354	    'c', MDB_OPT_SETBITS, SPA_FLAG_CONFIG, &spa_flags,
1355	    'v', MDB_OPT_SETBITS, SPA_FLAG_VDEVS, &spa_flags,
1356	    'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags,
1357	    'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags,
1358	    'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags,
1359	    'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags,
1360	    NULL) != argc)
1361		return (DCMD_USAGE);
1362
1363	if (!(flags & DCMD_ADDRSPEC)) {
1364		if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) {
1365			mdb_warn("can't walk spa");
1366			return (DCMD_ERR);
1367		}
1368
1369		return (DCMD_OK);
1370	}
1371
1372	if (flags & DCMD_PIPE_OUT) {
1373		mdb_printf("%#lr\n", addr);
1374		return (DCMD_OK);
1375	}
1376
1377	if (DCMD_HDRSPEC(flags))
1378		mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE",
1379		    sizeof (uintptr_t) == 4 ? 60 : 52, "NAME");
1380
1381	mdb_spa_print_t spa;
1382	if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_print_t", addr, 0) == -1)
1383		return (DCMD_ERR);
1384
1385	if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL)
1386		state = "UNKNOWN";
1387	else
1388		state = statetab[spa.spa_state];
1389
1390	mdb_printf("%0?p %9s %s\n", addr, state, spa.spa_name);
1391	if (spa_flags & SPA_FLAG_HISTOGRAMS)
1392		spa_class_histogram(spa.spa_normal_class);
1393
1394	if (spa_flags & SPA_FLAG_CONFIG) {
1395		mdb_printf("\n");
1396		mdb_inc_indent(4);
1397		if (mdb_call_dcmd("spa_config", addr, flags, 0,
1398		    NULL) != DCMD_OK)
1399			return (DCMD_ERR);
1400		mdb_dec_indent(4);
1401	}
1402
1403	if (spa_flags & SPA_FLAG_ALL_VDEV) {
1404		mdb_arg_t v;
1405		char opts[100] = "-";
1406		int args =
1407		    (spa_flags | SPA_FLAG_VDEVS) == SPA_FLAG_VDEVS ? 0 : 1;
1408
1409		if (spa_flags & SPA_FLAG_ERRORS)
1410			strcat(opts, "e");
1411		if (spa_flags & SPA_FLAG_METASLABS)
1412			strcat(opts, "m");
1413		if (spa_flags & SPA_FLAG_METASLAB_GROUPS)
1414			strcat(opts, "M");
1415		if (spa_flags & SPA_FLAG_HISTOGRAMS)
1416			strcat(opts, "h");
1417
1418		v.a_type = MDB_TYPE_STRING;
1419		v.a_un.a_str = opts;
1420
1421		mdb_printf("\n");
1422		mdb_inc_indent(4);
1423		if (mdb_call_dcmd("spa_vdevs", addr, flags, args,
1424		    &v) != DCMD_OK)
1425			return (DCMD_ERR);
1426		mdb_dec_indent(4);
1427	}
1428
1429	return (DCMD_OK);
1430}
1431
1432typedef struct mdb_spa_config_spa {
1433	uintptr_t spa_config;
1434} mdb_spa_config_spa_t;
1435
1436/*
1437 * ::spa_config
1438 *
1439 * Given a spa_t, print the configuration information stored in spa_config.
1440 * Since it's just an nvlist, format it as an indented list of name=value pairs.
1441 * We simply read the value of spa_config and pass off to ::nvlist.
1442 */
1443/* ARGSUSED */
1444static int
1445spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1446{
1447	mdb_spa_config_spa_t spa;
1448
1449	if (argc != 0 || !(flags & DCMD_ADDRSPEC))
1450		return (DCMD_USAGE);
1451
1452	if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_config_spa_t",
1453	    addr, 0) == -1)
1454		return (DCMD_ERR);
1455
1456	if (spa.spa_config == 0) {
1457		mdb_printf("(none)\n");
1458		return (DCMD_OK);
1459	}
1460
1461	return (mdb_call_dcmd("nvlist", spa.spa_config, flags,
1462	    0, NULL));
1463}
1464
1465
1466
1467typedef struct mdb_range_tree {
1468	struct {
1469		uint64_t avl_numnodes;
1470	} rt_root;
1471	uint64_t rt_space;
1472} mdb_range_tree_t;
1473
1474typedef struct mdb_metaslab_group {
1475	uint64_t mg_fragmentation;
1476	uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE];
1477	uintptr_t mg_vd;
1478} mdb_metaslab_group_t;
1479
1480typedef struct mdb_metaslab {
1481	uint64_t ms_id;
1482	uint64_t ms_start;
1483	uint64_t ms_size;
1484	int64_t ms_deferspace;
1485	uint64_t ms_fragmentation;
1486	uint64_t ms_weight;
1487	uintptr_t ms_allocating[TXG_SIZE];
1488	uintptr_t ms_checkpointing;
1489	uintptr_t ms_freeing;
1490	uintptr_t ms_freed;
1491	uintptr_t ms_allocatable;
1492	uintptr_t ms_unflushed_frees;
1493	uintptr_t ms_unflushed_allocs;
1494	uintptr_t ms_sm;
1495} mdb_metaslab_t;
1496
1497typedef struct mdb_space_map_phys_t {
1498	int64_t smp_alloc;
1499	uint64_t smp_histogram[SPACE_MAP_HISTOGRAM_SIZE];
1500} mdb_space_map_phys_t;
1501
1502typedef struct mdb_space_map {
1503	uint64_t sm_size;
1504	uint8_t sm_shift;
1505	uintptr_t sm_phys;
1506} mdb_space_map_t;
1507
1508typedef struct mdb_vdev {
1509	uint64_t vdev_id;
1510	uint64_t vdev_state;
1511	uintptr_t vdev_ops;
1512	struct {
1513		uint64_t vs_aux;
1514		uint64_t vs_ops[VS_ZIO_TYPES];
1515		uint64_t vs_bytes[VS_ZIO_TYPES];
1516		uint64_t vs_read_errors;
1517		uint64_t vs_write_errors;
1518		uint64_t vs_checksum_errors;
1519	} vdev_stat;
1520	uintptr_t vdev_child;
1521	uint64_t vdev_children;
1522	uint64_t vdev_ms_count;
1523	uintptr_t vdev_mg;
1524	uintptr_t vdev_ms;
1525	uintptr_t vdev_path;
1526} mdb_vdev_t;
1527
1528typedef struct mdb_vdev_ops {
1529	char vdev_op_type[16];
1530} mdb_vdev_ops_t;
1531
1532static int
1533metaslab_stats(mdb_vdev_t *vd, int spa_flags)
1534{
1535	mdb_inc_indent(4);
1536	mdb_printf("%<u>%-?s %6s %20s %10s %10s %10s%</u>\n", "ADDR", "ID",
1537	    "OFFSET", "FREE", "FRAG", "UCMU");
1538
1539	uintptr_t *vdev_ms = mdb_alloc(vd->vdev_ms_count * sizeof (vdev_ms),
1540	    UM_SLEEP | UM_GC);
1541	if (mdb_vread(vdev_ms, vd->vdev_ms_count * sizeof (uintptr_t),
1542	    vd->vdev_ms) == -1) {
1543		mdb_warn("failed to read vdev_ms at %p\n", vd->vdev_ms);
1544		return (DCMD_ERR);
1545	}
1546
1547	for (int m = 0; m < vd->vdev_ms_count; m++) {
1548		mdb_metaslab_t ms;
1549		mdb_space_map_t sm = { 0 };
1550		mdb_space_map_phys_t smp = { 0 };
1551		mdb_range_tree_t rt;
1552		uint64_t uallocs, ufrees, raw_free, raw_uchanges_mem;
1553		char free[MDB_NICENUM_BUFLEN];
1554		char uchanges_mem[MDB_NICENUM_BUFLEN];
1555
1556		if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t",
1557		    vdev_ms[m], 0) == -1)
1558			return (DCMD_ERR);
1559
1560		if (ms.ms_sm != 0 &&
1561		    mdb_ctf_vread(&sm, "space_map_t", "mdb_space_map_t",
1562		    ms.ms_sm, 0) == -1)
1563			return (DCMD_ERR);
1564
1565		if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t",
1566		    ms.ms_unflushed_frees, 0) == -1)
1567			return (DCMD_ERR);
1568		ufrees = rt.rt_space;
1569		raw_uchanges_mem = rt.rt_root.avl_numnodes *
1570		    mdb_ctf_sizeof_by_name("range_seg_t");
1571
1572		if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t",
1573		    ms.ms_unflushed_allocs, 0) == -1)
1574			return (DCMD_ERR);
1575		uallocs = rt.rt_space;
1576		raw_uchanges_mem += rt.rt_root.avl_numnodes *
1577		    mdb_ctf_sizeof_by_name("range_seg_t");
1578		mdb_nicenum(raw_uchanges_mem, uchanges_mem);
1579
1580		raw_free = ms.ms_size;
1581		if (ms.ms_sm != 0 && sm.sm_phys != 0) {
1582			(void) mdb_ctf_vread(&smp, "space_map_phys_t",
1583			    "mdb_space_map_phys_t", sm.sm_phys, 0);
1584			raw_free -= smp.smp_alloc;
1585		}
1586		raw_free += ufrees - uallocs;
1587		mdb_nicenum(raw_free, free);
1588
1589		mdb_printf("%0?p %6llu %20llx %10s ", vdev_ms[m], ms.ms_id,
1590		    ms.ms_start, free);
1591		if (ms.ms_fragmentation == ZFS_FRAG_INVALID)
1592			mdb_printf("%9s ", "-");
1593		else
1594			mdb_printf("%9llu%% ", ms.ms_fragmentation);
1595		mdb_printf("%10s\n", uchanges_mem);
1596
1597		if ((spa_flags & SPA_FLAG_HISTOGRAMS) && ms.ms_sm != 0 &&
1598		    sm.sm_phys != 0) {
1599			dump_histogram(smp.smp_histogram,
1600			    SPACE_MAP_HISTOGRAM_SIZE, sm.sm_shift);
1601		}
1602	}
1603	mdb_dec_indent(4);
1604	return (DCMD_OK);
1605}
1606
1607static int
1608metaslab_group_stats(mdb_vdev_t *vd, int spa_flags)
1609{
1610	mdb_metaslab_group_t mg;
1611	if (mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t",
1612	    vd->vdev_mg, 0) == -1) {
1613		mdb_warn("failed to read vdev_mg at %p\n", vd->vdev_mg);
1614		return (DCMD_ERR);
1615	}
1616
1617	mdb_inc_indent(4);
1618	mdb_printf("%<u>%-?s %7s %9s%</u>\n", "ADDR", "FRAG", "UCMU");
1619
1620	if (mg.mg_fragmentation == ZFS_FRAG_INVALID)
1621		mdb_printf("%0?p %6s\n", vd->vdev_mg, "-");
1622	else
1623		mdb_printf("%0?p %6llu%%", vd->vdev_mg, mg.mg_fragmentation);
1624
1625
1626	uintptr_t *vdev_ms = mdb_alloc(vd->vdev_ms_count * sizeof (vdev_ms),
1627	    UM_SLEEP | UM_GC);
1628	if (mdb_vread(vdev_ms, vd->vdev_ms_count * sizeof (uintptr_t),
1629	    vd->vdev_ms) == -1) {
1630		mdb_warn("failed to read vdev_ms at %p\n", vd->vdev_ms);
1631		return (DCMD_ERR);
1632	}
1633
1634	uint64_t raw_uchanges_mem = 0;
1635	char uchanges_mem[MDB_NICENUM_BUFLEN];
1636	for (int m = 0; m < vd->vdev_ms_count; m++) {
1637		mdb_metaslab_t ms;
1638		mdb_range_tree_t rt;
1639
1640		if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t",
1641		    vdev_ms[m], 0) == -1)
1642			return (DCMD_ERR);
1643
1644		if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t",
1645		    ms.ms_unflushed_frees, 0) == -1)
1646			return (DCMD_ERR);
1647		raw_uchanges_mem +=
1648		    rt.rt_root.avl_numnodes * sizeof (range_seg_t);
1649
1650		if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t",
1651		    ms.ms_unflushed_allocs, 0) == -1)
1652			return (DCMD_ERR);
1653		raw_uchanges_mem +=
1654		    rt.rt_root.avl_numnodes * sizeof (range_seg_t);
1655	}
1656	mdb_nicenum(raw_uchanges_mem, uchanges_mem);
1657	mdb_printf("%10s\n", uchanges_mem);
1658
1659	if (spa_flags & SPA_FLAG_HISTOGRAMS)
1660		dump_histogram(mg.mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
1661	mdb_dec_indent(4);
1662	return (DCMD_OK);
1663}
1664
1665/*
1666 * ::vdev
1667 *
1668 * Print out a summarized vdev_t, in the following form:
1669 *
1670 * ADDR             STATE	AUX            DESC
1671 * fffffffbcde23df0 HEALTHY	-              /dev/dsk/c0t0d0
1672 *
1673 * If '-r' is specified, recursively visit all children.
1674 *
1675 * With '-e', the statistics associated with the vdev are printed as well.
1676 */
1677static int
1678do_print_vdev(uintptr_t addr, int flags, int depth, boolean_t recursive,
1679    int spa_flags)
1680{
1681	mdb_vdev_t vd;
1682	if (mdb_ctf_vread(&vd, "vdev_t", "mdb_vdev_t",
1683	    (uintptr_t)addr, 0) == -1)
1684		return (DCMD_ERR);
1685
1686	if (flags & DCMD_PIPE_OUT) {
1687		mdb_printf("%#lr\n", addr);
1688	} else {
1689		char desc[MAXNAMELEN];
1690		if (vd.vdev_path != 0) {
1691			if (mdb_readstr(desc, sizeof (desc),
1692			    (uintptr_t)vd.vdev_path) == -1) {
1693				mdb_warn("failed to read vdev_path at %p\n",
1694				    vd.vdev_path);
1695				return (DCMD_ERR);
1696			}
1697		} else if (vd.vdev_ops != 0) {
1698			vdev_ops_t ops;
1699			if (mdb_vread(&ops, sizeof (ops),
1700			    (uintptr_t)vd.vdev_ops) == -1) {
1701				mdb_warn("failed to read vdev_ops at %p\n",
1702				    vd.vdev_ops);
1703				return (DCMD_ERR);
1704			}
1705			(void) strcpy(desc, ops.vdev_op_type);
1706		} else {
1707			(void) strcpy(desc, "<unknown>");
1708		}
1709
1710		if (depth == 0 && DCMD_HDRSPEC(flags))
1711			mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n",
1712			    "ADDR", "STATE", "AUX",
1713			    sizeof (uintptr_t) == 4 ? 43 : 35,
1714			    "DESCRIPTION");
1715
1716		mdb_printf("%0?p ", addr);
1717
1718		const char *state, *aux;
1719		switch (vd.vdev_state) {
1720		case VDEV_STATE_CLOSED:
1721			state = "CLOSED";
1722			break;
1723		case VDEV_STATE_OFFLINE:
1724			state = "OFFLINE";
1725			break;
1726		case VDEV_STATE_CANT_OPEN:
1727			state = "CANT_OPEN";
1728			break;
1729		case VDEV_STATE_DEGRADED:
1730			state = "DEGRADED";
1731			break;
1732		case VDEV_STATE_HEALTHY:
1733			state = "HEALTHY";
1734			break;
1735		case VDEV_STATE_REMOVED:
1736			state = "REMOVED";
1737			break;
1738		case VDEV_STATE_FAULTED:
1739			state = "FAULTED";
1740			break;
1741		default:
1742			state = "UNKNOWN";
1743			break;
1744		}
1745
1746		switch (vd.vdev_stat.vs_aux) {
1747		case VDEV_AUX_NONE:
1748			aux = "-";
1749			break;
1750		case VDEV_AUX_OPEN_FAILED:
1751			aux = "OPEN_FAILED";
1752			break;
1753		case VDEV_AUX_CORRUPT_DATA:
1754			aux = "CORRUPT_DATA";
1755			break;
1756		case VDEV_AUX_NO_REPLICAS:
1757			aux = "NO_REPLICAS";
1758			break;
1759		case VDEV_AUX_BAD_GUID_SUM:
1760			aux = "BAD_GUID_SUM";
1761			break;
1762		case VDEV_AUX_TOO_SMALL:
1763			aux = "TOO_SMALL";
1764			break;
1765		case VDEV_AUX_BAD_LABEL:
1766			aux = "BAD_LABEL";
1767			break;
1768		case VDEV_AUX_VERSION_NEWER:
1769			aux = "VERS_NEWER";
1770			break;
1771		case VDEV_AUX_VERSION_OLDER:
1772			aux = "VERS_OLDER";
1773			break;
1774		case VDEV_AUX_UNSUP_FEAT:
1775			aux = "UNSUP_FEAT";
1776			break;
1777		case VDEV_AUX_SPARED:
1778			aux = "SPARED";
1779			break;
1780		case VDEV_AUX_ERR_EXCEEDED:
1781			aux = "ERR_EXCEEDED";
1782			break;
1783		case VDEV_AUX_IO_FAILURE:
1784			aux = "IO_FAILURE";
1785			break;
1786		case VDEV_AUX_BAD_LOG:
1787			aux = "BAD_LOG";
1788			break;
1789		case VDEV_AUX_EXTERNAL:
1790			aux = "EXTERNAL";
1791			break;
1792		case VDEV_AUX_SPLIT_POOL:
1793			aux = "SPLIT_POOL";
1794			break;
1795		case VDEV_AUX_CHILDREN_OFFLINE:
1796			aux = "CHILDREN_OFFLINE";
1797			break;
1798		default:
1799			aux = "UNKNOWN";
1800			break;
1801		}
1802
1803		mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc);
1804
1805		if (spa_flags & SPA_FLAG_ERRORS) {
1806			int i;
1807
1808			mdb_inc_indent(4);
1809			mdb_printf("\n");
1810			mdb_printf("%<u>       %12s %12s %12s %12s "
1811			    "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM",
1812			    "IOCTL");
1813			mdb_printf("OPS     ");
1814			for (i = 1; i < VS_ZIO_TYPES; i++)
1815				mdb_printf("%11#llx%s",
1816				    vd.vdev_stat.vs_ops[i],
1817				    i == VS_ZIO_TYPES - 1 ? "" : "  ");
1818			mdb_printf("\n");
1819			mdb_printf("BYTES   ");
1820			for (i = 1; i < VS_ZIO_TYPES; i++)
1821				mdb_printf("%11#llx%s",
1822				    vd.vdev_stat.vs_bytes[i],
1823				    i == VS_ZIO_TYPES - 1 ? "" : "  ");
1824
1825
1826			mdb_printf("\n");
1827			mdb_printf("EREAD    %10#llx\n",
1828			    vd.vdev_stat.vs_read_errors);
1829			mdb_printf("EWRITE   %10#llx\n",
1830			    vd.vdev_stat.vs_write_errors);
1831			mdb_printf("ECKSUM   %10#llx\n",
1832			    vd.vdev_stat.vs_checksum_errors);
1833			mdb_dec_indent(4);
1834			mdb_printf("\n");
1835		}
1836
1837		if ((spa_flags & SPA_FLAG_METASLAB_GROUPS) &&
1838		    vd.vdev_mg != 0) {
1839			metaslab_group_stats(&vd, spa_flags);
1840		}
1841		if ((spa_flags & SPA_FLAG_METASLABS) && vd.vdev_ms != 0) {
1842			metaslab_stats(&vd, spa_flags);
1843		}
1844	}
1845
1846	uint64_t children = vd.vdev_children;
1847	if (children == 0 || !recursive)
1848		return (DCMD_OK);
1849
1850	uintptr_t *child = mdb_alloc(children * sizeof (child),
1851	    UM_SLEEP | UM_GC);
1852	if (mdb_vread(child, children * sizeof (void *), vd.vdev_child) == -1) {
1853		mdb_warn("failed to read vdev children at %p", vd.vdev_child);
1854		return (DCMD_ERR);
1855	}
1856
1857	for (uint64_t c = 0; c < children; c++) {
1858		if (do_print_vdev(child[c], flags, depth + 2, recursive,
1859		    spa_flags)) {
1860			return (DCMD_ERR);
1861		}
1862	}
1863
1864	return (DCMD_OK);
1865}
1866
1867static int
1868vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1869{
1870	uint64_t depth = 0;
1871	boolean_t recursive = B_FALSE;
1872	int spa_flags = 0;
1873
1874	if (mdb_getopts(argc, argv,
1875	    'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags,
1876	    'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags,
1877	    'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags,
1878	    'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags,
1879	    'r', MDB_OPT_SETBITS, TRUE, &recursive,
1880	    'd', MDB_OPT_UINT64, &depth, NULL) != argc)
1881		return (DCMD_USAGE);
1882
1883	if (!(flags & DCMD_ADDRSPEC)) {
1884		mdb_warn("no vdev_t address given\n");
1885		return (DCMD_ERR);
1886	}
1887
1888	return (do_print_vdev(addr, flags, (int)depth, recursive, spa_flags));
1889}
1890
1891typedef struct mdb_metaslab_alloc_trace {
1892	uintptr_t mat_mg;
1893	uintptr_t mat_msp;
1894	uint64_t mat_size;
1895	uint64_t mat_weight;
1896	uint64_t mat_offset;
1897	uint32_t mat_dva_id;
1898	int mat_allocator;
1899} mdb_metaslab_alloc_trace_t;
1900
1901static void
1902metaslab_print_weight(uint64_t weight)
1903{
1904	char buf[100];
1905
1906	if (WEIGHT_IS_SPACEBASED(weight)) {
1907		mdb_nicenum(
1908		    weight & ~(METASLAB_ACTIVE_MASK | METASLAB_WEIGHT_TYPE),
1909		    buf);
1910	} else {
1911		char size[MDB_NICENUM_BUFLEN];
1912		mdb_nicenum(1ULL << WEIGHT_GET_INDEX(weight), size);
1913		(void) mdb_snprintf(buf, sizeof (buf), "%llu x %s",
1914		    WEIGHT_GET_COUNT(weight), size);
1915	}
1916	mdb_printf("%11s ", buf);
1917}
1918
1919/* ARGSUSED */
1920static int
1921metaslab_weight(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1922{
1923	uint64_t weight = 0;
1924	char active;
1925
1926	if (argc == 0 && (flags & DCMD_ADDRSPEC)) {
1927		if (mdb_vread(&weight, sizeof (uint64_t), addr) == -1) {
1928			mdb_warn("failed to read weight at %p\n", addr);
1929			return (DCMD_ERR);
1930		}
1931	} else if (argc == 1 && !(flags & DCMD_ADDRSPEC)) {
1932		weight = (argv[0].a_type == MDB_TYPE_IMMEDIATE) ?
1933		    argv[0].a_un.a_val : mdb_strtoull(argv[0].a_un.a_str);
1934	} else {
1935		return (DCMD_USAGE);
1936	}
1937
1938	if (DCMD_HDRSPEC(flags)) {
1939		mdb_printf("%<u>%-6s %9s %9s%</u>\n",
1940		    "ACTIVE", "ALGORITHM", "WEIGHT");
1941	}
1942
1943	if (weight & METASLAB_WEIGHT_PRIMARY)
1944		active = 'P';
1945	else if (weight & METASLAB_WEIGHT_SECONDARY)
1946		active = 'S';
1947	else
1948		active = '-';
1949	mdb_printf("%6c %8s ", active,
1950	    WEIGHT_IS_SPACEBASED(weight) ? "SPACE" : "SEGMENT");
1951	metaslab_print_weight(weight);
1952	mdb_printf("\n");
1953
1954	return (DCMD_OK);
1955}
1956
1957/* ARGSUSED */
1958static int
1959metaslab_trace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1960{
1961	mdb_metaslab_alloc_trace_t mat;
1962	mdb_metaslab_group_t mg = { 0 };
1963	char result_type[100];
1964
1965	if (mdb_ctf_vread(&mat, "metaslab_alloc_trace_t",
1966	    "mdb_metaslab_alloc_trace_t", addr, 0) == -1) {
1967		return (DCMD_ERR);
1968	}
1969
1970	if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) {
1971		mdb_printf("%<u>%6s %6s %8s %11s %11s %18s %18s%</u>\n",
1972		    "MSID", "DVA", "ASIZE", "ALLOCATOR", "WEIGHT", "RESULT",
1973		    "VDEV");
1974	}
1975
1976	if (mat.mat_msp != 0) {
1977		mdb_metaslab_t ms;
1978
1979		if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t",
1980		    mat.mat_msp, 0) == -1) {
1981			return (DCMD_ERR);
1982		}
1983		mdb_printf("%6llu ", ms.ms_id);
1984	} else {
1985		mdb_printf("%6s ", "-");
1986	}
1987
1988	mdb_printf("%6d %8llx %11llx ", mat.mat_dva_id, mat.mat_size,
1989	    mat.mat_allocator);
1990
1991	metaslab_print_weight(mat.mat_weight);
1992
1993	if ((int64_t)mat.mat_offset < 0) {
1994		if (enum_lookup("enum trace_alloc_type", mat.mat_offset,
1995		    "TRACE_", sizeof (result_type), result_type) == -1) {
1996			mdb_warn("Could not find enum for trace_alloc_type");
1997			return (DCMD_ERR);
1998		}
1999		mdb_printf("%18s ", result_type);
2000	} else {
2001		mdb_printf("%<b>%18llx%</b> ", mat.mat_offset);
2002	}
2003
2004	if (mat.mat_mg != 0 &&
2005	    mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t",
2006	    mat.mat_mg, 0) == -1) {
2007		return (DCMD_ERR);
2008	}
2009
2010	if (mg.mg_vd != 0) {
2011		mdb_vdev_t vdev;
2012		char desc[MAXNAMELEN];
2013
2014		if (mdb_ctf_vread(&vdev, "vdev_t", "mdb_vdev_t",
2015		    mg.mg_vd, 0) == -1) {
2016			return (DCMD_ERR);
2017		}
2018
2019		if (vdev.vdev_path != 0) {
2020			char path[MAXNAMELEN];
2021
2022			if (mdb_readstr(path, sizeof (path),
2023			    vdev.vdev_path) == -1) {
2024				mdb_warn("failed to read vdev_path at %p\n",
2025				    vdev.vdev_path);
2026				return (DCMD_ERR);
2027			}
2028			char *slash;
2029			if ((slash = strrchr(path, '/')) != NULL) {
2030				strcpy(desc, slash + 1);
2031			} else {
2032				strcpy(desc, path);
2033			}
2034		} else if (vdev.vdev_ops != 0) {
2035			mdb_vdev_ops_t ops;
2036			if (mdb_ctf_vread(&ops, "vdev_ops_t", "mdb_vdev_ops_t",
2037			    vdev.vdev_ops, 0) == -1) {
2038				mdb_warn("failed to read vdev_ops at %p\n",
2039				    vdev.vdev_ops);
2040				return (DCMD_ERR);
2041			}
2042			(void) mdb_snprintf(desc, sizeof (desc),
2043			    "%s-%llu", ops.vdev_op_type, vdev.vdev_id);
2044		} else {
2045			(void) strcpy(desc, "<unknown>");
2046		}
2047		mdb_printf("%18s\n", desc);
2048	}
2049
2050	return (DCMD_OK);
2051}
2052
2053typedef struct metaslab_walk_data {
2054	uint64_t mw_numvdevs;
2055	uintptr_t *mw_vdevs;
2056	int mw_curvdev;
2057	uint64_t mw_nummss;
2058	uintptr_t *mw_mss;
2059	int mw_curms;
2060} metaslab_walk_data_t;
2061
2062static int
2063metaslab_walk_step(mdb_walk_state_t *wsp)
2064{
2065	metaslab_walk_data_t *mw = wsp->walk_data;
2066	metaslab_t ms;
2067	uintptr_t msp;
2068
2069	if (mw->mw_curvdev >= mw->mw_numvdevs)
2070		return (WALK_DONE);
2071
2072	if (mw->mw_mss == NULL) {
2073		uintptr_t mssp;
2074		uintptr_t vdevp;
2075
2076		ASSERT(mw->mw_curms == 0);
2077		ASSERT(mw->mw_nummss == 0);
2078
2079		vdevp = mw->mw_vdevs[mw->mw_curvdev];
2080		if (GETMEMB(vdevp, "vdev", vdev_ms, mssp) ||
2081		    GETMEMB(vdevp, "vdev", vdev_ms_count, mw->mw_nummss)) {
2082			return (WALK_ERR);
2083		}
2084
2085		mw->mw_mss = mdb_alloc(mw->mw_nummss * sizeof (void*),
2086		    UM_SLEEP | UM_GC);
2087		if (mdb_vread(mw->mw_mss, mw->mw_nummss * sizeof (void*),
2088		    mssp) == -1) {
2089			mdb_warn("failed to read vdev_ms at %p", mssp);
2090			return (WALK_ERR);
2091		}
2092	}
2093
2094	if (mw->mw_curms >= mw->mw_nummss) {
2095		mw->mw_mss = NULL;
2096		mw->mw_curms = 0;
2097		mw->mw_nummss = 0;
2098		mw->mw_curvdev++;
2099		return (WALK_NEXT);
2100	}
2101
2102	msp = mw->mw_mss[mw->mw_curms];
2103	if (mdb_vread(&ms, sizeof (metaslab_t), msp) == -1) {
2104		mdb_warn("failed to read metaslab_t at %p", msp);
2105		return (WALK_ERR);
2106	}
2107
2108	mw->mw_curms++;
2109
2110	return (wsp->walk_callback(msp, &ms, wsp->walk_cbdata));
2111}
2112
2113static int
2114metaslab_walk_init(mdb_walk_state_t *wsp)
2115{
2116	metaslab_walk_data_t *mw;
2117	uintptr_t root_vdevp;
2118	uintptr_t childp;
2119
2120	if (wsp->walk_addr == 0) {
2121		mdb_warn("must supply address of spa_t\n");
2122		return (WALK_ERR);
2123	}
2124
2125	mw = mdb_zalloc(sizeof (metaslab_walk_data_t), UM_SLEEP | UM_GC);
2126
2127	if (GETMEMB(wsp->walk_addr, "spa", spa_root_vdev, root_vdevp) ||
2128	    GETMEMB(root_vdevp, "vdev", vdev_children, mw->mw_numvdevs) ||
2129	    GETMEMB(root_vdevp, "vdev", vdev_child, childp)) {
2130		return (DCMD_ERR);
2131	}
2132
2133	mw->mw_vdevs = mdb_alloc(mw->mw_numvdevs * sizeof (void *),
2134	    UM_SLEEP | UM_GC);
2135	if (mdb_vread(mw->mw_vdevs, mw->mw_numvdevs * sizeof (void *),
2136	    childp) == -1) {
2137		mdb_warn("failed to read root vdev children at %p", childp);
2138		return (DCMD_ERR);
2139	}
2140
2141	wsp->walk_data = mw;
2142
2143	return (WALK_NEXT);
2144}
2145
2146typedef struct mdb_spa {
2147	uintptr_t spa_dsl_pool;
2148	uintptr_t spa_root_vdev;
2149} mdb_spa_t;
2150
2151typedef struct mdb_dsl_pool {
2152	uintptr_t dp_root_dir;
2153} mdb_dsl_pool_t;
2154
2155typedef struct mdb_dsl_dir {
2156	uintptr_t dd_dbuf;
2157	int64_t dd_space_towrite[TXG_SIZE];
2158} mdb_dsl_dir_t;
2159
2160typedef struct mdb_dsl_dir_phys {
2161	uint64_t dd_used_bytes;
2162	uint64_t dd_compressed_bytes;
2163	uint64_t dd_uncompressed_bytes;
2164} mdb_dsl_dir_phys_t;
2165
2166typedef struct space_data {
2167	uint64_t ms_allocating[TXG_SIZE];
2168	uint64_t ms_checkpointing;
2169	uint64_t ms_freeing;
2170	uint64_t ms_freed;
2171	uint64_t ms_unflushed_frees;
2172	uint64_t ms_unflushed_allocs;
2173	uint64_t ms_allocatable;
2174	int64_t ms_deferspace;
2175	uint64_t avail;
2176} space_data_t;
2177
2178/* ARGSUSED */
2179static int
2180space_cb(uintptr_t addr, const void *unknown, void *arg)
2181{
2182	space_data_t *sd = arg;
2183	mdb_metaslab_t ms;
2184	mdb_range_tree_t rt;
2185	mdb_space_map_t sm = { 0 };
2186	mdb_space_map_phys_t smp = { 0 };
2187	uint64_t uallocs, ufrees;
2188	int i;
2189
2190	if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t",
2191	    addr, 0) == -1)
2192		return (WALK_ERR);
2193
2194	for (i = 0; i < TXG_SIZE; i++) {
2195		if (mdb_ctf_vread(&rt, "range_tree_t",
2196		    "mdb_range_tree_t", ms.ms_allocating[i], 0) == -1)
2197			return (WALK_ERR);
2198		sd->ms_allocating[i] += rt.rt_space;
2199	}
2200
2201	if (mdb_ctf_vread(&rt, "range_tree_t",
2202	    "mdb_range_tree_t", ms.ms_checkpointing, 0) == -1)
2203		return (WALK_ERR);
2204	sd->ms_checkpointing += rt.rt_space;
2205
2206	if (mdb_ctf_vread(&rt, "range_tree_t",
2207	    "mdb_range_tree_t", ms.ms_freeing, 0) == -1)
2208		return (WALK_ERR);
2209	sd->ms_freeing += rt.rt_space;
2210
2211	if (mdb_ctf_vread(&rt, "range_tree_t",
2212	    "mdb_range_tree_t", ms.ms_freed, 0) == -1)
2213		return (WALK_ERR);
2214	sd->ms_freed += rt.rt_space;
2215
2216	if (mdb_ctf_vread(&rt, "range_tree_t",
2217	    "mdb_range_tree_t", ms.ms_allocatable, 0) == -1)
2218		return (WALK_ERR);
2219	sd->ms_allocatable += rt.rt_space;
2220
2221	if (mdb_ctf_vread(&rt, "range_tree_t",
2222	    "mdb_range_tree_t", ms.ms_unflushed_frees, 0) == -1)
2223		return (WALK_ERR);
2224	sd->ms_unflushed_frees += rt.rt_space;
2225	ufrees = rt.rt_space;
2226
2227	if (mdb_ctf_vread(&rt, "range_tree_t",
2228	    "mdb_range_tree_t", ms.ms_unflushed_allocs, 0) == -1)
2229		return (WALK_ERR);
2230	sd->ms_unflushed_allocs += rt.rt_space;
2231	uallocs = rt.rt_space;
2232
2233	if (ms.ms_sm != 0 &&
2234	    mdb_ctf_vread(&sm, "space_map_t",
2235	    "mdb_space_map_t", ms.ms_sm, 0) == -1)
2236		return (WALK_ERR);
2237
2238	if (sm.sm_phys != 0) {
2239		(void) mdb_ctf_vread(&smp, "space_map_phys_t",
2240		    "mdb_space_map_phys_t", sm.sm_phys, 0);
2241	}
2242
2243	sd->ms_deferspace += ms.ms_deferspace;
2244	sd->avail += sm.sm_size - smp.smp_alloc + ufrees - uallocs;
2245
2246	return (WALK_NEXT);
2247}
2248
2249/*
2250 * ::spa_space [-b]
2251 *
2252 * Given a spa_t, print out it's on-disk space usage and in-core
2253 * estimates of future usage.  If -b is given, print space in bytes.
2254 * Otherwise print in megabytes.
2255 */
2256/* ARGSUSED */
2257static int
2258spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2259{
2260	mdb_spa_t spa;
2261	mdb_dsl_pool_t dp;
2262	mdb_dsl_dir_t dd;
2263	mdb_dmu_buf_impl_t db;
2264	mdb_dsl_dir_phys_t dsp;
2265	space_data_t sd;
2266	int shift = 20;
2267	char *suffix = "M";
2268	int bytes = B_FALSE;
2269
2270	if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bytes, NULL) !=
2271	    argc)
2272		return (DCMD_USAGE);
2273	if (!(flags & DCMD_ADDRSPEC))
2274		return (DCMD_USAGE);
2275
2276	if (bytes) {
2277		shift = 0;
2278		suffix = "";
2279	}
2280
2281	if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_t",
2282	    addr, 0) == -1 ||
2283	    mdb_ctf_vread(&dp, ZFS_STRUCT "dsl_pool", "mdb_dsl_pool_t",
2284	    spa.spa_dsl_pool, 0) == -1 ||
2285	    mdb_ctf_vread(&dd, ZFS_STRUCT "dsl_dir", "mdb_dsl_dir_t",
2286	    dp.dp_root_dir, 0) == -1 ||
2287	    mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t",
2288	    dd.dd_dbuf, 0) == -1 ||
2289	    mdb_ctf_vread(&dsp, ZFS_STRUCT "dsl_dir_phys",
2290	    "mdb_dsl_dir_phys_t", db.db.db_data, 0) == -1) {
2291		return (DCMD_ERR);
2292	}
2293
2294	mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n",
2295	    dd.dd_space_towrite[0] >> shift, suffix,
2296	    dd.dd_space_towrite[1] >> shift, suffix,
2297	    dd.dd_space_towrite[2] >> shift, suffix,
2298	    dd.dd_space_towrite[3] >> shift, suffix);
2299
2300	mdb_printf("dd_phys.dd_used_bytes = %llu%s\n",
2301	    dsp.dd_used_bytes >> shift, suffix);
2302	mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n",
2303	    dsp.dd_compressed_bytes >> shift, suffix);
2304	mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n",
2305	    dsp.dd_uncompressed_bytes >> shift, suffix);
2306
2307	bzero(&sd, sizeof (sd));
2308	if (mdb_pwalk("metaslab", space_cb, &sd, addr) != 0) {
2309		mdb_warn("can't walk metaslabs");
2310		return (DCMD_ERR);
2311	}
2312
2313	mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n",
2314	    sd.ms_allocating[0] >> shift, suffix,
2315	    sd.ms_allocating[1] >> shift, suffix,
2316	    sd.ms_allocating[2] >> shift, suffix,
2317	    sd.ms_allocating[3] >> shift, suffix);
2318	mdb_printf("ms_checkpointing = %llu%s\n",
2319	    sd.ms_checkpointing >> shift, suffix);
2320	mdb_printf("ms_freeing = %llu%s\n",
2321	    sd.ms_freeing >> shift, suffix);
2322	mdb_printf("ms_freed = %llu%s\n",
2323	    sd.ms_freed >> shift, suffix);
2324	mdb_printf("ms_unflushed_frees = %llu%s\n",
2325	    sd.ms_unflushed_frees >> shift, suffix);
2326	mdb_printf("ms_unflushed_allocs = %llu%s\n",
2327	    sd.ms_unflushed_allocs >> shift, suffix);
2328	mdb_printf("ms_allocatable = %llu%s\n",
2329	    sd.ms_allocatable >> shift, suffix);
2330	mdb_printf("ms_deferspace = %llu%s\n",
2331	    sd.ms_deferspace >> shift, suffix);
2332	mdb_printf("current avail = %llu%s\n",
2333	    sd.avail >> shift, suffix);
2334
2335	return (DCMD_OK);
2336}
2337
2338typedef struct mdb_spa_aux_vdev {
2339	int sav_count;
2340	uintptr_t sav_vdevs;
2341} mdb_spa_aux_vdev_t;
2342
2343typedef struct mdb_spa_vdevs {
2344	uintptr_t spa_root_vdev;
2345	mdb_spa_aux_vdev_t spa_l2cache;
2346	mdb_spa_aux_vdev_t spa_spares;
2347} mdb_spa_vdevs_t;
2348
2349static int
2350spa_print_aux(mdb_spa_aux_vdev_t *sav, uint_t flags, mdb_arg_t *v,
2351    const char *name)
2352{
2353	uintptr_t *aux;
2354	size_t len;
2355	int ret, i;
2356
2357	/*
2358	 * Iterate over aux vdevs and print those out as well.  This is a
2359	 * little annoying because we don't have a root vdev to pass to ::vdev.
2360	 * Instead, we print a single line and then call it for each child
2361	 * vdev.
2362	 */
2363	if (sav->sav_count != 0) {
2364		v[1].a_type = MDB_TYPE_STRING;
2365		v[1].a_un.a_str = "-d";
2366		v[2].a_type = MDB_TYPE_IMMEDIATE;
2367		v[2].a_un.a_val = 2;
2368
2369		len = sav->sav_count * sizeof (uintptr_t);
2370		aux = mdb_alloc(len, UM_SLEEP);
2371		if (mdb_vread(aux, len, sav->sav_vdevs) == -1) {
2372			mdb_free(aux, len);
2373			mdb_warn("failed to read l2cache vdevs at %p",
2374			    sav->sav_vdevs);
2375			return (DCMD_ERR);
2376		}
2377
2378		mdb_printf("%-?s %-9s %-12s %s\n", "-", "-", "-", name);
2379
2380		for (i = 0; i < sav->sav_count; i++) {
2381			ret = mdb_call_dcmd("vdev", aux[i], flags, 3, v);
2382			if (ret != DCMD_OK) {
2383				mdb_free(aux, len);
2384				return (ret);
2385			}
2386		}
2387
2388		mdb_free(aux, len);
2389	}
2390
2391	return (0);
2392}
2393
2394/*
2395 * ::spa_vdevs
2396 *
2397 *	-e	Include error stats
2398 *	-m	Include metaslab information
2399 *	-M	Include metaslab group information
2400 *	-h	Include histogram information (requires -m or -M)
2401 *
2402 * Print out a summarized list of vdevs for the given spa_t.
2403 * This is accomplished by invoking "::vdev -re" on the root vdev, as well as
2404 * iterating over the cache devices.
2405 */
2406/* ARGSUSED */
2407static int
2408spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2409{
2410	mdb_arg_t v[3];
2411	int ret;
2412	char opts[100] = "-r";
2413	int spa_flags = 0;
2414
2415	if (mdb_getopts(argc, argv,
2416	    'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags,
2417	    'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags,
2418	    'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags,
2419	    'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags,
2420	    NULL) != argc)
2421		return (DCMD_USAGE);
2422
2423	if (!(flags & DCMD_ADDRSPEC))
2424		return (DCMD_USAGE);
2425
2426	mdb_spa_vdevs_t spa;
2427	if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_vdevs_t", addr, 0) == -1)
2428		return (DCMD_ERR);
2429
2430	/*
2431	 * Unitialized spa_t structures can have a NULL root vdev.
2432	 */
2433	if (spa.spa_root_vdev == 0) {
2434		mdb_printf("no associated vdevs\n");
2435		return (DCMD_OK);
2436	}
2437
2438	if (spa_flags & SPA_FLAG_ERRORS)
2439		strcat(opts, "e");
2440	if (spa_flags & SPA_FLAG_METASLABS)
2441		strcat(opts, "m");
2442	if (spa_flags & SPA_FLAG_METASLAB_GROUPS)
2443		strcat(opts, "M");
2444	if (spa_flags & SPA_FLAG_HISTOGRAMS)
2445		strcat(opts, "h");
2446
2447	v[0].a_type = MDB_TYPE_STRING;
2448	v[0].a_un.a_str = opts;
2449
2450	ret = mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev,
2451	    flags, 1, v);
2452	if (ret != DCMD_OK)
2453		return (ret);
2454
2455	if (spa_print_aux(&spa.spa_l2cache, flags, v, "cache") != 0 ||
2456	    spa_print_aux(&spa.spa_spares, flags, v, "spares") != 0)
2457		return (DCMD_ERR);
2458
2459	return (DCMD_OK);
2460}
2461
2462/*
2463 * ::zio
2464 *
2465 * Print a summary of zio_t and all its children.  This is intended to display a
2466 * zio tree, and hence we only pick the most important pieces of information for
2467 * the main summary.  More detailed information can always be found by doing a
2468 * '::print zio' on the underlying zio_t.  The columns we display are:
2469 *
2470 *	ADDRESS  TYPE  STAGE  WAITER  TIME_ELAPSED
2471 *
2472 * The 'address' column is indented by one space for each depth level as we
2473 * descend down the tree.
2474 */
2475
2476#define	ZIO_MAXINDENT	7
2477#define	ZIO_MAXWIDTH	(sizeof (uintptr_t) * 2 + ZIO_MAXINDENT)
2478#define	ZIO_WALK_SELF	0
2479#define	ZIO_WALK_CHILD	1
2480#define	ZIO_WALK_PARENT	2
2481
2482typedef struct zio_print_args {
2483	int	zpa_current_depth;
2484	int	zpa_min_depth;
2485	int	zpa_max_depth;
2486	int	zpa_type;
2487	uint_t	zpa_flags;
2488} zio_print_args_t;
2489
2490typedef struct mdb_zio {
2491	enum zio_type io_type;
2492	enum zio_stage io_stage;
2493	uintptr_t io_waiter;
2494	uintptr_t io_spa;
2495	struct {
2496		struct {
2497			uintptr_t list_next;
2498		} list_head;
2499	} io_parent_list;
2500	int io_error;
2501} mdb_zio_t;
2502
2503typedef struct mdb_zio_timestamp {
2504	hrtime_t io_timestamp;
2505} mdb_zio_timestamp_t;
2506
2507static int zio_child_cb(uintptr_t addr, const void *unknown, void *arg);
2508
2509static int
2510zio_print_cb(uintptr_t addr, zio_print_args_t *zpa)
2511{
2512	mdb_ctf_id_t type_enum, stage_enum;
2513	int indent = zpa->zpa_current_depth;
2514	const char *type, *stage;
2515	uintptr_t laddr;
2516	mdb_zio_t zio;
2517	mdb_zio_timestamp_t zio_timestamp = { 0 };
2518
2519	if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", addr, 0) == -1)
2520		return (WALK_ERR);
2521	(void) mdb_ctf_vread(&zio_timestamp, ZFS_STRUCT "zio",
2522	    "mdb_zio_timestamp_t", addr, MDB_CTF_VREAD_QUIET);
2523
2524	if (indent > ZIO_MAXINDENT)
2525		indent = ZIO_MAXINDENT;
2526
2527	if (mdb_ctf_lookup_by_name("enum zio_type", &type_enum) == -1 ||
2528	    mdb_ctf_lookup_by_name("enum zio_stage", &stage_enum) == -1) {
2529		mdb_warn("failed to lookup zio enums");
2530		return (WALK_ERR);
2531	}
2532
2533	if ((type = mdb_ctf_enum_name(type_enum, zio.io_type)) != NULL)
2534		type += sizeof ("ZIO_TYPE_") - 1;
2535	else
2536		type = "?";
2537
2538	if (zio.io_error == 0) {
2539		stage = mdb_ctf_enum_name(stage_enum, zio.io_stage);
2540		if (stage != NULL)
2541			stage += sizeof ("ZIO_STAGE_") - 1;
2542		else
2543			stage = "?";
2544	} else {
2545		stage = "FAILED";
2546	}
2547
2548	if (zpa->zpa_current_depth >= zpa->zpa_min_depth) {
2549		if (zpa->zpa_flags & DCMD_PIPE_OUT) {
2550			mdb_printf("%?p\n", addr);
2551		} else {
2552			mdb_printf("%*s%-*p %-5s %-16s ", indent, "",
2553			    ZIO_MAXWIDTH - indent, addr, type, stage);
2554			if (zio.io_waiter != 0)
2555				mdb_printf("%-16lx ", zio.io_waiter);
2556			else
2557				mdb_printf("%-16s ", "-");
2558#ifdef _KERNEL
2559			if (zio_timestamp.io_timestamp != 0) {
2560				mdb_printf("%llums", (mdb_gethrtime() -
2561				    zio_timestamp.io_timestamp) /
2562				    1000000);
2563			} else {
2564				mdb_printf("%-12s ", "-");
2565			}
2566#else
2567			mdb_printf("%-12s ", "-");
2568#endif
2569			mdb_printf("\n");
2570		}
2571	}
2572
2573	if (zpa->zpa_current_depth >= zpa->zpa_max_depth)
2574		return (WALK_NEXT);
2575
2576	if (zpa->zpa_type == ZIO_WALK_PARENT)
2577		laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio",
2578		    "io_parent_list");
2579	else
2580		laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio",
2581		    "io_child_list");
2582
2583	zpa->zpa_current_depth++;
2584	if (mdb_pwalk("list", zio_child_cb, zpa, laddr) != 0) {
2585		mdb_warn("failed to walk zio_t children at %p\n", laddr);
2586		return (WALK_ERR);
2587	}
2588	zpa->zpa_current_depth--;
2589
2590	return (WALK_NEXT);
2591}
2592
2593/* ARGSUSED */
2594static int
2595zio_child_cb(uintptr_t addr, const void *unknown, void *arg)
2596{
2597	zio_link_t zl;
2598	uintptr_t ziop;
2599	zio_print_args_t *zpa = arg;
2600
2601	if (mdb_vread(&zl, sizeof (zl), addr) == -1) {
2602		mdb_warn("failed to read zio_link_t at %p", addr);
2603		return (WALK_ERR);
2604	}
2605
2606	if (zpa->zpa_type == ZIO_WALK_PARENT)
2607		ziop = (uintptr_t)zl.zl_parent;
2608	else
2609		ziop = (uintptr_t)zl.zl_child;
2610
2611	return (zio_print_cb(ziop, zpa));
2612}
2613
2614/* ARGSUSED */
2615static int
2616zio_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2617{
2618	zio_print_args_t zpa = { 0 };
2619
2620	if (!(flags & DCMD_ADDRSPEC))
2621		return (DCMD_USAGE);
2622
2623	if (mdb_getopts(argc, argv,
2624	    'r', MDB_OPT_SETBITS, INT_MAX, &zpa.zpa_max_depth,
2625	    'c', MDB_OPT_SETBITS, ZIO_WALK_CHILD, &zpa.zpa_type,
2626	    'p', MDB_OPT_SETBITS, ZIO_WALK_PARENT, &zpa.zpa_type,
2627	    NULL) != argc)
2628		return (DCMD_USAGE);
2629
2630	zpa.zpa_flags = flags;
2631	if (zpa.zpa_max_depth != 0) {
2632		if (zpa.zpa_type == ZIO_WALK_SELF)
2633			zpa.zpa_type = ZIO_WALK_CHILD;
2634	} else if (zpa.zpa_type != ZIO_WALK_SELF) {
2635		zpa.zpa_min_depth = 1;
2636		zpa.zpa_max_depth = 1;
2637	}
2638
2639	if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) {
2640		mdb_printf("%<u>%-*s %-5s %-16s %-16s %-12s%</u>\n",
2641		    ZIO_MAXWIDTH, "ADDRESS", "TYPE", "STAGE", "WAITER",
2642		    "TIME_ELAPSED");
2643	}
2644
2645	if (zio_print_cb(addr, &zpa) != WALK_NEXT)
2646		return (DCMD_ERR);
2647
2648	return (DCMD_OK);
2649}
2650
2651/*
2652 * [addr]::zio_state
2653 *
2654 * Print a summary of all zio_t structures on the system, or for a particular
2655 * pool.  This is equivalent to '::walk zio_root | ::zio'.
2656 */
2657/*ARGSUSED*/
2658static int
2659zio_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2660{
2661	/*
2662	 * MDB will remember the last address of the pipeline, so if we don't
2663	 * zero this we'll end up trying to walk zio structures for a
2664	 * non-existent spa_t.
2665	 */
2666	if (!(flags & DCMD_ADDRSPEC))
2667		addr = 0;
2668
2669	return (mdb_pwalk_dcmd("zio_root", "zio", argc, argv, addr));
2670}
2671
2672typedef struct mdb_multilist {
2673	uint64_t ml_num_sublists;
2674	uintptr_t ml_sublists;
2675} mdb_multilist_t;
2676
2677typedef struct multilist_walk_data {
2678	uint64_t mwd_idx;
2679	mdb_multilist_t mwd_ml;
2680} multilist_walk_data_t;
2681
2682/* ARGSUSED */
2683static int
2684multilist_print_cb(uintptr_t addr, const void *unknown, void *arg)
2685{
2686	mdb_printf("%#lr\n", addr);
2687	return (WALK_NEXT);
2688}
2689
2690static int
2691multilist_walk_step(mdb_walk_state_t *wsp)
2692{
2693	multilist_walk_data_t *mwd = wsp->walk_data;
2694
2695	if (mwd->mwd_idx >= mwd->mwd_ml.ml_num_sublists)
2696		return (WALK_DONE);
2697
2698	wsp->walk_addr = mwd->mwd_ml.ml_sublists +
2699	    mdb_ctf_sizeof_by_name("multilist_sublist_t") * mwd->mwd_idx +
2700	    mdb_ctf_offsetof_by_name("multilist_sublist_t", "mls_list");
2701
2702	mdb_pwalk("list", multilist_print_cb, (void*)NULL, wsp->walk_addr);
2703	mwd->mwd_idx++;
2704
2705	return (WALK_NEXT);
2706}
2707
2708static int
2709multilist_walk_init(mdb_walk_state_t *wsp)
2710{
2711	multilist_walk_data_t *mwd;
2712
2713	if (wsp->walk_addr == 0) {
2714		mdb_warn("must supply address of multilist_t\n");
2715		return (WALK_ERR);
2716	}
2717
2718	mwd = mdb_zalloc(sizeof (multilist_walk_data_t), UM_SLEEP | UM_GC);
2719	if (mdb_ctf_vread(&mwd->mwd_ml, "multilist_t", "mdb_multilist_t",
2720	    wsp->walk_addr, 0) == -1) {
2721		return (WALK_ERR);
2722	}
2723
2724	if (mwd->mwd_ml.ml_num_sublists == 0 ||
2725	    mwd->mwd_ml.ml_sublists == 0) {
2726		mdb_warn("invalid or uninitialized multilist at %#lx\n",
2727		    wsp->walk_addr);
2728		return (WALK_ERR);
2729	}
2730
2731	wsp->walk_data = mwd;
2732	return (WALK_NEXT);
2733}
2734
2735typedef struct mdb_txg_list {
2736	size_t		tl_offset;
2737	uintptr_t	tl_head[TXG_SIZE];
2738} mdb_txg_list_t;
2739
2740typedef struct txg_list_walk_data {
2741	uintptr_t lw_head[TXG_SIZE];
2742	int	lw_txgoff;
2743	int	lw_maxoff;
2744	size_t	lw_offset;
2745	void	*lw_obj;
2746} txg_list_walk_data_t;
2747
2748static int
2749txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff)
2750{
2751	txg_list_walk_data_t *lwd;
2752	mdb_txg_list_t list;
2753	int i;
2754
2755	lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC);
2756	if (mdb_ctf_vread(&list, "txg_list_t", "mdb_txg_list_t", wsp->walk_addr,
2757	    0) == -1) {
2758		mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr);
2759		return (WALK_ERR);
2760	}
2761
2762	for (i = 0; i < TXG_SIZE; i++)
2763		lwd->lw_head[i] = list.tl_head[i];
2764	lwd->lw_offset = list.tl_offset;
2765	lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t),
2766	    UM_SLEEP | UM_GC);
2767	lwd->lw_txgoff = txg;
2768	lwd->lw_maxoff = maxoff;
2769
2770	wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
2771	wsp->walk_data = lwd;
2772
2773	return (WALK_NEXT);
2774}
2775
2776static int
2777txg_list_walk_init(mdb_walk_state_t *wsp)
2778{
2779	return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1));
2780}
2781
2782static int
2783txg_list0_walk_init(mdb_walk_state_t *wsp)
2784{
2785	return (txg_list_walk_init_common(wsp, 0, 0));
2786}
2787
2788static int
2789txg_list1_walk_init(mdb_walk_state_t *wsp)
2790{
2791	return (txg_list_walk_init_common(wsp, 1, 1));
2792}
2793
2794static int
2795txg_list2_walk_init(mdb_walk_state_t *wsp)
2796{
2797	return (txg_list_walk_init_common(wsp, 2, 2));
2798}
2799
2800static int
2801txg_list3_walk_init(mdb_walk_state_t *wsp)
2802{
2803	return (txg_list_walk_init_common(wsp, 3, 3));
2804}
2805
2806static int
2807txg_list_walk_step(mdb_walk_state_t *wsp)
2808{
2809	txg_list_walk_data_t *lwd = wsp->walk_data;
2810	uintptr_t addr;
2811	txg_node_t *node;
2812	int status;
2813
2814	while (wsp->walk_addr == 0 && lwd->lw_txgoff < lwd->lw_maxoff) {
2815		lwd->lw_txgoff++;
2816		wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
2817	}
2818
2819	if (wsp->walk_addr == 0)
2820		return (WALK_DONE);
2821
2822	addr = wsp->walk_addr - lwd->lw_offset;
2823
2824	if (mdb_vread(lwd->lw_obj,
2825	    lwd->lw_offset + sizeof (txg_node_t), addr) == -1) {
2826		mdb_warn("failed to read list element at %#lx", addr);
2827		return (WALK_ERR);
2828	}
2829
2830	status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata);
2831	node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset);
2832	wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff];
2833
2834	return (status);
2835}
2836
2837/*
2838 * ::walk spa
2839 *
2840 * Walk all named spa_t structures in the namespace.  This is nothing more than
2841 * a layered avl walk.
2842 */
2843static int
2844spa_walk_init(mdb_walk_state_t *wsp)
2845{
2846	GElf_Sym sym;
2847
2848	if (wsp->walk_addr != 0) {
2849		mdb_warn("spa walk only supports global walks\n");
2850		return (WALK_ERR);
2851	}
2852
2853	if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) {
2854		mdb_warn("failed to find symbol 'spa_namespace_avl'");
2855		return (WALK_ERR);
2856	}
2857
2858	wsp->walk_addr = (uintptr_t)sym.st_value;
2859
2860	if (mdb_layered_walk("avl", wsp) == -1) {
2861		mdb_warn("failed to walk 'avl'\n");
2862		return (WALK_ERR);
2863	}
2864
2865	return (WALK_NEXT);
2866}
2867
2868static int
2869spa_walk_step(mdb_walk_state_t *wsp)
2870{
2871	return (wsp->walk_callback(wsp->walk_addr, NULL, wsp->walk_cbdata));
2872}
2873
2874/*
2875 * [addr]::walk zio
2876 *
2877 * Walk all active zio_t structures on the system.  This is simply a layered
2878 * walk on top of ::walk zio_cache, with the optional ability to limit the
2879 * structures to a particular pool.
2880 */
2881static int
2882zio_walk_init(mdb_walk_state_t *wsp)
2883{
2884	wsp->walk_data = (void *)wsp->walk_addr;
2885
2886	if (mdb_layered_walk("zio_cache", wsp) == -1) {
2887		mdb_warn("failed to walk 'zio_cache'\n");
2888		return (WALK_ERR);
2889	}
2890
2891	return (WALK_NEXT);
2892}
2893
2894static int
2895zio_walk_step(mdb_walk_state_t *wsp)
2896{
2897	mdb_zio_t zio;
2898	uintptr_t spa = (uintptr_t)wsp->walk_data;
2899
2900	if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t",
2901	    wsp->walk_addr, 0) == -1)
2902		return (WALK_ERR);
2903
2904	if (spa != 0 && spa != zio.io_spa)
2905		return (WALK_NEXT);
2906
2907	return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
2908}
2909
2910/*
2911 * [addr]::walk zio_root
2912 *
2913 * Walk only root zio_t structures, optionally for a particular spa_t.
2914 */
2915static int
2916zio_walk_root_step(mdb_walk_state_t *wsp)
2917{
2918	mdb_zio_t zio;
2919	uintptr_t spa = (uintptr_t)wsp->walk_data;
2920
2921	if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t",
2922	    wsp->walk_addr, 0) == -1)
2923		return (WALK_ERR);
2924
2925	if (spa != 0 && spa != zio.io_spa)
2926		return (WALK_NEXT);
2927
2928	/* If the parent list is not empty, ignore */
2929	if (zio.io_parent_list.list_head.list_next !=
2930	    wsp->walk_addr +
2931	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", "io_parent_list") +
2932	    mdb_ctf_offsetof_by_name("struct list", "list_head"))
2933		return (WALK_NEXT);
2934
2935	return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
2936}
2937
2938/*
2939 * ::zfs_blkstats
2940 *
2941 *	-v	print verbose per-level information
2942 *
2943 */
2944static int
2945zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2946{
2947	boolean_t verbose = B_FALSE;
2948	zfs_all_blkstats_t stats;
2949	dmu_object_type_t t;
2950	zfs_blkstat_t *tzb;
2951	uint64_t ditto;
2952
2953	if (mdb_getopts(argc, argv,
2954	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2955	    NULL) != argc)
2956		return (DCMD_USAGE);
2957
2958	if (!(flags & DCMD_ADDRSPEC))
2959		return (DCMD_USAGE);
2960
2961	if (GETMEMB(addr, "spa", spa_dsl_pool, addr) ||
2962	    GETMEMB(addr, "dsl_pool", dp_blkstats, addr) ||
2963	    mdb_vread(&stats, sizeof (zfs_all_blkstats_t), addr) == -1) {
2964		mdb_warn("failed to read data at %p;", addr);
2965		mdb_printf("maybe no stats? run \"zpool scrub\" first.");
2966		return (DCMD_ERR);
2967	}
2968
2969	tzb = &stats.zab_type[DN_MAX_LEVELS][DMU_OT_TOTAL];
2970	if (tzb->zb_gangs != 0) {
2971		mdb_printf("Ganged blocks: %llu\n",
2972		    (longlong_t)tzb->zb_gangs);
2973	}
2974
2975	ditto = tzb->zb_ditto_2_of_2_samevdev + tzb->zb_ditto_2_of_3_samevdev +
2976	    tzb->zb_ditto_3_of_3_samevdev;
2977	if (ditto != 0) {
2978		mdb_printf("Dittoed blocks on same vdev: %llu\n",
2979		    (longlong_t)ditto);
2980	}
2981
2982	mdb_printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2983	    "\t  avg\t comp\t%%Total\tType\n");
2984
2985	for (t = 0; t <= DMU_OT_TOTAL; t++) {
2986		char csize[MDB_NICENUM_BUFLEN], lsize[MDB_NICENUM_BUFLEN];
2987		char psize[MDB_NICENUM_BUFLEN], asize[MDB_NICENUM_BUFLEN];
2988		char avg[MDB_NICENUM_BUFLEN];
2989		char comp[MDB_NICENUM_BUFLEN], pct[MDB_NICENUM_BUFLEN];
2990		char typename[64];
2991		int l;
2992
2993
2994		if (t == DMU_OT_DEFERRED)
2995			strcpy(typename, "deferred free");
2996		else if (t == DMU_OT_OTHER)
2997			strcpy(typename, "other");
2998		else if (t == DMU_OT_TOTAL)
2999			strcpy(typename, "Total");
3000		else if (enum_lookup("enum dmu_object_type",
3001		    t, "DMU_OT_", sizeof (typename), typename) == -1) {
3002			mdb_warn("failed to read type name");
3003			return (DCMD_ERR);
3004		}
3005
3006		if (stats.zab_type[DN_MAX_LEVELS][t].zb_asize == 0)
3007			continue;
3008
3009		for (l = -1; l < DN_MAX_LEVELS; l++) {
3010			int level = (l == -1 ? DN_MAX_LEVELS : l);
3011			zfs_blkstat_t *zb = &stats.zab_type[level][t];
3012
3013			if (zb->zb_asize == 0)
3014				continue;
3015
3016			/*
3017			 * Don't print each level unless requested.
3018			 */
3019			if (!verbose && level != DN_MAX_LEVELS)
3020				continue;
3021
3022			/*
3023			 * If all the space is level 0, don't print the
3024			 * level 0 separately.
3025			 */
3026			if (level == 0 && zb->zb_asize ==
3027			    stats.zab_type[DN_MAX_LEVELS][t].zb_asize)
3028				continue;
3029
3030			mdb_nicenum(zb->zb_count, csize);
3031			mdb_nicenum(zb->zb_lsize, lsize);
3032			mdb_nicenum(zb->zb_psize, psize);
3033			mdb_nicenum(zb->zb_asize, asize);
3034			mdb_nicenum(zb->zb_asize / zb->zb_count, avg);
3035			(void) mdb_snprintfrac(comp, MDB_NICENUM_BUFLEN,
3036			    zb->zb_lsize, zb->zb_psize, 2);
3037			(void) mdb_snprintfrac(pct, MDB_NICENUM_BUFLEN,
3038			    100 * zb->zb_asize, tzb->zb_asize, 2);
3039
3040			mdb_printf("%6s\t%5s\t%5s\t%5s\t%5s"
3041			    "\t%5s\t%6s\t",
3042			    csize, lsize, psize, asize, avg, comp, pct);
3043
3044			if (level == DN_MAX_LEVELS)
3045				mdb_printf("%s\n", typename);
3046			else
3047				mdb_printf("  L%d %s\n",
3048				    level, typename);
3049		}
3050	}
3051
3052	return (DCMD_OK);
3053}
3054
3055typedef struct mdb_reference {
3056	uintptr_t ref_holder;
3057	uintptr_t ref_removed;
3058	uint64_t ref_number;
3059} mdb_reference_t;
3060
3061/* ARGSUSED */
3062static int
3063reference_cb(uintptr_t addr, const void *ignored, void *arg)
3064{
3065	mdb_reference_t ref;
3066	boolean_t holder_is_str = B_FALSE;
3067	char holder_str[128];
3068	boolean_t removed = (boolean_t)arg;
3069
3070	if (mdb_ctf_vread(&ref, "reference_t", "mdb_reference_t", addr,
3071	    0) == -1)
3072		return (DCMD_ERR);
3073
3074	if (mdb_readstr(holder_str, sizeof (holder_str),
3075	    ref.ref_holder) != -1)
3076		holder_is_str = strisprint(holder_str);
3077
3078	if (removed)
3079		mdb_printf("removed ");
3080	mdb_printf("reference ");
3081	if (ref.ref_number != 1)
3082		mdb_printf("with count=%llu ", ref.ref_number);
3083	mdb_printf("with tag %lx", ref.ref_holder);
3084	if (holder_is_str)
3085		mdb_printf(" \"%s\"", holder_str);
3086	mdb_printf(", held at:\n");
3087
3088	(void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL);
3089
3090	if (removed) {
3091		mdb_printf("removed at:\n");
3092		(void) mdb_call_dcmd("whatis", ref.ref_removed,
3093		    DCMD_ADDRSPEC, 0, NULL);
3094	}
3095
3096	mdb_printf("\n");
3097
3098	return (WALK_NEXT);
3099}
3100
3101typedef struct mdb_zfs_refcount {
3102	uint64_t rc_count;
3103} mdb_zfs_refcount_t;
3104
3105typedef struct mdb_zfs_refcount_removed {
3106	uint64_t rc_removed_count;
3107} mdb_zfs_refcount_removed_t;
3108
3109typedef struct mdb_zfs_refcount_tracked {
3110	boolean_t rc_tracked;
3111} mdb_zfs_refcount_tracked_t;
3112
3113/* ARGSUSED */
3114static int
3115zfs_refcount(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3116{
3117	mdb_zfs_refcount_t rc;
3118	mdb_zfs_refcount_removed_t rcr;
3119	mdb_zfs_refcount_tracked_t rct;
3120	int off;
3121	boolean_t released = B_FALSE;
3122
3123	if (!(flags & DCMD_ADDRSPEC))
3124		return (DCMD_USAGE);
3125
3126	if (mdb_getopts(argc, argv,
3127	    'r', MDB_OPT_SETBITS, B_TRUE, &released,
3128	    NULL) != argc)
3129		return (DCMD_USAGE);
3130
3131	if (mdb_ctf_vread(&rc, "zfs_refcount_t", "mdb_zfs_refcount_t", addr,
3132	    0) == -1)
3133		return (DCMD_ERR);
3134
3135	if (mdb_ctf_vread(&rcr, "zfs_refcount_t", "mdb_zfs_refcount_removed_t",
3136	    addr, MDB_CTF_VREAD_QUIET) == -1) {
3137		mdb_printf("zfs_refcount_t at %p has %llu holds (untracked)\n",
3138		    addr, (longlong_t)rc.rc_count);
3139		return (DCMD_OK);
3140	}
3141
3142	if (mdb_ctf_vread(&rct, "zfs_refcount_t", "mdb_zfs_refcount_tracked_t",
3143	    addr, MDB_CTF_VREAD_QUIET) == -1) {
3144		/* If this is an old target, it might be tracked. */
3145		rct.rc_tracked = B_TRUE;
3146	}
3147
3148	mdb_printf("zfs_refcount_t at %p has %llu current holds, "
3149	    "%llu recently released holds\n",
3150	    addr, (longlong_t)rc.rc_count, (longlong_t)rcr.rc_removed_count);
3151
3152	if (rct.rc_tracked && rc.rc_count > 0)
3153		mdb_printf("current holds:\n");
3154	off = mdb_ctf_offsetof_by_name("zfs_refcount_t", "rc_list");
3155	if (off == -1)
3156		return (DCMD_ERR);
3157	mdb_pwalk("list", reference_cb, (void*)B_FALSE, addr + off);
3158
3159	if (released && rcr.rc_removed_count > 0) {
3160		mdb_printf("released holds:\n");
3161
3162		off = mdb_ctf_offsetof_by_name("zfs_refcount_t", "rc_removed");
3163		if (off == -1)
3164			return (DCMD_ERR);
3165		mdb_pwalk("list", reference_cb, (void*)B_TRUE, addr + off);
3166	}
3167
3168	return (DCMD_OK);
3169}
3170
3171/* ARGSUSED */
3172static int
3173sa_attr_table(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3174{
3175	sa_attr_table_t *table;
3176	sa_os_t sa_os;
3177	char *name;
3178	int i;
3179
3180	if (mdb_vread(&sa_os, sizeof (sa_os_t), addr) == -1) {
3181		mdb_warn("failed to read sa_os at %p", addr);
3182		return (DCMD_ERR);
3183	}
3184
3185	table = mdb_alloc(sizeof (sa_attr_table_t) * sa_os.sa_num_attrs,
3186	    UM_SLEEP | UM_GC);
3187	name = mdb_alloc(MAXPATHLEN, UM_SLEEP | UM_GC);
3188
3189	if (mdb_vread(table, sizeof (sa_attr_table_t) * sa_os.sa_num_attrs,
3190	    (uintptr_t)sa_os.sa_attr_table) == -1) {
3191		mdb_warn("failed to read sa_os at %p", addr);
3192		return (DCMD_ERR);
3193	}
3194
3195	mdb_printf("%<u>%-10s %-10s %-10s %-10s %s%</u>\n",
3196	    "ATTR ID", "REGISTERED", "LENGTH", "BSWAP", "NAME");
3197	for (i = 0; i != sa_os.sa_num_attrs; i++) {
3198		mdb_readstr(name, MAXPATHLEN, (uintptr_t)table[i].sa_name);
3199		mdb_printf("%5x   %8x %8x %8x          %-s\n",
3200		    (int)table[i].sa_attr, (int)table[i].sa_registered,
3201		    (int)table[i].sa_length, table[i].sa_byteswap, name);
3202	}
3203
3204	return (DCMD_OK);
3205}
3206
3207static int
3208sa_get_off_table(uintptr_t addr, uint32_t **off_tab, int attr_count)
3209{
3210	uintptr_t idx_table;
3211
3212	if (GETMEMB(addr, "sa_idx_tab", sa_idx_tab, idx_table)) {
3213		mdb_printf("can't find offset table in sa_idx_tab\n");
3214		return (-1);
3215	}
3216
3217	*off_tab = mdb_alloc(attr_count * sizeof (uint32_t),
3218	    UM_SLEEP | UM_GC);
3219
3220	if (mdb_vread(*off_tab,
3221	    attr_count * sizeof (uint32_t), idx_table) == -1) {
3222		mdb_warn("failed to attribute offset table %p", idx_table);
3223		return (-1);
3224	}
3225
3226	return (DCMD_OK);
3227}
3228
3229/*ARGSUSED*/
3230static int
3231sa_attr_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3232{
3233	uint32_t *offset_tab;
3234	int attr_count;
3235	uint64_t attr_id;
3236	uintptr_t attr_addr;
3237	uintptr_t bonus_tab, spill_tab;
3238	uintptr_t db_bonus, db_spill;
3239	uintptr_t os, os_sa;
3240	uintptr_t db_data;
3241
3242	if (argc != 1)
3243		return (DCMD_USAGE);
3244
3245	if (argv[0].a_type == MDB_TYPE_STRING)
3246		attr_id = mdb_strtoull(argv[0].a_un.a_str);
3247	else
3248		return (DCMD_USAGE);
3249
3250	if (GETMEMB(addr, "sa_handle", sa_bonus_tab, bonus_tab) ||
3251	    GETMEMB(addr, "sa_handle", sa_spill_tab, spill_tab) ||
3252	    GETMEMB(addr, "sa_handle", sa_os, os) ||
3253	    GETMEMB(addr, "sa_handle", sa_bonus, db_bonus) ||
3254	    GETMEMB(addr, "sa_handle", sa_spill, db_spill)) {
3255		mdb_printf("Can't find necessary information in sa_handle "
3256		    "in sa_handle\n");
3257		return (DCMD_ERR);
3258	}
3259
3260	if (GETMEMB(os, "objset", os_sa, os_sa)) {
3261		mdb_printf("Can't find os_sa in objset\n");
3262		return (DCMD_ERR);
3263	}
3264
3265	if (GETMEMB(os_sa, "sa_os", sa_num_attrs, attr_count)) {
3266		mdb_printf("Can't find sa_num_attrs\n");
3267		return (DCMD_ERR);
3268	}
3269
3270	if (attr_id > attr_count) {
3271		mdb_printf("attribute id number is out of range\n");
3272		return (DCMD_ERR);
3273	}
3274
3275	if (bonus_tab) {
3276		if (sa_get_off_table(bonus_tab, &offset_tab,
3277		    attr_count) == -1) {
3278			return (DCMD_ERR);
3279		}
3280
3281		if (GETMEMB(db_bonus, "dmu_buf", db_data, db_data)) {
3282			mdb_printf("can't find db_data in bonus dbuf\n");
3283			return (DCMD_ERR);
3284		}
3285	}
3286
3287	if (bonus_tab && !TOC_ATTR_PRESENT(offset_tab[attr_id]) &&
3288	    spill_tab == 0) {
3289		mdb_printf("Attribute does not exist\n");
3290		return (DCMD_ERR);
3291	} else if (!TOC_ATTR_PRESENT(offset_tab[attr_id]) && spill_tab) {
3292		if (sa_get_off_table(spill_tab, &offset_tab,
3293		    attr_count) == -1) {
3294			return (DCMD_ERR);
3295		}
3296		if (GETMEMB(db_spill, "dmu_buf", db_data, db_data)) {
3297			mdb_printf("can't find db_data in spill dbuf\n");
3298			return (DCMD_ERR);
3299		}
3300		if (!TOC_ATTR_PRESENT(offset_tab[attr_id])) {
3301			mdb_printf("Attribute does not exist\n");
3302			return (DCMD_ERR);
3303		}
3304	}
3305	attr_addr = db_data + TOC_OFF(offset_tab[attr_id]);
3306	mdb_printf("%p\n", attr_addr);
3307	return (DCMD_OK);
3308}
3309
3310/* ARGSUSED */
3311static int
3312zfs_ace_print_common(uintptr_t addr, uint_t flags,
3313    uint64_t id, uint32_t access_mask, uint16_t ace_flags,
3314    uint16_t ace_type, int verbose)
3315{
3316	if (DCMD_HDRSPEC(flags) && !verbose)
3317		mdb_printf("%<u>%-?s %-8s %-8s %-8s %s%</u>\n",
3318		    "ADDR", "FLAGS", "MASK", "TYPE", "ID");
3319
3320	if (!verbose) {
3321		mdb_printf("%0?p %-8x %-8x %-8x %-llx\n", addr,
3322		    ace_flags, access_mask, ace_type, id);
3323		return (DCMD_OK);
3324	}
3325
3326	switch (ace_flags & ACE_TYPE_FLAGS) {
3327	case ACE_OWNER:
3328		mdb_printf("owner@:");
3329		break;
3330	case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
3331		mdb_printf("group@:");
3332		break;
3333	case ACE_EVERYONE:
3334		mdb_printf("everyone@:");
3335		break;
3336	case ACE_IDENTIFIER_GROUP:
3337		mdb_printf("group:%llx:", (u_longlong_t)id);
3338		break;
3339	case 0: /* User entry */
3340		mdb_printf("user:%llx:", (u_longlong_t)id);
3341		break;
3342	}
3343
3344	/* print out permission mask */
3345	if (access_mask & ACE_READ_DATA)
3346		mdb_printf("r");
3347	else
3348		mdb_printf("-");
3349	if (access_mask & ACE_WRITE_DATA)
3350		mdb_printf("w");
3351	else
3352		mdb_printf("-");
3353	if (access_mask & ACE_EXECUTE)
3354		mdb_printf("x");
3355	else
3356		mdb_printf("-");
3357	if (access_mask & ACE_APPEND_DATA)
3358		mdb_printf("p");
3359	else
3360		mdb_printf("-");
3361	if (access_mask & ACE_DELETE)
3362		mdb_printf("d");
3363	else
3364		mdb_printf("-");
3365	if (access_mask & ACE_DELETE_CHILD)
3366		mdb_printf("D");
3367	else
3368		mdb_printf("-");
3369	if (access_mask & ACE_READ_ATTRIBUTES)
3370		mdb_printf("a");
3371	else
3372		mdb_printf("-");
3373	if (access_mask & ACE_WRITE_ATTRIBUTES)
3374		mdb_printf("A");
3375	else
3376		mdb_printf("-");
3377	if (access_mask & ACE_READ_NAMED_ATTRS)
3378		mdb_printf("R");
3379	else
3380		mdb_printf("-");
3381	if (access_mask & ACE_WRITE_NAMED_ATTRS)
3382		mdb_printf("W");
3383	else
3384		mdb_printf("-");
3385	if (access_mask & ACE_READ_ACL)
3386		mdb_printf("c");
3387	else
3388		mdb_printf("-");
3389	if (access_mask & ACE_WRITE_ACL)
3390		mdb_printf("C");
3391	else
3392		mdb_printf("-");
3393	if (access_mask & ACE_WRITE_OWNER)
3394		mdb_printf("o");
3395	else
3396		mdb_printf("-");
3397	if (access_mask & ACE_SYNCHRONIZE)
3398		mdb_printf("s");
3399	else
3400		mdb_printf("-");
3401
3402	mdb_printf(":");
3403
3404	/* Print out inheritance flags */
3405	if (ace_flags & ACE_FILE_INHERIT_ACE)
3406		mdb_printf("f");
3407	else
3408		mdb_printf("-");
3409	if (ace_flags & ACE_DIRECTORY_INHERIT_ACE)
3410		mdb_printf("d");
3411	else
3412		mdb_printf("-");
3413	if (ace_flags & ACE_INHERIT_ONLY_ACE)
3414		mdb_printf("i");
3415	else
3416		mdb_printf("-");
3417	if (ace_flags & ACE_NO_PROPAGATE_INHERIT_ACE)
3418		mdb_printf("n");
3419	else
3420		mdb_printf("-");
3421	if (ace_flags & ACE_SUCCESSFUL_ACCESS_ACE_FLAG)
3422		mdb_printf("S");
3423	else
3424		mdb_printf("-");
3425	if (ace_flags & ACE_FAILED_ACCESS_ACE_FLAG)
3426		mdb_printf("F");
3427	else
3428		mdb_printf("-");
3429	if (ace_flags & ACE_INHERITED_ACE)
3430		mdb_printf("I");
3431	else
3432		mdb_printf("-");
3433
3434	switch (ace_type) {
3435	case ACE_ACCESS_ALLOWED_ACE_TYPE:
3436		mdb_printf(":allow\n");
3437		break;
3438	case ACE_ACCESS_DENIED_ACE_TYPE:
3439		mdb_printf(":deny\n");
3440		break;
3441	case ACE_SYSTEM_AUDIT_ACE_TYPE:
3442		mdb_printf(":audit\n");
3443		break;
3444	case ACE_SYSTEM_ALARM_ACE_TYPE:
3445		mdb_printf(":alarm\n");
3446		break;
3447	default:
3448		mdb_printf(":?\n");
3449	}
3450	return (DCMD_OK);
3451}
3452
3453/* ARGSUSED */
3454static int
3455zfs_ace_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3456{
3457	zfs_ace_t zace;
3458	int verbose = FALSE;
3459	uint64_t id;
3460
3461	if (!(flags & DCMD_ADDRSPEC))
3462		return (DCMD_USAGE);
3463
3464	if (mdb_getopts(argc, argv,
3465	    'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
3466		return (DCMD_USAGE);
3467
3468	if (mdb_vread(&zace, sizeof (zfs_ace_t), addr) == -1) {
3469		mdb_warn("failed to read zfs_ace_t");
3470		return (DCMD_ERR);
3471	}
3472
3473	if ((zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == 0 ||
3474	    (zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP)
3475		id = zace.z_fuid;
3476	else
3477		id = -1;
3478
3479	return (zfs_ace_print_common(addr, flags, id, zace.z_hdr.z_access_mask,
3480	    zace.z_hdr.z_flags, zace.z_hdr.z_type, verbose));
3481}
3482
3483/* ARGSUSED */
3484static int
3485zfs_ace0_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3486{
3487	ace_t ace;
3488	uint64_t id;
3489	int verbose = FALSE;
3490
3491	if (!(flags & DCMD_ADDRSPEC))
3492		return (DCMD_USAGE);
3493
3494	if (mdb_getopts(argc, argv,
3495	    'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
3496		return (DCMD_USAGE);
3497
3498	if (mdb_vread(&ace, sizeof (ace_t), addr) == -1) {
3499		mdb_warn("failed to read ace_t");
3500		return (DCMD_ERR);
3501	}
3502
3503	if ((ace.a_flags & ACE_TYPE_FLAGS) == 0 ||
3504	    (ace.a_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP)
3505		id = ace.a_who;
3506	else
3507		id = -1;
3508
3509	return (zfs_ace_print_common(addr, flags, id, ace.a_access_mask,
3510	    ace.a_flags, ace.a_type, verbose));
3511}
3512
3513typedef struct acl_dump_args {
3514	int a_argc;
3515	const mdb_arg_t *a_argv;
3516	uint16_t a_version;
3517	int a_flags;
3518} acl_dump_args_t;
3519
3520/* ARGSUSED */
3521static int
3522acl_aces_cb(uintptr_t addr, const void *unknown, void *arg)
3523{
3524	acl_dump_args_t *acl_args = (acl_dump_args_t *)arg;
3525
3526	if (acl_args->a_version == 1) {
3527		if (mdb_call_dcmd("zfs_ace", addr,
3528		    DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc,
3529		    acl_args->a_argv) != DCMD_OK) {
3530			return (WALK_ERR);
3531		}
3532	} else {
3533		if (mdb_call_dcmd("zfs_ace0", addr,
3534		    DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc,
3535		    acl_args->a_argv) != DCMD_OK) {
3536			return (WALK_ERR);
3537		}
3538	}
3539	acl_args->a_flags = DCMD_LOOP;
3540	return (WALK_NEXT);
3541}
3542
3543/* ARGSUSED */
3544static int
3545acl_cb(uintptr_t addr, const void *unknown, void *arg)
3546{
3547	acl_dump_args_t *acl_args = (acl_dump_args_t *)arg;
3548
3549	if (acl_args->a_version == 1) {
3550		if (mdb_pwalk("zfs_acl_node_aces", acl_aces_cb,
3551		    arg, addr) != 0) {
3552			mdb_warn("can't walk ACEs");
3553			return (DCMD_ERR);
3554		}
3555	} else {
3556		if (mdb_pwalk("zfs_acl_node_aces0", acl_aces_cb,
3557		    arg, addr) != 0) {
3558			mdb_warn("can't walk ACEs");
3559			return (DCMD_ERR);
3560		}
3561	}
3562	return (WALK_NEXT);
3563}
3564
3565/* ARGSUSED */
3566static int
3567zfs_acl_dump(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3568{
3569	zfs_acl_t zacl;
3570	int verbose = FALSE;
3571	acl_dump_args_t acl_args;
3572
3573	if (!(flags & DCMD_ADDRSPEC))
3574		return (DCMD_USAGE);
3575
3576	if (mdb_getopts(argc, argv,
3577	    'v', MDB_OPT_SETBITS, TRUE, &verbose, NULL) != argc)
3578		return (DCMD_USAGE);
3579
3580	if (mdb_vread(&zacl, sizeof (zfs_acl_t), addr) == -1) {
3581		mdb_warn("failed to read zfs_acl_t");
3582		return (DCMD_ERR);
3583	}
3584
3585	acl_args.a_argc = argc;
3586	acl_args.a_argv = argv;
3587	acl_args.a_version = zacl.z_version;
3588	acl_args.a_flags = DCMD_LOOPFIRST;
3589
3590	if (mdb_pwalk("zfs_acl_node", acl_cb, &acl_args, addr) != 0) {
3591		mdb_warn("can't walk ACL");
3592		return (DCMD_ERR);
3593	}
3594
3595	return (DCMD_OK);
3596}
3597
3598/* ARGSUSED */
3599static int
3600zfs_acl_node_walk_init(mdb_walk_state_t *wsp)
3601{
3602	if (wsp->walk_addr == 0) {
3603		mdb_warn("must supply address of zfs_acl_node_t\n");
3604		return (WALK_ERR);
3605	}
3606
3607	wsp->walk_addr +=
3608	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "zfs_acl", "z_acl");
3609
3610	if (mdb_layered_walk("list", wsp) == -1) {
3611		mdb_warn("failed to walk 'list'\n");
3612		return (WALK_ERR);
3613	}
3614
3615	return (WALK_NEXT);
3616}
3617
3618static int
3619zfs_acl_node_walk_step(mdb_walk_state_t *wsp)
3620{
3621	zfs_acl_node_t	aclnode;
3622
3623	if (mdb_vread(&aclnode, sizeof (zfs_acl_node_t),
3624	    wsp->walk_addr) == -1) {
3625		mdb_warn("failed to read zfs_acl_node at %p", wsp->walk_addr);
3626		return (WALK_ERR);
3627	}
3628
3629	return (wsp->walk_callback(wsp->walk_addr, &aclnode, wsp->walk_cbdata));
3630}
3631
3632typedef struct ace_walk_data {
3633	int		ace_count;
3634	int		ace_version;
3635} ace_walk_data_t;
3636
3637static int
3638zfs_aces_walk_init_common(mdb_walk_state_t *wsp, int version,
3639    int ace_count, uintptr_t ace_data)
3640{
3641	ace_walk_data_t *ace_walk_data;
3642
3643	if (wsp->walk_addr == 0) {
3644		mdb_warn("must supply address of zfs_acl_node_t\n");
3645		return (WALK_ERR);
3646	}
3647
3648	ace_walk_data = mdb_alloc(sizeof (ace_walk_data_t), UM_SLEEP | UM_GC);
3649
3650	ace_walk_data->ace_count = ace_count;
3651	ace_walk_data->ace_version = version;
3652
3653	wsp->walk_addr = ace_data;
3654	wsp->walk_data = ace_walk_data;
3655
3656	return (WALK_NEXT);
3657}
3658
3659static int
3660zfs_acl_node_aces_walk_init_common(mdb_walk_state_t *wsp, int version)
3661{
3662	static int gotid;
3663	static mdb_ctf_id_t acl_id;
3664	int z_ace_count;
3665	uintptr_t z_acldata;
3666
3667	if (!gotid) {
3668		if (mdb_ctf_lookup_by_name("struct zfs_acl_node",
3669		    &acl_id) == -1) {
3670			mdb_warn("couldn't find struct zfs_acl_node");
3671			return (DCMD_ERR);
3672		}
3673		gotid = TRUE;
3674	}
3675
3676	if (GETMEMBID(wsp->walk_addr, &acl_id, z_ace_count, z_ace_count)) {
3677		return (DCMD_ERR);
3678	}
3679	if (GETMEMBID(wsp->walk_addr, &acl_id, z_acldata, z_acldata)) {
3680		return (DCMD_ERR);
3681	}
3682
3683	return (zfs_aces_walk_init_common(wsp, version,
3684	    z_ace_count, z_acldata));
3685}
3686
3687/* ARGSUSED */
3688static int
3689zfs_acl_node_aces_walk_init(mdb_walk_state_t *wsp)
3690{
3691	return (zfs_acl_node_aces_walk_init_common(wsp, 1));
3692}
3693
3694/* ARGSUSED */
3695static int
3696zfs_acl_node_aces0_walk_init(mdb_walk_state_t *wsp)
3697{
3698	return (zfs_acl_node_aces_walk_init_common(wsp, 0));
3699}
3700
3701static int
3702zfs_aces_walk_step(mdb_walk_state_t *wsp)
3703{
3704	ace_walk_data_t *ace_data = wsp->walk_data;
3705	zfs_ace_t zace;
3706	ace_t *acep;
3707	int status;
3708	int entry_type;
3709	int allow_type;
3710	uintptr_t ptr;
3711
3712	if (ace_data->ace_count == 0)
3713		return (WALK_DONE);
3714
3715	if (mdb_vread(&zace, sizeof (zfs_ace_t), wsp->walk_addr) == -1) {
3716		mdb_warn("failed to read zfs_ace_t at %#lx",
3717		    wsp->walk_addr);
3718		return (WALK_ERR);
3719	}
3720
3721	switch (ace_data->ace_version) {
3722	case 0:
3723		acep = (ace_t *)&zace;
3724		entry_type = acep->a_flags & ACE_TYPE_FLAGS;
3725		allow_type = acep->a_type;
3726		break;
3727	case 1:
3728		entry_type = zace.z_hdr.z_flags & ACE_TYPE_FLAGS;
3729		allow_type = zace.z_hdr.z_type;
3730		break;
3731	default:
3732		return (WALK_ERR);
3733	}
3734
3735	ptr = (uintptr_t)wsp->walk_addr;
3736	switch (entry_type) {
3737	case ACE_OWNER:
3738	case ACE_EVERYONE:
3739	case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
3740		ptr += ace_data->ace_version == 0 ?
3741		    sizeof (ace_t) : sizeof (zfs_ace_hdr_t);
3742		break;
3743	case ACE_IDENTIFIER_GROUP:
3744	default:
3745		switch (allow_type) {
3746		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
3747		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
3748		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
3749		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
3750			ptr += ace_data->ace_version == 0 ?
3751			    sizeof (ace_t) : sizeof (zfs_object_ace_t);
3752			break;
3753		default:
3754			ptr += ace_data->ace_version == 0 ?
3755			    sizeof (ace_t) : sizeof (zfs_ace_t);
3756			break;
3757		}
3758	}
3759
3760	ace_data->ace_count--;
3761	status = wsp->walk_callback(wsp->walk_addr,
3762	    (void *)(uintptr_t)&zace, wsp->walk_cbdata);
3763
3764	wsp->walk_addr = ptr;
3765	return (status);
3766}
3767
3768typedef struct mdb_zfs_rrwlock {
3769	uintptr_t	rr_writer;
3770	boolean_t	rr_writer_wanted;
3771} mdb_zfs_rrwlock_t;
3772
3773static uint_t rrw_key;
3774
3775/* ARGSUSED */
3776static int
3777rrwlock(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3778{
3779	mdb_zfs_rrwlock_t rrw;
3780
3781	if (rrw_key == 0) {
3782		if (mdb_ctf_readsym(&rrw_key, "uint_t", "rrw_tsd_key", 0) == -1)
3783			return (DCMD_ERR);
3784	}
3785
3786	if (mdb_ctf_vread(&rrw, "rrwlock_t", "mdb_zfs_rrwlock_t", addr,
3787	    0) == -1)
3788		return (DCMD_ERR);
3789
3790	if (rrw.rr_writer != 0) {
3791		mdb_printf("write lock held by thread %lx\n", rrw.rr_writer);
3792		return (DCMD_OK);
3793	}
3794
3795	if (rrw.rr_writer_wanted) {
3796		mdb_printf("writer wanted\n");
3797	}
3798
3799	mdb_printf("anonymous references:\n");
3800	(void) mdb_call_dcmd("zfs_refcount", addr +
3801	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_anon_rcount"),
3802	    DCMD_ADDRSPEC, 0, NULL);
3803
3804	mdb_printf("linked references:\n");
3805	(void) mdb_call_dcmd("zfs_refcount", addr +
3806	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_linked_rcount"),
3807	    DCMD_ADDRSPEC, 0, NULL);
3808
3809	/*
3810	 * XXX This should find references from
3811	 * "::walk thread | ::tsd -v <rrw_key>", but there is no support
3812	 * for programmatic consumption of dcmds, so this would be
3813	 * difficult, potentially requiring reimplementing ::tsd (both
3814	 * user and kernel versions) in this MDB module.
3815	 */
3816
3817	return (DCMD_OK);
3818}
3819
3820typedef struct mdb_arc_buf_hdr_t {
3821	uint16_t b_psize;
3822	uint16_t b_lsize;
3823	struct {
3824		uint32_t	b_bufcnt;
3825		uintptr_t	b_state;
3826	} b_l1hdr;
3827} mdb_arc_buf_hdr_t;
3828
3829enum arc_cflags {
3830	ARC_CFLAG_VERBOSE		= 1 << 0,
3831	ARC_CFLAG_ANON			= 1 << 1,
3832	ARC_CFLAG_MRU			= 1 << 2,
3833	ARC_CFLAG_MFU			= 1 << 3,
3834	ARC_CFLAG_BUFS			= 1 << 4,
3835};
3836
3837typedef struct arc_compression_stats_data {
3838	GElf_Sym anon_sym;	/* ARC_anon symbol */
3839	GElf_Sym mru_sym;	/* ARC_mru symbol */
3840	GElf_Sym mrug_sym;	/* ARC_mru_ghost symbol */
3841	GElf_Sym mfu_sym;	/* ARC_mfu symbol */
3842	GElf_Sym mfug_sym;	/* ARC_mfu_ghost symbol */
3843	GElf_Sym l2c_sym;	/* ARC_l2c_only symbol */
3844	uint64_t *anon_c_hist;	/* histogram of compressed sizes in anon */
3845	uint64_t *anon_u_hist;	/* histogram of uncompressed sizes in anon */
3846	uint64_t *anon_bufs;	/* histogram of buffer counts in anon state */
3847	uint64_t *mru_c_hist;	/* histogram of compressed sizes in mru */
3848	uint64_t *mru_u_hist;	/* histogram of uncompressed sizes in mru */
3849	uint64_t *mru_bufs;	/* histogram of buffer counts in mru */
3850	uint64_t *mfu_c_hist;	/* histogram of compressed sizes in mfu */
3851	uint64_t *mfu_u_hist;	/* histogram of uncompressed sizes in mfu */
3852	uint64_t *mfu_bufs;	/* histogram of buffer counts in mfu */
3853	uint64_t *all_c_hist;	/* histogram of compressed anon + mru + mfu */
3854	uint64_t *all_u_hist;	/* histogram of uncompressed anon + mru + mfu */
3855	uint64_t *all_bufs;	/* histogram of buffer counts in all states  */
3856	int arc_cflags;		/* arc compression flags, specified by user */
3857	int hist_nbuckets;	/* number of buckets in each histogram */
3858} arc_compression_stats_data_t;
3859
3860int
3861highbit64(uint64_t i)
3862{
3863	int h = 1;
3864
3865	if (i == 0)
3866		return (0);
3867	if (i & 0xffffffff00000000ULL) {
3868		h += 32; i >>= 32;
3869	}
3870	if (i & 0xffff0000) {
3871		h += 16; i >>= 16;
3872	}
3873	if (i & 0xff00) {
3874		h += 8; i >>= 8;
3875	}
3876	if (i & 0xf0) {
3877		h += 4; i >>= 4;
3878	}
3879	if (i & 0xc) {
3880		h += 2; i >>= 2;
3881	}
3882	if (i & 0x2) {
3883		h += 1;
3884	}
3885	return (h);
3886}
3887
3888/* ARGSUSED */
3889static int
3890arc_compression_stats_cb(uintptr_t addr, const void *unknown, void *arg)
3891{
3892	arc_compression_stats_data_t *data = arg;
3893	mdb_arc_buf_hdr_t hdr;
3894	int cbucket, ubucket, bufcnt;
3895
3896	if (mdb_ctf_vread(&hdr, "arc_buf_hdr_t", "mdb_arc_buf_hdr_t",
3897	    addr, 0) == -1) {
3898		return (WALK_ERR);
3899	}
3900
3901	/*
3902	 * Headers in the ghost states, or the l2c_only state don't have
3903	 * arc buffers linked off of them. Thus, their compressed size
3904	 * is meaningless, so we skip these from the stats.
3905	 */
3906	if (hdr.b_l1hdr.b_state == data->mrug_sym.st_value ||
3907	    hdr.b_l1hdr.b_state == data->mfug_sym.st_value ||
3908	    hdr.b_l1hdr.b_state == data->l2c_sym.st_value) {
3909		return (WALK_NEXT);
3910	}
3911
3912	/*
3913	 * The physical size (compressed) and logical size
3914	 * (uncompressed) are in units of SPA_MINBLOCKSIZE. By default,
3915	 * we use the log2 of this value (rounded down to the nearest
3916	 * integer) to determine the bucket to assign this header to.
3917	 * Thus, the histogram is logarithmic with respect to the size
3918	 * of the header. For example, the following is a mapping of the
3919	 * bucket numbers and the range of header sizes they correspond to:
3920	 *
3921	 *	0: 0 byte headers
3922	 *	1: 512 byte headers
3923	 *	2: [1024 - 2048) byte headers
3924	 *	3: [2048 - 4096) byte headers
3925	 *	4: [4096 - 8192) byte headers
3926	 *	5: [8192 - 16394) byte headers
3927	 *	6: [16384 - 32768) byte headers
3928	 *	7: [32768 - 65536) byte headers
3929	 *	8: [65536 - 131072) byte headers
3930	 *	9: 131072 byte headers
3931	 *
3932	 * If the ARC_CFLAG_VERBOSE flag was specified, we use the
3933	 * physical and logical sizes directly. Thus, the histogram will
3934	 * no longer be logarithmic; instead it will be linear with
3935	 * respect to the size of the header. The following is a mapping
3936	 * of the first many bucket numbers and the header size they
3937	 * correspond to:
3938	 *
3939	 *	0: 0 byte headers
3940	 *	1: 512 byte headers
3941	 *	2: 1024 byte headers
3942	 *	3: 1536 byte headers
3943	 *	4: 2048 byte headers
3944	 *	5: 2560 byte headers
3945	 *	6: 3072 byte headers
3946	 *
3947	 * And so on. Keep in mind that a range of sizes isn't used in
3948	 * the case of linear scale because the headers can only
3949	 * increment or decrement in sizes of 512 bytes. So, it's not
3950	 * possible for a header to be sized in between whats listed
3951	 * above.
3952	 *
3953	 * Also, the above mapping values were calculated assuming a
3954	 * SPA_MINBLOCKSHIFT of 512 bytes and a SPA_MAXBLOCKSIZE of 128K.
3955	 */
3956
3957	if (data->arc_cflags & ARC_CFLAG_VERBOSE) {
3958		cbucket = hdr.b_psize;
3959		ubucket = hdr.b_lsize;
3960	} else {
3961		cbucket = highbit64(hdr.b_psize);
3962		ubucket = highbit64(hdr.b_lsize);
3963	}
3964
3965	bufcnt = hdr.b_l1hdr.b_bufcnt;
3966	if (bufcnt >= data->hist_nbuckets)
3967		bufcnt = data->hist_nbuckets - 1;
3968
3969	/* Ensure we stay within the bounds of the histogram array */
3970	ASSERT3U(cbucket, <, data->hist_nbuckets);
3971	ASSERT3U(ubucket, <, data->hist_nbuckets);
3972
3973	if (hdr.b_l1hdr.b_state == data->anon_sym.st_value) {
3974		data->anon_c_hist[cbucket]++;
3975		data->anon_u_hist[ubucket]++;
3976		data->anon_bufs[bufcnt]++;
3977	} else if (hdr.b_l1hdr.b_state == data->mru_sym.st_value) {
3978		data->mru_c_hist[cbucket]++;
3979		data->mru_u_hist[ubucket]++;
3980		data->mru_bufs[bufcnt]++;
3981	} else if (hdr.b_l1hdr.b_state == data->mfu_sym.st_value) {
3982		data->mfu_c_hist[cbucket]++;
3983		data->mfu_u_hist[ubucket]++;
3984		data->mfu_bufs[bufcnt]++;
3985	}
3986
3987	data->all_c_hist[cbucket]++;
3988	data->all_u_hist[ubucket]++;
3989	data->all_bufs[bufcnt]++;
3990
3991	return (WALK_NEXT);
3992}
3993
3994/* ARGSUSED */
3995static int
3996arc_compression_stats(uintptr_t addr, uint_t flags, int argc,
3997    const mdb_arg_t *argv)
3998{
3999	arc_compression_stats_data_t data = { 0 };
4000	unsigned int max_shifted = SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT;
4001	unsigned int hist_size;
4002	char range[32];
4003	int rc = DCMD_OK;
4004
4005	if (mdb_getopts(argc, argv,
4006	    'v', MDB_OPT_SETBITS, ARC_CFLAG_VERBOSE, &data.arc_cflags,
4007	    'a', MDB_OPT_SETBITS, ARC_CFLAG_ANON, &data.arc_cflags,
4008	    'b', MDB_OPT_SETBITS, ARC_CFLAG_BUFS, &data.arc_cflags,
4009	    'r', MDB_OPT_SETBITS, ARC_CFLAG_MRU, &data.arc_cflags,
4010	    'f', MDB_OPT_SETBITS, ARC_CFLAG_MFU, &data.arc_cflags,
4011	    NULL) != argc)
4012		return (DCMD_USAGE);
4013
4014	if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_anon", &data.anon_sym) ||
4015	    mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mru", &data.mru_sym) ||
4016	    mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mru_ghost", &data.mrug_sym) ||
4017	    mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mfu", &data.mfu_sym) ||
4018	    mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mfu_ghost", &data.mfug_sym) ||
4019	    mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_l2c_only", &data.l2c_sym)) {
4020		mdb_warn("can't find arc state symbol");
4021		return (DCMD_ERR);
4022	}
4023
4024	/*
4025	 * Determine the maximum expected size for any header, and use
4026	 * this to determine the number of buckets needed for each
4027	 * histogram. If ARC_CFLAG_VERBOSE is specified, this value is
4028	 * used directly; otherwise the log2 of the maximum size is
4029	 * used. Thus, if using a log2 scale there's a maximum of 10
4030	 * possible buckets, while the linear scale (when using
4031	 * ARC_CFLAG_VERBOSE) has a maximum of 257 buckets.
4032	 */
4033	if (data.arc_cflags & ARC_CFLAG_VERBOSE)
4034		data.hist_nbuckets = max_shifted + 1;
4035	else
4036		data.hist_nbuckets = highbit64(max_shifted) + 1;
4037
4038	hist_size = sizeof (uint64_t) * data.hist_nbuckets;
4039
4040	data.anon_c_hist = mdb_zalloc(hist_size, UM_SLEEP);
4041	data.anon_u_hist = mdb_zalloc(hist_size, UM_SLEEP);
4042	data.anon_bufs = mdb_zalloc(hist_size, UM_SLEEP);
4043
4044	data.mru_c_hist = mdb_zalloc(hist_size, UM_SLEEP);
4045	data.mru_u_hist = mdb_zalloc(hist_size, UM_SLEEP);
4046	data.mru_bufs = mdb_zalloc(hist_size, UM_SLEEP);
4047
4048	data.mfu_c_hist = mdb_zalloc(hist_size, UM_SLEEP);
4049	data.mfu_u_hist = mdb_zalloc(hist_size, UM_SLEEP);
4050	data.mfu_bufs = mdb_zalloc(hist_size, UM_SLEEP);
4051
4052	data.all_c_hist = mdb_zalloc(hist_size, UM_SLEEP);
4053	data.all_u_hist = mdb_zalloc(hist_size, UM_SLEEP);
4054	data.all_bufs = mdb_zalloc(hist_size, UM_SLEEP);
4055
4056	if (mdb_walk("arc_buf_hdr_t_full", arc_compression_stats_cb,
4057	    &data) != 0) {
4058		mdb_warn("can't walk arc_buf_hdr's");
4059		rc = DCMD_ERR;
4060		goto out;
4061	}
4062
4063	if (data.arc_cflags & ARC_CFLAG_VERBOSE) {
4064		rc = mdb_snprintf(range, sizeof (range),
4065		    "[n*%llu, (n+1)*%llu)", SPA_MINBLOCKSIZE,
4066		    SPA_MINBLOCKSIZE);
4067	} else {
4068		rc = mdb_snprintf(range, sizeof (range),
4069		    "[2^(n-1)*%llu, 2^n*%llu)", SPA_MINBLOCKSIZE,
4070		    SPA_MINBLOCKSIZE);
4071	}
4072
4073	if (rc < 0) {
4074		/* snprintf failed, abort the dcmd */
4075		rc = DCMD_ERR;
4076		goto out;
4077	} else {
4078		/* snprintf succeeded above, reset return code */
4079		rc = DCMD_OK;
4080	}
4081
4082	if (data.arc_cflags & ARC_CFLAG_ANON) {
4083		if (data.arc_cflags & ARC_CFLAG_BUFS) {
4084			mdb_printf("Histogram of the number of anon buffers "
4085			    "that are associated with an arc hdr.\n");
4086			dump_histogram(data.anon_bufs, data.hist_nbuckets, 0);
4087			mdb_printf("\n");
4088		}
4089		mdb_printf("Histogram of compressed anon buffers.\n"
4090		    "Each bucket represents buffers of size: %s.\n", range);
4091		dump_histogram(data.anon_c_hist, data.hist_nbuckets, 0);
4092		mdb_printf("\n");
4093
4094		mdb_printf("Histogram of uncompressed anon buffers.\n"
4095		    "Each bucket represents buffers of size: %s.\n", range);
4096		dump_histogram(data.anon_u_hist, data.hist_nbuckets, 0);
4097		mdb_printf("\n");
4098	}
4099
4100	if (data.arc_cflags & ARC_CFLAG_MRU) {
4101		if (data.arc_cflags & ARC_CFLAG_BUFS) {
4102			mdb_printf("Histogram of the number of mru buffers "
4103			    "that are associated with an arc hdr.\n");
4104			dump_histogram(data.mru_bufs, data.hist_nbuckets, 0);
4105			mdb_printf("\n");
4106		}
4107		mdb_printf("Histogram of compressed mru buffers.\n"
4108		    "Each bucket represents buffers of size: %s.\n", range);
4109		dump_histogram(data.mru_c_hist, data.hist_nbuckets, 0);
4110		mdb_printf("\n");
4111
4112		mdb_printf("Histogram of uncompressed mru buffers.\n"
4113		    "Each bucket represents buffers of size: %s.\n", range);
4114		dump_histogram(data.mru_u_hist, data.hist_nbuckets, 0);
4115		mdb_printf("\n");
4116	}
4117
4118	if (data.arc_cflags & ARC_CFLAG_MFU) {
4119		if (data.arc_cflags & ARC_CFLAG_BUFS) {
4120			mdb_printf("Histogram of the number of mfu buffers "
4121			    "that are associated with an arc hdr.\n");
4122			dump_histogram(data.mfu_bufs, data.hist_nbuckets, 0);
4123			mdb_printf("\n");
4124		}
4125
4126		mdb_printf("Histogram of compressed mfu buffers.\n"
4127		    "Each bucket represents buffers of size: %s.\n", range);
4128		dump_histogram(data.mfu_c_hist, data.hist_nbuckets, 0);
4129		mdb_printf("\n");
4130
4131		mdb_printf("Histogram of uncompressed mfu buffers.\n"
4132		    "Each bucket represents buffers of size: %s.\n", range);
4133		dump_histogram(data.mfu_u_hist, data.hist_nbuckets, 0);
4134		mdb_printf("\n");
4135	}
4136
4137	if (data.arc_cflags & ARC_CFLAG_BUFS) {
4138		mdb_printf("Histogram of all buffers that "
4139		    "are associated with an arc hdr.\n");
4140		dump_histogram(data.all_bufs, data.hist_nbuckets, 0);
4141		mdb_printf("\n");
4142	}
4143
4144	mdb_printf("Histogram of all compressed buffers.\n"
4145	    "Each bucket represents buffers of size: %s.\n", range);
4146	dump_histogram(data.all_c_hist, data.hist_nbuckets, 0);
4147	mdb_printf("\n");
4148
4149	mdb_printf("Histogram of all uncompressed buffers.\n"
4150	    "Each bucket represents buffers of size: %s.\n", range);
4151	dump_histogram(data.all_u_hist, data.hist_nbuckets, 0);
4152
4153out:
4154	mdb_free(data.anon_c_hist, hist_size);
4155	mdb_free(data.anon_u_hist, hist_size);
4156	mdb_free(data.anon_bufs, hist_size);
4157
4158	mdb_free(data.mru_c_hist, hist_size);
4159	mdb_free(data.mru_u_hist, hist_size);
4160	mdb_free(data.mru_bufs, hist_size);
4161
4162	mdb_free(data.mfu_c_hist, hist_size);
4163	mdb_free(data.mfu_u_hist, hist_size);
4164	mdb_free(data.mfu_bufs, hist_size);
4165
4166	mdb_free(data.all_c_hist, hist_size);
4167	mdb_free(data.all_u_hist, hist_size);
4168	mdb_free(data.all_bufs, hist_size);
4169
4170	return (rc);
4171}
4172
4173typedef struct mdb_range_seg {
4174	uint64_t rs_start;
4175	uint64_t rs_end;
4176} mdb_range_seg_t;
4177
4178/* ARGSUSED */
4179static int
4180range_tree_cb(uintptr_t addr, const void *unknown, void *arg)
4181{
4182	mdb_range_seg_t rs;
4183
4184	if (mdb_ctf_vread(&rs, ZFS_STRUCT "range_seg", "mdb_range_seg_t",
4185	    addr, 0) == -1)
4186		return (DCMD_ERR);
4187
4188	mdb_printf("\t[%llx %llx) (length %llx)\n",
4189	    rs.rs_start, rs.rs_end, rs.rs_end - rs.rs_start);
4190
4191	return (0);
4192}
4193
4194/* ARGSUSED */
4195static int
4196range_tree(uintptr_t addr, uint_t flags, int argc,
4197    const mdb_arg_t *argv)
4198{
4199	mdb_range_tree_t rt;
4200	uintptr_t avl_addr;
4201
4202	if (!(flags & DCMD_ADDRSPEC))
4203		return (DCMD_USAGE);
4204
4205	if (mdb_ctf_vread(&rt, ZFS_STRUCT "range_tree", "mdb_range_tree_t",
4206	    addr, 0) == -1)
4207		return (DCMD_ERR);
4208
4209	mdb_printf("%p: range tree of %llu entries, %llu bytes\n",
4210	    addr, rt.rt_root.avl_numnodes, rt.rt_space);
4211
4212	avl_addr = addr +
4213	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "range_tree", "rt_root");
4214
4215	if (mdb_pwalk("avl", range_tree_cb, NULL, avl_addr) != 0) {
4216		mdb_warn("can't walk range_tree segments");
4217		return (DCMD_ERR);
4218	}
4219	return (DCMD_OK);
4220}
4221
4222typedef struct mdb_spa_log_sm {
4223	uint64_t sls_sm_obj;
4224	uint64_t sls_txg;
4225	uint64_t sls_nblocks;
4226	uint64_t sls_mscount;
4227} mdb_spa_log_sm_t;
4228
4229/* ARGSUSED */
4230static int
4231logsm_stats_cb(uintptr_t addr, const void *unknown, void *arg)
4232{
4233	mdb_spa_log_sm_t sls;
4234	if (mdb_ctf_vread(&sls, ZFS_STRUCT "spa_log_sm", "mdb_spa_log_sm_t",
4235	    addr, 0) == -1)
4236		return (WALK_ERR);
4237
4238	mdb_printf("%7lld %7lld %7lld %7lld\n",
4239	    sls.sls_txg, sls.sls_nblocks, sls.sls_mscount, sls.sls_sm_obj);
4240
4241	return (WALK_NEXT);
4242}
4243typedef struct mdb_log_summary_entry {
4244	uint64_t lse_start;
4245	uint64_t lse_blkcount;
4246	uint64_t lse_mscount;
4247} mdb_log_summary_entry_t;
4248
4249/* ARGSUSED */
4250static int
4251logsm_summary_cb(uintptr_t addr, const void *unknown, void *arg)
4252{
4253	mdb_log_summary_entry_t lse;
4254	if (mdb_ctf_vread(&lse, ZFS_STRUCT "log_summary_entry",
4255	    "mdb_log_summary_entry_t", addr, 0) == -1)
4256		return (WALK_ERR);
4257
4258	mdb_printf("%7lld %7lld %7lld\n",
4259	    lse.lse_start, lse.lse_blkcount, lse.lse_mscount);
4260	return (WALK_NEXT);
4261}
4262
4263/* ARGSUSED */
4264static int
4265logsm_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4266{
4267	if (!(flags & DCMD_ADDRSPEC))
4268		return (DCMD_USAGE);
4269
4270	uintptr_t sls_avl_addr = addr +
4271	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "spa", "spa_sm_logs_by_txg");
4272	uintptr_t summary_addr = addr +
4273	    mdb_ctf_offsetof_by_name(ZFS_STRUCT "spa", "spa_log_summary");
4274
4275	mdb_printf("Log Entries:\n");
4276	mdb_printf("%7s %7s %7s %7s\n", "txg", "blk", "ms", "obj");
4277	if (mdb_pwalk("avl", logsm_stats_cb, NULL, sls_avl_addr) != 0)
4278		return (DCMD_ERR);
4279
4280	mdb_printf("\nSummary Entries:\n");
4281	mdb_printf("%7s %7s %7s\n", "txg", "blk", "ms");
4282	if (mdb_pwalk("list", logsm_summary_cb, NULL, summary_addr) != 0)
4283		return (DCMD_ERR);
4284
4285	return (DCMD_OK);
4286}
4287
4288/*
4289 * MDB module linkage information:
4290 *
4291 * We declare a list of structures describing our dcmds, and a function
4292 * named _mdb_init to return a pointer to our module information.
4293 */
4294
4295static const mdb_dcmd_t dcmds[] = {
4296	{ "arc", "[-bkmg]", "print ARC variables", arc_print },
4297	{ "blkptr", ":", "print blkptr_t", blkptr },
4298	{ "dva", ":", "print dva_t", dva },
4299	{ "dbuf", ":", "print dmu_buf_impl_t", dbuf },
4300	{ "dbuf_stats", ":", "dbuf stats", dbuf_stats },
4301	{ "dbufs",
4302	    "\t[-O objset_t*] [-n objset_name | \"mos\"] "
4303	    "[-o object | \"mdn\"] \n"
4304	    "\t[-l level] [-b blkid | \"bonus\"]",
4305	    "find dmu_buf_impl_t's that match specified criteria", dbufs },
4306	{ "abuf_find", "dva_word[0] dva_word[1]",
4307	    "find arc_buf_hdr_t of a specified DVA",
4308	    abuf_find },
4309	{ "logsm_stats", ":", "print log space map statistics of a spa_t",
4310	    logsm_stats},
4311	{ "spa", "?[-cevmMh]\n"
4312	    "\t-c display spa config\n"
4313	    "\t-e display vdev statistics\n"
4314	    "\t-v display vdev information\n"
4315	    "\t-m display metaslab statistics\n"
4316	    "\t-M display metaslab group statistics\n"
4317	    "\t-h display histogram (requires -m or -M)\n",
4318	    "spa_t summary", spa_print },
4319	{ "spa_config", ":", "print spa_t configuration", spa_print_config },
4320	{ "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space },
4321	{ "spa_vdevs", ":[-emMh]\n"
4322	    "\t-e display vdev statistics\n"
4323	    "\t-m dispaly metaslab statistics\n"
4324	    "\t-M display metaslab group statistic\n"
4325	    "\t-h display histogram (requires -m or -M)\n",
4326	    "given a spa_t, print vdev summary", spa_vdevs },
4327	{ "sm_entries", "<buffer length in bytes>",
4328	    "print out space map entries from a buffer decoded",
4329	    sm_entries},
4330	{ "vdev", ":[-remMh]\n"
4331	    "\t-r display recursively\n"
4332	    "\t-e display statistics\n"
4333	    "\t-m display metaslab statistics (top level vdev only)\n"
4334	    "\t-M display metaslab group statistics (top level vdev only)\n"
4335	    "\t-h display histogram (requires -m or -M)\n",
4336	    "vdev_t summary", vdev_print },
4337	{ "zio", ":[-cpr]\n"
4338	    "\t-c display children\n"
4339	    "\t-p display parents\n"
4340	    "\t-r display recursively",
4341	    "zio_t summary", zio_print },
4342	{ "zio_state", "?", "print out all zio_t structures on system or "
4343	    "for a particular pool", zio_state },
4344	{ "zfs_blkstats", ":[-v]",
4345	    "given a spa_t, print block type stats from last scrub",
4346	    zfs_blkstats },
4347	{ "zfs_params", "", "print zfs tunable parameters", zfs_params },
4348	{ "zfs_refcount", ":[-r]\n"
4349	    "\t-r display recently removed references",
4350	    "print zfs_refcount_t holders", zfs_refcount },
4351	{ "zap_leaf", "", "print zap_leaf_phys_t", zap_leaf },
4352	{ "zfs_aces", ":[-v]", "print all ACEs from a zfs_acl_t",
4353	    zfs_acl_dump },
4354	{ "zfs_ace", ":[-v]", "print zfs_ace", zfs_ace_print },
4355	{ "zfs_ace0", ":[-v]", "print zfs_ace0", zfs_ace0_print },
4356	{ "sa_attr_table", ":", "print SA attribute table from sa_os_t",
4357	    sa_attr_table},
4358	{ "sa_attr", ": attr_id",
4359	    "print SA attribute address when given sa_handle_t", sa_attr_print},
4360	{ "zfs_dbgmsg", ":[-va]",
4361	    "print zfs debug log", dbgmsg},
4362	{ "rrwlock", ":",
4363	    "print rrwlock_t, including readers", rrwlock},
4364	{ "metaslab_weight", "weight",
4365	    "print metaslab weight", metaslab_weight},
4366	{ "metaslab_trace", ":",
4367	    "print metaslab allocation trace records", metaslab_trace},
4368	{ "arc_compression_stats", ":[-vabrf]\n"
4369	    "\t-v verbose, display a linearly scaled histogram\n"
4370	    "\t-a display ARC_anon state statistics individually\n"
4371	    "\t-r display ARC_mru state statistics individually\n"
4372	    "\t-f display ARC_mfu state statistics individually\n"
4373	    "\t-b display histogram of buffer counts\n",
4374	    "print a histogram of compressed arc buffer sizes",
4375	    arc_compression_stats},
4376	{ "range_tree", ":",
4377	    "print entries in range_tree_t", range_tree},
4378	{ NULL }
4379};
4380
4381static const mdb_walker_t walkers[] = {
4382	{ "txg_list", "given any txg_list_t *, walk all entries in all txgs",
4383	    txg_list_walk_init, txg_list_walk_step, NULL },
4384	{ "txg_list0", "given any txg_list_t *, walk all entries in txg 0",
4385	    txg_list0_walk_init, txg_list_walk_step, NULL },
4386	{ "txg_list1", "given any txg_list_t *, walk all entries in txg 1",
4387	    txg_list1_walk_init, txg_list_walk_step, NULL },
4388	{ "txg_list2", "given any txg_list_t *, walk all entries in txg 2",
4389	    txg_list2_walk_init, txg_list_walk_step, NULL },
4390	{ "txg_list3", "given any txg_list_t *, walk all entries in txg 3",
4391	    txg_list3_walk_init, txg_list_walk_step, NULL },
4392	{ "zio", "walk all zio structures, optionally for a particular spa_t",
4393	    zio_walk_init, zio_walk_step, NULL },
4394	{ "zio_root",
4395	    "walk all root zio_t structures, optionally for a particular spa_t",
4396	    zio_walk_init, zio_walk_root_step, NULL },
4397	{ "spa", "walk all spa_t entries in the namespace",
4398	    spa_walk_init, spa_walk_step, NULL },
4399	{ "metaslab", "given a spa_t *, walk all metaslab_t structures",
4400	    metaslab_walk_init, metaslab_walk_step, NULL },
4401	{ "multilist", "given a multilist_t *, walk all list_t structures",
4402	    multilist_walk_init, multilist_walk_step, NULL },
4403	{ "zfs_acl_node", "given a zfs_acl_t, walk all zfs_acl_nodes",
4404	    zfs_acl_node_walk_init, zfs_acl_node_walk_step, NULL },
4405	{ "zfs_acl_node_aces", "given a zfs_acl_node_t, walk all ACEs",
4406	    zfs_acl_node_aces_walk_init, zfs_aces_walk_step, NULL },
4407	{ "zfs_acl_node_aces0",
4408	    "given a zfs_acl_node_t, walk all ACEs as ace_t",
4409	    zfs_acl_node_aces0_walk_init, zfs_aces_walk_step, NULL },
4410	{ NULL }
4411};
4412
4413static const mdb_modinfo_t modinfo = {
4414	MDB_API_VERSION, dcmds, walkers
4415};
4416
4417const mdb_modinfo_t *
4418_mdb_init(void)
4419{
4420	return (&modinfo);
4421}
4422