1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <mdb/mdb_param.h>
27 #include <mdb/mdb_modapi.h>
28 #include <mdb/mdb_ctf.h>
29 #include <mdb/mdb_whatis.h>
30 #include <sys/cpuvar.h>
31 #include <sys/kmem_impl.h>
32 #include <sys/vmem_impl.h>
33 #include <sys/machelf.h>
34 #include <sys/modctl.h>
35 #include <sys/kobj.h>
36 #include <sys/panic.h>
37 #include <sys/stack.h>
38 #include <sys/sysmacros.h>
39 #include <vm/page.h>
40 
41 #include "avl.h"
42 #include "combined.h"
43 #include "dist.h"
44 #include "kmem.h"
45 #include "list.h"
46 
47 #define	dprintf(x) if (mdb_debug_level) { \
48 	mdb_printf("kmem debug: ");  \
49 	/*CSTYLED*/\
50 	mdb_printf x ;\
51 }
52 
53 #define	KM_ALLOCATED		0x01
54 #define	KM_FREE			0x02
55 #define	KM_BUFCTL		0x04
56 #define	KM_CONSTRUCTED		0x08	/* only constructed free buffers */
57 #define	KM_HASH			0x10
58 
59 static int mdb_debug_level = 0;
60 
61 /*ARGSUSED*/
62 static int
63 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
64 {
65 	mdb_walker_t w;
66 	char descr[64];
67 
68 	(void) mdb_snprintf(descr, sizeof (descr),
69 	    "walk the %s cache", c->cache_name);
70 
71 	w.walk_name = c->cache_name;
72 	w.walk_descr = descr;
73 	w.walk_init = kmem_walk_init;
74 	w.walk_step = kmem_walk_step;
75 	w.walk_fini = kmem_walk_fini;
76 	w.walk_init_arg = (void *)addr;
77 
78 	if (mdb_add_walker(&w) == -1)
79 		mdb_warn("failed to add %s walker", c->cache_name);
80 
81 	return (WALK_NEXT);
82 }
83 
84 /*ARGSUSED*/
85 int
86 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
87 {
88 	mdb_debug_level ^= 1;
89 
90 	mdb_printf("kmem: debugging is now %s\n",
91 	    mdb_debug_level ? "on" : "off");
92 
93 	return (DCMD_OK);
94 }
95 
96 int
97 kmem_cache_walk_init(mdb_walk_state_t *wsp)
98 {
99 	GElf_Sym sym;
100 
101 	if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
102 		mdb_warn("couldn't find kmem_caches");
103 		return (WALK_ERR);
104 	}
105 
106 	wsp->walk_addr = (uintptr_t)sym.st_value;
107 
108 	return (list_walk_init_named(wsp, "cache list", "cache"));
109 }
110 
111 int
112 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
113 {
114 	if (wsp->walk_addr == NULL) {
115 		mdb_warn("kmem_cpu_cache doesn't support global walks");
116 		return (WALK_ERR);
117 	}
118 
119 	if (mdb_layered_walk("cpu", wsp) == -1) {
120 		mdb_warn("couldn't walk 'cpu'");
121 		return (WALK_ERR);
122 	}
123 
124 	wsp->walk_data = (void *)wsp->walk_addr;
125 
126 	return (WALK_NEXT);
127 }
128 
129 int
130 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
131 {
132 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
133 	const cpu_t *cpu = wsp->walk_layer;
134 	kmem_cpu_cache_t cc;
135 
136 	caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
137 
138 	if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
139 		mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
140 		return (WALK_ERR);
141 	}
142 
143 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
144 }
145 
146 static int
147 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
148 {
149 	kmem_slab_t *sp = p;
150 	uintptr_t caddr = (uintptr_t)arg;
151 	if ((uintptr_t)sp->slab_cache != caddr) {
152 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
153 		    saddr, caddr, sp->slab_cache);
154 		return (-1);
155 	}
156 
157 	return (0);
158 }
159 
160 static int
161 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
162 {
163 	kmem_slab_t *sp = p;
164 
165 	int rc = kmem_slab_check(p, saddr, arg);
166 	if (rc != 0) {
167 		return (rc);
168 	}
169 
170 	if (!KMEM_SLAB_IS_PARTIAL(sp)) {
171 		mdb_warn("slab %p is not a partial slab\n", saddr);
172 		return (-1);
173 	}
174 
175 	return (0);
176 }
177 
178 static int
179 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
180 {
181 	kmem_slab_t *sp = p;
182 
183 	int rc = kmem_slab_check(p, saddr, arg);
184 	if (rc != 0) {
185 		return (rc);
186 	}
187 
188 	if (!KMEM_SLAB_IS_ALL_USED(sp)) {
189 		mdb_warn("slab %p is not completely allocated\n", saddr);
190 		return (-1);
191 	}
192 
193 	return (0);
194 }
195 
196 typedef struct {
197 	uintptr_t kns_cache_addr;
198 	int kns_nslabs;
199 } kmem_nth_slab_t;
200 
201 static int
202 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
203 {
204 	kmem_nth_slab_t *chkp = arg;
205 
206 	int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
207 	if (rc != 0) {
208 		return (rc);
209 	}
210 
211 	return (chkp->kns_nslabs-- == 0 ? 1 : 0);
212 }
213 
214 static int
215 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
216 {
217 	uintptr_t caddr = wsp->walk_addr;
218 
219 	wsp->walk_addr = (uintptr_t)(caddr +
220 	    offsetof(kmem_cache_t, cache_complete_slabs));
221 
222 	return (list_walk_init_checked(wsp, "slab list", "slab",
223 	    kmem_complete_slab_check, (void *)caddr));
224 }
225 
226 static int
227 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
228 {
229 	uintptr_t caddr = wsp->walk_addr;
230 
231 	wsp->walk_addr = (uintptr_t)(caddr +
232 	    offsetof(kmem_cache_t, cache_partial_slabs));
233 
234 	return (avl_walk_init_checked(wsp, "slab list", "slab",
235 	    kmem_partial_slab_check, (void *)caddr));
236 }
237 
238 int
239 kmem_slab_walk_init(mdb_walk_state_t *wsp)
240 {
241 	uintptr_t caddr = wsp->walk_addr;
242 
243 	if (caddr == NULL) {
244 		mdb_warn("kmem_slab doesn't support global walks\n");
245 		return (WALK_ERR);
246 	}
247 
248 	combined_walk_init(wsp);
249 	combined_walk_add(wsp,
250 	    kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
251 	combined_walk_add(wsp,
252 	    kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
253 
254 	return (WALK_NEXT);
255 }
256 
257 static int
258 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
259 {
260 	uintptr_t caddr = wsp->walk_addr;
261 	kmem_nth_slab_t *chk;
262 
263 	chk = mdb_alloc(sizeof (kmem_nth_slab_t),
264 	    UM_SLEEP | UM_GC);
265 	chk->kns_cache_addr = caddr;
266 	chk->kns_nslabs = 1;
267 	wsp->walk_addr = (uintptr_t)(caddr +
268 	    offsetof(kmem_cache_t, cache_complete_slabs));
269 
270 	return (list_walk_init_checked(wsp, "slab list", "slab",
271 	    kmem_nth_slab_check, chk));
272 }
273 
274 int
275 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
276 {
277 	uintptr_t caddr = wsp->walk_addr;
278 	kmem_cache_t c;
279 
280 	if (caddr == NULL) {
281 		mdb_warn("kmem_slab_partial doesn't support global walks\n");
282 		return (WALK_ERR);
283 	}
284 
285 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
286 		mdb_warn("couldn't read kmem_cache at %p", caddr);
287 		return (WALK_ERR);
288 	}
289 
290 	combined_walk_init(wsp);
291 
292 	/*
293 	 * Some consumers (umem_walk_step(), in particular) require at
294 	 * least one callback if there are any buffers in the cache.  So
295 	 * if there are *no* partial slabs, report the first full slab, if
296 	 * any.
297 	 *
298 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
299 	 */
300 	if (c.cache_partial_slabs.avl_numnodes == 0) {
301 		combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
302 		    list_walk_step, list_walk_fini);
303 	} else {
304 		combined_walk_add(wsp, kmem_partial_slab_walk_init,
305 		    avl_walk_step, avl_walk_fini);
306 	}
307 
308 	return (WALK_NEXT);
309 }
310 
311 int
312 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
313 {
314 	kmem_cache_t c;
315 	const char *filter = NULL;
316 
317 	if (mdb_getopts(ac, argv,
318 	    'n', MDB_OPT_STR, &filter,
319 	    NULL) != ac) {
320 		return (DCMD_USAGE);
321 	}
322 
323 	if (!(flags & DCMD_ADDRSPEC)) {
324 		if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
325 			mdb_warn("can't walk kmem_cache");
326 			return (DCMD_ERR);
327 		}
328 		return (DCMD_OK);
329 	}
330 
331 	if (DCMD_HDRSPEC(flags))
332 		mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
333 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
334 
335 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
336 		mdb_warn("couldn't read kmem_cache at %p", addr);
337 		return (DCMD_ERR);
338 	}
339 
340 	if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
341 		return (DCMD_OK);
342 
343 	mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
344 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
345 
346 	return (DCMD_OK);
347 }
348 
349 void
350 kmem_cache_help(void)
351 {
352 	mdb_printf("%s", "Print kernel memory caches.\n\n");
353 	mdb_dec_indent(2);
354 	mdb_printf("%<b>OPTIONS%</b>\n");
355 	mdb_inc_indent(2);
356 	mdb_printf("%s",
357 "  -n name\n"
358 "        name of kmem cache (or matching partial name)\n"
359 "\n"
360 "Column\tDescription\n"
361 "\n"
362 "ADDR\t\taddress of kmem cache\n"
363 "NAME\t\tname of kmem cache\n"
364 "FLAG\t\tvarious cache state flags\n"
365 "CFLAG\t\tcache creation flags\n"
366 "BUFSIZE\tobject size in bytes\n"
367 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
368 }
369 
370 #define	LABEL_WIDTH	11
371 static void
372 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
373     size_t maxbuckets, size_t minbucketsize)
374 {
375 	uint64_t total;
376 	int buckets;
377 	int i;
378 	const int *distarray;
379 	int complete[2];
380 
381 	buckets = buffers_per_slab;
382 
383 	total = 0;
384 	for (i = 0; i <= buffers_per_slab; i++)
385 		total += ks_bucket[i];
386 
387 	if (maxbuckets > 1)
388 		buckets = MIN(buckets, maxbuckets);
389 
390 	if (minbucketsize > 1) {
391 		/*
392 		 * minbucketsize does not apply to the first bucket reserved
393 		 * for completely allocated slabs
394 		 */
395 		buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
396 		    minbucketsize));
397 		if ((buckets < 2) && (buffers_per_slab > 1)) {
398 			buckets = 2;
399 			minbucketsize = (buffers_per_slab - 1);
400 		}
401 	}
402 
403 	/*
404 	 * The first printed bucket is reserved for completely allocated slabs.
405 	 * Passing (buckets - 1) excludes that bucket from the generated
406 	 * distribution, since we're handling it as a special case.
407 	 */
408 	complete[0] = buffers_per_slab;
409 	complete[1] = buffers_per_slab + 1;
410 	distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
411 
412 	mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
413 	dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
414 
415 	dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
416 	/*
417 	 * Print bucket ranges in descending order after the first bucket for
418 	 * completely allocated slabs, so a person can see immediately whether
419 	 * or not there is fragmentation without having to scan possibly
420 	 * multiple screens of output. Starting at (buckets - 2) excludes the
421 	 * extra terminating bucket.
422 	 */
423 	for (i = buckets - 2; i >= 0; i--) {
424 		dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
425 	}
426 	mdb_printf("\n");
427 }
428 #undef LABEL_WIDTH
429 
430 /*ARGSUSED*/
431 static int
432 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
433 {
434 	*is_slab = B_TRUE;
435 	return (WALK_DONE);
436 }
437 
438 /*ARGSUSED*/
439 static int
440 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
441     boolean_t *is_slab)
442 {
443 	/*
444 	 * The "kmem_partial_slab" walker reports the first full slab if there
445 	 * are no partial slabs (for the sake of consumers that require at least
446 	 * one callback if there are any buffers in the cache).
447 	 */
448 	*is_slab = KMEM_SLAB_IS_PARTIAL(sp);
449 	return (WALK_DONE);
450 }
451 
452 typedef struct kmem_slab_usage {
453 	int ksu_refcnt;			/* count of allocated buffers on slab */
454 	boolean_t ksu_nomove;		/* slab marked non-reclaimable */
455 } kmem_slab_usage_t;
456 
457 typedef struct kmem_slab_stats {
458 	const kmem_cache_t *ks_cp;
459 	int ks_slabs;			/* slabs in cache */
460 	int ks_partial_slabs;		/* partially allocated slabs in cache */
461 	uint64_t ks_unused_buffers;	/* total unused buffers in cache */
462 	int ks_max_buffers_per_slab;	/* max buffers per slab */
463 	int ks_usage_len;		/* ks_usage array length */
464 	kmem_slab_usage_t *ks_usage;	/* partial slab usage */
465 	uint_t *ks_bucket;		/* slab usage distribution */
466 } kmem_slab_stats_t;
467 
468 /*ARGSUSED*/
469 static int
470 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
471     kmem_slab_stats_t *ks)
472 {
473 	kmem_slab_usage_t *ksu;
474 	long unused;
475 
476 	ks->ks_slabs++;
477 	ks->ks_bucket[sp->slab_refcnt]++;
478 
479 	unused = (sp->slab_chunks - sp->slab_refcnt);
480 	if (unused == 0) {
481 		return (WALK_NEXT);
482 	}
483 
484 	ks->ks_partial_slabs++;
485 	ks->ks_unused_buffers += unused;
486 
487 	if (ks->ks_partial_slabs > ks->ks_usage_len) {
488 		kmem_slab_usage_t *usage;
489 		int len = ks->ks_usage_len;
490 
491 		len = (len == 0 ? 16 : len * 2);
492 		usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
493 		if (ks->ks_usage != NULL) {
494 			bcopy(ks->ks_usage, usage,
495 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
496 			mdb_free(ks->ks_usage,
497 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
498 		}
499 		ks->ks_usage = usage;
500 		ks->ks_usage_len = len;
501 	}
502 
503 	ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
504 	ksu->ksu_refcnt = sp->slab_refcnt;
505 	ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
506 	return (WALK_NEXT);
507 }
508 
509 static void
510 kmem_slabs_header()
511 {
512 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
513 	    "", "", "Partial", "", "Unused", "");
514 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
515 	    "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
516 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
517 	    "-------------------------", "--------", "--------", "---------",
518 	    "---------", "------");
519 }
520 
521 int
522 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
523 {
524 	kmem_cache_t c;
525 	kmem_slab_stats_t stats;
526 	mdb_walk_cb_t cb;
527 	int pct;
528 	int tenths_pct;
529 	size_t maxbuckets = 1;
530 	size_t minbucketsize = 0;
531 	const char *filter = NULL;
532 	const char *name = NULL;
533 	uint_t opt_v = FALSE;
534 	boolean_t buckets = B_FALSE;
535 	boolean_t skip = B_FALSE;
536 
537 	if (mdb_getopts(argc, argv,
538 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
539 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
540 	    'n', MDB_OPT_STR, &filter,
541 	    'N', MDB_OPT_STR, &name,
542 	    'v', MDB_OPT_SETBITS, TRUE, &opt_v,
543 	    NULL) != argc) {
544 		return (DCMD_USAGE);
545 	}
546 
547 	if ((maxbuckets != 1) || (minbucketsize != 0)) {
548 		buckets = B_TRUE;
549 	}
550 
551 	if (!(flags & DCMD_ADDRSPEC)) {
552 		if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
553 		    argv) == -1) {
554 			mdb_warn("can't walk kmem_cache");
555 			return (DCMD_ERR);
556 		}
557 		return (DCMD_OK);
558 	}
559 
560 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
561 		mdb_warn("couldn't read kmem_cache at %p", addr);
562 		return (DCMD_ERR);
563 	}
564 
565 	if (name == NULL) {
566 		skip = ((filter != NULL) &&
567 		    (strstr(c.cache_name, filter) == NULL));
568 	} else if (filter == NULL) {
569 		skip = (strcmp(c.cache_name, name) != 0);
570 	} else {
571 		/* match either -n or -N */
572 		skip = ((strcmp(c.cache_name, name) != 0) &&
573 		    (strstr(c.cache_name, filter) == NULL));
574 	}
575 
576 	if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
577 		kmem_slabs_header();
578 	} else if ((opt_v || buckets) && !skip) {
579 		if (DCMD_HDRSPEC(flags)) {
580 			kmem_slabs_header();
581 		} else {
582 			boolean_t is_slab = B_FALSE;
583 			const char *walker_name;
584 			if (opt_v) {
585 				cb = (mdb_walk_cb_t)kmem_first_partial_slab;
586 				walker_name = "kmem_slab_partial";
587 			} else {
588 				cb = (mdb_walk_cb_t)kmem_first_slab;
589 				walker_name = "kmem_slab";
590 			}
591 			(void) mdb_pwalk(walker_name, cb, &is_slab, addr);
592 			if (is_slab) {
593 				kmem_slabs_header();
594 			}
595 		}
596 	}
597 
598 	if (skip) {
599 		return (DCMD_OK);
600 	}
601 
602 	bzero(&stats, sizeof (kmem_slab_stats_t));
603 	stats.ks_cp = &c;
604 	stats.ks_max_buffers_per_slab = c.cache_maxchunks;
605 	/* +1 to include a zero bucket */
606 	stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
607 	    sizeof (*stats.ks_bucket), UM_SLEEP);
608 	cb = (mdb_walk_cb_t)kmem_slablist_stat;
609 	(void) mdb_pwalk("kmem_slab", cb, &stats, addr);
610 
611 	if (c.cache_buftotal == 0) {
612 		pct = 0;
613 		tenths_pct = 0;
614 	} else {
615 		uint64_t n = stats.ks_unused_buffers * 10000;
616 		pct = (int)(n / c.cache_buftotal);
617 		tenths_pct = pct - ((pct / 100) * 100);
618 		tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
619 		if (tenths_pct == 10) {
620 			pct += 100;
621 			tenths_pct = 0;
622 		}
623 	}
624 
625 	pct /= 100;
626 	mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
627 	    stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
628 	    stats.ks_unused_buffers, pct, tenths_pct);
629 
630 	if (maxbuckets == 0) {
631 		maxbuckets = stats.ks_max_buffers_per_slab;
632 	}
633 
634 	if (((maxbuckets > 1) || (minbucketsize > 0)) &&
635 	    (stats.ks_slabs > 0)) {
636 		mdb_printf("\n");
637 		kmem_slabs_print_dist(stats.ks_bucket,
638 		    stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
639 	}
640 
641 	mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
642 	    sizeof (*stats.ks_bucket));
643 
644 	if (!opt_v) {
645 		return (DCMD_OK);
646 	}
647 
648 	if (opt_v && (stats.ks_partial_slabs > 0)) {
649 		int i;
650 		kmem_slab_usage_t *ksu;
651 
652 		mdb_printf("  %d complete (%d), %d partial:",
653 		    (stats.ks_slabs - stats.ks_partial_slabs),
654 		    stats.ks_max_buffers_per_slab,
655 		    stats.ks_partial_slabs);
656 
657 		for (i = 0; i < stats.ks_partial_slabs; i++) {
658 			ksu = &stats.ks_usage[i];
659 			mdb_printf(" %d%s", ksu->ksu_refcnt,
660 			    (ksu->ksu_nomove ? "*" : ""));
661 		}
662 		mdb_printf("\n\n");
663 	}
664 
665 	if (stats.ks_usage_len > 0) {
666 		mdb_free(stats.ks_usage,
667 		    stats.ks_usage_len * sizeof (kmem_slab_usage_t));
668 	}
669 
670 	return (DCMD_OK);
671 }
672 
673 void
674 kmem_slabs_help(void)
675 {
676 	mdb_printf("%s",
677 "Display slab usage per kmem cache.\n\n");
678 	mdb_dec_indent(2);
679 	mdb_printf("%<b>OPTIONS%</b>\n");
680 	mdb_inc_indent(2);
681 	mdb_printf("%s",
682 "  -n name\n"
683 "        name of kmem cache (or matching partial name)\n"
684 "  -N name\n"
685 "        exact name of kmem cache\n"
686 "  -b maxbins\n"
687 "        Print a distribution of allocated buffers per slab using at\n"
688 "        most maxbins bins. The first bin is reserved for completely\n"
689 "        allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
690 "        effect as specifying the maximum allocated buffers per slab\n"
691 "        or setting minbinsize to 1 (-B 1).\n"
692 "  -B minbinsize\n"
693 "        Print a distribution of allocated buffers per slab, making\n"
694 "        all bins (except the first, reserved for completely allocated\n"
695 "        slabs) at least minbinsize buffers apart.\n"
696 "  -v    verbose output: List the allocated buffer count of each partial\n"
697 "        slab on the free list in order from front to back to show how\n"
698 "        closely the slabs are ordered by usage. For example\n"
699 "\n"
700 "          10 complete, 3 partial (8): 7 3 1\n"
701 "\n"
702 "        means there are thirteen slabs with eight buffers each, including\n"
703 "        three partially allocated slabs with less than all eight buffers\n"
704 "        allocated.\n"
705 "\n"
706 "        Buffer allocations are always from the front of the partial slab\n"
707 "        list. When a buffer is freed from a completely used slab, that\n"
708 "        slab is added to the front of the partial slab list. Assuming\n"
709 "        that all buffers are equally likely to be freed soon, the\n"
710 "        desired order of partial slabs is most-used at the front of the\n"
711 "        list and least-used at the back (as in the example above).\n"
712 "        However, if a slab contains an allocated buffer that will not\n"
713 "        soon be freed, it would be better for that slab to be at the\n"
714 "        front where all of its buffers can be allocated. Taking a slab\n"
715 "        off the partial slab list (either with all buffers freed or all\n"
716 "        buffers allocated) reduces cache fragmentation.\n"
717 "\n"
718 "        A slab's allocated buffer count representing a partial slab (9 in\n"
719 "        the example below) may be marked as follows:\n"
720 "\n"
721 "        9*   An asterisk indicates that kmem has marked the slab non-\n"
722 "        reclaimable because the kmem client refused to move one of the\n"
723 "        slab's buffers. Since kmem does not expect to completely free the\n"
724 "        slab, it moves it to the front of the list in the hope of\n"
725 "        completely allocating it instead. A slab marked with an asterisk\n"
726 "        stays marked for as long as it remains on the partial slab list.\n"
727 "\n"
728 "Column\t\tDescription\n"
729 "\n"
730 "Cache Name\t\tname of kmem cache\n"
731 "Slabs\t\t\ttotal slab count\n"
732 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
733 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
734 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
735 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
736 "\t\t\t  for accounting structures (debug mode), slab\n"
737 "\t\t\t  coloring (incremental small offsets to stagger\n"
738 "\t\t\t  buffer alignment), or the per-CPU magazine layer\n");
739 }
740 
741 static int
742 addrcmp(const void *lhs, const void *rhs)
743 {
744 	uintptr_t p1 = *((uintptr_t *)lhs);
745 	uintptr_t p2 = *((uintptr_t *)rhs);
746 
747 	if (p1 < p2)
748 		return (-1);
749 	if (p1 > p2)
750 		return (1);
751 	return (0);
752 }
753 
754 static int
755 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
756 {
757 	const kmem_bufctl_audit_t *bcp1 = *lhs;
758 	const kmem_bufctl_audit_t *bcp2 = *rhs;
759 
760 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
761 		return (-1);
762 
763 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
764 		return (1);
765 
766 	return (0);
767 }
768 
769 typedef struct kmem_hash_walk {
770 	uintptr_t *kmhw_table;
771 	size_t kmhw_nelems;
772 	size_t kmhw_pos;
773 	kmem_bufctl_t kmhw_cur;
774 } kmem_hash_walk_t;
775 
776 int
777 kmem_hash_walk_init(mdb_walk_state_t *wsp)
778 {
779 	kmem_hash_walk_t *kmhw;
780 	uintptr_t *hash;
781 	kmem_cache_t c;
782 	uintptr_t haddr, addr = wsp->walk_addr;
783 	size_t nelems;
784 	size_t hsize;
785 
786 	if (addr == NULL) {
787 		mdb_warn("kmem_hash doesn't support global walks\n");
788 		return (WALK_ERR);
789 	}
790 
791 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
792 		mdb_warn("couldn't read cache at addr %p", addr);
793 		return (WALK_ERR);
794 	}
795 
796 	if (!(c.cache_flags & KMF_HASH)) {
797 		mdb_warn("cache %p doesn't have a hash table\n", addr);
798 		return (WALK_DONE);		/* nothing to do */
799 	}
800 
801 	kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
802 	kmhw->kmhw_cur.bc_next = NULL;
803 	kmhw->kmhw_pos = 0;
804 
805 	kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
806 	hsize = nelems * sizeof (uintptr_t);
807 	haddr = (uintptr_t)c.cache_hash_table;
808 
809 	kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
810 	if (mdb_vread(hash, hsize, haddr) == -1) {
811 		mdb_warn("failed to read hash table at %p", haddr);
812 		mdb_free(hash, hsize);
813 		mdb_free(kmhw, sizeof (kmem_hash_walk_t));
814 		return (WALK_ERR);
815 	}
816 
817 	wsp->walk_data = kmhw;
818 
819 	return (WALK_NEXT);
820 }
821 
822 int
823 kmem_hash_walk_step(mdb_walk_state_t *wsp)
824 {
825 	kmem_hash_walk_t *kmhw = wsp->walk_data;
826 	uintptr_t addr = NULL;
827 
828 	if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
829 		while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
830 			if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
831 				break;
832 		}
833 	}
834 	if (addr == NULL)
835 		return (WALK_DONE);
836 
837 	if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
838 		mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
839 		return (WALK_ERR);
840 	}
841 
842 	return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
843 }
844 
845 void
846 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
847 {
848 	kmem_hash_walk_t *kmhw = wsp->walk_data;
849 
850 	if (kmhw == NULL)
851 		return;
852 
853 	mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
854 	mdb_free(kmhw, sizeof (kmem_hash_walk_t));
855 }
856 
857 /*
858  * Find the address of the bufctl structure for the address 'buf' in cache
859  * 'cp', which is at address caddr, and place it in *out.
860  */
861 static int
862 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
863 {
864 	uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
865 	kmem_bufctl_t *bcp;
866 	kmem_bufctl_t bc;
867 
868 	if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
869 		mdb_warn("unable to read hash bucket for %p in cache %p",
870 		    buf, caddr);
871 		return (-1);
872 	}
873 
874 	while (bcp != NULL) {
875 		if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
876 		    (uintptr_t)bcp) == -1) {
877 			mdb_warn("unable to read bufctl at %p", bcp);
878 			return (-1);
879 		}
880 		if (bc.bc_addr == buf) {
881 			*out = (uintptr_t)bcp;
882 			return (0);
883 		}
884 		bcp = bc.bc_next;
885 	}
886 
887 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
888 	return (-1);
889 }
890 
891 int
892 kmem_get_magsize(const kmem_cache_t *cp)
893 {
894 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
895 	GElf_Sym mt_sym;
896 	kmem_magtype_t mt;
897 	int res;
898 
899 	/*
900 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
901 	 * with KMF_NOMAGAZINE have disabled their magazine layers, so
902 	 * it is okay to return 0 for them.
903 	 */
904 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
905 	    (cp->cache_flags & KMF_NOMAGAZINE))
906 		return (res);
907 
908 	if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
909 		mdb_warn("unable to read 'kmem_magtype'");
910 	} else if (addr < mt_sym.st_value ||
911 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
912 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
913 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
914 		    cp->cache_name, addr);
915 		return (0);
916 	}
917 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
918 		mdb_warn("unable to read magtype at %a", addr);
919 		return (0);
920 	}
921 	return (mt.mt_magsize);
922 }
923 
924 /*ARGSUSED*/
925 static int
926 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
927 {
928 	*est -= (sp->slab_chunks - sp->slab_refcnt);
929 
930 	return (WALK_NEXT);
931 }
932 
933 /*
934  * Returns an upper bound on the number of allocated buffers in a given
935  * cache.
936  */
937 size_t
938 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
939 {
940 	int magsize;
941 	size_t cache_est;
942 
943 	cache_est = cp->cache_buftotal;
944 
945 	(void) mdb_pwalk("kmem_slab_partial",
946 	    (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
947 
948 	if ((magsize = kmem_get_magsize(cp)) != 0) {
949 		size_t mag_est = cp->cache_full.ml_total * magsize;
950 
951 		if (cache_est >= mag_est) {
952 			cache_est -= mag_est;
953 		} else {
954 			mdb_warn("cache %p's magazine layer holds more buffers "
955 			    "than the slab layer.\n", addr);
956 		}
957 	}
958 	return (cache_est);
959 }
960 
961 #define	READMAG_ROUNDS(rounds) { \
962 	if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
963 		mdb_warn("couldn't read magazine at %p", kmp); \
964 		goto fail; \
965 	} \
966 	for (i = 0; i < rounds; i++) { \
967 		maglist[magcnt++] = mp->mag_round[i]; \
968 		if (magcnt == magmax) { \
969 			mdb_warn("%d magazines exceeds fudge factor\n", \
970 			    magcnt); \
971 			goto fail; \
972 		} \
973 	} \
974 }
975 
976 int
977 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
978     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
979 {
980 	kmem_magazine_t *kmp, *mp;
981 	void **maglist = NULL;
982 	int i, cpu;
983 	size_t magsize, magmax, magbsize;
984 	size_t magcnt = 0;
985 
986 	/*
987 	 * Read the magtype out of the cache, after verifying the pointer's
988 	 * correctness.
989 	 */
990 	magsize = kmem_get_magsize(cp);
991 	if (magsize == 0) {
992 		*maglistp = NULL;
993 		*magcntp = 0;
994 		*magmaxp = 0;
995 		return (WALK_NEXT);
996 	}
997 
998 	/*
999 	 * There are several places where we need to go buffer hunting:
1000 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1001 	 * and the full magazine list in the depot.
1002 	 *
1003 	 * For an upper bound on the number of buffers in the magazine
1004 	 * layer, we have the number of magazines on the cache_full
1005 	 * list plus at most two magazines per CPU (the loaded and the
1006 	 * spare).  Toss in 100 magazines as a fudge factor in case this
1007 	 * is live (the number "100" comes from the same fudge factor in
1008 	 * crash(1M)).
1009 	 */
1010 	magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1011 	magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1012 
1013 	if (magbsize >= PAGESIZE / 2) {
1014 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1015 		    addr, magbsize);
1016 		return (WALK_ERR);
1017 	}
1018 
1019 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1020 	mp = mdb_alloc(magbsize, alloc_flags);
1021 	if (mp == NULL || maglist == NULL)
1022 		goto fail;
1023 
1024 	/*
1025 	 * First up: the magazines in the depot (i.e. on the cache_full list).
1026 	 */
1027 	for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1028 		READMAG_ROUNDS(magsize);
1029 		kmp = mp->mag_next;
1030 
1031 		if (kmp == cp->cache_full.ml_list)
1032 			break; /* cache_full list loop detected */
1033 	}
1034 
1035 	dprintf(("cache_full list done\n"));
1036 
1037 	/*
1038 	 * Now whip through the CPUs, snagging the loaded magazines
1039 	 * and full spares.
1040 	 */
1041 	for (cpu = 0; cpu < ncpus; cpu++) {
1042 		kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1043 
1044 		dprintf(("reading cpu cache %p\n",
1045 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
1046 
1047 		if (ccp->cc_rounds > 0 &&
1048 		    (kmp = ccp->cc_loaded) != NULL) {
1049 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
1050 			READMAG_ROUNDS(ccp->cc_rounds);
1051 		}
1052 
1053 		if (ccp->cc_prounds > 0 &&
1054 		    (kmp = ccp->cc_ploaded) != NULL) {
1055 			dprintf(("reading %d previously loaded rounds\n",
1056 			    ccp->cc_prounds));
1057 			READMAG_ROUNDS(ccp->cc_prounds);
1058 		}
1059 	}
1060 
1061 	dprintf(("magazine layer: %d buffers\n", magcnt));
1062 
1063 	if (!(alloc_flags & UM_GC))
1064 		mdb_free(mp, magbsize);
1065 
1066 	*maglistp = maglist;
1067 	*magcntp = magcnt;
1068 	*magmaxp = magmax;
1069 
1070 	return (WALK_NEXT);
1071 
1072 fail:
1073 	if (!(alloc_flags & UM_GC)) {
1074 		if (mp)
1075 			mdb_free(mp, magbsize);
1076 		if (maglist)
1077 			mdb_free(maglist, magmax * sizeof (void *));
1078 	}
1079 	return (WALK_ERR);
1080 }
1081 
1082 static int
1083 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1084 {
1085 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1086 }
1087 
1088 static int
1089 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1090 {
1091 	kmem_bufctl_audit_t b;
1092 
1093 	/*
1094 	 * if KMF_AUDIT is not set, we know that we're looking at a
1095 	 * kmem_bufctl_t.
1096 	 */
1097 	if (!(cp->cache_flags & KMF_AUDIT) ||
1098 	    mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1099 		(void) memset(&b, 0, sizeof (b));
1100 		if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1101 			mdb_warn("unable to read bufctl at %p", buf);
1102 			return (WALK_ERR);
1103 		}
1104 	}
1105 
1106 	return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1107 }
1108 
1109 typedef struct kmem_walk {
1110 	int kmw_type;
1111 
1112 	int kmw_addr;			/* cache address */
1113 	kmem_cache_t *kmw_cp;
1114 	size_t kmw_csize;
1115 
1116 	/*
1117 	 * magazine layer
1118 	 */
1119 	void **kmw_maglist;
1120 	size_t kmw_max;
1121 	size_t kmw_count;
1122 	size_t kmw_pos;
1123 
1124 	/*
1125 	 * slab layer
1126 	 */
1127 	char *kmw_valid;	/* to keep track of freed buffers */
1128 	char *kmw_ubase;	/* buffer for slab data */
1129 } kmem_walk_t;
1130 
1131 static int
1132 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1133 {
1134 	kmem_walk_t *kmw;
1135 	int ncpus, csize;
1136 	kmem_cache_t *cp;
1137 	size_t vm_quantum;
1138 
1139 	size_t magmax, magcnt;
1140 	void **maglist = NULL;
1141 	uint_t chunksize, slabsize;
1142 	int status = WALK_ERR;
1143 	uintptr_t addr = wsp->walk_addr;
1144 	const char *layered;
1145 
1146 	type &= ~KM_HASH;
1147 
1148 	if (addr == NULL) {
1149 		mdb_warn("kmem walk doesn't support global walks\n");
1150 		return (WALK_ERR);
1151 	}
1152 
1153 	dprintf(("walking %p\n", addr));
1154 
1155 	/*
1156 	 * First we need to figure out how many CPUs are configured in the
1157 	 * system to know how much to slurp out.
1158 	 */
1159 	mdb_readvar(&ncpus, "max_ncpus");
1160 
1161 	csize = KMEM_CACHE_SIZE(ncpus);
1162 	cp = mdb_alloc(csize, UM_SLEEP);
1163 
1164 	if (mdb_vread(cp, csize, addr) == -1) {
1165 		mdb_warn("couldn't read cache at addr %p", addr);
1166 		goto out2;
1167 	}
1168 
1169 	/*
1170 	 * It's easy for someone to hand us an invalid cache address.
1171 	 * Unfortunately, it is hard for this walker to survive an
1172 	 * invalid cache cleanly.  So we make sure that:
1173 	 *
1174 	 *	1. the vmem arena for the cache is readable,
1175 	 *	2. the vmem arena's quantum is a power of 2,
1176 	 *	3. our slabsize is a multiple of the quantum, and
1177 	 *	4. our chunksize is >0 and less than our slabsize.
1178 	 */
1179 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1180 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1181 	    vm_quantum == 0 ||
1182 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
1183 	    cp->cache_slabsize < vm_quantum ||
1184 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1185 	    cp->cache_chunksize == 0 ||
1186 	    cp->cache_chunksize > cp->cache_slabsize) {
1187 		mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1188 		goto out2;
1189 	}
1190 
1191 	dprintf(("buf total is %d\n", cp->cache_buftotal));
1192 
1193 	if (cp->cache_buftotal == 0) {
1194 		mdb_free(cp, csize);
1195 		return (WALK_DONE);
1196 	}
1197 
1198 	/*
1199 	 * If they ask for bufctls, but it's a small-slab cache,
1200 	 * there is nothing to report.
1201 	 */
1202 	if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1203 		dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1204 		    cp->cache_flags));
1205 		mdb_free(cp, csize);
1206 		return (WALK_DONE);
1207 	}
1208 
1209 	/*
1210 	 * If they want constructed buffers, but there's no constructor or
1211 	 * the cache has DEADBEEF checking enabled, there is nothing to report.
1212 	 */
1213 	if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1214 	    cp->cache_constructor == NULL ||
1215 	    (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1216 		mdb_free(cp, csize);
1217 		return (WALK_DONE);
1218 	}
1219 
1220 	/*
1221 	 * Read in the contents of the magazine layer
1222 	 */
1223 	if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1224 	    &magmax, UM_SLEEP) == WALK_ERR)
1225 		goto out2;
1226 
1227 	/*
1228 	 * We have all of the buffers from the magazines;  if we are walking
1229 	 * allocated buffers, sort them so we can bsearch them later.
1230 	 */
1231 	if (type & KM_ALLOCATED)
1232 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
1233 
1234 	wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1235 
1236 	kmw->kmw_type = type;
1237 	kmw->kmw_addr = addr;
1238 	kmw->kmw_cp = cp;
1239 	kmw->kmw_csize = csize;
1240 	kmw->kmw_maglist = maglist;
1241 	kmw->kmw_max = magmax;
1242 	kmw->kmw_count = magcnt;
1243 	kmw->kmw_pos = 0;
1244 
1245 	/*
1246 	 * When walking allocated buffers in a KMF_HASH cache, we walk the
1247 	 * hash table instead of the slab layer.
1248 	 */
1249 	if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1250 		layered = "kmem_hash";
1251 
1252 		kmw->kmw_type |= KM_HASH;
1253 	} else {
1254 		/*
1255 		 * If we are walking freed buffers, we only need the
1256 		 * magazine layer plus the partially allocated slabs.
1257 		 * To walk allocated buffers, we need all of the slabs.
1258 		 */
1259 		if (type & KM_ALLOCATED)
1260 			layered = "kmem_slab";
1261 		else
1262 			layered = "kmem_slab_partial";
1263 
1264 		/*
1265 		 * for small-slab caches, we read in the entire slab.  For
1266 		 * freed buffers, we can just walk the freelist.  For
1267 		 * allocated buffers, we use a 'valid' array to track
1268 		 * the freed buffers.
1269 		 */
1270 		if (!(cp->cache_flags & KMF_HASH)) {
1271 			chunksize = cp->cache_chunksize;
1272 			slabsize = cp->cache_slabsize;
1273 
1274 			kmw->kmw_ubase = mdb_alloc(slabsize +
1275 			    sizeof (kmem_bufctl_t), UM_SLEEP);
1276 
1277 			if (type & KM_ALLOCATED)
1278 				kmw->kmw_valid =
1279 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1280 		}
1281 	}
1282 
1283 	status = WALK_NEXT;
1284 
1285 	if (mdb_layered_walk(layered, wsp) == -1) {
1286 		mdb_warn("unable to start layered '%s' walk", layered);
1287 		status = WALK_ERR;
1288 	}
1289 
1290 out1:
1291 	if (status == WALK_ERR) {
1292 		if (kmw->kmw_valid)
1293 			mdb_free(kmw->kmw_valid, slabsize / chunksize);
1294 
1295 		if (kmw->kmw_ubase)
1296 			mdb_free(kmw->kmw_ubase, slabsize +
1297 			    sizeof (kmem_bufctl_t));
1298 
1299 		if (kmw->kmw_maglist)
1300 			mdb_free(kmw->kmw_maglist,
1301 			    kmw->kmw_max * sizeof (uintptr_t));
1302 
1303 		mdb_free(kmw, sizeof (kmem_walk_t));
1304 		wsp->walk_data = NULL;
1305 	}
1306 
1307 out2:
1308 	if (status == WALK_ERR)
1309 		mdb_free(cp, csize);
1310 
1311 	return (status);
1312 }
1313 
1314 int
1315 kmem_walk_step(mdb_walk_state_t *wsp)
1316 {
1317 	kmem_walk_t *kmw = wsp->walk_data;
1318 	int type = kmw->kmw_type;
1319 	kmem_cache_t *cp = kmw->kmw_cp;
1320 
1321 	void **maglist = kmw->kmw_maglist;
1322 	int magcnt = kmw->kmw_count;
1323 
1324 	uintptr_t chunksize, slabsize;
1325 	uintptr_t addr;
1326 	const kmem_slab_t *sp;
1327 	const kmem_bufctl_t *bcp;
1328 	kmem_bufctl_t bc;
1329 
1330 	int chunks;
1331 	char *kbase;
1332 	void *buf;
1333 	int i, ret;
1334 
1335 	char *valid, *ubase;
1336 
1337 	/*
1338 	 * first, handle the 'kmem_hash' layered walk case
1339 	 */
1340 	if (type & KM_HASH) {
1341 		/*
1342 		 * We have a buffer which has been allocated out of the
1343 		 * global layer. We need to make sure that it's not
1344 		 * actually sitting in a magazine before we report it as
1345 		 * an allocated buffer.
1346 		 */
1347 		buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1348 
1349 		if (magcnt > 0 &&
1350 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1351 		    addrcmp) != NULL)
1352 			return (WALK_NEXT);
1353 
1354 		if (type & KM_BUFCTL)
1355 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1356 
1357 		return (kmem_walk_callback(wsp, (uintptr_t)buf));
1358 	}
1359 
1360 	ret = WALK_NEXT;
1361 
1362 	addr = kmw->kmw_addr;
1363 
1364 	/*
1365 	 * If we're walking freed buffers, report everything in the
1366 	 * magazine layer before processing the first slab.
1367 	 */
1368 	if ((type & KM_FREE) && magcnt != 0) {
1369 		kmw->kmw_count = 0;		/* only do this once */
1370 		for (i = 0; i < magcnt; i++) {
1371 			buf = maglist[i];
1372 
1373 			if (type & KM_BUFCTL) {
1374 				uintptr_t out;
1375 
1376 				if (cp->cache_flags & KMF_BUFTAG) {
1377 					kmem_buftag_t *btp;
1378 					kmem_buftag_t tag;
1379 
1380 					/* LINTED - alignment */
1381 					btp = KMEM_BUFTAG(cp, buf);
1382 					if (mdb_vread(&tag, sizeof (tag),
1383 					    (uintptr_t)btp) == -1) {
1384 						mdb_warn("reading buftag for "
1385 						    "%p at %p", buf, btp);
1386 						continue;
1387 					}
1388 					out = (uintptr_t)tag.bt_bufctl;
1389 				} else {
1390 					if (kmem_hash_lookup(cp, addr, buf,
1391 					    &out) == -1)
1392 						continue;
1393 				}
1394 				ret = bufctl_walk_callback(cp, wsp, out);
1395 			} else {
1396 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1397 			}
1398 
1399 			if (ret != WALK_NEXT)
1400 				return (ret);
1401 		}
1402 	}
1403 
1404 	/*
1405 	 * If they want constructed buffers, we're finished, since the
1406 	 * magazine layer holds them all.
1407 	 */
1408 	if (type & KM_CONSTRUCTED)
1409 		return (WALK_DONE);
1410 
1411 	/*
1412 	 * Handle the buffers in the current slab
1413 	 */
1414 	chunksize = cp->cache_chunksize;
1415 	slabsize = cp->cache_slabsize;
1416 
1417 	sp = wsp->walk_layer;
1418 	chunks = sp->slab_chunks;
1419 	kbase = sp->slab_base;
1420 
1421 	dprintf(("kbase is %p\n", kbase));
1422 
1423 	if (!(cp->cache_flags & KMF_HASH)) {
1424 		valid = kmw->kmw_valid;
1425 		ubase = kmw->kmw_ubase;
1426 
1427 		if (mdb_vread(ubase, chunks * chunksize,
1428 		    (uintptr_t)kbase) == -1) {
1429 			mdb_warn("failed to read slab contents at %p", kbase);
1430 			return (WALK_ERR);
1431 		}
1432 
1433 		/*
1434 		 * Set up the valid map as fully allocated -- we'll punch
1435 		 * out the freelist.
1436 		 */
1437 		if (type & KM_ALLOCATED)
1438 			(void) memset(valid, 1, chunks);
1439 	} else {
1440 		valid = NULL;
1441 		ubase = NULL;
1442 	}
1443 
1444 	/*
1445 	 * walk the slab's freelist
1446 	 */
1447 	bcp = sp->slab_head;
1448 
1449 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1450 
1451 	/*
1452 	 * since we could be in the middle of allocating a buffer,
1453 	 * our refcnt could be one higher than it aught.  So we
1454 	 * check one further on the freelist than the count allows.
1455 	 */
1456 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1457 		uint_t ndx;
1458 
1459 		dprintf(("bcp is %p\n", bcp));
1460 
1461 		if (bcp == NULL) {
1462 			if (i == chunks)
1463 				break;
1464 			mdb_warn(
1465 			    "slab %p in cache %p freelist too short by %d\n",
1466 			    sp, addr, chunks - i);
1467 			break;
1468 		}
1469 
1470 		if (cp->cache_flags & KMF_HASH) {
1471 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1472 				mdb_warn("failed to read bufctl ptr at %p",
1473 				    bcp);
1474 				break;
1475 			}
1476 			buf = bc.bc_addr;
1477 		} else {
1478 			/*
1479 			 * Otherwise the buffer is in the slab which
1480 			 * we've read in;  we just need to determine
1481 			 * its offset in the slab to find the
1482 			 * kmem_bufctl_t.
1483 			 */
1484 			bc = *((kmem_bufctl_t *)
1485 			    ((uintptr_t)bcp - (uintptr_t)kbase +
1486 			    (uintptr_t)ubase));
1487 
1488 			buf = KMEM_BUF(cp, bcp);
1489 		}
1490 
1491 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1492 
1493 		if (ndx > slabsize / cp->cache_bufsize) {
1494 			/*
1495 			 * This is very wrong; we have managed to find
1496 			 * a buffer in the slab which shouldn't
1497 			 * actually be here.  Emit a warning, and
1498 			 * try to continue.
1499 			 */
1500 			mdb_warn("buf %p is out of range for "
1501 			    "slab %p, cache %p\n", buf, sp, addr);
1502 		} else if (type & KM_ALLOCATED) {
1503 			/*
1504 			 * we have found a buffer on the slab's freelist;
1505 			 * clear its entry
1506 			 */
1507 			valid[ndx] = 0;
1508 		} else {
1509 			/*
1510 			 * Report this freed buffer
1511 			 */
1512 			if (type & KM_BUFCTL) {
1513 				ret = bufctl_walk_callback(cp, wsp,
1514 				    (uintptr_t)bcp);
1515 			} else {
1516 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1517 			}
1518 			if (ret != WALK_NEXT)
1519 				return (ret);
1520 		}
1521 
1522 		bcp = bc.bc_next;
1523 	}
1524 
1525 	if (bcp != NULL) {
1526 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1527 		    sp, addr, bcp));
1528 	}
1529 
1530 	/*
1531 	 * If we are walking freed buffers, the loop above handled reporting
1532 	 * them.
1533 	 */
1534 	if (type & KM_FREE)
1535 		return (WALK_NEXT);
1536 
1537 	if (type & KM_BUFCTL) {
1538 		mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1539 		    "cache %p\n", addr);
1540 		return (WALK_ERR);
1541 	}
1542 
1543 	/*
1544 	 * Report allocated buffers, skipping buffers in the magazine layer.
1545 	 * We only get this far for small-slab caches.
1546 	 */
1547 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1548 		buf = (char *)kbase + i * chunksize;
1549 
1550 		if (!valid[i])
1551 			continue;		/* on slab freelist */
1552 
1553 		if (magcnt > 0 &&
1554 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1555 		    addrcmp) != NULL)
1556 			continue;		/* in magazine layer */
1557 
1558 		ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1559 	}
1560 	return (ret);
1561 }
1562 
1563 void
1564 kmem_walk_fini(mdb_walk_state_t *wsp)
1565 {
1566 	kmem_walk_t *kmw = wsp->walk_data;
1567 	uintptr_t chunksize;
1568 	uintptr_t slabsize;
1569 
1570 	if (kmw == NULL)
1571 		return;
1572 
1573 	if (kmw->kmw_maglist != NULL)
1574 		mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1575 
1576 	chunksize = kmw->kmw_cp->cache_chunksize;
1577 	slabsize = kmw->kmw_cp->cache_slabsize;
1578 
1579 	if (kmw->kmw_valid != NULL)
1580 		mdb_free(kmw->kmw_valid, slabsize / chunksize);
1581 	if (kmw->kmw_ubase != NULL)
1582 		mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1583 
1584 	mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1585 	mdb_free(kmw, sizeof (kmem_walk_t));
1586 }
1587 
1588 /*ARGSUSED*/
1589 static int
1590 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1591 {
1592 	/*
1593 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1594 	 * memory in other caches.  This can be a little confusing, so we
1595 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1596 	 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1597 	 */
1598 	if (c->cache_cflags & KMC_NOTOUCH)
1599 		return (WALK_NEXT);
1600 
1601 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1602 	    wsp->walk_cbdata, addr) == -1)
1603 		return (WALK_DONE);
1604 
1605 	return (WALK_NEXT);
1606 }
1607 
1608 #define	KMEM_WALK_ALL(name, wsp) { \
1609 	wsp->walk_data = (name); \
1610 	if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1611 		return (WALK_ERR); \
1612 	return (WALK_DONE); \
1613 }
1614 
1615 int
1616 kmem_walk_init(mdb_walk_state_t *wsp)
1617 {
1618 	if (wsp->walk_arg != NULL)
1619 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1620 
1621 	if (wsp->walk_addr == NULL)
1622 		KMEM_WALK_ALL("kmem", wsp);
1623 	return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1624 }
1625 
1626 int
1627 bufctl_walk_init(mdb_walk_state_t *wsp)
1628 {
1629 	if (wsp->walk_addr == NULL)
1630 		KMEM_WALK_ALL("bufctl", wsp);
1631 	return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1632 }
1633 
1634 int
1635 freemem_walk_init(mdb_walk_state_t *wsp)
1636 {
1637 	if (wsp->walk_addr == NULL)
1638 		KMEM_WALK_ALL("freemem", wsp);
1639 	return (kmem_walk_init_common(wsp, KM_FREE));
1640 }
1641 
1642 int
1643 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1644 {
1645 	if (wsp->walk_addr == NULL)
1646 		KMEM_WALK_ALL("freemem_constructed", wsp);
1647 	return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1648 }
1649 
1650 int
1651 freectl_walk_init(mdb_walk_state_t *wsp)
1652 {
1653 	if (wsp->walk_addr == NULL)
1654 		KMEM_WALK_ALL("freectl", wsp);
1655 	return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1656 }
1657 
1658 int
1659 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1660 {
1661 	if (wsp->walk_addr == NULL)
1662 		KMEM_WALK_ALL("freectl_constructed", wsp);
1663 	return (kmem_walk_init_common(wsp,
1664 	    KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1665 }
1666 
1667 typedef struct bufctl_history_walk {
1668 	void		*bhw_next;
1669 	kmem_cache_t	*bhw_cache;
1670 	kmem_slab_t	*bhw_slab;
1671 	hrtime_t	bhw_timestamp;
1672 } bufctl_history_walk_t;
1673 
1674 int
1675 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1676 {
1677 	bufctl_history_walk_t *bhw;
1678 	kmem_bufctl_audit_t bc;
1679 	kmem_bufctl_audit_t bcn;
1680 
1681 	if (wsp->walk_addr == NULL) {
1682 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1683 		return (WALK_ERR);
1684 	}
1685 
1686 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1687 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1688 		return (WALK_ERR);
1689 	}
1690 
1691 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1692 	bhw->bhw_timestamp = 0;
1693 	bhw->bhw_cache = bc.bc_cache;
1694 	bhw->bhw_slab = bc.bc_slab;
1695 
1696 	/*
1697 	 * sometimes the first log entry matches the base bufctl;  in that
1698 	 * case, skip the base bufctl.
1699 	 */
1700 	if (bc.bc_lastlog != NULL &&
1701 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1702 	    bc.bc_addr == bcn.bc_addr &&
1703 	    bc.bc_cache == bcn.bc_cache &&
1704 	    bc.bc_slab == bcn.bc_slab &&
1705 	    bc.bc_timestamp == bcn.bc_timestamp &&
1706 	    bc.bc_thread == bcn.bc_thread)
1707 		bhw->bhw_next = bc.bc_lastlog;
1708 	else
1709 		bhw->bhw_next = (void *)wsp->walk_addr;
1710 
1711 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1712 	wsp->walk_data = bhw;
1713 
1714 	return (WALK_NEXT);
1715 }
1716 
1717 int
1718 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1719 {
1720 	bufctl_history_walk_t *bhw = wsp->walk_data;
1721 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1722 	uintptr_t baseaddr = wsp->walk_addr;
1723 	kmem_bufctl_audit_t bc;
1724 
1725 	if (addr == NULL)
1726 		return (WALK_DONE);
1727 
1728 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1729 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1730 		return (WALK_ERR);
1731 	}
1732 
1733 	/*
1734 	 * The bufctl is only valid if the address, cache, and slab are
1735 	 * correct.  We also check that the timestamp is decreasing, to
1736 	 * prevent infinite loops.
1737 	 */
1738 	if ((uintptr_t)bc.bc_addr != baseaddr ||
1739 	    bc.bc_cache != bhw->bhw_cache ||
1740 	    bc.bc_slab != bhw->bhw_slab ||
1741 	    (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1742 		return (WALK_DONE);
1743 
1744 	bhw->bhw_next = bc.bc_lastlog;
1745 	bhw->bhw_timestamp = bc.bc_timestamp;
1746 
1747 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1748 }
1749 
1750 void
1751 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1752 {
1753 	bufctl_history_walk_t *bhw = wsp->walk_data;
1754 
1755 	mdb_free(bhw, sizeof (*bhw));
1756 }
1757 
1758 typedef struct kmem_log_walk {
1759 	kmem_bufctl_audit_t *klw_base;
1760 	kmem_bufctl_audit_t **klw_sorted;
1761 	kmem_log_header_t klw_lh;
1762 	size_t klw_size;
1763 	size_t klw_maxndx;
1764 	size_t klw_ndx;
1765 } kmem_log_walk_t;
1766 
1767 int
1768 kmem_log_walk_init(mdb_walk_state_t *wsp)
1769 {
1770 	uintptr_t lp = wsp->walk_addr;
1771 	kmem_log_walk_t *klw;
1772 	kmem_log_header_t *lhp;
1773 	int maxndx, i, j, k;
1774 
1775 	/*
1776 	 * By default (global walk), walk the kmem_transaction_log.  Otherwise
1777 	 * read the log whose kmem_log_header_t is stored at walk_addr.
1778 	 */
1779 	if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1780 		mdb_warn("failed to read 'kmem_transaction_log'");
1781 		return (WALK_ERR);
1782 	}
1783 
1784 	if (lp == NULL) {
1785 		mdb_warn("log is disabled\n");
1786 		return (WALK_ERR);
1787 	}
1788 
1789 	klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1790 	lhp = &klw->klw_lh;
1791 
1792 	if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1793 		mdb_warn("failed to read log header at %p", lp);
1794 		mdb_free(klw, sizeof (kmem_log_walk_t));
1795 		return (WALK_ERR);
1796 	}
1797 
1798 	klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1799 	klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1800 	maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1801 
1802 	if (mdb_vread(klw->klw_base, klw->klw_size,
1803 	    (uintptr_t)lhp->lh_base) == -1) {
1804 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1805 		mdb_free(klw->klw_base, klw->klw_size);
1806 		mdb_free(klw, sizeof (kmem_log_walk_t));
1807 		return (WALK_ERR);
1808 	}
1809 
1810 	klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1811 	    sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1812 
1813 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1814 		kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1815 		    ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1816 
1817 		for (j = 0; j < maxndx; j++)
1818 			klw->klw_sorted[k++] = &chunk[j];
1819 	}
1820 
1821 	qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1822 	    (int(*)(const void *, const void *))bufctlcmp);
1823 
1824 	klw->klw_maxndx = k;
1825 	wsp->walk_data = klw;
1826 
1827 	return (WALK_NEXT);
1828 }
1829 
1830 int
1831 kmem_log_walk_step(mdb_walk_state_t *wsp)
1832 {
1833 	kmem_log_walk_t *klw = wsp->walk_data;
1834 	kmem_bufctl_audit_t *bcp;
1835 
1836 	if (klw->klw_ndx == klw->klw_maxndx)
1837 		return (WALK_DONE);
1838 
1839 	bcp = klw->klw_sorted[klw->klw_ndx++];
1840 
1841 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1842 	    (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1843 }
1844 
1845 void
1846 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1847 {
1848 	kmem_log_walk_t *klw = wsp->walk_data;
1849 
1850 	mdb_free(klw->klw_base, klw->klw_size);
1851 	mdb_free(klw->klw_sorted, klw->klw_maxndx *
1852 	    sizeof (kmem_bufctl_audit_t *));
1853 	mdb_free(klw, sizeof (kmem_log_walk_t));
1854 }
1855 
1856 typedef struct allocdby_bufctl {
1857 	uintptr_t abb_addr;
1858 	hrtime_t abb_ts;
1859 } allocdby_bufctl_t;
1860 
1861 typedef struct allocdby_walk {
1862 	const char *abw_walk;
1863 	uintptr_t abw_thread;
1864 	size_t abw_nbufs;
1865 	size_t abw_size;
1866 	allocdby_bufctl_t *abw_buf;
1867 	size_t abw_ndx;
1868 } allocdby_walk_t;
1869 
1870 int
1871 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1872     allocdby_walk_t *abw)
1873 {
1874 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1875 		return (WALK_NEXT);
1876 
1877 	if (abw->abw_nbufs == abw->abw_size) {
1878 		allocdby_bufctl_t *buf;
1879 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1880 
1881 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1882 
1883 		bcopy(abw->abw_buf, buf, oldsize);
1884 		mdb_free(abw->abw_buf, oldsize);
1885 
1886 		abw->abw_size <<= 1;
1887 		abw->abw_buf = buf;
1888 	}
1889 
1890 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1891 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1892 	abw->abw_nbufs++;
1893 
1894 	return (WALK_NEXT);
1895 }
1896 
1897 /*ARGSUSED*/
1898 int
1899 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1900 {
1901 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1902 	    abw, addr) == -1) {
1903 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1904 		return (WALK_DONE);
1905 	}
1906 
1907 	return (WALK_NEXT);
1908 }
1909 
1910 static int
1911 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1912 {
1913 	if (lhs->abb_ts < rhs->abb_ts)
1914 		return (1);
1915 	if (lhs->abb_ts > rhs->abb_ts)
1916 		return (-1);
1917 	return (0);
1918 }
1919 
1920 static int
1921 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1922 {
1923 	allocdby_walk_t *abw;
1924 
1925 	if (wsp->walk_addr == NULL) {
1926 		mdb_warn("allocdby walk doesn't support global walks\n");
1927 		return (WALK_ERR);
1928 	}
1929 
1930 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1931 
1932 	abw->abw_thread = wsp->walk_addr;
1933 	abw->abw_walk = walk;
1934 	abw->abw_size = 128;	/* something reasonable */
1935 	abw->abw_buf =
1936 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1937 
1938 	wsp->walk_data = abw;
1939 
1940 	if (mdb_walk("kmem_cache",
1941 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1942 		mdb_warn("couldn't walk kmem_cache");
1943 		allocdby_walk_fini(wsp);
1944 		return (WALK_ERR);
1945 	}
1946 
1947 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1948 	    (int(*)(const void *, const void *))allocdby_cmp);
1949 
1950 	return (WALK_NEXT);
1951 }
1952 
1953 int
1954 allocdby_walk_init(mdb_walk_state_t *wsp)
1955 {
1956 	return (allocdby_walk_init_common(wsp, "bufctl"));
1957 }
1958 
1959 int
1960 freedby_walk_init(mdb_walk_state_t *wsp)
1961 {
1962 	return (allocdby_walk_init_common(wsp, "freectl"));
1963 }
1964 
1965 int
1966 allocdby_walk_step(mdb_walk_state_t *wsp)
1967 {
1968 	allocdby_walk_t *abw = wsp->walk_data;
1969 	kmem_bufctl_audit_t bc;
1970 	uintptr_t addr;
1971 
1972 	if (abw->abw_ndx == abw->abw_nbufs)
1973 		return (WALK_DONE);
1974 
1975 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1976 
1977 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1978 		mdb_warn("couldn't read bufctl at %p", addr);
1979 		return (WALK_DONE);
1980 	}
1981 
1982 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1983 }
1984 
1985 void
1986 allocdby_walk_fini(mdb_walk_state_t *wsp)
1987 {
1988 	allocdby_walk_t *abw = wsp->walk_data;
1989 
1990 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1991 	mdb_free(abw, sizeof (allocdby_walk_t));
1992 }
1993 
1994 /*ARGSUSED*/
1995 int
1996 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
1997 {
1998 	char c[MDB_SYM_NAMLEN];
1999 	GElf_Sym sym;
2000 	int i;
2001 
2002 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2003 	for (i = 0; i < bcp->bc_depth; i++) {
2004 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
2005 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2006 			continue;
2007 		if (strncmp(c, "kmem_", 5) == 0)
2008 			continue;
2009 		mdb_printf("%s+0x%lx",
2010 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2011 		break;
2012 	}
2013 	mdb_printf("\n");
2014 
2015 	return (WALK_NEXT);
2016 }
2017 
2018 static int
2019 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2020 {
2021 	if (!(flags & DCMD_ADDRSPEC))
2022 		return (DCMD_USAGE);
2023 
2024 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2025 
2026 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2027 		mdb_warn("can't walk '%s' for %p", w, addr);
2028 		return (DCMD_ERR);
2029 	}
2030 
2031 	return (DCMD_OK);
2032 }
2033 
2034 /*ARGSUSED*/
2035 int
2036 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2037 {
2038 	return (allocdby_common(addr, flags, "allocdby"));
2039 }
2040 
2041 /*ARGSUSED*/
2042 int
2043 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2044 {
2045 	return (allocdby_common(addr, flags, "freedby"));
2046 }
2047 
2048 /*
2049  * Return a string describing the address in relation to the given thread's
2050  * stack.
2051  *
2052  * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2053  *
2054  * - If the address is above the stack pointer, return an empty string
2055  *   signifying that the address is active.
2056  *
2057  * - If the address is below the stack pointer, and the thread is not on proc,
2058  *   return " (below sp)".
2059  *
2060  * - If the address is below the stack pointer, and the thread is on proc,
2061  *   return " (possibly below sp)".  Depending on context, we may or may not
2062  *   have an accurate t_sp.
2063  */
2064 static const char *
2065 stack_active(const kthread_t *t, uintptr_t addr)
2066 {
2067 	uintptr_t panicstk;
2068 	GElf_Sym sym;
2069 
2070 	if (t->t_state == TS_FREE)
2071 		return (" (inactive interrupt thread)");
2072 
2073 	/*
2074 	 * Check to see if we're on the panic stack.  If so, ignore t_sp, as it
2075 	 * no longer relates to the thread's real stack.
2076 	 */
2077 	if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2078 		panicstk = (uintptr_t)sym.st_value;
2079 
2080 		if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2081 			return ("");
2082 	}
2083 
2084 	if (addr >= t->t_sp + STACK_BIAS)
2085 		return ("");
2086 
2087 	if (t->t_state == TS_ONPROC)
2088 		return (" (possibly below sp)");
2089 
2090 	return (" (below sp)");
2091 }
2092 
2093 /*
2094  * Additional state for the kmem and vmem ::whatis handlers
2095  */
2096 typedef struct whatis_info {
2097 	mdb_whatis_t *wi_w;
2098 	const kmem_cache_t *wi_cache;
2099 	const vmem_t *wi_vmem;
2100 	vmem_t *wi_msb_arena;
2101 	size_t wi_slab_size;
2102 	uint_t wi_slab_found;
2103 	uint_t wi_kmem_lite_count;
2104 	uint_t wi_freemem;
2105 } whatis_info_t;
2106 
2107 /* call one of our dcmd functions with "-v" and the provided address */
2108 static void
2109 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2110 {
2111 	mdb_arg_t a;
2112 	a.a_type = MDB_TYPE_STRING;
2113 	a.a_un.a_str = "-v";
2114 
2115 	mdb_printf(":\n");
2116 	(void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2117 }
2118 
2119 static void
2120 whatis_print_kmf_lite(uintptr_t btaddr, size_t count)
2121 {
2122 #define	KMEM_LITE_MAX	16
2123 	pc_t callers[KMEM_LITE_MAX];
2124 	pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN;
2125 
2126 	kmem_buftag_t bt;
2127 	intptr_t stat;
2128 	const char *plural = "";
2129 	int i;
2130 
2131 	/* validate our arguments and read in the buftag */
2132 	if (count == 0 || count > KMEM_LITE_MAX ||
2133 	    mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2134 		return;
2135 
2136 	/* validate the buffer state and read in the callers */
2137 	stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2138 
2139 	if (stat != KMEM_BUFTAG_ALLOC || stat != KMEM_BUFTAG_FREE ||
2140 	    mdb_vread(callers, count * sizeof (pc_t),
2141 	    btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2142 		return;
2143 
2144 	/* If there aren't any filled in callers, bail */
2145 	if (callers[0] == uninit)
2146 		return;
2147 
2148 	plural = (callers[1] == uninit) ? "" : "s";
2149 
2150 	/* Everything's done and checked; print them out */
2151 	mdb_printf(":\n");
2152 
2153 	mdb_inc_indent(8);
2154 	mdb_printf("recent caller%s: %a", plural, callers[0]);
2155 	for (i = 1; i < count; i++) {
2156 		if (callers[i] == uninit)
2157 			break;
2158 		mdb_printf(", %a", callers[i]);
2159 	}
2160 	mdb_dec_indent(8);
2161 }
2162 
2163 static void
2164 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2165     uintptr_t baddr)
2166 {
2167 	mdb_whatis_t *w = wi->wi_w;
2168 
2169 	const kmem_cache_t *cp = wi->wi_cache;
2170 	/* LINTED pointer cast may result in improper alignment */
2171 	uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr);
2172 	int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2173 	int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT));
2174 
2175 	mdb_whatis_report_object(w, maddr, addr, "");
2176 
2177 	if (baddr != 0 && !call_printer)
2178 		mdb_printf("bufctl %p ", baddr);
2179 
2180 	mdb_printf("%s from %s",
2181 	    (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2182 
2183 	if (baddr != 0 && call_printer) {
2184 		whatis_call_printer(bufctl, baddr);
2185 		return;
2186 	}
2187 
2188 	/* for KMF_LITE caches, try to print out the previous callers */
2189 	if (!quiet && (cp->cache_flags & KMF_LITE))
2190 		whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count);
2191 
2192 	mdb_printf("\n");
2193 }
2194 
2195 /*ARGSUSED*/
2196 static int
2197 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2198 {
2199 	mdb_whatis_t *w = wi->wi_w;
2200 
2201 	uintptr_t cur;
2202 	size_t size = wi->wi_cache->cache_bufsize;
2203 
2204 	while (mdb_whatis_match(w, addr, size, &cur))
2205 		whatis_print_kmem(wi, cur, addr, NULL);
2206 
2207 	return (WHATIS_WALKRET(w));
2208 }
2209 
2210 /*ARGSUSED*/
2211 static int
2212 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi)
2213 {
2214 	mdb_whatis_t *w = wi->wi_w;
2215 
2216 	uintptr_t cur;
2217 	uintptr_t addr = (uintptr_t)bcp->bc_addr;
2218 	size_t size = wi->wi_cache->cache_bufsize;
2219 
2220 	while (mdb_whatis_match(w, addr, size, &cur))
2221 		whatis_print_kmem(wi, cur, addr, baddr);
2222 
2223 	return (WHATIS_WALKRET(w));
2224 }
2225 
2226 static int
2227 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2228 {
2229 	mdb_whatis_t *w = wi->wi_w;
2230 
2231 	size_t size = vs->vs_end - vs->vs_start;
2232 	uintptr_t cur;
2233 
2234 	/* We're not interested in anything but alloc and free segments */
2235 	if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2236 		return (WALK_NEXT);
2237 
2238 	while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2239 		mdb_whatis_report_object(w, cur, vs->vs_start, "");
2240 
2241 		/*
2242 		 * If we're not printing it seperately, provide the vmem_seg
2243 		 * pointer if it has a stack trace.
2244 		 */
2245 		if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2246 		    (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) ||
2247 		    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2248 			mdb_printf("vmem_seg %p ", addr);
2249 		}
2250 
2251 		mdb_printf("%s from the %s vmem arena",
2252 		    (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2253 		    wi->wi_vmem->vm_name);
2254 
2255 		if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2256 			whatis_call_printer(vmem_seg, addr);
2257 		else
2258 			mdb_printf("\n");
2259 	}
2260 
2261 	return (WHATIS_WALKRET(w));
2262 }
2263 
2264 static int
2265 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2266 {
2267 	mdb_whatis_t *w = wi->wi_w;
2268 	const char *nm = vmem->vm_name;
2269 
2270 	int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0);
2271 	int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2272 
2273 	if (identifier != idspace)
2274 		return (WALK_NEXT);
2275 
2276 	wi->wi_vmem = vmem;
2277 
2278 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2279 		mdb_printf("Searching vmem arena %s...\n", nm);
2280 
2281 	if (mdb_pwalk("vmem_seg",
2282 	    (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2283 		mdb_warn("can't walk vmem_seg for %p", addr);
2284 		return (WALK_NEXT);
2285 	}
2286 
2287 	return (WHATIS_WALKRET(w));
2288 }
2289 
2290 /*ARGSUSED*/
2291 static int
2292 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi)
2293 {
2294 	mdb_whatis_t *w = wi->wi_w;
2295 
2296 	/* It must overlap with the slab data, or it's not interesting */
2297 	if (mdb_whatis_overlaps(w,
2298 	    (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2299 		wi->wi_slab_found++;
2300 		return (WALK_DONE);
2301 	}
2302 	return (WALK_NEXT);
2303 }
2304 
2305 static int
2306 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2307 {
2308 	mdb_whatis_t *w = wi->wi_w;
2309 
2310 	char *walk, *freewalk;
2311 	mdb_walk_cb_t func;
2312 	int do_bufctl;
2313 
2314 	int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0);
2315 	int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2316 
2317 	if (identifier != idspace)
2318 		return (WALK_NEXT);
2319 
2320 	/* Override the '-b' flag as necessary */
2321 	if (!(c->cache_flags & KMF_HASH))
2322 		do_bufctl = FALSE;	/* no bufctls to walk */
2323 	else if (c->cache_flags & KMF_AUDIT)
2324 		do_bufctl = TRUE;	/* we always want debugging info */
2325 	else
2326 		do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2327 
2328 	if (do_bufctl) {
2329 		walk = "bufctl";
2330 		freewalk = "freectl";
2331 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
2332 	} else {
2333 		walk = "kmem";
2334 		freewalk = "freemem";
2335 		func = (mdb_walk_cb_t)whatis_walk_kmem;
2336 	}
2337 
2338 	wi->wi_cache = c;
2339 
2340 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2341 		mdb_printf("Searching %s...\n", c->cache_name);
2342 
2343 	/*
2344 	 * If more then two buffers live on each slab, figure out if we're
2345 	 * interested in anything in any slab before doing the more expensive
2346 	 * kmem/freemem (bufctl/freectl) walkers.
2347 	 */
2348 	wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2349 	if (!(c->cache_flags & KMF_HASH))
2350 		wi->wi_slab_size -= sizeof (kmem_slab_t);
2351 
2352 	if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2353 		wi->wi_slab_found = 0;
2354 		if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2355 		    addr) == -1) {
2356 			mdb_warn("can't find kmem_slab walker");
2357 			return (WALK_DONE);
2358 		}
2359 		if (wi->wi_slab_found == 0)
2360 			return (WALK_NEXT);
2361 	}
2362 
2363 	wi->wi_freemem = FALSE;
2364 	if (mdb_pwalk(walk, func, wi, addr) == -1) {
2365 		mdb_warn("can't find %s walker", walk);
2366 		return (WALK_DONE);
2367 	}
2368 
2369 	if (mdb_whatis_done(w))
2370 		return (WALK_DONE);
2371 
2372 	/*
2373 	 * We have searched for allocated memory; now search for freed memory.
2374 	 */
2375 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2376 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
2377 
2378 	wi->wi_freemem = TRUE;
2379 	if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2380 		mdb_warn("can't find %s walker", freewalk);
2381 		return (WALK_DONE);
2382 	}
2383 
2384 	return (WHATIS_WALKRET(w));
2385 }
2386 
2387 static int
2388 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2389 {
2390 	if (c->cache_arena == wi->wi_msb_arena ||
2391 	    (c->cache_cflags & KMC_NOTOUCH))
2392 		return (WALK_NEXT);
2393 
2394 	return (whatis_walk_cache(addr, c, wi));
2395 }
2396 
2397 static int
2398 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2399 {
2400 	if (c->cache_arena != wi->wi_msb_arena)
2401 		return (WALK_NEXT);
2402 
2403 	return (whatis_walk_cache(addr, c, wi));
2404 }
2405 
2406 static int
2407 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2408 {
2409 	if (c->cache_arena == wi->wi_msb_arena ||
2410 	    !(c->cache_cflags & KMC_NOTOUCH))
2411 		return (WALK_NEXT);
2412 
2413 	return (whatis_walk_cache(addr, c, wi));
2414 }
2415 
2416 static int
2417 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w)
2418 {
2419 	uintptr_t cur;
2420 	uintptr_t saddr;
2421 	size_t size;
2422 
2423 	/*
2424 	 * Often, one calls ::whatis on an address from a thread structure.
2425 	 * We use this opportunity to short circuit this case...
2426 	 */
2427 	while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur))
2428 		mdb_whatis_report_object(w, cur, addr,
2429 		    "allocated as a thread structure\n");
2430 
2431 	/*
2432 	 * Now check the stack
2433 	 */
2434 	if (t->t_stkbase == NULL)
2435 		return (WALK_NEXT);
2436 
2437 	/*
2438 	 * This assumes that t_stk is the end of the stack, but it's really
2439 	 * only the initial stack pointer for the thread.  Arguments to the
2440 	 * initial procedure, SA(MINFRAME), etc. are all after t_stk.  So
2441 	 * that 't->t_stk::whatis' reports "part of t's stack", we include
2442 	 * t_stk in the range (the "+ 1", below), but the kernel should
2443 	 * really include the full stack bounds where we can find it.
2444 	 */
2445 	saddr = (uintptr_t)t->t_stkbase;
2446 	size = (uintptr_t)t->t_stk - saddr + 1;
2447 	while (mdb_whatis_match(w, saddr, size, &cur))
2448 		mdb_whatis_report_object(w, cur, cur,
2449 		    "in thread %p's stack%s\n", addr, stack_active(t, cur));
2450 
2451 	return (WHATIS_WALKRET(w));
2452 }
2453 
2454 static void
2455 whatis_modctl_match(mdb_whatis_t *w, const char *name,
2456     uintptr_t base, size_t size, const char *where)
2457 {
2458 	uintptr_t cur;
2459 
2460 	/*
2461 	 * Since we're searching for addresses inside a module, we report
2462 	 * them as symbols.
2463 	 */
2464 	while (mdb_whatis_match(w, base, size, &cur))
2465 		mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where);
2466 }
2467 
2468 static int
2469 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w)
2470 {
2471 	char name[MODMAXNAMELEN];
2472 	struct module mod;
2473 	Shdr shdr;
2474 
2475 	if (m->mod_mp == NULL)
2476 		return (WALK_NEXT);
2477 
2478 	if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2479 		mdb_warn("couldn't read modctl %p's module", addr);
2480 		return (WALK_NEXT);
2481 	}
2482 
2483 	if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2484 		(void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2485 
2486 	whatis_modctl_match(w, name,
2487 	    (uintptr_t)mod.text, mod.text_size, "text segment");
2488 	whatis_modctl_match(w, name,
2489 	    (uintptr_t)mod.data, mod.data_size, "data segment");
2490 	whatis_modctl_match(w, name,
2491 	    (uintptr_t)mod.bss, mod.bss_size, "bss segment");
2492 
2493 	if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2494 		mdb_warn("couldn't read symbol header for %p's module", addr);
2495 		return (WALK_NEXT);
2496 	}
2497 
2498 	whatis_modctl_match(w, name,
2499 	    (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab");
2500 	whatis_modctl_match(w, name,
2501 	    (uintptr_t)mod.symspace, mod.symsize, "symtab");
2502 
2503 	return (WHATIS_WALKRET(w));
2504 }
2505 
2506 /*ARGSUSED*/
2507 static int
2508 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w)
2509 {
2510 	uintptr_t cur;
2511 
2512 	uintptr_t base = (uintptr_t)seg->pages;
2513 	size_t size = (uintptr_t)seg->epages - base;
2514 
2515 	while (mdb_whatis_match(w, base, size, &cur)) {
2516 		/* round our found pointer down to the page_t base. */
2517 		size_t offset = (cur - base) % sizeof (page_t);
2518 
2519 		mdb_whatis_report_object(w, cur, cur - offset,
2520 		    "allocated as a page structure\n");
2521 	}
2522 
2523 	return (WHATIS_WALKRET(w));
2524 }
2525 
2526 /*ARGSUSED*/
2527 static int
2528 whatis_run_modules(mdb_whatis_t *w, void *arg)
2529 {
2530 	if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) {
2531 		mdb_warn("couldn't find modctl walker");
2532 		return (1);
2533 	}
2534 	return (0);
2535 }
2536 
2537 /*ARGSUSED*/
2538 static int
2539 whatis_run_threads(mdb_whatis_t *w, void *ignored)
2540 {
2541 	/*
2542 	 * Now search all thread stacks.  Yes, this is a little weak; we
2543 	 * can save a lot of work by first checking to see if the
2544 	 * address is in segkp vs. segkmem.  But hey, computers are
2545 	 * fast.
2546 	 */
2547 	if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) {
2548 		mdb_warn("couldn't find thread walker");
2549 		return (1);
2550 	}
2551 	return (0);
2552 }
2553 
2554 /*ARGSUSED*/
2555 static int
2556 whatis_run_pages(mdb_whatis_t *w, void *ignored)
2557 {
2558 	if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) {
2559 		mdb_warn("couldn't find memseg walker");
2560 		return (1);
2561 	}
2562 	return (0);
2563 }
2564 
2565 /*ARGSUSED*/
2566 static int
2567 whatis_run_kmem(mdb_whatis_t *w, void *ignored)
2568 {
2569 	whatis_info_t wi;
2570 
2571 	bzero(&wi, sizeof (wi));
2572 	wi.wi_w = w;
2573 
2574 	if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1)
2575 		mdb_warn("unable to readvar \"kmem_msb_arena\"");
2576 
2577 	if (mdb_readvar(&wi.wi_kmem_lite_count,
2578 	    "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16)
2579 		wi.wi_kmem_lite_count = 0;
2580 
2581 	/*
2582 	 * We process kmem caches in the following order:
2583 	 *
2584 	 *	non-KMC_NOTOUCH, non-metadata	(typically the most interesting)
2585 	 *	metadata			(can be huge with KMF_AUDIT)
2586 	 *	KMC_NOTOUCH, non-metadata	(see kmem_walk_all())
2587 	 */
2588 	if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2589 	    &wi) == -1 ||
2590 	    mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2591 	    &wi) == -1 ||
2592 	    mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2593 	    &wi) == -1) {
2594 		mdb_warn("couldn't find kmem_cache walker");
2595 		return (1);
2596 	}
2597 	return (0);
2598 }
2599 
2600 /*ARGSUSED*/
2601 static int
2602 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2603 {
2604 	whatis_info_t wi;
2605 
2606 	bzero(&wi, sizeof (wi));
2607 	wi.wi_w = w;
2608 
2609 	if (mdb_walk("vmem_postfix",
2610 	    (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2611 		mdb_warn("couldn't find vmem_postfix walker");
2612 		return (1);
2613 	}
2614 	return (0);
2615 }
2616 
2617 typedef struct kmem_log_cpu {
2618 	uintptr_t kmc_low;
2619 	uintptr_t kmc_high;
2620 } kmem_log_cpu_t;
2621 
2622 typedef struct kmem_log_data {
2623 	uintptr_t kmd_addr;
2624 	kmem_log_cpu_t *kmd_cpu;
2625 } kmem_log_data_t;
2626 
2627 int
2628 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2629     kmem_log_data_t *kmd)
2630 {
2631 	int i;
2632 	kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2633 	size_t bufsize;
2634 
2635 	for (i = 0; i < NCPU; i++) {
2636 		if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2637 			break;
2638 	}
2639 
2640 	if (kmd->kmd_addr) {
2641 		if (b->bc_cache == NULL)
2642 			return (WALK_NEXT);
2643 
2644 		if (mdb_vread(&bufsize, sizeof (bufsize),
2645 		    (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2646 			mdb_warn(
2647 			    "failed to read cache_bufsize for cache at %p",
2648 			    b->bc_cache);
2649 			return (WALK_ERR);
2650 		}
2651 
2652 		if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2653 		    kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2654 			return (WALK_NEXT);
2655 	}
2656 
2657 	if (i == NCPU)
2658 		mdb_printf("   ");
2659 	else
2660 		mdb_printf("%3d", i);
2661 
2662 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2663 	    b->bc_timestamp, b->bc_thread);
2664 
2665 	return (WALK_NEXT);
2666 }
2667 
2668 /*ARGSUSED*/
2669 int
2670 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2671 {
2672 	kmem_log_header_t lh;
2673 	kmem_cpu_log_header_t clh;
2674 	uintptr_t lhp, clhp;
2675 	int ncpus;
2676 	uintptr_t *cpu;
2677 	GElf_Sym sym;
2678 	kmem_log_cpu_t *kmc;
2679 	int i;
2680 	kmem_log_data_t kmd;
2681 	uint_t opt_b = FALSE;
2682 
2683 	if (mdb_getopts(argc, argv,
2684 	    'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2685 		return (DCMD_USAGE);
2686 
2687 	if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2688 		mdb_warn("failed to read 'kmem_transaction_log'");
2689 		return (DCMD_ERR);
2690 	}
2691 
2692 	if (lhp == NULL) {
2693 		mdb_warn("no kmem transaction log\n");
2694 		return (DCMD_ERR);
2695 	}
2696 
2697 	mdb_readvar(&ncpus, "ncpus");
2698 
2699 	if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2700 		mdb_warn("failed to read log header at %p", lhp);
2701 		return (DCMD_ERR);
2702 	}
2703 
2704 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2705 
2706 	cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2707 
2708 	if (mdb_lookup_by_name("cpu", &sym) == -1) {
2709 		mdb_warn("couldn't find 'cpu' array");
2710 		return (DCMD_ERR);
2711 	}
2712 
2713 	if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2714 		mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2715 		    NCPU * sizeof (uintptr_t), sym.st_size);
2716 		return (DCMD_ERR);
2717 	}
2718 
2719 	if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2720 		mdb_warn("failed to read cpu array at %p", sym.st_value);
2721 		return (DCMD_ERR);
2722 	}
2723 
2724 	kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2725 	kmd.kmd_addr = NULL;
2726 	kmd.kmd_cpu = kmc;
2727 
2728 	for (i = 0; i < NCPU; i++) {
2729 
2730 		if (cpu[i] == NULL)
2731 			continue;
2732 
2733 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2734 			mdb_warn("cannot read cpu %d's log header at %p",
2735 			    i, clhp);
2736 			return (DCMD_ERR);
2737 		}
2738 
2739 		kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2740 		    (uintptr_t)lh.lh_base;
2741 		kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2742 
2743 		clhp += sizeof (kmem_cpu_log_header_t);
2744 	}
2745 
2746 	mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2747 	    "TIMESTAMP", "THREAD");
2748 
2749 	/*
2750 	 * If we have been passed an address, print out only log entries
2751 	 * corresponding to that address.  If opt_b is specified, then interpret
2752 	 * the address as a bufctl.
2753 	 */
2754 	if (flags & DCMD_ADDRSPEC) {
2755 		kmem_bufctl_audit_t b;
2756 
2757 		if (opt_b) {
2758 			kmd.kmd_addr = addr;
2759 		} else {
2760 			if (mdb_vread(&b,
2761 			    sizeof (kmem_bufctl_audit_t), addr) == -1) {
2762 				mdb_warn("failed to read bufctl at %p", addr);
2763 				return (DCMD_ERR);
2764 			}
2765 
2766 			(void) kmem_log_walk(addr, &b, &kmd);
2767 
2768 			return (DCMD_OK);
2769 		}
2770 	}
2771 
2772 	if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2773 		mdb_warn("can't find kmem log walker");
2774 		return (DCMD_ERR);
2775 	}
2776 
2777 	return (DCMD_OK);
2778 }
2779 
2780 typedef struct bufctl_history_cb {
2781 	int		bhc_flags;
2782 	int		bhc_argc;
2783 	const mdb_arg_t	*bhc_argv;
2784 	int		bhc_ret;
2785 } bufctl_history_cb_t;
2786 
2787 /*ARGSUSED*/
2788 static int
2789 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2790 {
2791 	bufctl_history_cb_t *bhc = arg;
2792 
2793 	bhc->bhc_ret =
2794 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2795 
2796 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2797 
2798 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2799 }
2800 
2801 void
2802 bufctl_help(void)
2803 {
2804 	mdb_printf("%s",
2805 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2806 	mdb_dec_indent(2);
2807 	mdb_printf("%<b>OPTIONS%</b>\n");
2808 	mdb_inc_indent(2);
2809 	mdb_printf("%s",
2810 "  -v    Display the full content of the bufctl, including its stack trace\n"
2811 "  -h    retrieve the bufctl's transaction history, if available\n"
2812 "  -a addr\n"
2813 "        filter out bufctls not involving the buffer at addr\n"
2814 "  -c caller\n"
2815 "        filter out bufctls without the function/PC in their stack trace\n"
2816 "  -e earliest\n"
2817 "        filter out bufctls timestamped before earliest\n"
2818 "  -l latest\n"
2819 "        filter out bufctls timestamped after latest\n"
2820 "  -t thread\n"
2821 "        filter out bufctls not involving thread\n");
2822 }
2823 
2824 int
2825 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2826 {
2827 	kmem_bufctl_audit_t bc;
2828 	uint_t verbose = FALSE;
2829 	uint_t history = FALSE;
2830 	uint_t in_history = FALSE;
2831 	uintptr_t caller = NULL, thread = NULL;
2832 	uintptr_t laddr, haddr, baddr = NULL;
2833 	hrtime_t earliest = 0, latest = 0;
2834 	int i, depth;
2835 	char c[MDB_SYM_NAMLEN];
2836 	GElf_Sym sym;
2837 
2838 	if (mdb_getopts(argc, argv,
2839 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2840 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2841 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2842 	    'c', MDB_OPT_UINTPTR, &caller,
2843 	    't', MDB_OPT_UINTPTR, &thread,
2844 	    'e', MDB_OPT_UINT64, &earliest,
2845 	    'l', MDB_OPT_UINT64, &latest,
2846 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2847 		return (DCMD_USAGE);
2848 
2849 	if (!(flags & DCMD_ADDRSPEC))
2850 		return (DCMD_USAGE);
2851 
2852 	if (in_history && !history)
2853 		return (DCMD_USAGE);
2854 
2855 	if (history && !in_history) {
2856 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2857 		    UM_SLEEP | UM_GC);
2858 		bufctl_history_cb_t bhc;
2859 
2860 		nargv[0].a_type = MDB_TYPE_STRING;
2861 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2862 
2863 		for (i = 0; i < argc; i++)
2864 			nargv[i + 1] = argv[i];
2865 
2866 		/*
2867 		 * When in history mode, we treat each element as if it
2868 		 * were in a seperate loop, so that the headers group
2869 		 * bufctls with similar histories.
2870 		 */
2871 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2872 		bhc.bhc_argc = argc + 1;
2873 		bhc.bhc_argv = nargv;
2874 		bhc.bhc_ret = DCMD_OK;
2875 
2876 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2877 		    addr) == -1) {
2878 			mdb_warn("unable to walk bufctl_history");
2879 			return (DCMD_ERR);
2880 		}
2881 
2882 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2883 			mdb_printf("\n");
2884 
2885 		return (bhc.bhc_ret);
2886 	}
2887 
2888 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2889 		if (verbose) {
2890 			mdb_printf("%16s %16s %16s %16s\n"
2891 			    "%<u>%16s %16s %16s %16s%</u>\n",
2892 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2893 			    "", "CACHE", "LASTLOG", "CONTENTS");
2894 		} else {
2895 			mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2896 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2897 		}
2898 	}
2899 
2900 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2901 		mdb_warn("couldn't read bufctl at %p", addr);
2902 		return (DCMD_ERR);
2903 	}
2904 
2905 	/*
2906 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2907 	 * the address does not really refer to a bufctl.
2908 	 */
2909 	depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2910 
2911 	if (caller != NULL) {
2912 		laddr = caller;
2913 		haddr = caller + sizeof (caller);
2914 
2915 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2916 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2917 			/*
2918 			 * We were provided an exact symbol value; any
2919 			 * address in the function is valid.
2920 			 */
2921 			laddr = (uintptr_t)sym.st_value;
2922 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2923 		}
2924 
2925 		for (i = 0; i < depth; i++)
2926 			if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2927 				break;
2928 
2929 		if (i == depth)
2930 			return (DCMD_OK);
2931 	}
2932 
2933 	if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2934 		return (DCMD_OK);
2935 
2936 	if (earliest != 0 && bc.bc_timestamp < earliest)
2937 		return (DCMD_OK);
2938 
2939 	if (latest != 0 && bc.bc_timestamp > latest)
2940 		return (DCMD_OK);
2941 
2942 	if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2943 		return (DCMD_OK);
2944 
2945 	if (flags & DCMD_PIPE_OUT) {
2946 		mdb_printf("%#lr\n", addr);
2947 		return (DCMD_OK);
2948 	}
2949 
2950 	if (verbose) {
2951 		mdb_printf(
2952 		    "%<b>%16p%</b> %16p %16llx %16p\n"
2953 		    "%16s %16p %16p %16p\n",
2954 		    addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2955 		    "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2956 
2957 		mdb_inc_indent(17);
2958 		for (i = 0; i < depth; i++)
2959 			mdb_printf("%a\n", bc.bc_stack[i]);
2960 		mdb_dec_indent(17);
2961 		mdb_printf("\n");
2962 	} else {
2963 		mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2964 		    bc.bc_timestamp, bc.bc_thread);
2965 
2966 		for (i = 0; i < depth; i++) {
2967 			if (mdb_lookup_by_addr(bc.bc_stack[i],
2968 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2969 				continue;
2970 			if (strncmp(c, "kmem_", 5) == 0)
2971 				continue;
2972 			mdb_printf(" %a\n", bc.bc_stack[i]);
2973 			break;
2974 		}
2975 
2976 		if (i >= depth)
2977 			mdb_printf("\n");
2978 	}
2979 
2980 	return (DCMD_OK);
2981 }
2982 
2983 typedef struct kmem_verify {
2984 	uint64_t *kmv_buf;		/* buffer to read cache contents into */
2985 	size_t kmv_size;		/* number of bytes in kmv_buf */
2986 	int kmv_corruption;		/* > 0 if corruption found. */
2987 	int kmv_besilent;		/* report actual corruption sites */
2988 	struct kmem_cache kmv_cache;	/* the cache we're operating on */
2989 } kmem_verify_t;
2990 
2991 /*
2992  * verify_pattern()
2993  * 	verify that buf is filled with the pattern pat.
2994  */
2995 static int64_t
2996 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2997 {
2998 	/*LINTED*/
2999 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3000 	uint64_t *buf;
3001 
3002 	for (buf = buf_arg; buf < bufend; buf++)
3003 		if (*buf != pat)
3004 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
3005 	return (-1);
3006 }
3007 
3008 /*
3009  * verify_buftag()
3010  *	verify that btp->bt_bxstat == (bcp ^ pat)
3011  */
3012 static int
3013 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3014 {
3015 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3016 }
3017 
3018 /*
3019  * verify_free()
3020  * 	verify the integrity of a free block of memory by checking
3021  * 	that it is filled with 0xdeadbeef and that its buftag is sane.
3022  */
3023 /*ARGSUSED1*/
3024 static int
3025 verify_free(uintptr_t addr, const void *data, void *private)
3026 {
3027 	kmem_verify_t *kmv = (kmem_verify_t *)private;
3028 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
3029 	int64_t corrupt;		/* corruption offset */
3030 	kmem_buftag_t *buftagp;		/* ptr to buftag */
3031 	kmem_cache_t *cp = &kmv->kmv_cache;
3032 	int besilent = kmv->kmv_besilent;
3033 
3034 	/*LINTED*/
3035 	buftagp = KMEM_BUFTAG(cp, buf);
3036 
3037 	/*
3038 	 * Read the buffer to check.
3039 	 */
3040 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3041 		if (!besilent)
3042 			mdb_warn("couldn't read %p", addr);
3043 		return (WALK_NEXT);
3044 	}
3045 
3046 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
3047 	    KMEM_FREE_PATTERN)) >= 0) {
3048 		if (!besilent)
3049 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3050 			    addr, (uintptr_t)addr + corrupt);
3051 		goto corrupt;
3052 	}
3053 	/*
3054 	 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3055 	 * the first bytes of the buffer, hence we cannot check for red
3056 	 * zone corruption.
3057 	 */
3058 	if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3059 	    buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3060 		if (!besilent)
3061 			mdb_printf("buffer %p (free) seems to "
3062 			    "have a corrupt redzone pattern\n", addr);
3063 		goto corrupt;
3064 	}
3065 
3066 	/*
3067 	 * confirm bufctl pointer integrity.
3068 	 */
3069 	if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3070 		if (!besilent)
3071 			mdb_printf("buffer %p (free) has a corrupt "
3072 			    "buftag\n", addr);
3073 		goto corrupt;
3074 	}
3075 
3076 	return (WALK_NEXT);
3077 corrupt:
3078 	kmv->kmv_corruption++;
3079 	return (WALK_NEXT);
3080 }
3081 
3082 /*
3083  * verify_alloc()
3084  * 	Verify that the buftag of an allocated buffer makes sense with respect
3085  * 	to the buffer.
3086  */
3087 /*ARGSUSED1*/
3088 static int
3089 verify_alloc(uintptr_t addr, const void *data, void *private)
3090 {
3091 	kmem_verify_t *kmv = (kmem_verify_t *)private;
3092 	kmem_cache_t *cp = &kmv->kmv_cache;
3093 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
3094 	/*LINTED*/
3095 	kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3096 	uint32_t *ip = (uint32_t *)buftagp;
3097 	uint8_t *bp = (uint8_t *)buf;
3098 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
3099 	int besilent = kmv->kmv_besilent;
3100 
3101 	/*
3102 	 * Read the buffer to check.
3103 	 */
3104 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3105 		if (!besilent)
3106 			mdb_warn("couldn't read %p", addr);
3107 		return (WALK_NEXT);
3108 	}
3109 
3110 	/*
3111 	 * There are two cases to handle:
3112 	 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3113 	 *    0xfeedfacefeedface at the end of it
3114 	 * 2. If the buf was alloc'd using kmem_alloc, it will have
3115 	 *    0xbb just past the end of the region in use.  At the buftag,
3116 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
3117 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3118 	 *    endianness), followed by 32 bits containing the offset of the
3119 	 *    0xbb byte in the buffer.
3120 	 *
3121 	 * Finally, the two 32-bit words that comprise the second half of the
3122 	 * buftag should xor to KMEM_BUFTAG_ALLOC
3123 	 */
3124 
3125 	if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3126 		looks_ok = 1;
3127 	else if (!KMEM_SIZE_VALID(ip[1]))
3128 		size_ok = 0;
3129 	else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3130 		looks_ok = 1;
3131 	else
3132 		size_ok = 0;
3133 
3134 	if (!size_ok) {
3135 		if (!besilent)
3136 			mdb_printf("buffer %p (allocated) has a corrupt "
3137 			    "redzone size encoding\n", addr);
3138 		goto corrupt;
3139 	}
3140 
3141 	if (!looks_ok) {
3142 		if (!besilent)
3143 			mdb_printf("buffer %p (allocated) has a corrupt "
3144 			    "redzone signature\n", addr);
3145 		goto corrupt;
3146 	}
3147 
3148 	if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3149 		if (!besilent)
3150 			mdb_printf("buffer %p (allocated) has a "
3151 			    "corrupt buftag\n", addr);
3152 		goto corrupt;
3153 	}
3154 
3155 	return (WALK_NEXT);
3156 corrupt:
3157 	kmv->kmv_corruption++;
3158 	return (WALK_NEXT);
3159 }
3160 
3161 /*ARGSUSED2*/
3162 int
3163 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3164 {
3165 	if (flags & DCMD_ADDRSPEC) {
3166 		int check_alloc = 0, check_free = 0;
3167 		kmem_verify_t kmv;
3168 
3169 		if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3170 		    addr) == -1) {
3171 			mdb_warn("couldn't read kmem_cache %p", addr);
3172 			return (DCMD_ERR);
3173 		}
3174 
3175 		kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3176 		    sizeof (kmem_buftag_t);
3177 		kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3178 		kmv.kmv_corruption = 0;
3179 
3180 		if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3181 			check_alloc = 1;
3182 			if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3183 				check_free = 1;
3184 		} else {
3185 			if (!(flags & DCMD_LOOP)) {
3186 				mdb_warn("cache %p (%s) does not have "
3187 				    "redzone checking enabled\n", addr,
3188 				    kmv.kmv_cache.cache_name);
3189 			}
3190 			return (DCMD_ERR);
3191 		}
3192 
3193 		if (flags & DCMD_LOOP) {
3194 			/*
3195 			 * table mode, don't print out every corrupt buffer
3196 			 */
3197 			kmv.kmv_besilent = 1;
3198 		} else {
3199 			mdb_printf("Summary for cache '%s'\n",
3200 			    kmv.kmv_cache.cache_name);
3201 			mdb_inc_indent(2);
3202 			kmv.kmv_besilent = 0;
3203 		}
3204 
3205 		if (check_alloc)
3206 			(void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3207 		if (check_free)
3208 			(void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3209 
3210 		if (flags & DCMD_LOOP) {
3211 			if (kmv.kmv_corruption == 0) {
3212 				mdb_printf("%-*s %?p clean\n",
3213 				    KMEM_CACHE_NAMELEN,
3214 				    kmv.kmv_cache.cache_name, addr);
3215 			} else {
3216 				char *s = "";	/* optional s in "buffer[s]" */
3217 				if (kmv.kmv_corruption > 1)
3218 					s = "s";
3219 
3220 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
3221 				    KMEM_CACHE_NAMELEN,
3222 				    kmv.kmv_cache.cache_name, addr,
3223 				    kmv.kmv_corruption, s);
3224 			}
3225 		} else {
3226 			/*
3227 			 * This is the more verbose mode, when the user has
3228 			 * type addr::kmem_verify.  If the cache was clean,
3229 			 * nothing will have yet been printed. So say something.
3230 			 */
3231 			if (kmv.kmv_corruption == 0)
3232 				mdb_printf("clean\n");
3233 
3234 			mdb_dec_indent(2);
3235 		}
3236 	} else {
3237 		/*
3238 		 * If the user didn't specify a cache to verify, we'll walk all
3239 		 * kmem_cache's, specifying ourself as a callback for each...
3240 		 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3241 		 */
3242 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN,
3243 		    "Cache Name", "Addr", "Cache Integrity");
3244 		(void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3245 	}
3246 
3247 	return (DCMD_OK);
3248 }
3249 
3250 typedef struct vmem_node {
3251 	struct vmem_node *vn_next;
3252 	struct vmem_node *vn_parent;
3253 	struct vmem_node *vn_sibling;
3254 	struct vmem_node *vn_children;
3255 	uintptr_t vn_addr;
3256 	int vn_marked;
3257 	vmem_t vn_vmem;
3258 } vmem_node_t;
3259 
3260 typedef struct vmem_walk {
3261 	vmem_node_t *vw_root;
3262 	vmem_node_t *vw_current;
3263 } vmem_walk_t;
3264 
3265 int
3266 vmem_walk_init(mdb_walk_state_t *wsp)
3267 {
3268 	uintptr_t vaddr, paddr;
3269 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3270 	vmem_walk_t *vw;
3271 
3272 	if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3273 		mdb_warn("couldn't read 'vmem_list'");
3274 		return (WALK_ERR);
3275 	}
3276 
3277 	while (vaddr != NULL) {
3278 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3279 		vp->vn_addr = vaddr;
3280 		vp->vn_next = head;
3281 		head = vp;
3282 
3283 		if (vaddr == wsp->walk_addr)
3284 			current = vp;
3285 
3286 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3287 			mdb_warn("couldn't read vmem_t at %p", vaddr);
3288 			goto err;
3289 		}
3290 
3291 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3292 	}
3293 
3294 	for (vp = head; vp != NULL; vp = vp->vn_next) {
3295 
3296 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
3297 			vp->vn_sibling = root;
3298 			root = vp;
3299 			continue;
3300 		}
3301 
3302 		for (parent = head; parent != NULL; parent = parent->vn_next) {
3303 			if (parent->vn_addr != paddr)
3304 				continue;
3305 			vp->vn_sibling = parent->vn_children;
3306 			parent->vn_children = vp;
3307 			vp->vn_parent = parent;
3308 			break;
3309 		}
3310 
3311 		if (parent == NULL) {
3312 			mdb_warn("couldn't find %p's parent (%p)\n",
3313 			    vp->vn_addr, paddr);
3314 			goto err;
3315 		}
3316 	}
3317 
3318 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3319 	vw->vw_root = root;
3320 
3321 	if (current != NULL)
3322 		vw->vw_current = current;
3323 	else
3324 		vw->vw_current = root;
3325 
3326 	wsp->walk_data = vw;
3327 	return (WALK_NEXT);
3328 err:
3329 	for (vp = head; head != NULL; vp = head) {
3330 		head = vp->vn_next;
3331 		mdb_free(vp, sizeof (vmem_node_t));
3332 	}
3333 
3334 	return (WALK_ERR);
3335 }
3336 
3337 int
3338 vmem_walk_step(mdb_walk_state_t *wsp)
3339 {
3340 	vmem_walk_t *vw = wsp->walk_data;
3341 	vmem_node_t *vp;
3342 	int rval;
3343 
3344 	if ((vp = vw->vw_current) == NULL)
3345 		return (WALK_DONE);
3346 
3347 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3348 
3349 	if (vp->vn_children != NULL) {
3350 		vw->vw_current = vp->vn_children;
3351 		return (rval);
3352 	}
3353 
3354 	do {
3355 		vw->vw_current = vp->vn_sibling;
3356 		vp = vp->vn_parent;
3357 	} while (vw->vw_current == NULL && vp != NULL);
3358 
3359 	return (rval);
3360 }
3361 
3362 /*
3363  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3364  * children are visited before their parent.  We perform the postfix walk
3365  * iteratively (rather than recursively) to allow mdb to regain control
3366  * after each callback.
3367  */
3368 int
3369 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3370 {
3371 	vmem_walk_t *vw = wsp->walk_data;
3372 	vmem_node_t *vp = vw->vw_current;
3373 	int rval;
3374 
3375 	/*
3376 	 * If this node is marked, then we know that we have already visited
3377 	 * all of its children.  If the node has any siblings, they need to
3378 	 * be visited next; otherwise, we need to visit the parent.  Note
3379 	 * that vp->vn_marked will only be zero on the first invocation of
3380 	 * the step function.
3381 	 */
3382 	if (vp->vn_marked) {
3383 		if (vp->vn_sibling != NULL)
3384 			vp = vp->vn_sibling;
3385 		else if (vp->vn_parent != NULL)
3386 			vp = vp->vn_parent;
3387 		else {
3388 			/*
3389 			 * We have neither a parent, nor a sibling, and we
3390 			 * have already been visited; we're done.
3391 			 */
3392 			return (WALK_DONE);
3393 		}
3394 	}
3395 
3396 	/*
3397 	 * Before we visit this node, visit its children.
3398 	 */
3399 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3400 		vp = vp->vn_children;
3401 
3402 	vp->vn_marked = 1;
3403 	vw->vw_current = vp;
3404 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3405 
3406 	return (rval);
3407 }
3408 
3409 void
3410 vmem_walk_fini(mdb_walk_state_t *wsp)
3411 {
3412 	vmem_walk_t *vw = wsp->walk_data;
3413 	vmem_node_t *root = vw->vw_root;
3414 	int done;
3415 
3416 	if (root == NULL)
3417 		return;
3418 
3419 	if ((vw->vw_root = root->vn_children) != NULL)
3420 		vmem_walk_fini(wsp);
3421 
3422 	vw->vw_root = root->vn_sibling;
3423 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3424 	mdb_free(root, sizeof (vmem_node_t));
3425 
3426 	if (done) {
3427 		mdb_free(vw, sizeof (vmem_walk_t));
3428 	} else {
3429 		vmem_walk_fini(wsp);
3430 	}
3431 }
3432 
3433 typedef struct vmem_seg_walk {
3434 	uint8_t vsw_type;
3435 	uintptr_t vsw_start;
3436 	uintptr_t vsw_current;
3437 } vmem_seg_walk_t;
3438 
3439 /*ARGSUSED*/
3440 int
3441 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3442 {
3443 	vmem_seg_walk_t *vsw;
3444 
3445 	if (wsp->walk_addr == NULL) {
3446 		mdb_warn("vmem_%s does not support global walks\n", name);
3447 		return (WALK_ERR);
3448 	}
3449 
3450 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3451 
3452 	vsw->vsw_type = type;
3453 	vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3454 	vsw->vsw_current = vsw->vsw_start;
3455 
3456 	return (WALK_NEXT);
3457 }
3458 
3459 /*
3460  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3461  */
3462 #define	VMEM_NONE	0
3463 
3464 int
3465 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3466 {
3467 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3468 }
3469 
3470 int
3471 vmem_free_walk_init(mdb_walk_state_t *wsp)
3472 {
3473 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3474 }
3475 
3476 int
3477 vmem_span_walk_init(mdb_walk_state_t *wsp)
3478 {
3479 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3480 }
3481 
3482 int
3483 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3484 {
3485 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3486 }
3487 
3488 int
3489 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3490 {
3491 	vmem_seg_t seg;
3492 	vmem_seg_walk_t *vsw = wsp->walk_data;
3493 	uintptr_t addr = vsw->vsw_current;
3494 	static size_t seg_size = 0;
3495 	int rval;
3496 
3497 	if (!seg_size) {
3498 		if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3499 			mdb_warn("failed to read 'vmem_seg_size'");
3500 			seg_size = sizeof (vmem_seg_t);
3501 		}
3502 	}
3503 
3504 	if (seg_size < sizeof (seg))
3505 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3506 
3507 	if (mdb_vread(&seg, seg_size, addr) == -1) {
3508 		mdb_warn("couldn't read vmem_seg at %p", addr);
3509 		return (WALK_ERR);
3510 	}
3511 
3512 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
3513 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3514 		rval = WALK_NEXT;
3515 	} else {
3516 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3517 	}
3518 
3519 	if (vsw->vsw_current == vsw->vsw_start)
3520 		return (WALK_DONE);
3521 
3522 	return (rval);
3523 }
3524 
3525 void
3526 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3527 {
3528 	vmem_seg_walk_t *vsw = wsp->walk_data;
3529 
3530 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
3531 }
3532 
3533 #define	VMEM_NAMEWIDTH	22
3534 
3535 int
3536 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3537 {
3538 	vmem_t v, parent;
3539 	vmem_kstat_t *vkp = &v.vm_kstat;
3540 	uintptr_t paddr;
3541 	int ident = 0;
3542 	char c[VMEM_NAMEWIDTH];
3543 
3544 	if (!(flags & DCMD_ADDRSPEC)) {
3545 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3546 			mdb_warn("can't walk vmem");
3547 			return (DCMD_ERR);
3548 		}
3549 		return (DCMD_OK);
3550 	}
3551 
3552 	if (DCMD_HDRSPEC(flags))
3553 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3554 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3555 		    "TOTAL", "SUCCEED", "FAIL");
3556 
3557 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
3558 		mdb_warn("couldn't read vmem at %p", addr);
3559 		return (DCMD_ERR);
3560 	}
3561 
3562 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3563 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3564 			mdb_warn("couldn't trace %p's ancestry", addr);
3565 			ident = 0;
3566 			break;
3567 		}
3568 		paddr = (uintptr_t)parent.vm_source;
3569 	}
3570 
3571 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3572 
3573 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3574 	    addr, VMEM_NAMEWIDTH, c,
3575 	    vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3576 	    vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3577 
3578 	return (DCMD_OK);
3579 }
3580 
3581 void
3582 vmem_seg_help(void)
3583 {
3584 	mdb_printf("%s",
3585 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3586 "\n"
3587 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3588 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3589 "information.\n");
3590 	mdb_dec_indent(2);
3591 	mdb_printf("%<b>OPTIONS%</b>\n");
3592 	mdb_inc_indent(2);
3593 	mdb_printf("%s",
3594 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3595 "  -s    report the size of the segment, instead of the end address\n"
3596 "  -c caller\n"
3597 "        filter out segments without the function/PC in their stack trace\n"
3598 "  -e earliest\n"
3599 "        filter out segments timestamped before earliest\n"
3600 "  -l latest\n"
3601 "        filter out segments timestamped after latest\n"
3602 "  -m minsize\n"
3603 "        filer out segments smaller than minsize\n"
3604 "  -M maxsize\n"
3605 "        filer out segments larger than maxsize\n"
3606 "  -t thread\n"
3607 "        filter out segments not involving thread\n"
3608 "  -T type\n"
3609 "        filter out segments not of type 'type'\n"
3610 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3611 }
3612 
3613 /*ARGSUSED*/
3614 int
3615 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3616 {
3617 	vmem_seg_t vs;
3618 	pc_t *stk = vs.vs_stack;
3619 	uintptr_t sz;
3620 	uint8_t t;
3621 	const char *type = NULL;
3622 	GElf_Sym sym;
3623 	char c[MDB_SYM_NAMLEN];
3624 	int no_debug;
3625 	int i;
3626 	int depth;
3627 	uintptr_t laddr, haddr;
3628 
3629 	uintptr_t caller = NULL, thread = NULL;
3630 	uintptr_t minsize = 0, maxsize = 0;
3631 
3632 	hrtime_t earliest = 0, latest = 0;
3633 
3634 	uint_t size = 0;
3635 	uint_t verbose = 0;
3636 
3637 	if (!(flags & DCMD_ADDRSPEC))
3638 		return (DCMD_USAGE);
3639 
3640 	if (mdb_getopts(argc, argv,
3641 	    'c', MDB_OPT_UINTPTR, &caller,
3642 	    'e', MDB_OPT_UINT64, &earliest,
3643 	    'l', MDB_OPT_UINT64, &latest,
3644 	    's', MDB_OPT_SETBITS, TRUE, &size,
3645 	    'm', MDB_OPT_UINTPTR, &minsize,
3646 	    'M', MDB_OPT_UINTPTR, &maxsize,
3647 	    't', MDB_OPT_UINTPTR, &thread,
3648 	    'T', MDB_OPT_STR, &type,
3649 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3650 	    NULL) != argc)
3651 		return (DCMD_USAGE);
3652 
3653 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3654 		if (verbose) {
3655 			mdb_printf("%16s %4s %16s %16s %16s\n"
3656 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3657 			    "ADDR", "TYPE", "START", "END", "SIZE",
3658 			    "", "", "THREAD", "TIMESTAMP", "");
3659 		} else {
3660 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3661 			    "START", size? "SIZE" : "END", "WHO");
3662 		}
3663 	}
3664 
3665 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3666 		mdb_warn("couldn't read vmem_seg at %p", addr);
3667 		return (DCMD_ERR);
3668 	}
3669 
3670 	if (type != NULL) {
3671 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3672 			t = VMEM_ALLOC;
3673 		else if (strcmp(type, "FREE") == 0)
3674 			t = VMEM_FREE;
3675 		else if (strcmp(type, "SPAN") == 0)
3676 			t = VMEM_SPAN;
3677 		else if (strcmp(type, "ROTR") == 0 ||
3678 		    strcmp(type, "ROTOR") == 0)
3679 			t = VMEM_ROTOR;
3680 		else if (strcmp(type, "WLKR") == 0 ||
3681 		    strcmp(type, "WALKER") == 0)
3682 			t = VMEM_WALKER;
3683 		else {
3684 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3685 			    type);
3686 			return (DCMD_ERR);
3687 		}
3688 
3689 		if (vs.vs_type != t)
3690 			return (DCMD_OK);
3691 	}
3692 
3693 	sz = vs.vs_end - vs.vs_start;
3694 
3695 	if (minsize != 0 && sz < minsize)
3696 		return (DCMD_OK);
3697 
3698 	if (maxsize != 0 && sz > maxsize)
3699 		return (DCMD_OK);
3700 
3701 	t = vs.vs_type;
3702 	depth = vs.vs_depth;
3703 
3704 	/*
3705 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3706 	 */
3707 	no_debug = (t != VMEM_ALLOC) ||
3708 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3709 
3710 	if (no_debug) {
3711 		if (caller != NULL || thread != NULL || earliest != 0 ||
3712 		    latest != 0)
3713 			return (DCMD_OK);		/* not enough info */
3714 	} else {
3715 		if (caller != NULL) {
3716 			laddr = caller;
3717 			haddr = caller + sizeof (caller);
3718 
3719 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3720 			    sizeof (c), &sym) != -1 &&
3721 			    caller == (uintptr_t)sym.st_value) {
3722 				/*
3723 				 * We were provided an exact symbol value; any
3724 				 * address in the function is valid.
3725 				 */
3726 				laddr = (uintptr_t)sym.st_value;
3727 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3728 			}
3729 
3730 			for (i = 0; i < depth; i++)
3731 				if (vs.vs_stack[i] >= laddr &&
3732 				    vs.vs_stack[i] < haddr)
3733 					break;
3734 
3735 			if (i == depth)
3736 				return (DCMD_OK);
3737 		}
3738 
3739 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3740 			return (DCMD_OK);
3741 
3742 		if (earliest != 0 && vs.vs_timestamp < earliest)
3743 			return (DCMD_OK);
3744 
3745 		if (latest != 0 && vs.vs_timestamp > latest)
3746 			return (DCMD_OK);
3747 	}
3748 
3749 	type = (t == VMEM_ALLOC ? "ALLC" :
3750 	    t == VMEM_FREE ? "FREE" :
3751 	    t == VMEM_SPAN ? "SPAN" :
3752 	    t == VMEM_ROTOR ? "ROTR" :
3753 	    t == VMEM_WALKER ? "WLKR" :
3754 	    "????");
3755 
3756 	if (flags & DCMD_PIPE_OUT) {
3757 		mdb_printf("%#lr\n", addr);
3758 		return (DCMD_OK);
3759 	}
3760 
3761 	if (verbose) {
3762 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3763 		    addr, type, vs.vs_start, vs.vs_end, sz);
3764 
3765 		if (no_debug)
3766 			return (DCMD_OK);
3767 
3768 		mdb_printf("%16s %4s %16p %16llx\n",
3769 		    "", "", vs.vs_thread, vs.vs_timestamp);
3770 
3771 		mdb_inc_indent(17);
3772 		for (i = 0; i < depth; i++) {
3773 			mdb_printf("%a\n", stk[i]);
3774 		}
3775 		mdb_dec_indent(17);
3776 		mdb_printf("\n");
3777 	} else {
3778 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3779 		    vs.vs_start, size? sz : vs.vs_end);
3780 
3781 		if (no_debug) {
3782 			mdb_printf("\n");
3783 			return (DCMD_OK);
3784 		}
3785 
3786 		for (i = 0; i < depth; i++) {
3787 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3788 			    c, sizeof (c), &sym) == -1)
3789 				continue;
3790 			if (strncmp(c, "vmem_", 5) == 0)
3791 				continue;
3792 			break;
3793 		}
3794 		mdb_printf(" %a\n", stk[i]);
3795 	}
3796 	return (DCMD_OK);
3797 }
3798 
3799 typedef struct kmalog_data {
3800 	uintptr_t	kma_addr;
3801 	hrtime_t	kma_newest;
3802 } kmalog_data_t;
3803 
3804 /*ARGSUSED*/
3805 static int
3806 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3807 {
3808 	char name[KMEM_CACHE_NAMELEN + 1];
3809 	hrtime_t delta;
3810 	int i, depth;
3811 	size_t bufsize;
3812 
3813 	if (bcp->bc_timestamp == 0)
3814 		return (WALK_DONE);
3815 
3816 	if (kma->kma_newest == 0)
3817 		kma->kma_newest = bcp->bc_timestamp;
3818 
3819 	if (kma->kma_addr) {
3820 		if (mdb_vread(&bufsize, sizeof (bufsize),
3821 		    (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3822 			mdb_warn(
3823 			    "failed to read cache_bufsize for cache at %p",
3824 			    bcp->bc_cache);
3825 			return (WALK_ERR);
3826 		}
3827 
3828 		if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3829 		    kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3830 			return (WALK_NEXT);
3831 	}
3832 
3833 	delta = kma->kma_newest - bcp->bc_timestamp;
3834 	depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3835 
3836 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3837 	    &bcp->bc_cache->cache_name) <= 0)
3838 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3839 
3840 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3841 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3842 
3843 	for (i = 0; i < depth; i++)
3844 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3845 
3846 	return (WALK_NEXT);
3847 }
3848 
3849 int
3850 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3851 {
3852 	const char *logname = "kmem_transaction_log";
3853 	kmalog_data_t kma;
3854 
3855 	if (argc > 1)
3856 		return (DCMD_USAGE);
3857 
3858 	kma.kma_newest = 0;
3859 	if (flags & DCMD_ADDRSPEC)
3860 		kma.kma_addr = addr;
3861 	else
3862 		kma.kma_addr = NULL;
3863 
3864 	if (argc > 0) {
3865 		if (argv->a_type != MDB_TYPE_STRING)
3866 			return (DCMD_USAGE);
3867 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3868 			logname = "kmem_failure_log";
3869 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3870 			logname = "kmem_slab_log";
3871 		else
3872 			return (DCMD_USAGE);
3873 	}
3874 
3875 	if (mdb_readvar(&addr, logname) == -1) {
3876 		mdb_warn("failed to read %s log header pointer");
3877 		return (DCMD_ERR);
3878 	}
3879 
3880 	if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3881 		mdb_warn("failed to walk kmem log");
3882 		return (DCMD_ERR);
3883 	}
3884 
3885 	return (DCMD_OK);
3886 }
3887 
3888 /*
3889  * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3890  * The first piece is a structure which we use to accumulate kmem_cache_t
3891  * addresses of interest.  The kmc_add is used as a callback for the kmem_cache
3892  * walker; we either add all caches, or ones named explicitly as arguments.
3893  */
3894 
3895 typedef struct kmclist {
3896 	const char *kmc_name;			/* Name to match (or NULL) */
3897 	uintptr_t *kmc_caches;			/* List of kmem_cache_t addrs */
3898 	int kmc_nelems;				/* Num entries in kmc_caches */
3899 	int kmc_size;				/* Size of kmc_caches array */
3900 } kmclist_t;
3901 
3902 static int
3903 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3904 {
3905 	void *p;
3906 	int s;
3907 
3908 	if (kmc->kmc_name == NULL ||
3909 	    strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3910 		/*
3911 		 * If we have a match, grow our array (if necessary), and then
3912 		 * add the virtual address of the matching cache to our list.
3913 		 */
3914 		if (kmc->kmc_nelems >= kmc->kmc_size) {
3915 			s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3916 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3917 
3918 			bcopy(kmc->kmc_caches, p,
3919 			    sizeof (uintptr_t) * kmc->kmc_size);
3920 
3921 			kmc->kmc_caches = p;
3922 			kmc->kmc_size = s;
3923 		}
3924 
3925 		kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3926 		return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3927 	}
3928 
3929 	return (WALK_NEXT);
3930 }
3931 
3932 /*
3933  * The second piece of ::kmausers is a hash table of allocations.  Each
3934  * allocation owner is identified by its stack trace and data_size.  We then
3935  * track the total bytes of all such allocations, and the number of allocations
3936  * to report at the end.  Once we have a list of caches, we walk through the
3937  * allocated bufctls of each, and update our hash table accordingly.
3938  */
3939 
3940 typedef struct kmowner {
3941 	struct kmowner *kmo_head;		/* First hash elt in bucket */
3942 	struct kmowner *kmo_next;		/* Next hash elt in chain */
3943 	size_t kmo_signature;			/* Hash table signature */
3944 	uint_t kmo_num;				/* Number of allocations */
3945 	size_t kmo_data_size;			/* Size of each allocation */
3946 	size_t kmo_total_size;			/* Total bytes of allocation */
3947 	int kmo_depth;				/* Depth of stack trace */
3948 	uintptr_t kmo_stack[KMEM_STACK_DEPTH];	/* Stack trace */
3949 } kmowner_t;
3950 
3951 typedef struct kmusers {
3952 	uintptr_t kmu_addr;			/* address of interest */
3953 	const kmem_cache_t *kmu_cache;		/* Current kmem cache */
3954 	kmowner_t *kmu_hash;			/* Hash table of owners */
3955 	int kmu_nelems;				/* Number of entries in use */
3956 	int kmu_size;				/* Total number of entries */
3957 } kmusers_t;
3958 
3959 static void
3960 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
3961     size_t size, size_t data_size)
3962 {
3963 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3964 	size_t bucket, signature = data_size;
3965 	kmowner_t *kmo, *kmoend;
3966 
3967 	/*
3968 	 * If the hash table is full, double its size and rehash everything.
3969 	 */
3970 	if (kmu->kmu_nelems >= kmu->kmu_size) {
3971 		int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
3972 
3973 		kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
3974 		bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
3975 		kmu->kmu_hash = kmo;
3976 		kmu->kmu_size = s;
3977 
3978 		kmoend = kmu->kmu_hash + kmu->kmu_size;
3979 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
3980 			kmo->kmo_head = NULL;
3981 
3982 		kmoend = kmu->kmu_hash + kmu->kmu_nelems;
3983 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
3984 			bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
3985 			kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
3986 			kmu->kmu_hash[bucket].kmo_head = kmo;
3987 		}
3988 	}
3989 
3990 	/*
3991 	 * Finish computing the hash signature from the stack trace, and then
3992 	 * see if the owner is in the hash table.  If so, update our stats.
3993 	 */
3994 	for (i = 0; i < depth; i++)
3995 		signature += bcp->bc_stack[i];
3996 
3997 	bucket = signature & (kmu->kmu_size - 1);
3998 
3999 	for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4000 		if (kmo->kmo_signature == signature) {
4001 			size_t difference = 0;
4002 
4003 			difference |= kmo->kmo_data_size - data_size;
4004 			difference |= kmo->kmo_depth - depth;
4005 
4006 			for (i = 0; i < depth; i++) {
4007 				difference |= kmo->kmo_stack[i] -
4008 				    bcp->bc_stack[i];
4009 			}
4010 
4011 			if (difference == 0) {
4012 				kmo->kmo_total_size += size;
4013 				kmo->kmo_num++;
4014 				return;
4015 			}
4016 		}
4017 	}
4018 
4019 	/*
4020 	 * If the owner is not yet hashed, grab the next element and fill it
4021 	 * in based on the allocation information.
4022 	 */
4023 	kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4024 	kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4025 	kmu->kmu_hash[bucket].kmo_head = kmo;
4026 
4027 	kmo->kmo_signature = signature;
4028 	kmo->kmo_num = 1;
4029 	kmo->kmo_data_size = data_size;
4030 	kmo->kmo_total_size = size;
4031 	kmo->kmo_depth = depth;
4032 
4033 	for (i = 0; i < depth; i++)
4034 		kmo->kmo_stack[i] = bcp->bc_stack[i];
4035 }
4036 
4037 /*
4038  * When ::kmausers is invoked without the -f flag, we simply update our hash
4039  * table with the information from each allocated bufctl.
4040  */
4041 /*ARGSUSED*/
4042 static int
4043 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4044 {
4045 	const kmem_cache_t *cp = kmu->kmu_cache;
4046 
4047 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4048 	return (WALK_NEXT);
4049 }
4050 
4051 /*
4052  * When ::kmausers is invoked with the -f flag, we print out the information
4053  * for each bufctl as well as updating the hash table.
4054  */
4055 static int
4056 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4057 {
4058 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4059 	const kmem_cache_t *cp = kmu->kmu_cache;
4060 	kmem_bufctl_t bufctl;
4061 
4062 	if (kmu->kmu_addr) {
4063 		if (mdb_vread(&bufctl, sizeof (bufctl),  addr) == -1)
4064 			mdb_warn("couldn't read bufctl at %p", addr);
4065 		else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4066 		    kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4067 		    cp->cache_bufsize)
4068 			return (WALK_NEXT);
4069 	}
4070 
4071 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4072 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4073 
4074 	for (i = 0; i < depth; i++)
4075 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
4076 
4077 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4078 	return (WALK_NEXT);
4079 }
4080 
4081 /*
4082  * We sort our results by allocation size before printing them.
4083  */
4084 static int
4085 kmownercmp(const void *lp, const void *rp)
4086 {
4087 	const kmowner_t *lhs = lp;
4088 	const kmowner_t *rhs = rp;
4089 
4090 	return (rhs->kmo_total_size - lhs->kmo_total_size);
4091 }
4092 
4093 /*
4094  * The main engine of ::kmausers is relatively straightforward: First we
4095  * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4096  * iterate over the allocated bufctls of each cache in the list.  Finally,
4097  * we sort and print our results.
4098  */
4099 /*ARGSUSED*/
4100 int
4101 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4102 {
4103 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
4104 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
4105 	int audited_caches = 0;		/* Number of KMF_AUDIT caches found */
4106 	int do_all_caches = 1;		/* Do all caches (no arguments) */
4107 	int opt_e = FALSE;		/* Include "small" users */
4108 	int opt_f = FALSE;		/* Print stack traces */
4109 
4110 	mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4111 	kmowner_t *kmo, *kmoend;
4112 	int i, oelems;
4113 
4114 	kmclist_t kmc;
4115 	kmusers_t kmu;
4116 
4117 	bzero(&kmc, sizeof (kmc));
4118 	bzero(&kmu, sizeof (kmu));
4119 
4120 	while ((i = mdb_getopts(argc, argv,
4121 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4122 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4123 
4124 		argv += i;	/* skip past options we just processed */
4125 		argc -= i;	/* adjust argc */
4126 
4127 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4128 			return (DCMD_USAGE);
4129 
4130 		oelems = kmc.kmc_nelems;
4131 		kmc.kmc_name = argv->a_un.a_str;
4132 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4133 
4134 		if (kmc.kmc_nelems == oelems) {
4135 			mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4136 			return (DCMD_ERR);
4137 		}
4138 
4139 		do_all_caches = 0;
4140 		argv++;
4141 		argc--;
4142 	}
4143 
4144 	if (flags & DCMD_ADDRSPEC) {
4145 		opt_f = TRUE;
4146 		kmu.kmu_addr = addr;
4147 	} else {
4148 		kmu.kmu_addr = NULL;
4149 	}
4150 
4151 	if (opt_e)
4152 		mem_threshold = cnt_threshold = 0;
4153 
4154 	if (opt_f)
4155 		callback = (mdb_walk_cb_t)kmause2;
4156 
4157 	if (do_all_caches) {
4158 		kmc.kmc_name = NULL; /* match all cache names */
4159 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4160 	}
4161 
4162 	for (i = 0; i < kmc.kmc_nelems; i++) {
4163 		uintptr_t cp = kmc.kmc_caches[i];
4164 		kmem_cache_t c;
4165 
4166 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
4167 			mdb_warn("failed to read cache at %p", cp);
4168 			continue;
4169 		}
4170 
4171 		if (!(c.cache_flags & KMF_AUDIT)) {
4172 			if (!do_all_caches) {
4173 				mdb_warn("KMF_AUDIT is not enabled for %s\n",
4174 				    c.cache_name);
4175 			}
4176 			continue;
4177 		}
4178 
4179 		kmu.kmu_cache = &c;
4180 		(void) mdb_pwalk("bufctl", callback, &kmu, cp);
4181 		audited_caches++;
4182 	}
4183 
4184 	if (audited_caches == 0 && do_all_caches) {
4185 		mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4186 		return (DCMD_ERR);
4187 	}
4188 
4189 	qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4190 	kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4191 
4192 	for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4193 		if (kmo->kmo_total_size < mem_threshold &&
4194 		    kmo->kmo_num < cnt_threshold)
4195 			continue;
4196 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4197 		    kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4198 		for (i = 0; i < kmo->kmo_depth; i++)
4199 			mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4200 	}
4201 
4202 	return (DCMD_OK);
4203 }
4204 
4205 void
4206 kmausers_help(void)
4207 {
4208 	mdb_printf(
4209 	    "Displays the largest users of the kmem allocator, sorted by \n"
4210 	    "trace.  If one or more caches is specified, only those caches\n"
4211 	    "will be searched.  By default, all caches are searched.  If an\n"
4212 	    "address is specified, then only those allocations which include\n"
4213 	    "the given address are displayed.  Specifying an address implies\n"
4214 	    "-f.\n"
4215 	    "\n"
4216 	    "\t-e\tInclude all users, not just the largest\n"
4217 	    "\t-f\tDisplay individual allocations.  By default, users are\n"
4218 	    "\t\tgrouped by stack\n");
4219 }
4220 
4221 static int
4222 kmem_ready_check(void)
4223 {
4224 	int ready;
4225 
4226 	if (mdb_readvar(&ready, "kmem_ready") < 0)
4227 		return (-1); /* errno is set for us */
4228 
4229 	return (ready);
4230 }
4231 
4232 void
4233 kmem_statechange(void)
4234 {
4235 	static int been_ready = 0;
4236 
4237 	if (been_ready)
4238 		return;
4239 
4240 	if (kmem_ready_check() <= 0)
4241 		return;
4242 
4243 	been_ready = 1;
4244 	(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4245 }
4246 
4247 void
4248 kmem_init(void)
4249 {
4250 	mdb_walker_t w = {
4251 		"kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4252 		list_walk_step, list_walk_fini
4253 	};
4254 
4255 	/*
4256 	 * If kmem is ready, we'll need to invoke the kmem_cache walker
4257 	 * immediately.  Walkers in the linkage structure won't be ready until
4258 	 * _mdb_init returns, so we'll need to add this one manually.  If kmem
4259 	 * is ready, we'll use the walker to initialize the caches.  If kmem
4260 	 * isn't ready, we'll register a callback that will allow us to defer
4261 	 * cache walking until it is.
4262 	 */
4263 	if (mdb_add_walker(&w) != 0) {
4264 		mdb_warn("failed to add kmem_cache walker");
4265 		return;
4266 	}
4267 
4268 	kmem_statechange();
4269 
4270 	/* register our ::whatis handlers */
4271 	mdb_whatis_register("modules", whatis_run_modules, NULL,
4272 	    WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4273 	mdb_whatis_register("threads", whatis_run_threads, NULL,
4274 	    WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4275 	mdb_whatis_register("pages", whatis_run_pages, NULL,
4276 	    WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4277 	mdb_whatis_register("kmem", whatis_run_kmem, NULL,
4278 	    WHATIS_PRIO_ALLOCATOR, 0);
4279 	mdb_whatis_register("vmem", whatis_run_vmem, NULL,
4280 	    WHATIS_PRIO_ALLOCATOR, 0);
4281 }
4282 
4283 typedef struct whatthread {
4284 	uintptr_t	wt_target;
4285 	int		wt_verbose;
4286 } whatthread_t;
4287 
4288 static int
4289 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4290 {
4291 	uintptr_t current, data;
4292 
4293 	if (t->t_stkbase == NULL)
4294 		return (WALK_NEXT);
4295 
4296 	/*
4297 	 * Warn about swapped out threads, but drive on anyway
4298 	 */
4299 	if (!(t->t_schedflag & TS_LOAD)) {
4300 		mdb_warn("thread %p's stack swapped out\n", addr);
4301 		return (WALK_NEXT);
4302 	}
4303 
4304 	/*
4305 	 * Search the thread's stack for the given pointer.  Note that it would
4306 	 * be more efficient to follow ::kgrep's lead and read in page-sized
4307 	 * chunks, but this routine is already fast and simple.
4308 	 */
4309 	for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4310 	    current += sizeof (uintptr_t)) {
4311 		if (mdb_vread(&data, sizeof (data), current) == -1) {
4312 			mdb_warn("couldn't read thread %p's stack at %p",
4313 			    addr, current);
4314 			return (WALK_ERR);
4315 		}
4316 
4317 		if (data == w->wt_target) {
4318 			if (w->wt_verbose) {
4319 				mdb_printf("%p in thread %p's stack%s\n",
4320 				    current, addr, stack_active(t, current));
4321 			} else {
4322 				mdb_printf("%#lr\n", addr);
4323 				return (WALK_NEXT);
4324 			}
4325 		}
4326 	}
4327 
4328 	return (WALK_NEXT);
4329 }
4330 
4331 int
4332 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4333 {
4334 	whatthread_t w;
4335 
4336 	if (!(flags & DCMD_ADDRSPEC))
4337 		return (DCMD_USAGE);
4338 
4339 	w.wt_verbose = FALSE;
4340 	w.wt_target = addr;
4341 
4342 	if (mdb_getopts(argc, argv,
4343 	    'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4344 		return (DCMD_USAGE);
4345 
4346 	if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4347 	    == -1) {
4348 		mdb_warn("couldn't walk threads");
4349 		return (DCMD_ERR);
4350 	}
4351 
4352 	return (DCMD_OK);
4353 }
4354