1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <mdb/mdb_param.h>
29 #include <mdb/mdb_modapi.h>
30 #include <mdb/mdb_ctf.h>
31 #include <sys/cpuvar.h>
32 #include <sys/kmem_impl.h>
33 #include <sys/vmem_impl.h>
34 #include <sys/machelf.h>
35 #include <sys/modctl.h>
36 #include <sys/kobj.h>
37 #include <sys/panic.h>
38 #include <sys/stack.h>
39 #include <sys/sysmacros.h>
40 #include <vm/page.h>
41 
42 #include "dist.h"
43 #include "kmem.h"
44 #include "leaky.h"
45 
46 #define	dprintf(x) if (mdb_debug_level) { \
47 	mdb_printf("kmem debug: ");  \
48 	/*CSTYLED*/\
49 	mdb_printf x ;\
50 }
51 
52 #define	KM_ALLOCATED		0x01
53 #define	KM_FREE			0x02
54 #define	KM_BUFCTL		0x04
55 #define	KM_CONSTRUCTED		0x08	/* only constructed free buffers */
56 #define	KM_HASH			0x10
57 
58 static int mdb_debug_level = 0;
59 
60 /*ARGSUSED*/
61 static int
62 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
63 {
64 	mdb_walker_t w;
65 	char descr[64];
66 
67 	(void) mdb_snprintf(descr, sizeof (descr),
68 	    "walk the %s cache", c->cache_name);
69 
70 	w.walk_name = c->cache_name;
71 	w.walk_descr = descr;
72 	w.walk_init = kmem_walk_init;
73 	w.walk_step = kmem_walk_step;
74 	w.walk_fini = kmem_walk_fini;
75 	w.walk_init_arg = (void *)addr;
76 
77 	if (mdb_add_walker(&w) == -1)
78 		mdb_warn("failed to add %s walker", c->cache_name);
79 
80 	return (WALK_NEXT);
81 }
82 
83 /*ARGSUSED*/
84 int
85 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
86 {
87 	mdb_debug_level ^= 1;
88 
89 	mdb_printf("kmem: debugging is now %s\n",
90 	    mdb_debug_level ? "on" : "off");
91 
92 	return (DCMD_OK);
93 }
94 
95 typedef struct {
96 	uintptr_t kcw_first;
97 	uintptr_t kcw_current;
98 } kmem_cache_walk_t;
99 
100 int
101 kmem_cache_walk_init(mdb_walk_state_t *wsp)
102 {
103 	kmem_cache_walk_t *kcw;
104 	kmem_cache_t c;
105 	uintptr_t cp;
106 	GElf_Sym sym;
107 
108 	if (mdb_lookup_by_name("kmem_null_cache", &sym) == -1) {
109 		mdb_warn("couldn't find kmem_null_cache");
110 		return (WALK_ERR);
111 	}
112 
113 	cp = (uintptr_t)sym.st_value;
114 
115 	if (mdb_vread(&c, sizeof (kmem_cache_t), cp) == -1) {
116 		mdb_warn("couldn't read cache at %p", cp);
117 		return (WALK_ERR);
118 	}
119 
120 	kcw = mdb_alloc(sizeof (kmem_cache_walk_t), UM_SLEEP);
121 
122 	kcw->kcw_first = cp;
123 	kcw->kcw_current = (uintptr_t)c.cache_next;
124 	wsp->walk_data = kcw;
125 
126 	return (WALK_NEXT);
127 }
128 
129 int
130 kmem_cache_walk_step(mdb_walk_state_t *wsp)
131 {
132 	kmem_cache_walk_t *kcw = wsp->walk_data;
133 	kmem_cache_t c;
134 	int status;
135 
136 	if (mdb_vread(&c, sizeof (kmem_cache_t), kcw->kcw_current) == -1) {
137 		mdb_warn("couldn't read cache at %p", kcw->kcw_current);
138 		return (WALK_DONE);
139 	}
140 
141 	status = wsp->walk_callback(kcw->kcw_current, &c, wsp->walk_cbdata);
142 
143 	if ((kcw->kcw_current = (uintptr_t)c.cache_next) == kcw->kcw_first)
144 		return (WALK_DONE);
145 
146 	return (status);
147 }
148 
149 void
150 kmem_cache_walk_fini(mdb_walk_state_t *wsp)
151 {
152 	kmem_cache_walk_t *kcw = wsp->walk_data;
153 	mdb_free(kcw, sizeof (kmem_cache_walk_t));
154 }
155 
156 int
157 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
158 {
159 	if (wsp->walk_addr == NULL) {
160 		mdb_warn("kmem_cpu_cache doesn't support global walks");
161 		return (WALK_ERR);
162 	}
163 
164 	if (mdb_layered_walk("cpu", wsp) == -1) {
165 		mdb_warn("couldn't walk 'cpu'");
166 		return (WALK_ERR);
167 	}
168 
169 	wsp->walk_data = (void *)wsp->walk_addr;
170 
171 	return (WALK_NEXT);
172 }
173 
174 int
175 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
176 {
177 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
178 	const cpu_t *cpu = wsp->walk_layer;
179 	kmem_cpu_cache_t cc;
180 
181 	caddr += cpu->cpu_cache_offset;
182 
183 	if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
184 		mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
185 		return (WALK_ERR);
186 	}
187 
188 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
189 }
190 
191 int
192 kmem_slab_walk_init(mdb_walk_state_t *wsp)
193 {
194 	uintptr_t caddr = wsp->walk_addr;
195 	kmem_cache_t c;
196 
197 	if (caddr == NULL) {
198 		mdb_warn("kmem_slab doesn't support global walks\n");
199 		return (WALK_ERR);
200 	}
201 
202 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
203 		mdb_warn("couldn't read kmem_cache at %p", caddr);
204 		return (WALK_ERR);
205 	}
206 
207 	wsp->walk_data =
208 	    (void *)(caddr + offsetof(kmem_cache_t, cache_nullslab));
209 	wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
210 
211 	return (WALK_NEXT);
212 }
213 
214 int
215 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
216 {
217 	uintptr_t caddr = wsp->walk_addr;
218 	kmem_cache_t c;
219 
220 	if (caddr == NULL) {
221 		mdb_warn("kmem_slab_partial doesn't support global walks\n");
222 		return (WALK_ERR);
223 	}
224 
225 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
226 		mdb_warn("couldn't read kmem_cache at %p", caddr);
227 		return (WALK_ERR);
228 	}
229 
230 	wsp->walk_data =
231 	    (void *)(caddr + offsetof(kmem_cache_t, cache_nullslab));
232 	wsp->walk_addr = (uintptr_t)c.cache_freelist;
233 
234 	/*
235 	 * Some consumers (umem_walk_step(), in particular) require at
236 	 * least one callback if there are any buffers in the cache.  So
237 	 * if there are *no* partial slabs, report the last full slab, if
238 	 * any.
239 	 *
240 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
241 	 */
242 	if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
243 		wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
244 
245 	return (WALK_NEXT);
246 }
247 
248 int
249 kmem_slab_walk_step(mdb_walk_state_t *wsp)
250 {
251 	kmem_slab_t s;
252 	uintptr_t addr = wsp->walk_addr;
253 	uintptr_t saddr = (uintptr_t)wsp->walk_data;
254 	uintptr_t caddr = saddr - offsetof(kmem_cache_t, cache_nullslab);
255 
256 	if (addr == saddr)
257 		return (WALK_DONE);
258 
259 	if (mdb_vread(&s, sizeof (s), addr) == -1) {
260 		mdb_warn("failed to read slab at %p", wsp->walk_addr);
261 		return (WALK_ERR);
262 	}
263 
264 	if ((uintptr_t)s.slab_cache != caddr) {
265 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
266 		    addr, caddr, s.slab_cache);
267 		return (WALK_ERR);
268 	}
269 
270 	wsp->walk_addr = (uintptr_t)s.slab_next;
271 
272 	return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
273 }
274 
275 int
276 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
277 {
278 	kmem_cache_t c;
279 
280 	if (!(flags & DCMD_ADDRSPEC)) {
281 		if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
282 			mdb_warn("can't walk kmem_cache");
283 			return (DCMD_ERR);
284 		}
285 		return (DCMD_OK);
286 	}
287 
288 	if (DCMD_HDRSPEC(flags))
289 		mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
290 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
291 
292 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
293 		mdb_warn("couldn't read kmem_cache at %p", addr);
294 		return (DCMD_ERR);
295 	}
296 
297 	mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
298 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
299 
300 	return (DCMD_OK);
301 }
302 
303 typedef struct kmem_slab_usage {
304 	int ksu_refcnt;			/* count of allocated buffers on slab */
305 } kmem_slab_usage_t;
306 
307 typedef struct kmem_slab_stats {
308 	int ks_slabs;			/* slabs in cache */
309 	int ks_partial_slabs;		/* partially allocated slabs in cache */
310 	uint64_t ks_unused_buffers;	/* total unused buffers in cache */
311 	int ks_buffers_per_slab;	/* buffers per slab */
312 	int ks_usage_len;		/* ks_usage array length */
313 	kmem_slab_usage_t *ks_usage;	/* partial slab usage */
314 	uint_t *ks_bucket;		/* slab usage distribution */
315 } kmem_slab_stats_t;
316 
317 #define	LABEL_WIDTH	11
318 static void
319 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
320     size_t maxbuckets, size_t minbucketsize)
321 {
322 	uint64_t total;
323 	int buckets;
324 	int i;
325 	const int *distarray;
326 	int complete[2];
327 
328 	buckets = buffers_per_slab;
329 
330 	total = 0;
331 	for (i = 0; i <= buffers_per_slab; i++)
332 		total += ks_bucket[i];
333 
334 	if (maxbuckets > 1)
335 		buckets = MIN(buckets, maxbuckets);
336 
337 	if (minbucketsize > 1) {
338 		/*
339 		 * minbucketsize does not apply to the first bucket reserved
340 		 * for completely allocated slabs
341 		 */
342 		buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
343 		    minbucketsize));
344 		if ((buckets < 2) && (buffers_per_slab > 1)) {
345 			buckets = 2;
346 			minbucketsize = (buffers_per_slab - 1);
347 		}
348 	}
349 
350 	/*
351 	 * The first printed bucket is reserved for completely allocated slabs.
352 	 * Passing (buckets - 1) excludes that bucket from the generated
353 	 * distribution, since we're handling it as a special case.
354 	 */
355 	complete[0] = buffers_per_slab;
356 	complete[1] = buffers_per_slab + 1;
357 	distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
358 
359 	mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
360 	dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
361 
362 	dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
363 	/*
364 	 * Print bucket ranges in descending order after the first bucket for
365 	 * completely allocated slabs, so a person can see immediately whether
366 	 * or not there is fragmentation without having to scan possibly
367 	 * multiple screens of output. Starting at (buckets - 2) excludes the
368 	 * extra terminating bucket.
369 	 */
370 	for (i = buckets - 2; i >= 0; i--) {
371 		dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
372 	}
373 	mdb_printf("\n");
374 }
375 #undef LABEL_WIDTH
376 
377 /*ARGSUSED*/
378 static int
379 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
380 {
381 	*is_slab = B_TRUE;
382 	return (WALK_DONE);
383 }
384 
385 /*ARGSUSED*/
386 static int
387 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
388     boolean_t *is_slab)
389 {
390 	/*
391 	 * The "kmem_partial_slab" walker reports the last full slab if there
392 	 * are no partial slabs (for the sake of consumers that require at least
393 	 * one callback if there are any buffers in the cache).
394 	 */
395 	*is_slab = ((sp->slab_refcnt > 0) &&
396 	    (sp->slab_refcnt < sp->slab_chunks));
397 	return (WALK_DONE);
398 }
399 
400 /*ARGSUSED*/
401 static int
402 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
403     kmem_slab_stats_t *ks)
404 {
405 	kmem_slab_usage_t *ksu;
406 	long unused;
407 
408 	ks->ks_slabs++;
409 	if (ks->ks_buffers_per_slab == 0) {
410 		ks->ks_buffers_per_slab = sp->slab_chunks;
411 		/* +1 to include a zero bucket */
412 		ks->ks_bucket = mdb_zalloc((ks->ks_buffers_per_slab + 1) *
413 		    sizeof (*ks->ks_bucket), UM_SLEEP | UM_GC);
414 	}
415 	ks->ks_bucket[sp->slab_refcnt]++;
416 
417 	unused = (sp->slab_chunks - sp->slab_refcnt);
418 	if (unused == 0) {
419 		return (WALK_NEXT);
420 	}
421 
422 	ks->ks_partial_slabs++;
423 	ks->ks_unused_buffers += unused;
424 
425 	if (ks->ks_partial_slabs > ks->ks_usage_len) {
426 		kmem_slab_usage_t *usage;
427 		int len = ks->ks_usage_len;
428 
429 		len = (len == 0 ? 16 : len * 2);
430 		usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
431 		if (ks->ks_usage != NULL) {
432 			bcopy(ks->ks_usage, usage,
433 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
434 			mdb_free(ks->ks_usage,
435 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
436 		}
437 		ks->ks_usage = usage;
438 		ks->ks_usage_len = len;
439 	}
440 
441 	ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
442 	ksu->ksu_refcnt = sp->slab_refcnt;
443 	return (WALK_NEXT);
444 }
445 
446 static void
447 kmem_slabs_header()
448 {
449 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
450 	    "", "", "Partial", "", "Unused", "");
451 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
452 	    "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
453 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
454 	    "-------------------------", "--------", "--------", "---------",
455 	    "---------", "------");
456 }
457 
458 int
459 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
460 {
461 	kmem_cache_t c;
462 	kmem_slab_stats_t stats;
463 	mdb_walk_cb_t cb;
464 	int pct;
465 	int tenths_pct;
466 	size_t maxbuckets = 1;
467 	size_t minbucketsize = 0;
468 	const char *filter = NULL;
469 	uint_t opt_v = FALSE;
470 	boolean_t verbose = B_FALSE;
471 	boolean_t skip = B_FALSE;
472 
473 	if (mdb_getopts(argc, argv,
474 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
475 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
476 	    'n', MDB_OPT_STR, &filter,
477 	    'v', MDB_OPT_SETBITS, TRUE, &opt_v,
478 	    NULL) != argc) {
479 		return (DCMD_USAGE);
480 	}
481 
482 	if (opt_v || (maxbuckets != 1) || (minbucketsize != 0)) {
483 		verbose = 1;
484 	}
485 
486 	if (!(flags & DCMD_ADDRSPEC)) {
487 		if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
488 		    argv) == -1) {
489 			mdb_warn("can't walk kmem_cache");
490 			return (DCMD_ERR);
491 		}
492 		return (DCMD_OK);
493 	}
494 
495 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
496 		mdb_warn("couldn't read kmem_cache at %p", addr);
497 		return (DCMD_ERR);
498 	}
499 
500 	if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL)) {
501 		skip = B_TRUE;
502 	}
503 
504 	if (!verbose && DCMD_HDRSPEC(flags)) {
505 		kmem_slabs_header();
506 	} else if (verbose && !skip) {
507 		if (DCMD_HDRSPEC(flags)) {
508 			kmem_slabs_header();
509 		} else {
510 			boolean_t is_slab = B_FALSE;
511 			const char *walker_name;
512 			if (opt_v) {
513 				cb = (mdb_walk_cb_t)kmem_first_partial_slab;
514 				walker_name = "kmem_slab_partial";
515 			} else {
516 				cb = (mdb_walk_cb_t)kmem_first_slab;
517 				walker_name = "kmem_slab";
518 			}
519 			(void) mdb_pwalk(walker_name, cb, &is_slab, addr);
520 			if (is_slab) {
521 				kmem_slabs_header();
522 			}
523 		}
524 	}
525 
526 	if (skip) {
527 		return (DCMD_OK);
528 	}
529 
530 	bzero(&stats, sizeof (kmem_slab_stats_t));
531 	cb = (mdb_walk_cb_t)kmem_slablist_stat;
532 	(void) mdb_pwalk("kmem_slab", cb, &stats, addr);
533 
534 	if (c.cache_buftotal == 0) {
535 		pct = 0;
536 		tenths_pct = 0;
537 	} else {
538 		uint64_t n = stats.ks_unused_buffers * 10000;
539 		pct = (int)(n / c.cache_buftotal);
540 		tenths_pct = pct - ((pct / 100) * 100);
541 		tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
542 		if (tenths_pct == 10) {
543 			pct += 100;
544 			tenths_pct = 0;
545 		}
546 	}
547 
548 	pct /= 100;
549 	mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
550 	    stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
551 	    stats.ks_unused_buffers, pct, tenths_pct);
552 
553 	if (!verbose) {
554 		return (DCMD_OK);
555 	}
556 
557 	if (maxbuckets == 0) {
558 		maxbuckets = stats.ks_buffers_per_slab;
559 	}
560 
561 	if (((maxbuckets > 1) || (minbucketsize > 0)) &&
562 	    (stats.ks_slabs > 0)) {
563 		mdb_printf("\n");
564 		kmem_slabs_print_dist(stats.ks_bucket,
565 		    stats.ks_buffers_per_slab, maxbuckets, minbucketsize);
566 	}
567 
568 	if (opt_v && (stats.ks_partial_slabs > 0)) {
569 		int i;
570 		kmem_slab_usage_t *ksu;
571 
572 		mdb_printf("  %d complete, %d partial",
573 		    (stats.ks_slabs - stats.ks_partial_slabs),
574 		    stats.ks_partial_slabs);
575 		if (stats.ks_partial_slabs > 0) {
576 			mdb_printf(" (%d):", stats.ks_buffers_per_slab);
577 		}
578 		for (i = 0; i < stats.ks_partial_slabs; i++) {
579 			ksu = &stats.ks_usage[i];
580 			mdb_printf(" %d", ksu->ksu_refcnt);
581 		}
582 		mdb_printf("\n\n");
583 	}
584 
585 	if (stats.ks_usage_len > 0) {
586 		mdb_free(stats.ks_usage,
587 		    stats.ks_usage_len * sizeof (kmem_slab_usage_t));
588 	}
589 
590 	return (DCMD_OK);
591 }
592 
593 void
594 kmem_slabs_help(void)
595 {
596 	mdb_printf("%s\n",
597 "Display slab usage per kmem cache.\n");
598 	mdb_dec_indent(2);
599 	mdb_printf("%<b>OPTIONS%</b>\n");
600 	mdb_inc_indent(2);
601 	mdb_printf("%s",
602 "  -n name\n"
603 "        name of kmem cache (or matching partial name)\n"
604 "  -b maxbins\n"
605 "        Print a distribution of allocated buffers per slab using at\n"
606 "        most maxbins bins. The first bin is reserved for completely\n"
607 "        allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
608 "        effect as specifying the maximum allocated buffers per slab\n"
609 "        or setting minbinsize to 1 (-B 1).\n"
610 "  -B minbinsize\n"
611 "        Print a distribution of allocated buffers per slab, making\n"
612 "        all bins (except the first, reserved for completely allocated\n"
613 "        slabs) at least minbinsize buffers apart.\n"
614 "  -v    verbose output: List the allocated buffer count of each partial\n"
615 "        slab on the free list in order from front to back to show how\n"
616 "        closely the slabs are ordered by usage. For example\n"
617 "\n"
618 "          10 complete, 3 partial (8): 7 3 1\n"
619 "\n"
620 "        means there are thirteen slabs with eight buffers each, including\n"
621 "        three partially allocated slabs with less than all eight buffers\n"
622 "        allocated.\n"
623 "\n"
624 "        Buffer allocations are always from the front of the partial slab\n"
625 "        list. When a buffer is freed from a completely used slab, that\n"
626 "        slab is added to the front of the partial slab list. Assuming\n"
627 "        that all buffers are equally likely to be freed soon, the\n"
628 "        desired order of partial slabs is most-used at the front of the\n"
629 "        list and least-used at the back (as in the example above).\n"
630 "        However, if a slab contains an allocated buffer that will not\n"
631 "        soon be freed, it would be better for that slab to be at the\n"
632 "        front where it can get used up. Taking a slab off the partial\n"
633 "        slab list (either with all buffers freed or all buffers\n"
634 "        allocated) reduces cache fragmentation.\n"
635 "\n"
636 "Column\t\tDescription\n"
637 "\n"
638 "Cache Name\t\tname of kmem cache\n"
639 "Slabs\t\t\ttotal slab count\n"
640 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
641 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
642 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
643 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
644 "\t\t\t  for accounting structures (debug mode), slab\n"
645 "\t\t\t  coloring (incremental small offsets to stagger\n"
646 "\t\t\t  buffer alignment), or the per-CPU magazine layer\n");
647 }
648 
649 static int
650 addrcmp(const void *lhs, const void *rhs)
651 {
652 	uintptr_t p1 = *((uintptr_t *)lhs);
653 	uintptr_t p2 = *((uintptr_t *)rhs);
654 
655 	if (p1 < p2)
656 		return (-1);
657 	if (p1 > p2)
658 		return (1);
659 	return (0);
660 }
661 
662 static int
663 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
664 {
665 	const kmem_bufctl_audit_t *bcp1 = *lhs;
666 	const kmem_bufctl_audit_t *bcp2 = *rhs;
667 
668 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
669 		return (-1);
670 
671 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
672 		return (1);
673 
674 	return (0);
675 }
676 
677 typedef struct kmem_hash_walk {
678 	uintptr_t *kmhw_table;
679 	size_t kmhw_nelems;
680 	size_t kmhw_pos;
681 	kmem_bufctl_t kmhw_cur;
682 } kmem_hash_walk_t;
683 
684 int
685 kmem_hash_walk_init(mdb_walk_state_t *wsp)
686 {
687 	kmem_hash_walk_t *kmhw;
688 	uintptr_t *hash;
689 	kmem_cache_t c;
690 	uintptr_t haddr, addr = wsp->walk_addr;
691 	size_t nelems;
692 	size_t hsize;
693 
694 	if (addr == NULL) {
695 		mdb_warn("kmem_hash doesn't support global walks\n");
696 		return (WALK_ERR);
697 	}
698 
699 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
700 		mdb_warn("couldn't read cache at addr %p", addr);
701 		return (WALK_ERR);
702 	}
703 
704 	if (!(c.cache_flags & KMF_HASH)) {
705 		mdb_warn("cache %p doesn't have a hash table\n", addr);
706 		return (WALK_DONE);		/* nothing to do */
707 	}
708 
709 	kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
710 	kmhw->kmhw_cur.bc_next = NULL;
711 	kmhw->kmhw_pos = 0;
712 
713 	kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
714 	hsize = nelems * sizeof (uintptr_t);
715 	haddr = (uintptr_t)c.cache_hash_table;
716 
717 	kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
718 	if (mdb_vread(hash, hsize, haddr) == -1) {
719 		mdb_warn("failed to read hash table at %p", haddr);
720 		mdb_free(hash, hsize);
721 		mdb_free(kmhw, sizeof (kmem_hash_walk_t));
722 		return (WALK_ERR);
723 	}
724 
725 	wsp->walk_data = kmhw;
726 
727 	return (WALK_NEXT);
728 }
729 
730 int
731 kmem_hash_walk_step(mdb_walk_state_t *wsp)
732 {
733 	kmem_hash_walk_t *kmhw = wsp->walk_data;
734 	uintptr_t addr = NULL;
735 
736 	if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
737 		while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
738 			if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
739 				break;
740 		}
741 	}
742 	if (addr == NULL)
743 		return (WALK_DONE);
744 
745 	if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
746 		mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
747 		return (WALK_ERR);
748 	}
749 
750 	return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
751 }
752 
753 void
754 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
755 {
756 	kmem_hash_walk_t *kmhw = wsp->walk_data;
757 
758 	if (kmhw == NULL)
759 		return;
760 
761 	mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
762 	mdb_free(kmhw, sizeof (kmem_hash_walk_t));
763 }
764 
765 /*
766  * Find the address of the bufctl structure for the address 'buf' in cache
767  * 'cp', which is at address caddr, and place it in *out.
768  */
769 static int
770 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
771 {
772 	uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
773 	kmem_bufctl_t *bcp;
774 	kmem_bufctl_t bc;
775 
776 	if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
777 		mdb_warn("unable to read hash bucket for %p in cache %p",
778 		    buf, caddr);
779 		return (-1);
780 	}
781 
782 	while (bcp != NULL) {
783 		if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
784 		    (uintptr_t)bcp) == -1) {
785 			mdb_warn("unable to read bufctl at %p", bcp);
786 			return (-1);
787 		}
788 		if (bc.bc_addr == buf) {
789 			*out = (uintptr_t)bcp;
790 			return (0);
791 		}
792 		bcp = bc.bc_next;
793 	}
794 
795 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
796 	return (-1);
797 }
798 
799 int
800 kmem_get_magsize(const kmem_cache_t *cp)
801 {
802 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
803 	GElf_Sym mt_sym;
804 	kmem_magtype_t mt;
805 	int res;
806 
807 	/*
808 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
809 	 * with KMF_NOMAGAZINE have disabled their magazine layers, so
810 	 * it is okay to return 0 for them.
811 	 */
812 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
813 	    (cp->cache_flags & KMF_NOMAGAZINE))
814 		return (res);
815 
816 	if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
817 		mdb_warn("unable to read 'kmem_magtype'");
818 	} else if (addr < mt_sym.st_value ||
819 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
820 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
821 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
822 		    cp->cache_name, addr);
823 		return (0);
824 	}
825 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
826 		mdb_warn("unable to read magtype at %a", addr);
827 		return (0);
828 	}
829 	return (mt.mt_magsize);
830 }
831 
832 /*ARGSUSED*/
833 static int
834 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
835 {
836 	*est -= (sp->slab_chunks - sp->slab_refcnt);
837 
838 	return (WALK_NEXT);
839 }
840 
841 /*
842  * Returns an upper bound on the number of allocated buffers in a given
843  * cache.
844  */
845 size_t
846 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
847 {
848 	int magsize;
849 	size_t cache_est;
850 
851 	cache_est = cp->cache_buftotal;
852 
853 	(void) mdb_pwalk("kmem_slab_partial",
854 	    (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
855 
856 	if ((magsize = kmem_get_magsize(cp)) != 0) {
857 		size_t mag_est = cp->cache_full.ml_total * magsize;
858 
859 		if (cache_est >= mag_est) {
860 			cache_est -= mag_est;
861 		} else {
862 			mdb_warn("cache %p's magazine layer holds more buffers "
863 			    "than the slab layer.\n", addr);
864 		}
865 	}
866 	return (cache_est);
867 }
868 
869 #define	READMAG_ROUNDS(rounds) { \
870 	if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
871 		mdb_warn("couldn't read magazine at %p", kmp); \
872 		goto fail; \
873 	} \
874 	for (i = 0; i < rounds; i++) { \
875 		maglist[magcnt++] = mp->mag_round[i]; \
876 		if (magcnt == magmax) { \
877 			mdb_warn("%d magazines exceeds fudge factor\n", \
878 			    magcnt); \
879 			goto fail; \
880 		} \
881 	} \
882 }
883 
884 int
885 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
886     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
887 {
888 	kmem_magazine_t *kmp, *mp;
889 	void **maglist = NULL;
890 	int i, cpu;
891 	size_t magsize, magmax, magbsize;
892 	size_t magcnt = 0;
893 
894 	/*
895 	 * Read the magtype out of the cache, after verifying the pointer's
896 	 * correctness.
897 	 */
898 	magsize = kmem_get_magsize(cp);
899 	if (magsize == 0) {
900 		*maglistp = NULL;
901 		*magcntp = 0;
902 		*magmaxp = 0;
903 		return (WALK_NEXT);
904 	}
905 
906 	/*
907 	 * There are several places where we need to go buffer hunting:
908 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
909 	 * and the full magazine list in the depot.
910 	 *
911 	 * For an upper bound on the number of buffers in the magazine
912 	 * layer, we have the number of magazines on the cache_full
913 	 * list plus at most two magazines per CPU (the loaded and the
914 	 * spare).  Toss in 100 magazines as a fudge factor in case this
915 	 * is live (the number "100" comes from the same fudge factor in
916 	 * crash(1M)).
917 	 */
918 	magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
919 	magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
920 
921 	if (magbsize >= PAGESIZE / 2) {
922 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
923 		    addr, magbsize);
924 		return (WALK_ERR);
925 	}
926 
927 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
928 	mp = mdb_alloc(magbsize, alloc_flags);
929 	if (mp == NULL || maglist == NULL)
930 		goto fail;
931 
932 	/*
933 	 * First up: the magazines in the depot (i.e. on the cache_full list).
934 	 */
935 	for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
936 		READMAG_ROUNDS(magsize);
937 		kmp = mp->mag_next;
938 
939 		if (kmp == cp->cache_full.ml_list)
940 			break; /* cache_full list loop detected */
941 	}
942 
943 	dprintf(("cache_full list done\n"));
944 
945 	/*
946 	 * Now whip through the CPUs, snagging the loaded magazines
947 	 * and full spares.
948 	 */
949 	for (cpu = 0; cpu < ncpus; cpu++) {
950 		kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
951 
952 		dprintf(("reading cpu cache %p\n",
953 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
954 
955 		if (ccp->cc_rounds > 0 &&
956 		    (kmp = ccp->cc_loaded) != NULL) {
957 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
958 			READMAG_ROUNDS(ccp->cc_rounds);
959 		}
960 
961 		if (ccp->cc_prounds > 0 &&
962 		    (kmp = ccp->cc_ploaded) != NULL) {
963 			dprintf(("reading %d previously loaded rounds\n",
964 			    ccp->cc_prounds));
965 			READMAG_ROUNDS(ccp->cc_prounds);
966 		}
967 	}
968 
969 	dprintf(("magazine layer: %d buffers\n", magcnt));
970 
971 	if (!(alloc_flags & UM_GC))
972 		mdb_free(mp, magbsize);
973 
974 	*maglistp = maglist;
975 	*magcntp = magcnt;
976 	*magmaxp = magmax;
977 
978 	return (WALK_NEXT);
979 
980 fail:
981 	if (!(alloc_flags & UM_GC)) {
982 		if (mp)
983 			mdb_free(mp, magbsize);
984 		if (maglist)
985 			mdb_free(maglist, magmax * sizeof (void *));
986 	}
987 	return (WALK_ERR);
988 }
989 
990 static int
991 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
992 {
993 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
994 }
995 
996 static int
997 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
998 {
999 	kmem_bufctl_audit_t b;
1000 
1001 	/*
1002 	 * if KMF_AUDIT is not set, we know that we're looking at a
1003 	 * kmem_bufctl_t.
1004 	 */
1005 	if (!(cp->cache_flags & KMF_AUDIT) ||
1006 	    mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1007 		(void) memset(&b, 0, sizeof (b));
1008 		if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1009 			mdb_warn("unable to read bufctl at %p", buf);
1010 			return (WALK_ERR);
1011 		}
1012 	}
1013 
1014 	return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1015 }
1016 
1017 typedef struct kmem_walk {
1018 	int kmw_type;
1019 
1020 	int kmw_addr;			/* cache address */
1021 	kmem_cache_t *kmw_cp;
1022 	size_t kmw_csize;
1023 
1024 	/*
1025 	 * magazine layer
1026 	 */
1027 	void **kmw_maglist;
1028 	size_t kmw_max;
1029 	size_t kmw_count;
1030 	size_t kmw_pos;
1031 
1032 	/*
1033 	 * slab layer
1034 	 */
1035 	char *kmw_valid;	/* to keep track of freed buffers */
1036 	char *kmw_ubase;	/* buffer for slab data */
1037 } kmem_walk_t;
1038 
1039 static int
1040 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1041 {
1042 	kmem_walk_t *kmw;
1043 	int ncpus, csize;
1044 	kmem_cache_t *cp;
1045 	size_t vm_quantum;
1046 
1047 	size_t magmax, magcnt;
1048 	void **maglist = NULL;
1049 	uint_t chunksize, slabsize;
1050 	int status = WALK_ERR;
1051 	uintptr_t addr = wsp->walk_addr;
1052 	const char *layered;
1053 
1054 	type &= ~KM_HASH;
1055 
1056 	if (addr == NULL) {
1057 		mdb_warn("kmem walk doesn't support global walks\n");
1058 		return (WALK_ERR);
1059 	}
1060 
1061 	dprintf(("walking %p\n", addr));
1062 
1063 	/*
1064 	 * First we need to figure out how many CPUs are configured in the
1065 	 * system to know how much to slurp out.
1066 	 */
1067 	mdb_readvar(&ncpus, "max_ncpus");
1068 
1069 	csize = KMEM_CACHE_SIZE(ncpus);
1070 	cp = mdb_alloc(csize, UM_SLEEP);
1071 
1072 	if (mdb_vread(cp, csize, addr) == -1) {
1073 		mdb_warn("couldn't read cache at addr %p", addr);
1074 		goto out2;
1075 	}
1076 
1077 	/*
1078 	 * It's easy for someone to hand us an invalid cache address.
1079 	 * Unfortunately, it is hard for this walker to survive an
1080 	 * invalid cache cleanly.  So we make sure that:
1081 	 *
1082 	 *	1. the vmem arena for the cache is readable,
1083 	 *	2. the vmem arena's quantum is a power of 2,
1084 	 *	3. our slabsize is a multiple of the quantum, and
1085 	 *	4. our chunksize is >0 and less than our slabsize.
1086 	 */
1087 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1088 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1089 	    vm_quantum == 0 ||
1090 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
1091 	    cp->cache_slabsize < vm_quantum ||
1092 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1093 	    cp->cache_chunksize == 0 ||
1094 	    cp->cache_chunksize > cp->cache_slabsize) {
1095 		mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1096 		goto out2;
1097 	}
1098 
1099 	dprintf(("buf total is %d\n", cp->cache_buftotal));
1100 
1101 	if (cp->cache_buftotal == 0) {
1102 		mdb_free(cp, csize);
1103 		return (WALK_DONE);
1104 	}
1105 
1106 	/*
1107 	 * If they ask for bufctls, but it's a small-slab cache,
1108 	 * there is nothing to report.
1109 	 */
1110 	if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1111 		dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1112 		    cp->cache_flags));
1113 		mdb_free(cp, csize);
1114 		return (WALK_DONE);
1115 	}
1116 
1117 	/*
1118 	 * If they want constructed buffers, but there's no constructor or
1119 	 * the cache has DEADBEEF checking enabled, there is nothing to report.
1120 	 */
1121 	if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1122 	    cp->cache_constructor == NULL ||
1123 	    (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1124 		mdb_free(cp, csize);
1125 		return (WALK_DONE);
1126 	}
1127 
1128 	/*
1129 	 * Read in the contents of the magazine layer
1130 	 */
1131 	if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1132 	    &magmax, UM_SLEEP) == WALK_ERR)
1133 		goto out2;
1134 
1135 	/*
1136 	 * We have all of the buffers from the magazines;  if we are walking
1137 	 * allocated buffers, sort them so we can bsearch them later.
1138 	 */
1139 	if (type & KM_ALLOCATED)
1140 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
1141 
1142 	wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1143 
1144 	kmw->kmw_type = type;
1145 	kmw->kmw_addr = addr;
1146 	kmw->kmw_cp = cp;
1147 	kmw->kmw_csize = csize;
1148 	kmw->kmw_maglist = maglist;
1149 	kmw->kmw_max = magmax;
1150 	kmw->kmw_count = magcnt;
1151 	kmw->kmw_pos = 0;
1152 
1153 	/*
1154 	 * When walking allocated buffers in a KMF_HASH cache, we walk the
1155 	 * hash table instead of the slab layer.
1156 	 */
1157 	if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1158 		layered = "kmem_hash";
1159 
1160 		kmw->kmw_type |= KM_HASH;
1161 	} else {
1162 		/*
1163 		 * If we are walking freed buffers, we only need the
1164 		 * magazine layer plus the partially allocated slabs.
1165 		 * To walk allocated buffers, we need all of the slabs.
1166 		 */
1167 		if (type & KM_ALLOCATED)
1168 			layered = "kmem_slab";
1169 		else
1170 			layered = "kmem_slab_partial";
1171 
1172 		/*
1173 		 * for small-slab caches, we read in the entire slab.  For
1174 		 * freed buffers, we can just walk the freelist.  For
1175 		 * allocated buffers, we use a 'valid' array to track
1176 		 * the freed buffers.
1177 		 */
1178 		if (!(cp->cache_flags & KMF_HASH)) {
1179 			chunksize = cp->cache_chunksize;
1180 			slabsize = cp->cache_slabsize;
1181 
1182 			kmw->kmw_ubase = mdb_alloc(slabsize +
1183 			    sizeof (kmem_bufctl_t), UM_SLEEP);
1184 
1185 			if (type & KM_ALLOCATED)
1186 				kmw->kmw_valid =
1187 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1188 		}
1189 	}
1190 
1191 	status = WALK_NEXT;
1192 
1193 	if (mdb_layered_walk(layered, wsp) == -1) {
1194 		mdb_warn("unable to start layered '%s' walk", layered);
1195 		status = WALK_ERR;
1196 	}
1197 
1198 out1:
1199 	if (status == WALK_ERR) {
1200 		if (kmw->kmw_valid)
1201 			mdb_free(kmw->kmw_valid, slabsize / chunksize);
1202 
1203 		if (kmw->kmw_ubase)
1204 			mdb_free(kmw->kmw_ubase, slabsize +
1205 			    sizeof (kmem_bufctl_t));
1206 
1207 		if (kmw->kmw_maglist)
1208 			mdb_free(kmw->kmw_maglist,
1209 			    kmw->kmw_max * sizeof (uintptr_t));
1210 
1211 		mdb_free(kmw, sizeof (kmem_walk_t));
1212 		wsp->walk_data = NULL;
1213 	}
1214 
1215 out2:
1216 	if (status == WALK_ERR)
1217 		mdb_free(cp, csize);
1218 
1219 	return (status);
1220 }
1221 
1222 int
1223 kmem_walk_step(mdb_walk_state_t *wsp)
1224 {
1225 	kmem_walk_t *kmw = wsp->walk_data;
1226 	int type = kmw->kmw_type;
1227 	kmem_cache_t *cp = kmw->kmw_cp;
1228 
1229 	void **maglist = kmw->kmw_maglist;
1230 	int magcnt = kmw->kmw_count;
1231 
1232 	uintptr_t chunksize, slabsize;
1233 	uintptr_t addr;
1234 	const kmem_slab_t *sp;
1235 	const kmem_bufctl_t *bcp;
1236 	kmem_bufctl_t bc;
1237 
1238 	int chunks;
1239 	char *kbase;
1240 	void *buf;
1241 	int i, ret;
1242 
1243 	char *valid, *ubase;
1244 
1245 	/*
1246 	 * first, handle the 'kmem_hash' layered walk case
1247 	 */
1248 	if (type & KM_HASH) {
1249 		/*
1250 		 * We have a buffer which has been allocated out of the
1251 		 * global layer. We need to make sure that it's not
1252 		 * actually sitting in a magazine before we report it as
1253 		 * an allocated buffer.
1254 		 */
1255 		buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1256 
1257 		if (magcnt > 0 &&
1258 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1259 		    addrcmp) != NULL)
1260 			return (WALK_NEXT);
1261 
1262 		if (type & KM_BUFCTL)
1263 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1264 
1265 		return (kmem_walk_callback(wsp, (uintptr_t)buf));
1266 	}
1267 
1268 	ret = WALK_NEXT;
1269 
1270 	addr = kmw->kmw_addr;
1271 
1272 	/*
1273 	 * If we're walking freed buffers, report everything in the
1274 	 * magazine layer before processing the first slab.
1275 	 */
1276 	if ((type & KM_FREE) && magcnt != 0) {
1277 		kmw->kmw_count = 0;		/* only do this once */
1278 		for (i = 0; i < magcnt; i++) {
1279 			buf = maglist[i];
1280 
1281 			if (type & KM_BUFCTL) {
1282 				uintptr_t out;
1283 
1284 				if (cp->cache_flags & KMF_BUFTAG) {
1285 					kmem_buftag_t *btp;
1286 					kmem_buftag_t tag;
1287 
1288 					/* LINTED - alignment */
1289 					btp = KMEM_BUFTAG(cp, buf);
1290 					if (mdb_vread(&tag, sizeof (tag),
1291 					    (uintptr_t)btp) == -1) {
1292 						mdb_warn("reading buftag for "
1293 						    "%p at %p", buf, btp);
1294 						continue;
1295 					}
1296 					out = (uintptr_t)tag.bt_bufctl;
1297 				} else {
1298 					if (kmem_hash_lookup(cp, addr, buf,
1299 					    &out) == -1)
1300 						continue;
1301 				}
1302 				ret = bufctl_walk_callback(cp, wsp, out);
1303 			} else {
1304 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1305 			}
1306 
1307 			if (ret != WALK_NEXT)
1308 				return (ret);
1309 		}
1310 	}
1311 
1312 	/*
1313 	 * If they want constructed buffers, we're finished, since the
1314 	 * magazine layer holds them all.
1315 	 */
1316 	if (type & KM_CONSTRUCTED)
1317 		return (WALK_DONE);
1318 
1319 	/*
1320 	 * Handle the buffers in the current slab
1321 	 */
1322 	chunksize = cp->cache_chunksize;
1323 	slabsize = cp->cache_slabsize;
1324 
1325 	sp = wsp->walk_layer;
1326 	chunks = sp->slab_chunks;
1327 	kbase = sp->slab_base;
1328 
1329 	dprintf(("kbase is %p\n", kbase));
1330 
1331 	if (!(cp->cache_flags & KMF_HASH)) {
1332 		valid = kmw->kmw_valid;
1333 		ubase = kmw->kmw_ubase;
1334 
1335 		if (mdb_vread(ubase, chunks * chunksize,
1336 		    (uintptr_t)kbase) == -1) {
1337 			mdb_warn("failed to read slab contents at %p", kbase);
1338 			return (WALK_ERR);
1339 		}
1340 
1341 		/*
1342 		 * Set up the valid map as fully allocated -- we'll punch
1343 		 * out the freelist.
1344 		 */
1345 		if (type & KM_ALLOCATED)
1346 			(void) memset(valid, 1, chunks);
1347 	} else {
1348 		valid = NULL;
1349 		ubase = NULL;
1350 	}
1351 
1352 	/*
1353 	 * walk the slab's freelist
1354 	 */
1355 	bcp = sp->slab_head;
1356 
1357 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1358 
1359 	/*
1360 	 * since we could be in the middle of allocating a buffer,
1361 	 * our refcnt could be one higher than it aught.  So we
1362 	 * check one further on the freelist than the count allows.
1363 	 */
1364 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1365 		uint_t ndx;
1366 
1367 		dprintf(("bcp is %p\n", bcp));
1368 
1369 		if (bcp == NULL) {
1370 			if (i == chunks)
1371 				break;
1372 			mdb_warn(
1373 			    "slab %p in cache %p freelist too short by %d\n",
1374 			    sp, addr, chunks - i);
1375 			break;
1376 		}
1377 
1378 		if (cp->cache_flags & KMF_HASH) {
1379 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1380 				mdb_warn("failed to read bufctl ptr at %p",
1381 				    bcp);
1382 				break;
1383 			}
1384 			buf = bc.bc_addr;
1385 		} else {
1386 			/*
1387 			 * Otherwise the buffer is in the slab which
1388 			 * we've read in;  we just need to determine
1389 			 * its offset in the slab to find the
1390 			 * kmem_bufctl_t.
1391 			 */
1392 			bc = *((kmem_bufctl_t *)
1393 			    ((uintptr_t)bcp - (uintptr_t)kbase +
1394 			    (uintptr_t)ubase));
1395 
1396 			buf = KMEM_BUF(cp, bcp);
1397 		}
1398 
1399 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1400 
1401 		if (ndx > slabsize / cp->cache_bufsize) {
1402 			/*
1403 			 * This is very wrong; we have managed to find
1404 			 * a buffer in the slab which shouldn't
1405 			 * actually be here.  Emit a warning, and
1406 			 * try to continue.
1407 			 */
1408 			mdb_warn("buf %p is out of range for "
1409 			    "slab %p, cache %p\n", buf, sp, addr);
1410 		} else if (type & KM_ALLOCATED) {
1411 			/*
1412 			 * we have found a buffer on the slab's freelist;
1413 			 * clear its entry
1414 			 */
1415 			valid[ndx] = 0;
1416 		} else {
1417 			/*
1418 			 * Report this freed buffer
1419 			 */
1420 			if (type & KM_BUFCTL) {
1421 				ret = bufctl_walk_callback(cp, wsp,
1422 				    (uintptr_t)bcp);
1423 			} else {
1424 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1425 			}
1426 			if (ret != WALK_NEXT)
1427 				return (ret);
1428 		}
1429 
1430 		bcp = bc.bc_next;
1431 	}
1432 
1433 	if (bcp != NULL) {
1434 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1435 		    sp, addr, bcp));
1436 	}
1437 
1438 	/*
1439 	 * If we are walking freed buffers, the loop above handled reporting
1440 	 * them.
1441 	 */
1442 	if (type & KM_FREE)
1443 		return (WALK_NEXT);
1444 
1445 	if (type & KM_BUFCTL) {
1446 		mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1447 		    "cache %p\n", addr);
1448 		return (WALK_ERR);
1449 	}
1450 
1451 	/*
1452 	 * Report allocated buffers, skipping buffers in the magazine layer.
1453 	 * We only get this far for small-slab caches.
1454 	 */
1455 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1456 		buf = (char *)kbase + i * chunksize;
1457 
1458 		if (!valid[i])
1459 			continue;		/* on slab freelist */
1460 
1461 		if (magcnt > 0 &&
1462 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1463 		    addrcmp) != NULL)
1464 			continue;		/* in magazine layer */
1465 
1466 		ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1467 	}
1468 	return (ret);
1469 }
1470 
1471 void
1472 kmem_walk_fini(mdb_walk_state_t *wsp)
1473 {
1474 	kmem_walk_t *kmw = wsp->walk_data;
1475 	uintptr_t chunksize;
1476 	uintptr_t slabsize;
1477 
1478 	if (kmw == NULL)
1479 		return;
1480 
1481 	if (kmw->kmw_maglist != NULL)
1482 		mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1483 
1484 	chunksize = kmw->kmw_cp->cache_chunksize;
1485 	slabsize = kmw->kmw_cp->cache_slabsize;
1486 
1487 	if (kmw->kmw_valid != NULL)
1488 		mdb_free(kmw->kmw_valid, slabsize / chunksize);
1489 	if (kmw->kmw_ubase != NULL)
1490 		mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1491 
1492 	mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1493 	mdb_free(kmw, sizeof (kmem_walk_t));
1494 }
1495 
1496 /*ARGSUSED*/
1497 static int
1498 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1499 {
1500 	/*
1501 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1502 	 * memory in other caches.  This can be a little confusing, so we
1503 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1504 	 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1505 	 */
1506 	if (c->cache_cflags & KMC_NOTOUCH)
1507 		return (WALK_NEXT);
1508 
1509 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1510 	    wsp->walk_cbdata, addr) == -1)
1511 		return (WALK_DONE);
1512 
1513 	return (WALK_NEXT);
1514 }
1515 
1516 #define	KMEM_WALK_ALL(name, wsp) { \
1517 	wsp->walk_data = (name); \
1518 	if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1519 		return (WALK_ERR); \
1520 	return (WALK_DONE); \
1521 }
1522 
1523 int
1524 kmem_walk_init(mdb_walk_state_t *wsp)
1525 {
1526 	if (wsp->walk_arg != NULL)
1527 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1528 
1529 	if (wsp->walk_addr == NULL)
1530 		KMEM_WALK_ALL("kmem", wsp);
1531 	return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1532 }
1533 
1534 int
1535 bufctl_walk_init(mdb_walk_state_t *wsp)
1536 {
1537 	if (wsp->walk_addr == NULL)
1538 		KMEM_WALK_ALL("bufctl", wsp);
1539 	return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1540 }
1541 
1542 int
1543 freemem_walk_init(mdb_walk_state_t *wsp)
1544 {
1545 	if (wsp->walk_addr == NULL)
1546 		KMEM_WALK_ALL("freemem", wsp);
1547 	return (kmem_walk_init_common(wsp, KM_FREE));
1548 }
1549 
1550 int
1551 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1552 {
1553 	if (wsp->walk_addr == NULL)
1554 		KMEM_WALK_ALL("freemem_constructed", wsp);
1555 	return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1556 }
1557 
1558 int
1559 freectl_walk_init(mdb_walk_state_t *wsp)
1560 {
1561 	if (wsp->walk_addr == NULL)
1562 		KMEM_WALK_ALL("freectl", wsp);
1563 	return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1564 }
1565 
1566 int
1567 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1568 {
1569 	if (wsp->walk_addr == NULL)
1570 		KMEM_WALK_ALL("freectl_constructed", wsp);
1571 	return (kmem_walk_init_common(wsp,
1572 	    KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1573 }
1574 
1575 typedef struct bufctl_history_walk {
1576 	void		*bhw_next;
1577 	kmem_cache_t	*bhw_cache;
1578 	kmem_slab_t	*bhw_slab;
1579 	hrtime_t	bhw_timestamp;
1580 } bufctl_history_walk_t;
1581 
1582 int
1583 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1584 {
1585 	bufctl_history_walk_t *bhw;
1586 	kmem_bufctl_audit_t bc;
1587 	kmem_bufctl_audit_t bcn;
1588 
1589 	if (wsp->walk_addr == NULL) {
1590 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1591 		return (WALK_ERR);
1592 	}
1593 
1594 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1595 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1596 		return (WALK_ERR);
1597 	}
1598 
1599 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1600 	bhw->bhw_timestamp = 0;
1601 	bhw->bhw_cache = bc.bc_cache;
1602 	bhw->bhw_slab = bc.bc_slab;
1603 
1604 	/*
1605 	 * sometimes the first log entry matches the base bufctl;  in that
1606 	 * case, skip the base bufctl.
1607 	 */
1608 	if (bc.bc_lastlog != NULL &&
1609 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1610 	    bc.bc_addr == bcn.bc_addr &&
1611 	    bc.bc_cache == bcn.bc_cache &&
1612 	    bc.bc_slab == bcn.bc_slab &&
1613 	    bc.bc_timestamp == bcn.bc_timestamp &&
1614 	    bc.bc_thread == bcn.bc_thread)
1615 		bhw->bhw_next = bc.bc_lastlog;
1616 	else
1617 		bhw->bhw_next = (void *)wsp->walk_addr;
1618 
1619 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1620 	wsp->walk_data = bhw;
1621 
1622 	return (WALK_NEXT);
1623 }
1624 
1625 int
1626 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1627 {
1628 	bufctl_history_walk_t *bhw = wsp->walk_data;
1629 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1630 	uintptr_t baseaddr = wsp->walk_addr;
1631 	kmem_bufctl_audit_t bc;
1632 
1633 	if (addr == NULL)
1634 		return (WALK_DONE);
1635 
1636 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1637 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1638 		return (WALK_ERR);
1639 	}
1640 
1641 	/*
1642 	 * The bufctl is only valid if the address, cache, and slab are
1643 	 * correct.  We also check that the timestamp is decreasing, to
1644 	 * prevent infinite loops.
1645 	 */
1646 	if ((uintptr_t)bc.bc_addr != baseaddr ||
1647 	    bc.bc_cache != bhw->bhw_cache ||
1648 	    bc.bc_slab != bhw->bhw_slab ||
1649 	    (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1650 		return (WALK_DONE);
1651 
1652 	bhw->bhw_next = bc.bc_lastlog;
1653 	bhw->bhw_timestamp = bc.bc_timestamp;
1654 
1655 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1656 }
1657 
1658 void
1659 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1660 {
1661 	bufctl_history_walk_t *bhw = wsp->walk_data;
1662 
1663 	mdb_free(bhw, sizeof (*bhw));
1664 }
1665 
1666 typedef struct kmem_log_walk {
1667 	kmem_bufctl_audit_t *klw_base;
1668 	kmem_bufctl_audit_t **klw_sorted;
1669 	kmem_log_header_t klw_lh;
1670 	size_t klw_size;
1671 	size_t klw_maxndx;
1672 	size_t klw_ndx;
1673 } kmem_log_walk_t;
1674 
1675 int
1676 kmem_log_walk_init(mdb_walk_state_t *wsp)
1677 {
1678 	uintptr_t lp = wsp->walk_addr;
1679 	kmem_log_walk_t *klw;
1680 	kmem_log_header_t *lhp;
1681 	int maxndx, i, j, k;
1682 
1683 	/*
1684 	 * By default (global walk), walk the kmem_transaction_log.  Otherwise
1685 	 * read the log whose kmem_log_header_t is stored at walk_addr.
1686 	 */
1687 	if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1688 		mdb_warn("failed to read 'kmem_transaction_log'");
1689 		return (WALK_ERR);
1690 	}
1691 
1692 	if (lp == NULL) {
1693 		mdb_warn("log is disabled\n");
1694 		return (WALK_ERR);
1695 	}
1696 
1697 	klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1698 	lhp = &klw->klw_lh;
1699 
1700 	if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1701 		mdb_warn("failed to read log header at %p", lp);
1702 		mdb_free(klw, sizeof (kmem_log_walk_t));
1703 		return (WALK_ERR);
1704 	}
1705 
1706 	klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1707 	klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1708 	maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1709 
1710 	if (mdb_vread(klw->klw_base, klw->klw_size,
1711 	    (uintptr_t)lhp->lh_base) == -1) {
1712 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1713 		mdb_free(klw->klw_base, klw->klw_size);
1714 		mdb_free(klw, sizeof (kmem_log_walk_t));
1715 		return (WALK_ERR);
1716 	}
1717 
1718 	klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1719 	    sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1720 
1721 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1722 		kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1723 		    ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1724 
1725 		for (j = 0; j < maxndx; j++)
1726 			klw->klw_sorted[k++] = &chunk[j];
1727 	}
1728 
1729 	qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1730 	    (int(*)(const void *, const void *))bufctlcmp);
1731 
1732 	klw->klw_maxndx = k;
1733 	wsp->walk_data = klw;
1734 
1735 	return (WALK_NEXT);
1736 }
1737 
1738 int
1739 kmem_log_walk_step(mdb_walk_state_t *wsp)
1740 {
1741 	kmem_log_walk_t *klw = wsp->walk_data;
1742 	kmem_bufctl_audit_t *bcp;
1743 
1744 	if (klw->klw_ndx == klw->klw_maxndx)
1745 		return (WALK_DONE);
1746 
1747 	bcp = klw->klw_sorted[klw->klw_ndx++];
1748 
1749 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1750 	    (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1751 }
1752 
1753 void
1754 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1755 {
1756 	kmem_log_walk_t *klw = wsp->walk_data;
1757 
1758 	mdb_free(klw->klw_base, klw->klw_size);
1759 	mdb_free(klw->klw_sorted, klw->klw_maxndx *
1760 	    sizeof (kmem_bufctl_audit_t *));
1761 	mdb_free(klw, sizeof (kmem_log_walk_t));
1762 }
1763 
1764 typedef struct allocdby_bufctl {
1765 	uintptr_t abb_addr;
1766 	hrtime_t abb_ts;
1767 } allocdby_bufctl_t;
1768 
1769 typedef struct allocdby_walk {
1770 	const char *abw_walk;
1771 	uintptr_t abw_thread;
1772 	size_t abw_nbufs;
1773 	size_t abw_size;
1774 	allocdby_bufctl_t *abw_buf;
1775 	size_t abw_ndx;
1776 } allocdby_walk_t;
1777 
1778 int
1779 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1780     allocdby_walk_t *abw)
1781 {
1782 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1783 		return (WALK_NEXT);
1784 
1785 	if (abw->abw_nbufs == abw->abw_size) {
1786 		allocdby_bufctl_t *buf;
1787 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1788 
1789 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1790 
1791 		bcopy(abw->abw_buf, buf, oldsize);
1792 		mdb_free(abw->abw_buf, oldsize);
1793 
1794 		abw->abw_size <<= 1;
1795 		abw->abw_buf = buf;
1796 	}
1797 
1798 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1799 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1800 	abw->abw_nbufs++;
1801 
1802 	return (WALK_NEXT);
1803 }
1804 
1805 /*ARGSUSED*/
1806 int
1807 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1808 {
1809 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1810 	    abw, addr) == -1) {
1811 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1812 		return (WALK_DONE);
1813 	}
1814 
1815 	return (WALK_NEXT);
1816 }
1817 
1818 static int
1819 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1820 {
1821 	if (lhs->abb_ts < rhs->abb_ts)
1822 		return (1);
1823 	if (lhs->abb_ts > rhs->abb_ts)
1824 		return (-1);
1825 	return (0);
1826 }
1827 
1828 static int
1829 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1830 {
1831 	allocdby_walk_t *abw;
1832 
1833 	if (wsp->walk_addr == NULL) {
1834 		mdb_warn("allocdby walk doesn't support global walks\n");
1835 		return (WALK_ERR);
1836 	}
1837 
1838 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1839 
1840 	abw->abw_thread = wsp->walk_addr;
1841 	abw->abw_walk = walk;
1842 	abw->abw_size = 128;	/* something reasonable */
1843 	abw->abw_buf =
1844 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1845 
1846 	wsp->walk_data = abw;
1847 
1848 	if (mdb_walk("kmem_cache",
1849 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1850 		mdb_warn("couldn't walk kmem_cache");
1851 		allocdby_walk_fini(wsp);
1852 		return (WALK_ERR);
1853 	}
1854 
1855 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1856 	    (int(*)(const void *, const void *))allocdby_cmp);
1857 
1858 	return (WALK_NEXT);
1859 }
1860 
1861 int
1862 allocdby_walk_init(mdb_walk_state_t *wsp)
1863 {
1864 	return (allocdby_walk_init_common(wsp, "bufctl"));
1865 }
1866 
1867 int
1868 freedby_walk_init(mdb_walk_state_t *wsp)
1869 {
1870 	return (allocdby_walk_init_common(wsp, "freectl"));
1871 }
1872 
1873 int
1874 allocdby_walk_step(mdb_walk_state_t *wsp)
1875 {
1876 	allocdby_walk_t *abw = wsp->walk_data;
1877 	kmem_bufctl_audit_t bc;
1878 	uintptr_t addr;
1879 
1880 	if (abw->abw_ndx == abw->abw_nbufs)
1881 		return (WALK_DONE);
1882 
1883 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1884 
1885 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1886 		mdb_warn("couldn't read bufctl at %p", addr);
1887 		return (WALK_DONE);
1888 	}
1889 
1890 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1891 }
1892 
1893 void
1894 allocdby_walk_fini(mdb_walk_state_t *wsp)
1895 {
1896 	allocdby_walk_t *abw = wsp->walk_data;
1897 
1898 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1899 	mdb_free(abw, sizeof (allocdby_walk_t));
1900 }
1901 
1902 /*ARGSUSED*/
1903 int
1904 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
1905 {
1906 	char c[MDB_SYM_NAMLEN];
1907 	GElf_Sym sym;
1908 	int i;
1909 
1910 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1911 	for (i = 0; i < bcp->bc_depth; i++) {
1912 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
1913 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1914 			continue;
1915 		if (strncmp(c, "kmem_", 5) == 0)
1916 			continue;
1917 		mdb_printf("%s+0x%lx",
1918 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1919 		break;
1920 	}
1921 	mdb_printf("\n");
1922 
1923 	return (WALK_NEXT);
1924 }
1925 
1926 static int
1927 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1928 {
1929 	if (!(flags & DCMD_ADDRSPEC))
1930 		return (DCMD_USAGE);
1931 
1932 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1933 
1934 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1935 		mdb_warn("can't walk '%s' for %p", w, addr);
1936 		return (DCMD_ERR);
1937 	}
1938 
1939 	return (DCMD_OK);
1940 }
1941 
1942 /*ARGSUSED*/
1943 int
1944 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1945 {
1946 	return (allocdby_common(addr, flags, "allocdby"));
1947 }
1948 
1949 /*ARGSUSED*/
1950 int
1951 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1952 {
1953 	return (allocdby_common(addr, flags, "freedby"));
1954 }
1955 
1956 /*
1957  * Return a string describing the address in relation to the given thread's
1958  * stack.
1959  *
1960  * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
1961  *
1962  * - If the address is above the stack pointer, return an empty string
1963  *   signifying that the address is active.
1964  *
1965  * - If the address is below the stack pointer, and the thread is not on proc,
1966  *   return " (below sp)".
1967  *
1968  * - If the address is below the stack pointer, and the thread is on proc,
1969  *   return " (possibly below sp)".  Depending on context, we may or may not
1970  *   have an accurate t_sp.
1971  */
1972 static const char *
1973 stack_active(const kthread_t *t, uintptr_t addr)
1974 {
1975 	uintptr_t panicstk;
1976 	GElf_Sym sym;
1977 
1978 	if (t->t_state == TS_FREE)
1979 		return (" (inactive interrupt thread)");
1980 
1981 	/*
1982 	 * Check to see if we're on the panic stack.  If so, ignore t_sp, as it
1983 	 * no longer relates to the thread's real stack.
1984 	 */
1985 	if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
1986 		panicstk = (uintptr_t)sym.st_value;
1987 
1988 		if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
1989 			return ("");
1990 	}
1991 
1992 	if (addr >= t->t_sp + STACK_BIAS)
1993 		return ("");
1994 
1995 	if (t->t_state == TS_ONPROC)
1996 		return (" (possibly below sp)");
1997 
1998 	return (" (below sp)");
1999 }
2000 
2001 typedef struct whatis {
2002 	uintptr_t w_addr;
2003 	const kmem_cache_t *w_cache;
2004 	const vmem_t *w_vmem;
2005 	size_t w_slab_align;
2006 	int w_slab_found;
2007 	int w_found;
2008 	int w_kmem_lite_count;
2009 	uint_t w_verbose;
2010 	uint_t w_freemem;
2011 	uint_t w_all;
2012 	uint_t w_bufctl;
2013 	uint_t w_idspace;
2014 } whatis_t;
2015 
2016 static void
2017 whatis_print_kmem(uintptr_t addr, uintptr_t baddr, whatis_t *w)
2018 {
2019 	/* LINTED pointer cast may result in improper alignment */
2020 	uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(w->w_cache, addr);
2021 	intptr_t stat;
2022 	int count = 0;
2023 	int i;
2024 	pc_t callers[16];
2025 
2026 	if (w->w_cache->cache_flags & KMF_REDZONE) {
2027 		kmem_buftag_t bt;
2028 
2029 		if (mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2030 			goto done;
2031 
2032 		stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2033 
2034 		if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE)
2035 			goto done;
2036 
2037 		/*
2038 		 * provide the bufctl ptr if it has useful information
2039 		 */
2040 		if (baddr == 0 && (w->w_cache->cache_flags & KMF_AUDIT))
2041 			baddr = (uintptr_t)bt.bt_bufctl;
2042 
2043 		if (w->w_cache->cache_flags & KMF_LITE) {
2044 			count = w->w_kmem_lite_count;
2045 
2046 			if (count * sizeof (pc_t) > sizeof (callers))
2047 				count = 0;
2048 
2049 			if (count > 0 &&
2050 			    mdb_vread(callers, count * sizeof (pc_t),
2051 			    btaddr +
2052 			    offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2053 				count = 0;
2054 
2055 			/*
2056 			 * skip unused callers
2057 			 */
2058 			while (count > 0 && callers[count - 1] ==
2059 			    (pc_t)KMEM_UNINITIALIZED_PATTERN)
2060 				count--;
2061 		}
2062 	}
2063 
2064 done:
2065 	if (baddr == 0)
2066 		mdb_printf("%p is %p+%p, %s from %s\n",
2067 		    w->w_addr, addr, w->w_addr - addr,
2068 		    w->w_freemem == FALSE ? "allocated" : "freed",
2069 		    w->w_cache->cache_name);
2070 	else
2071 		mdb_printf("%p is %p+%p, bufctl %p %s from %s\n",
2072 		    w->w_addr, addr, w->w_addr - addr, baddr,
2073 		    w->w_freemem == FALSE ? "allocated" : "freed",
2074 		    w->w_cache->cache_name);
2075 
2076 	if (count > 0) {
2077 		mdb_inc_indent(8);
2078 		mdb_printf("recent caller%s: %a%s", (count != 1)? "s":"",
2079 		    callers[0], (count != 1)? ", ":"\n");
2080 		for (i = 1; i < count; i++)
2081 			mdb_printf("%a%s", callers[i],
2082 			    (i + 1 < count)? ", ":"\n");
2083 		mdb_dec_indent(8);
2084 	}
2085 }
2086 
2087 /*ARGSUSED*/
2088 static int
2089 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_t *w)
2090 {
2091 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
2092 		return (WALK_NEXT);
2093 
2094 	whatis_print_kmem(addr, 0, w);
2095 	w->w_found++;
2096 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2097 }
2098 
2099 static int
2100 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w)
2101 {
2102 	if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end)
2103 		return (WALK_NEXT);
2104 
2105 	mdb_printf("%p is %p+%p ", w->w_addr,
2106 	    vs->vs_start, w->w_addr - vs->vs_start);
2107 
2108 	/*
2109 	 * Always provide the vmem_seg pointer if it has a stack trace.
2110 	 */
2111 	if (w->w_bufctl == TRUE ||
2112 	    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0)) {
2113 		mdb_printf("(vmem_seg %p) ", addr);
2114 	}
2115 
2116 	mdb_printf("%sfrom %s vmem arena\n", w->w_freemem == TRUE ?
2117 	    "freed " : "", w->w_vmem->vm_name);
2118 
2119 	w->w_found++;
2120 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2121 }
2122 
2123 static int
2124 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w)
2125 {
2126 	const char *nm = vmem->vm_name;
2127 	w->w_vmem = vmem;
2128 	w->w_freemem = FALSE;
2129 
2130 	if (((vmem->vm_cflags & VMC_IDENTIFIER) != 0) ^ w->w_idspace)
2131 		return (WALK_NEXT);
2132 
2133 	if (w->w_verbose)
2134 		mdb_printf("Searching vmem arena %s...\n", nm);
2135 
2136 	if (mdb_pwalk("vmem_alloc",
2137 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
2138 		mdb_warn("can't walk vmem seg for %p", addr);
2139 		return (WALK_NEXT);
2140 	}
2141 
2142 	if (w->w_found && w->w_all == FALSE)
2143 		return (WALK_DONE);
2144 
2145 	if (w->w_verbose)
2146 		mdb_printf("Searching vmem arena %s for free virtual...\n", nm);
2147 
2148 	w->w_freemem = TRUE;
2149 
2150 	if (mdb_pwalk("vmem_free",
2151 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
2152 		mdb_warn("can't walk vmem seg for %p", addr);
2153 		return (WALK_NEXT);
2154 	}
2155 
2156 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
2157 }
2158 
2159 /*ARGSUSED*/
2160 static int
2161 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_t *w)
2162 {
2163 	uintptr_t addr;
2164 
2165 	if (bcp == NULL)
2166 		return (WALK_NEXT);
2167 
2168 	addr = (uintptr_t)bcp->bc_addr;
2169 
2170 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
2171 		return (WALK_NEXT);
2172 
2173 	whatis_print_kmem(addr, baddr, w);
2174 	w->w_found++;
2175 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2176 }
2177 
2178 /*ARGSUSED*/
2179 static int
2180 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_t *w)
2181 {
2182 	uintptr_t base = P2ALIGN((uintptr_t)sp->slab_base, w->w_slab_align);
2183 
2184 	if ((w->w_addr - base) >= w->w_cache->cache_slabsize)
2185 		return (WALK_NEXT);
2186 
2187 	w->w_slab_found++;
2188 	return (WALK_DONE);
2189 }
2190 
2191 static int
2192 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
2193 {
2194 	char *walk, *freewalk;
2195 	mdb_walk_cb_t func;
2196 	vmem_t *vmp = c->cache_arena;
2197 
2198 	if (((c->cache_flags & VMC_IDENTIFIER) != 0) ^ w->w_idspace)
2199 		return (WALK_NEXT);
2200 
2201 	if (w->w_bufctl == FALSE) {
2202 		walk = "kmem";
2203 		freewalk = "freemem";
2204 		func = (mdb_walk_cb_t)whatis_walk_kmem;
2205 	} else {
2206 		walk = "bufctl";
2207 		freewalk = "freectl";
2208 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
2209 	}
2210 
2211 	w->w_cache = c;
2212 
2213 	if (w->w_verbose)
2214 		mdb_printf("Searching %s's slabs...\n", c->cache_name);
2215 
2216 	/*
2217 	 * Verify that the address is in one of the cache's slabs.  If not,
2218 	 * we can skip the more expensive walkers.  (this is purely a
2219 	 * heuristic -- as long as there are no false-negatives, we'll be fine)
2220 	 *
2221 	 * We try to get the cache's arena's quantum, since to accurately
2222 	 * get the base of a slab, you have to align it to the quantum.  If
2223 	 * it doesn't look sensible, we fall back to not aligning.
2224 	 */
2225 	if (mdb_vread(&w->w_slab_align, sizeof (w->w_slab_align),
2226 	    (uintptr_t)&vmp->vm_quantum) == -1) {
2227 		mdb_warn("unable to read %p->cache_arena->vm_quantum", c);
2228 		w->w_slab_align = 1;
2229 	}
2230 
2231 	if ((c->cache_slabsize < w->w_slab_align) || w->w_slab_align == 0 ||
2232 	    (w->w_slab_align & (w->w_slab_align - 1))) {
2233 		mdb_warn("%p's arena has invalid quantum (0x%p)\n", c,
2234 		    w->w_slab_align);
2235 		w->w_slab_align = 1;
2236 	}
2237 
2238 	w->w_slab_found = 0;
2239 	if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, w,
2240 	    addr) == -1) {
2241 		mdb_warn("can't find kmem_slab walker");
2242 		return (WALK_DONE);
2243 	}
2244 	if (w->w_slab_found == 0)
2245 		return (WALK_NEXT);
2246 
2247 	if (c->cache_flags & KMF_LITE) {
2248 		if (mdb_readvar(&w->w_kmem_lite_count,
2249 		    "kmem_lite_count") == -1 || w->w_kmem_lite_count > 16)
2250 			w->w_kmem_lite_count = 0;
2251 	}
2252 
2253 	if (w->w_verbose)
2254 		mdb_printf("Searching %s...\n", c->cache_name);
2255 
2256 	w->w_freemem = FALSE;
2257 
2258 	if (mdb_pwalk(walk, func, w, addr) == -1) {
2259 		mdb_warn("can't find %s walker", walk);
2260 		return (WALK_DONE);
2261 	}
2262 
2263 	if (w->w_found && w->w_all == FALSE)
2264 		return (WALK_DONE);
2265 
2266 	/*
2267 	 * We have searched for allocated memory; now search for freed memory.
2268 	 */
2269 	if (w->w_verbose)
2270 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
2271 
2272 	w->w_freemem = TRUE;
2273 
2274 	if (mdb_pwalk(freewalk, func, w, addr) == -1) {
2275 		mdb_warn("can't find %s walker", freewalk);
2276 		return (WALK_DONE);
2277 	}
2278 
2279 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
2280 }
2281 
2282 static int
2283 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
2284 {
2285 	if (c->cache_cflags & KMC_NOTOUCH)
2286 		return (WALK_NEXT);
2287 
2288 	return (whatis_walk_cache(addr, c, w));
2289 }
2290 
2291 static int
2292 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
2293 {
2294 	if (!(c->cache_cflags & KMC_NOTOUCH))
2295 		return (WALK_NEXT);
2296 
2297 	return (whatis_walk_cache(addr, c, w));
2298 }
2299 
2300 static int
2301 whatis_walk_thread(uintptr_t addr, const kthread_t *t, whatis_t *w)
2302 {
2303 	/*
2304 	 * Often, one calls ::whatis on an address from a thread structure.
2305 	 * We use this opportunity to short circuit this case...
2306 	 */
2307 	if (w->w_addr >= addr && w->w_addr < addr + sizeof (kthread_t)) {
2308 		mdb_printf("%p is %p+%p, allocated as a thread structure\n",
2309 		    w->w_addr, addr, w->w_addr - addr);
2310 		w->w_found++;
2311 		return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2312 	}
2313 
2314 	if (w->w_addr < (uintptr_t)t->t_stkbase ||
2315 	    w->w_addr > (uintptr_t)t->t_stk)
2316 		return (WALK_NEXT);
2317 
2318 	if (t->t_stkbase == NULL)
2319 		return (WALK_NEXT);
2320 
2321 	mdb_printf("%p is in thread %p's stack%s\n", w->w_addr, addr,
2322 	    stack_active(t, w->w_addr));
2323 
2324 	w->w_found++;
2325 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2326 }
2327 
2328 static int
2329 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, whatis_t *w)
2330 {
2331 	struct module mod;
2332 	char name[MODMAXNAMELEN], *where;
2333 	char c[MDB_SYM_NAMLEN];
2334 	Shdr shdr;
2335 	GElf_Sym sym;
2336 
2337 	if (m->mod_mp == NULL)
2338 		return (WALK_NEXT);
2339 
2340 	if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2341 		mdb_warn("couldn't read modctl %p's module", addr);
2342 		return (WALK_NEXT);
2343 	}
2344 
2345 	if (w->w_addr >= (uintptr_t)mod.text &&
2346 	    w->w_addr < (uintptr_t)mod.text + mod.text_size) {
2347 		where = "text segment";
2348 		goto found;
2349 	}
2350 
2351 	if (w->w_addr >= (uintptr_t)mod.data &&
2352 	    w->w_addr < (uintptr_t)mod.data + mod.data_size) {
2353 		where = "data segment";
2354 		goto found;
2355 	}
2356 
2357 	if (w->w_addr >= (uintptr_t)mod.bss &&
2358 	    w->w_addr < (uintptr_t)mod.bss + mod.bss_size) {
2359 		where = "bss";
2360 		goto found;
2361 	}
2362 
2363 	if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2364 		mdb_warn("couldn't read symbol header for %p's module", addr);
2365 		return (WALK_NEXT);
2366 	}
2367 
2368 	if (w->w_addr >= (uintptr_t)mod.symtbl && w->w_addr <
2369 	    (uintptr_t)mod.symtbl + (uintptr_t)mod.nsyms * shdr.sh_entsize) {
2370 		where = "symtab";
2371 		goto found;
2372 	}
2373 
2374 	if (w->w_addr >= (uintptr_t)mod.symspace &&
2375 	    w->w_addr < (uintptr_t)mod.symspace + (uintptr_t)mod.symsize) {
2376 		where = "symspace";
2377 		goto found;
2378 	}
2379 
2380 	return (WALK_NEXT);
2381 
2382 found:
2383 	if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2384 		(void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2385 
2386 	mdb_printf("%p is ", w->w_addr);
2387 
2388 	/*
2389 	 * If we found this address in a module, then there's a chance that
2390 	 * it's actually a named symbol.  Try the symbol lookup.
2391 	 */
2392 	if (mdb_lookup_by_addr(w->w_addr, MDB_SYM_FUZZY, c, sizeof (c),
2393 	    &sym) != -1 && w->w_addr >= (uintptr_t)sym.st_value &&
2394 	    w->w_addr < (uintptr_t)sym.st_value + sym.st_size) {
2395 		mdb_printf("%s+%lx ", c, w->w_addr - (uintptr_t)sym.st_value);
2396 	}
2397 
2398 	mdb_printf("in %s's %s\n", name, where);
2399 
2400 	w->w_found++;
2401 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2402 }
2403 
2404 /*ARGSUSED*/
2405 static int
2406 whatis_walk_page(uintptr_t addr, const void *ignored, whatis_t *w)
2407 {
2408 	static int machsize = 0;
2409 	mdb_ctf_id_t id;
2410 
2411 	if (machsize == 0) {
2412 		if (mdb_ctf_lookup_by_name("unix`page_t", &id) == 0)
2413 			machsize = mdb_ctf_type_size(id);
2414 		else {
2415 			mdb_warn("could not get size of page_t");
2416 			machsize = sizeof (page_t);
2417 		}
2418 	}
2419 
2420 	if (w->w_addr < addr || w->w_addr >= addr + machsize)
2421 		return (WALK_NEXT);
2422 
2423 	mdb_printf("%p is %p+%p, allocated as a page structure\n",
2424 	    w->w_addr, addr, w->w_addr - addr);
2425 
2426 	w->w_found++;
2427 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2428 }
2429 
2430 int
2431 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2432 {
2433 	whatis_t w;
2434 
2435 	if (!(flags & DCMD_ADDRSPEC))
2436 		return (DCMD_USAGE);
2437 
2438 	w.w_verbose = FALSE;
2439 	w.w_bufctl = FALSE;
2440 	w.w_all = FALSE;
2441 	w.w_idspace = FALSE;
2442 
2443 	if (mdb_getopts(argc, argv,
2444 	    'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose,
2445 	    'a', MDB_OPT_SETBITS, TRUE, &w.w_all,
2446 	    'i', MDB_OPT_SETBITS, TRUE, &w.w_idspace,
2447 	    'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, NULL) != argc)
2448 		return (DCMD_USAGE);
2449 
2450 	w.w_addr = addr;
2451 	w.w_found = 0;
2452 
2453 	if (w.w_verbose)
2454 		mdb_printf("Searching modules...\n");
2455 
2456 	if (!w.w_idspace) {
2457 		if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, &w)
2458 		    == -1) {
2459 			mdb_warn("couldn't find modctl walker");
2460 			return (DCMD_ERR);
2461 		}
2462 
2463 		if (w.w_found && w.w_all == FALSE)
2464 			return (DCMD_OK);
2465 
2466 		/*
2467 		 * Now search all thread stacks.  Yes, this is a little weak; we
2468 		 * can save a lot of work by first checking to see if the
2469 		 * address is in segkp vs. segkmem.  But hey, computers are
2470 		 * fast.
2471 		 */
2472 		if (w.w_verbose)
2473 			mdb_printf("Searching threads...\n");
2474 
2475 		if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, &w)
2476 		    == -1) {
2477 			mdb_warn("couldn't find thread walker");
2478 			return (DCMD_ERR);
2479 		}
2480 
2481 		if (w.w_found && w.w_all == FALSE)
2482 			return (DCMD_OK);
2483 
2484 		if (w.w_verbose)
2485 			mdb_printf("Searching page structures...\n");
2486 
2487 		if (mdb_walk("page", (mdb_walk_cb_t)whatis_walk_page, &w)
2488 		    == -1) {
2489 			mdb_warn("couldn't find page walker");
2490 			return (DCMD_ERR);
2491 		}
2492 
2493 		if (w.w_found && w.w_all == FALSE)
2494 			return (DCMD_OK);
2495 	}
2496 
2497 	if (mdb_walk("kmem_cache",
2498 	    (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) {
2499 		mdb_warn("couldn't find kmem_cache walker");
2500 		return (DCMD_ERR);
2501 	}
2502 
2503 	if (w.w_found && w.w_all == FALSE)
2504 		return (DCMD_OK);
2505 
2506 	if (mdb_walk("kmem_cache",
2507 	    (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) {
2508 		mdb_warn("couldn't find kmem_cache walker");
2509 		return (DCMD_ERR);
2510 	}
2511 
2512 	if (w.w_found && w.w_all == FALSE)
2513 		return (DCMD_OK);
2514 
2515 	if (mdb_walk("vmem_postfix",
2516 	    (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) {
2517 		mdb_warn("couldn't find vmem_postfix walker");
2518 		return (DCMD_ERR);
2519 	}
2520 
2521 	if (w.w_found == 0)
2522 		mdb_printf("%p is unknown\n", addr);
2523 
2524 	return (DCMD_OK);
2525 }
2526 
2527 void
2528 whatis_help(void)
2529 {
2530 	mdb_printf(
2531 	    "Given a virtual address, attempt to determine where it came\n"
2532 	    "from.\n"
2533 	    "\n"
2534 	    "\t-v\tVerbose output; display caches/arenas/etc as they are\n"
2535 	    "\t\tsearched\n"
2536 	    "\t-a\tFind all possible sources.  Default behavior is to stop at\n"
2537 	    "\t\tthe first (most specific) source.\n"
2538 	    "\t-i\tSearch only identifier arenas and caches.  By default\n"
2539 	    "\t\tthese are ignored.\n"
2540 	    "\t-b\tReport bufctls and vmem_segs for matches in kmem and vmem,\n"
2541 	    "\t\trespectively.  Warning: if the buffer exists, but does not\n"
2542 	    "\t\thave a bufctl, it will not be reported.\n");
2543 }
2544 
2545 typedef struct kmem_log_cpu {
2546 	uintptr_t kmc_low;
2547 	uintptr_t kmc_high;
2548 } kmem_log_cpu_t;
2549 
2550 typedef struct kmem_log_data {
2551 	uintptr_t kmd_addr;
2552 	kmem_log_cpu_t *kmd_cpu;
2553 } kmem_log_data_t;
2554 
2555 int
2556 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2557     kmem_log_data_t *kmd)
2558 {
2559 	int i;
2560 	kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2561 	size_t bufsize;
2562 
2563 	for (i = 0; i < NCPU; i++) {
2564 		if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2565 			break;
2566 	}
2567 
2568 	if (kmd->kmd_addr) {
2569 		if (b->bc_cache == NULL)
2570 			return (WALK_NEXT);
2571 
2572 		if (mdb_vread(&bufsize, sizeof (bufsize),
2573 		    (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2574 			mdb_warn(
2575 			    "failed to read cache_bufsize for cache at %p",
2576 			    b->bc_cache);
2577 			return (WALK_ERR);
2578 		}
2579 
2580 		if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2581 		    kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2582 			return (WALK_NEXT);
2583 	}
2584 
2585 	if (i == NCPU)
2586 		mdb_printf("   ");
2587 	else
2588 		mdb_printf("%3d", i);
2589 
2590 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2591 	    b->bc_timestamp, b->bc_thread);
2592 
2593 	return (WALK_NEXT);
2594 }
2595 
2596 /*ARGSUSED*/
2597 int
2598 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2599 {
2600 	kmem_log_header_t lh;
2601 	kmem_cpu_log_header_t clh;
2602 	uintptr_t lhp, clhp;
2603 	int ncpus;
2604 	uintptr_t *cpu;
2605 	GElf_Sym sym;
2606 	kmem_log_cpu_t *kmc;
2607 	int i;
2608 	kmem_log_data_t kmd;
2609 	uint_t opt_b = FALSE;
2610 
2611 	if (mdb_getopts(argc, argv,
2612 	    'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2613 		return (DCMD_USAGE);
2614 
2615 	if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2616 		mdb_warn("failed to read 'kmem_transaction_log'");
2617 		return (DCMD_ERR);
2618 	}
2619 
2620 	if (lhp == NULL) {
2621 		mdb_warn("no kmem transaction log\n");
2622 		return (DCMD_ERR);
2623 	}
2624 
2625 	mdb_readvar(&ncpus, "ncpus");
2626 
2627 	if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2628 		mdb_warn("failed to read log header at %p", lhp);
2629 		return (DCMD_ERR);
2630 	}
2631 
2632 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2633 
2634 	cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2635 
2636 	if (mdb_lookup_by_name("cpu", &sym) == -1) {
2637 		mdb_warn("couldn't find 'cpu' array");
2638 		return (DCMD_ERR);
2639 	}
2640 
2641 	if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2642 		mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2643 		    NCPU * sizeof (uintptr_t), sym.st_size);
2644 		return (DCMD_ERR);
2645 	}
2646 
2647 	if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2648 		mdb_warn("failed to read cpu array at %p", sym.st_value);
2649 		return (DCMD_ERR);
2650 	}
2651 
2652 	kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2653 	kmd.kmd_addr = NULL;
2654 	kmd.kmd_cpu = kmc;
2655 
2656 	for (i = 0; i < NCPU; i++) {
2657 
2658 		if (cpu[i] == NULL)
2659 			continue;
2660 
2661 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2662 			mdb_warn("cannot read cpu %d's log header at %p",
2663 			    i, clhp);
2664 			return (DCMD_ERR);
2665 		}
2666 
2667 		kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2668 		    (uintptr_t)lh.lh_base;
2669 		kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2670 
2671 		clhp += sizeof (kmem_cpu_log_header_t);
2672 	}
2673 
2674 	mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2675 	    "TIMESTAMP", "THREAD");
2676 
2677 	/*
2678 	 * If we have been passed an address, print out only log entries
2679 	 * corresponding to that address.  If opt_b is specified, then interpret
2680 	 * the address as a bufctl.
2681 	 */
2682 	if (flags & DCMD_ADDRSPEC) {
2683 		kmem_bufctl_audit_t b;
2684 
2685 		if (opt_b) {
2686 			kmd.kmd_addr = addr;
2687 		} else {
2688 			if (mdb_vread(&b,
2689 			    sizeof (kmem_bufctl_audit_t), addr) == -1) {
2690 				mdb_warn("failed to read bufctl at %p", addr);
2691 				return (DCMD_ERR);
2692 			}
2693 
2694 			(void) kmem_log_walk(addr, &b, &kmd);
2695 
2696 			return (DCMD_OK);
2697 		}
2698 	}
2699 
2700 	if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2701 		mdb_warn("can't find kmem log walker");
2702 		return (DCMD_ERR);
2703 	}
2704 
2705 	return (DCMD_OK);
2706 }
2707 
2708 typedef struct bufctl_history_cb {
2709 	int		bhc_flags;
2710 	int		bhc_argc;
2711 	const mdb_arg_t	*bhc_argv;
2712 	int		bhc_ret;
2713 } bufctl_history_cb_t;
2714 
2715 /*ARGSUSED*/
2716 static int
2717 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2718 {
2719 	bufctl_history_cb_t *bhc = arg;
2720 
2721 	bhc->bhc_ret =
2722 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2723 
2724 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2725 
2726 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2727 }
2728 
2729 void
2730 bufctl_help(void)
2731 {
2732 	mdb_printf("%s\n",
2733 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n");
2734 	mdb_dec_indent(2);
2735 	mdb_printf("%<b>OPTIONS%</b>\n");
2736 	mdb_inc_indent(2);
2737 	mdb_printf("%s",
2738 "  -v    Display the full content of the bufctl, including its stack trace\n"
2739 "  -h    retrieve the bufctl's transaction history, if available\n"
2740 "  -a addr\n"
2741 "        filter out bufctls not involving the buffer at addr\n"
2742 "  -c caller\n"
2743 "        filter out bufctls without the function/PC in their stack trace\n"
2744 "  -e earliest\n"
2745 "        filter out bufctls timestamped before earliest\n"
2746 "  -l latest\n"
2747 "        filter out bufctls timestamped after latest\n"
2748 "  -t thread\n"
2749 "        filter out bufctls not involving thread\n");
2750 }
2751 
2752 int
2753 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2754 {
2755 	kmem_bufctl_audit_t bc;
2756 	uint_t verbose = FALSE;
2757 	uint_t history = FALSE;
2758 	uint_t in_history = FALSE;
2759 	uintptr_t caller = NULL, thread = NULL;
2760 	uintptr_t laddr, haddr, baddr = NULL;
2761 	hrtime_t earliest = 0, latest = 0;
2762 	int i, depth;
2763 	char c[MDB_SYM_NAMLEN];
2764 	GElf_Sym sym;
2765 
2766 	if (mdb_getopts(argc, argv,
2767 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2768 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2769 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2770 	    'c', MDB_OPT_UINTPTR, &caller,
2771 	    't', MDB_OPT_UINTPTR, &thread,
2772 	    'e', MDB_OPT_UINT64, &earliest,
2773 	    'l', MDB_OPT_UINT64, &latest,
2774 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2775 		return (DCMD_USAGE);
2776 
2777 	if (!(flags & DCMD_ADDRSPEC))
2778 		return (DCMD_USAGE);
2779 
2780 	if (in_history && !history)
2781 		return (DCMD_USAGE);
2782 
2783 	if (history && !in_history) {
2784 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2785 		    UM_SLEEP | UM_GC);
2786 		bufctl_history_cb_t bhc;
2787 
2788 		nargv[0].a_type = MDB_TYPE_STRING;
2789 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2790 
2791 		for (i = 0; i < argc; i++)
2792 			nargv[i + 1] = argv[i];
2793 
2794 		/*
2795 		 * When in history mode, we treat each element as if it
2796 		 * were in a seperate loop, so that the headers group
2797 		 * bufctls with similar histories.
2798 		 */
2799 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2800 		bhc.bhc_argc = argc + 1;
2801 		bhc.bhc_argv = nargv;
2802 		bhc.bhc_ret = DCMD_OK;
2803 
2804 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2805 		    addr) == -1) {
2806 			mdb_warn("unable to walk bufctl_history");
2807 			return (DCMD_ERR);
2808 		}
2809 
2810 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2811 			mdb_printf("\n");
2812 
2813 		return (bhc.bhc_ret);
2814 	}
2815 
2816 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2817 		if (verbose) {
2818 			mdb_printf("%16s %16s %16s %16s\n"
2819 			    "%<u>%16s %16s %16s %16s%</u>\n",
2820 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2821 			    "", "CACHE", "LASTLOG", "CONTENTS");
2822 		} else {
2823 			mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2824 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2825 		}
2826 	}
2827 
2828 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2829 		mdb_warn("couldn't read bufctl at %p", addr);
2830 		return (DCMD_ERR);
2831 	}
2832 
2833 	/*
2834 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2835 	 * the address does not really refer to a bufctl.
2836 	 */
2837 	depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2838 
2839 	if (caller != NULL) {
2840 		laddr = caller;
2841 		haddr = caller + sizeof (caller);
2842 
2843 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2844 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2845 			/*
2846 			 * We were provided an exact symbol value; any
2847 			 * address in the function is valid.
2848 			 */
2849 			laddr = (uintptr_t)sym.st_value;
2850 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2851 		}
2852 
2853 		for (i = 0; i < depth; i++)
2854 			if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2855 				break;
2856 
2857 		if (i == depth)
2858 			return (DCMD_OK);
2859 	}
2860 
2861 	if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2862 		return (DCMD_OK);
2863 
2864 	if (earliest != 0 && bc.bc_timestamp < earliest)
2865 		return (DCMD_OK);
2866 
2867 	if (latest != 0 && bc.bc_timestamp > latest)
2868 		return (DCMD_OK);
2869 
2870 	if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2871 		return (DCMD_OK);
2872 
2873 	if (flags & DCMD_PIPE_OUT) {
2874 		mdb_printf("%#lr\n", addr);
2875 		return (DCMD_OK);
2876 	}
2877 
2878 	if (verbose) {
2879 		mdb_printf(
2880 		    "%<b>%16p%</b> %16p %16llx %16p\n"
2881 		    "%16s %16p %16p %16p\n",
2882 		    addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2883 		    "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2884 
2885 		mdb_inc_indent(17);
2886 		for (i = 0; i < depth; i++)
2887 			mdb_printf("%a\n", bc.bc_stack[i]);
2888 		mdb_dec_indent(17);
2889 		mdb_printf("\n");
2890 	} else {
2891 		mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2892 		    bc.bc_timestamp, bc.bc_thread);
2893 
2894 		for (i = 0; i < depth; i++) {
2895 			if (mdb_lookup_by_addr(bc.bc_stack[i],
2896 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2897 				continue;
2898 			if (strncmp(c, "kmem_", 5) == 0)
2899 				continue;
2900 			mdb_printf(" %a\n", bc.bc_stack[i]);
2901 			break;
2902 		}
2903 
2904 		if (i >= depth)
2905 			mdb_printf("\n");
2906 	}
2907 
2908 	return (DCMD_OK);
2909 }
2910 
2911 typedef struct kmem_verify {
2912 	uint64_t *kmv_buf;		/* buffer to read cache contents into */
2913 	size_t kmv_size;		/* number of bytes in kmv_buf */
2914 	int kmv_corruption;		/* > 0 if corruption found. */
2915 	int kmv_besilent;		/* report actual corruption sites */
2916 	struct kmem_cache kmv_cache;	/* the cache we're operating on */
2917 } kmem_verify_t;
2918 
2919 /*
2920  * verify_pattern()
2921  * 	verify that buf is filled with the pattern pat.
2922  */
2923 static int64_t
2924 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2925 {
2926 	/*LINTED*/
2927 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2928 	uint64_t *buf;
2929 
2930 	for (buf = buf_arg; buf < bufend; buf++)
2931 		if (*buf != pat)
2932 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
2933 	return (-1);
2934 }
2935 
2936 /*
2937  * verify_buftag()
2938  *	verify that btp->bt_bxstat == (bcp ^ pat)
2939  */
2940 static int
2941 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
2942 {
2943 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2944 }
2945 
2946 /*
2947  * verify_free()
2948  * 	verify the integrity of a free block of memory by checking
2949  * 	that it is filled with 0xdeadbeef and that its buftag is sane.
2950  */
2951 /*ARGSUSED1*/
2952 static int
2953 verify_free(uintptr_t addr, const void *data, void *private)
2954 {
2955 	kmem_verify_t *kmv = (kmem_verify_t *)private;
2956 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
2957 	int64_t corrupt;		/* corruption offset */
2958 	kmem_buftag_t *buftagp;		/* ptr to buftag */
2959 	kmem_cache_t *cp = &kmv->kmv_cache;
2960 	int besilent = kmv->kmv_besilent;
2961 
2962 	/*LINTED*/
2963 	buftagp = KMEM_BUFTAG(cp, buf);
2964 
2965 	/*
2966 	 * Read the buffer to check.
2967 	 */
2968 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
2969 		if (!besilent)
2970 			mdb_warn("couldn't read %p", addr);
2971 		return (WALK_NEXT);
2972 	}
2973 
2974 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
2975 	    KMEM_FREE_PATTERN)) >= 0) {
2976 		if (!besilent)
2977 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2978 			    addr, (uintptr_t)addr + corrupt);
2979 		goto corrupt;
2980 	}
2981 	/*
2982 	 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
2983 	 * the first bytes of the buffer, hence we cannot check for red
2984 	 * zone corruption.
2985 	 */
2986 	if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
2987 	    buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
2988 		if (!besilent)
2989 			mdb_printf("buffer %p (free) seems to "
2990 			    "have a corrupt redzone pattern\n", addr);
2991 		goto corrupt;
2992 	}
2993 
2994 	/*
2995 	 * confirm bufctl pointer integrity.
2996 	 */
2997 	if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
2998 		if (!besilent)
2999 			mdb_printf("buffer %p (free) has a corrupt "
3000 			    "buftag\n", addr);
3001 		goto corrupt;
3002 	}
3003 
3004 	return (WALK_NEXT);
3005 corrupt:
3006 	kmv->kmv_corruption++;
3007 	return (WALK_NEXT);
3008 }
3009 
3010 /*
3011  * verify_alloc()
3012  * 	Verify that the buftag of an allocated buffer makes sense with respect
3013  * 	to the buffer.
3014  */
3015 /*ARGSUSED1*/
3016 static int
3017 verify_alloc(uintptr_t addr, const void *data, void *private)
3018 {
3019 	kmem_verify_t *kmv = (kmem_verify_t *)private;
3020 	kmem_cache_t *cp = &kmv->kmv_cache;
3021 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
3022 	/*LINTED*/
3023 	kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3024 	uint32_t *ip = (uint32_t *)buftagp;
3025 	uint8_t *bp = (uint8_t *)buf;
3026 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
3027 	int besilent = kmv->kmv_besilent;
3028 
3029 	/*
3030 	 * Read the buffer to check.
3031 	 */
3032 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3033 		if (!besilent)
3034 			mdb_warn("couldn't read %p", addr);
3035 		return (WALK_NEXT);
3036 	}
3037 
3038 	/*
3039 	 * There are two cases to handle:
3040 	 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3041 	 *    0xfeedfacefeedface at the end of it
3042 	 * 2. If the buf was alloc'd using kmem_alloc, it will have
3043 	 *    0xbb just past the end of the region in use.  At the buftag,
3044 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
3045 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3046 	 *    endianness), followed by 32 bits containing the offset of the
3047 	 *    0xbb byte in the buffer.
3048 	 *
3049 	 * Finally, the two 32-bit words that comprise the second half of the
3050 	 * buftag should xor to KMEM_BUFTAG_ALLOC
3051 	 */
3052 
3053 	if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3054 		looks_ok = 1;
3055 	else if (!KMEM_SIZE_VALID(ip[1]))
3056 		size_ok = 0;
3057 	else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3058 		looks_ok = 1;
3059 	else
3060 		size_ok = 0;
3061 
3062 	if (!size_ok) {
3063 		if (!besilent)
3064 			mdb_printf("buffer %p (allocated) has a corrupt "
3065 			    "redzone size encoding\n", addr);
3066 		goto corrupt;
3067 	}
3068 
3069 	if (!looks_ok) {
3070 		if (!besilent)
3071 			mdb_printf("buffer %p (allocated) has a corrupt "
3072 			    "redzone signature\n", addr);
3073 		goto corrupt;
3074 	}
3075 
3076 	if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3077 		if (!besilent)
3078 			mdb_printf("buffer %p (allocated) has a "
3079 			    "corrupt buftag\n", addr);
3080 		goto corrupt;
3081 	}
3082 
3083 	return (WALK_NEXT);
3084 corrupt:
3085 	kmv->kmv_corruption++;
3086 	return (WALK_NEXT);
3087 }
3088 
3089 /*ARGSUSED2*/
3090 int
3091 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3092 {
3093 	if (flags & DCMD_ADDRSPEC) {
3094 		int check_alloc = 0, check_free = 0;
3095 		kmem_verify_t kmv;
3096 
3097 		if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3098 		    addr) == -1) {
3099 			mdb_warn("couldn't read kmem_cache %p", addr);
3100 			return (DCMD_ERR);
3101 		}
3102 
3103 		kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3104 		    sizeof (kmem_buftag_t);
3105 		kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3106 		kmv.kmv_corruption = 0;
3107 
3108 		if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3109 			check_alloc = 1;
3110 			if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3111 				check_free = 1;
3112 		} else {
3113 			if (!(flags & DCMD_LOOP)) {
3114 				mdb_warn("cache %p (%s) does not have "
3115 				    "redzone checking enabled\n", addr,
3116 				    kmv.kmv_cache.cache_name);
3117 			}
3118 			return (DCMD_ERR);
3119 		}
3120 
3121 		if (flags & DCMD_LOOP) {
3122 			/*
3123 			 * table mode, don't print out every corrupt buffer
3124 			 */
3125 			kmv.kmv_besilent = 1;
3126 		} else {
3127 			mdb_printf("Summary for cache '%s'\n",
3128 			    kmv.kmv_cache.cache_name);
3129 			mdb_inc_indent(2);
3130 			kmv.kmv_besilent = 0;
3131 		}
3132 
3133 		if (check_alloc)
3134 			(void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3135 		if (check_free)
3136 			(void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3137 
3138 		if (flags & DCMD_LOOP) {
3139 			if (kmv.kmv_corruption == 0) {
3140 				mdb_printf("%-*s %?p clean\n",
3141 				    KMEM_CACHE_NAMELEN,
3142 				    kmv.kmv_cache.cache_name, addr);
3143 			} else {
3144 				char *s = "";	/* optional s in "buffer[s]" */
3145 				if (kmv.kmv_corruption > 1)
3146 					s = "s";
3147 
3148 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
3149 				    KMEM_CACHE_NAMELEN,
3150 				    kmv.kmv_cache.cache_name, addr,
3151 				    kmv.kmv_corruption, s);
3152 			}
3153 		} else {
3154 			/*
3155 			 * This is the more verbose mode, when the user has
3156 			 * type addr::kmem_verify.  If the cache was clean,
3157 			 * nothing will have yet been printed. So say something.
3158 			 */
3159 			if (kmv.kmv_corruption == 0)
3160 				mdb_printf("clean\n");
3161 
3162 			mdb_dec_indent(2);
3163 		}
3164 	} else {
3165 		/*
3166 		 * If the user didn't specify a cache to verify, we'll walk all
3167 		 * kmem_cache's, specifying ourself as a callback for each...
3168 		 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3169 		 */
3170 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN,
3171 		    "Cache Name", "Addr", "Cache Integrity");
3172 		(void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3173 	}
3174 
3175 	return (DCMD_OK);
3176 }
3177 
3178 typedef struct vmem_node {
3179 	struct vmem_node *vn_next;
3180 	struct vmem_node *vn_parent;
3181 	struct vmem_node *vn_sibling;
3182 	struct vmem_node *vn_children;
3183 	uintptr_t vn_addr;
3184 	int vn_marked;
3185 	vmem_t vn_vmem;
3186 } vmem_node_t;
3187 
3188 typedef struct vmem_walk {
3189 	vmem_node_t *vw_root;
3190 	vmem_node_t *vw_current;
3191 } vmem_walk_t;
3192 
3193 int
3194 vmem_walk_init(mdb_walk_state_t *wsp)
3195 {
3196 	uintptr_t vaddr, paddr;
3197 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3198 	vmem_walk_t *vw;
3199 
3200 	if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3201 		mdb_warn("couldn't read 'vmem_list'");
3202 		return (WALK_ERR);
3203 	}
3204 
3205 	while (vaddr != NULL) {
3206 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3207 		vp->vn_addr = vaddr;
3208 		vp->vn_next = head;
3209 		head = vp;
3210 
3211 		if (vaddr == wsp->walk_addr)
3212 			current = vp;
3213 
3214 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3215 			mdb_warn("couldn't read vmem_t at %p", vaddr);
3216 			goto err;
3217 		}
3218 
3219 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3220 	}
3221 
3222 	for (vp = head; vp != NULL; vp = vp->vn_next) {
3223 
3224 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
3225 			vp->vn_sibling = root;
3226 			root = vp;
3227 			continue;
3228 		}
3229 
3230 		for (parent = head; parent != NULL; parent = parent->vn_next) {
3231 			if (parent->vn_addr != paddr)
3232 				continue;
3233 			vp->vn_sibling = parent->vn_children;
3234 			parent->vn_children = vp;
3235 			vp->vn_parent = parent;
3236 			break;
3237 		}
3238 
3239 		if (parent == NULL) {
3240 			mdb_warn("couldn't find %p's parent (%p)\n",
3241 			    vp->vn_addr, paddr);
3242 			goto err;
3243 		}
3244 	}
3245 
3246 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3247 	vw->vw_root = root;
3248 
3249 	if (current != NULL)
3250 		vw->vw_current = current;
3251 	else
3252 		vw->vw_current = root;
3253 
3254 	wsp->walk_data = vw;
3255 	return (WALK_NEXT);
3256 err:
3257 	for (vp = head; head != NULL; vp = head) {
3258 		head = vp->vn_next;
3259 		mdb_free(vp, sizeof (vmem_node_t));
3260 	}
3261 
3262 	return (WALK_ERR);
3263 }
3264 
3265 int
3266 vmem_walk_step(mdb_walk_state_t *wsp)
3267 {
3268 	vmem_walk_t *vw = wsp->walk_data;
3269 	vmem_node_t *vp;
3270 	int rval;
3271 
3272 	if ((vp = vw->vw_current) == NULL)
3273 		return (WALK_DONE);
3274 
3275 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3276 
3277 	if (vp->vn_children != NULL) {
3278 		vw->vw_current = vp->vn_children;
3279 		return (rval);
3280 	}
3281 
3282 	do {
3283 		vw->vw_current = vp->vn_sibling;
3284 		vp = vp->vn_parent;
3285 	} while (vw->vw_current == NULL && vp != NULL);
3286 
3287 	return (rval);
3288 }
3289 
3290 /*
3291  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3292  * children are visited before their parent.  We perform the postfix walk
3293  * iteratively (rather than recursively) to allow mdb to regain control
3294  * after each callback.
3295  */
3296 int
3297 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3298 {
3299 	vmem_walk_t *vw = wsp->walk_data;
3300 	vmem_node_t *vp = vw->vw_current;
3301 	int rval;
3302 
3303 	/*
3304 	 * If this node is marked, then we know that we have already visited
3305 	 * all of its children.  If the node has any siblings, they need to
3306 	 * be visited next; otherwise, we need to visit the parent.  Note
3307 	 * that vp->vn_marked will only be zero on the first invocation of
3308 	 * the step function.
3309 	 */
3310 	if (vp->vn_marked) {
3311 		if (vp->vn_sibling != NULL)
3312 			vp = vp->vn_sibling;
3313 		else if (vp->vn_parent != NULL)
3314 			vp = vp->vn_parent;
3315 		else {
3316 			/*
3317 			 * We have neither a parent, nor a sibling, and we
3318 			 * have already been visited; we're done.
3319 			 */
3320 			return (WALK_DONE);
3321 		}
3322 	}
3323 
3324 	/*
3325 	 * Before we visit this node, visit its children.
3326 	 */
3327 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3328 		vp = vp->vn_children;
3329 
3330 	vp->vn_marked = 1;
3331 	vw->vw_current = vp;
3332 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3333 
3334 	return (rval);
3335 }
3336 
3337 void
3338 vmem_walk_fini(mdb_walk_state_t *wsp)
3339 {
3340 	vmem_walk_t *vw = wsp->walk_data;
3341 	vmem_node_t *root = vw->vw_root;
3342 	int done;
3343 
3344 	if (root == NULL)
3345 		return;
3346 
3347 	if ((vw->vw_root = root->vn_children) != NULL)
3348 		vmem_walk_fini(wsp);
3349 
3350 	vw->vw_root = root->vn_sibling;
3351 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3352 	mdb_free(root, sizeof (vmem_node_t));
3353 
3354 	if (done) {
3355 		mdb_free(vw, sizeof (vmem_walk_t));
3356 	} else {
3357 		vmem_walk_fini(wsp);
3358 	}
3359 }
3360 
3361 typedef struct vmem_seg_walk {
3362 	uint8_t vsw_type;
3363 	uintptr_t vsw_start;
3364 	uintptr_t vsw_current;
3365 } vmem_seg_walk_t;
3366 
3367 /*ARGSUSED*/
3368 int
3369 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3370 {
3371 	vmem_seg_walk_t *vsw;
3372 
3373 	if (wsp->walk_addr == NULL) {
3374 		mdb_warn("vmem_%s does not support global walks\n", name);
3375 		return (WALK_ERR);
3376 	}
3377 
3378 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3379 
3380 	vsw->vsw_type = type;
3381 	vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3382 	vsw->vsw_current = vsw->vsw_start;
3383 
3384 	return (WALK_NEXT);
3385 }
3386 
3387 /*
3388  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3389  */
3390 #define	VMEM_NONE	0
3391 
3392 int
3393 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3394 {
3395 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3396 }
3397 
3398 int
3399 vmem_free_walk_init(mdb_walk_state_t *wsp)
3400 {
3401 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3402 }
3403 
3404 int
3405 vmem_span_walk_init(mdb_walk_state_t *wsp)
3406 {
3407 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3408 }
3409 
3410 int
3411 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3412 {
3413 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3414 }
3415 
3416 int
3417 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3418 {
3419 	vmem_seg_t seg;
3420 	vmem_seg_walk_t *vsw = wsp->walk_data;
3421 	uintptr_t addr = vsw->vsw_current;
3422 	static size_t seg_size = 0;
3423 	int rval;
3424 
3425 	if (!seg_size) {
3426 		if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3427 			mdb_warn("failed to read 'vmem_seg_size'");
3428 			seg_size = sizeof (vmem_seg_t);
3429 		}
3430 	}
3431 
3432 	if (seg_size < sizeof (seg))
3433 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3434 
3435 	if (mdb_vread(&seg, seg_size, addr) == -1) {
3436 		mdb_warn("couldn't read vmem_seg at %p", addr);
3437 		return (WALK_ERR);
3438 	}
3439 
3440 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
3441 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3442 		rval = WALK_NEXT;
3443 	} else {
3444 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3445 	}
3446 
3447 	if (vsw->vsw_current == vsw->vsw_start)
3448 		return (WALK_DONE);
3449 
3450 	return (rval);
3451 }
3452 
3453 void
3454 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3455 {
3456 	vmem_seg_walk_t *vsw = wsp->walk_data;
3457 
3458 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
3459 }
3460 
3461 #define	VMEM_NAMEWIDTH	22
3462 
3463 int
3464 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3465 {
3466 	vmem_t v, parent;
3467 	vmem_kstat_t *vkp = &v.vm_kstat;
3468 	uintptr_t paddr;
3469 	int ident = 0;
3470 	char c[VMEM_NAMEWIDTH];
3471 
3472 	if (!(flags & DCMD_ADDRSPEC)) {
3473 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3474 			mdb_warn("can't walk vmem");
3475 			return (DCMD_ERR);
3476 		}
3477 		return (DCMD_OK);
3478 	}
3479 
3480 	if (DCMD_HDRSPEC(flags))
3481 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3482 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3483 		    "TOTAL", "SUCCEED", "FAIL");
3484 
3485 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
3486 		mdb_warn("couldn't read vmem at %p", addr);
3487 		return (DCMD_ERR);
3488 	}
3489 
3490 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3491 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3492 			mdb_warn("couldn't trace %p's ancestry", addr);
3493 			ident = 0;
3494 			break;
3495 		}
3496 		paddr = (uintptr_t)parent.vm_source;
3497 	}
3498 
3499 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3500 
3501 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3502 	    addr, VMEM_NAMEWIDTH, c,
3503 	    vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3504 	    vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3505 
3506 	return (DCMD_OK);
3507 }
3508 
3509 void
3510 vmem_seg_help(void)
3511 {
3512 	mdb_printf("%s\n",
3513 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3514 "\n"
3515 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3516 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3517 "information.\n");
3518 	mdb_dec_indent(2);
3519 	mdb_printf("%<b>OPTIONS%</b>\n");
3520 	mdb_inc_indent(2);
3521 	mdb_printf("%s",
3522 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3523 "  -s    report the size of the segment, instead of the end address\n"
3524 "  -c caller\n"
3525 "        filter out segments without the function/PC in their stack trace\n"
3526 "  -e earliest\n"
3527 "        filter out segments timestamped before earliest\n"
3528 "  -l latest\n"
3529 "        filter out segments timestamped after latest\n"
3530 "  -m minsize\n"
3531 "        filer out segments smaller than minsize\n"
3532 "  -M maxsize\n"
3533 "        filer out segments larger than maxsize\n"
3534 "  -t thread\n"
3535 "        filter out segments not involving thread\n"
3536 "  -T type\n"
3537 "        filter out segments not of type 'type'\n"
3538 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3539 }
3540 
3541 /*ARGSUSED*/
3542 int
3543 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3544 {
3545 	vmem_seg_t vs;
3546 	pc_t *stk = vs.vs_stack;
3547 	uintptr_t sz;
3548 	uint8_t t;
3549 	const char *type = NULL;
3550 	GElf_Sym sym;
3551 	char c[MDB_SYM_NAMLEN];
3552 	int no_debug;
3553 	int i;
3554 	int depth;
3555 	uintptr_t laddr, haddr;
3556 
3557 	uintptr_t caller = NULL, thread = NULL;
3558 	uintptr_t minsize = 0, maxsize = 0;
3559 
3560 	hrtime_t earliest = 0, latest = 0;
3561 
3562 	uint_t size = 0;
3563 	uint_t verbose = 0;
3564 
3565 	if (!(flags & DCMD_ADDRSPEC))
3566 		return (DCMD_USAGE);
3567 
3568 	if (mdb_getopts(argc, argv,
3569 	    'c', MDB_OPT_UINTPTR, &caller,
3570 	    'e', MDB_OPT_UINT64, &earliest,
3571 	    'l', MDB_OPT_UINT64, &latest,
3572 	    's', MDB_OPT_SETBITS, TRUE, &size,
3573 	    'm', MDB_OPT_UINTPTR, &minsize,
3574 	    'M', MDB_OPT_UINTPTR, &maxsize,
3575 	    't', MDB_OPT_UINTPTR, &thread,
3576 	    'T', MDB_OPT_STR, &type,
3577 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3578 	    NULL) != argc)
3579 		return (DCMD_USAGE);
3580 
3581 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3582 		if (verbose) {
3583 			mdb_printf("%16s %4s %16s %16s %16s\n"
3584 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3585 			    "ADDR", "TYPE", "START", "END", "SIZE",
3586 			    "", "", "THREAD", "TIMESTAMP", "");
3587 		} else {
3588 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3589 			    "START", size? "SIZE" : "END", "WHO");
3590 		}
3591 	}
3592 
3593 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3594 		mdb_warn("couldn't read vmem_seg at %p", addr);
3595 		return (DCMD_ERR);
3596 	}
3597 
3598 	if (type != NULL) {
3599 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3600 			t = VMEM_ALLOC;
3601 		else if (strcmp(type, "FREE") == 0)
3602 			t = VMEM_FREE;
3603 		else if (strcmp(type, "SPAN") == 0)
3604 			t = VMEM_SPAN;
3605 		else if (strcmp(type, "ROTR") == 0 ||
3606 		    strcmp(type, "ROTOR") == 0)
3607 			t = VMEM_ROTOR;
3608 		else if (strcmp(type, "WLKR") == 0 ||
3609 		    strcmp(type, "WALKER") == 0)
3610 			t = VMEM_WALKER;
3611 		else {
3612 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3613 			    type);
3614 			return (DCMD_ERR);
3615 		}
3616 
3617 		if (vs.vs_type != t)
3618 			return (DCMD_OK);
3619 	}
3620 
3621 	sz = vs.vs_end - vs.vs_start;
3622 
3623 	if (minsize != 0 && sz < minsize)
3624 		return (DCMD_OK);
3625 
3626 	if (maxsize != 0 && sz > maxsize)
3627 		return (DCMD_OK);
3628 
3629 	t = vs.vs_type;
3630 	depth = vs.vs_depth;
3631 
3632 	/*
3633 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3634 	 */
3635 	no_debug = (t != VMEM_ALLOC) ||
3636 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3637 
3638 	if (no_debug) {
3639 		if (caller != NULL || thread != NULL || earliest != 0 ||
3640 		    latest != 0)
3641 			return (DCMD_OK);		/* not enough info */
3642 	} else {
3643 		if (caller != NULL) {
3644 			laddr = caller;
3645 			haddr = caller + sizeof (caller);
3646 
3647 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3648 			    sizeof (c), &sym) != -1 &&
3649 			    caller == (uintptr_t)sym.st_value) {
3650 				/*
3651 				 * We were provided an exact symbol value; any
3652 				 * address in the function is valid.
3653 				 */
3654 				laddr = (uintptr_t)sym.st_value;
3655 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3656 			}
3657 
3658 			for (i = 0; i < depth; i++)
3659 				if (vs.vs_stack[i] >= laddr &&
3660 				    vs.vs_stack[i] < haddr)
3661 					break;
3662 
3663 			if (i == depth)
3664 				return (DCMD_OK);
3665 		}
3666 
3667 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3668 			return (DCMD_OK);
3669 
3670 		if (earliest != 0 && vs.vs_timestamp < earliest)
3671 			return (DCMD_OK);
3672 
3673 		if (latest != 0 && vs.vs_timestamp > latest)
3674 			return (DCMD_OK);
3675 	}
3676 
3677 	type = (t == VMEM_ALLOC ? "ALLC" :
3678 	    t == VMEM_FREE ? "FREE" :
3679 	    t == VMEM_SPAN ? "SPAN" :
3680 	    t == VMEM_ROTOR ? "ROTR" :
3681 	    t == VMEM_WALKER ? "WLKR" :
3682 	    "????");
3683 
3684 	if (flags & DCMD_PIPE_OUT) {
3685 		mdb_printf("%#lr\n", addr);
3686 		return (DCMD_OK);
3687 	}
3688 
3689 	if (verbose) {
3690 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3691 		    addr, type, vs.vs_start, vs.vs_end, sz);
3692 
3693 		if (no_debug)
3694 			return (DCMD_OK);
3695 
3696 		mdb_printf("%16s %4s %16p %16llx\n",
3697 		    "", "", vs.vs_thread, vs.vs_timestamp);
3698 
3699 		mdb_inc_indent(17);
3700 		for (i = 0; i < depth; i++) {
3701 			mdb_printf("%a\n", stk[i]);
3702 		}
3703 		mdb_dec_indent(17);
3704 		mdb_printf("\n");
3705 	} else {
3706 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3707 		    vs.vs_start, size? sz : vs.vs_end);
3708 
3709 		if (no_debug) {
3710 			mdb_printf("\n");
3711 			return (DCMD_OK);
3712 		}
3713 
3714 		for (i = 0; i < depth; i++) {
3715 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3716 			    c, sizeof (c), &sym) == -1)
3717 				continue;
3718 			if (strncmp(c, "vmem_", 5) == 0)
3719 				continue;
3720 			break;
3721 		}
3722 		mdb_printf(" %a\n", stk[i]);
3723 	}
3724 	return (DCMD_OK);
3725 }
3726 
3727 typedef struct kmalog_data {
3728 	uintptr_t	kma_addr;
3729 	hrtime_t	kma_newest;
3730 } kmalog_data_t;
3731 
3732 /*ARGSUSED*/
3733 static int
3734 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3735 {
3736 	char name[KMEM_CACHE_NAMELEN + 1];
3737 	hrtime_t delta;
3738 	int i, depth;
3739 	size_t bufsize;
3740 
3741 	if (bcp->bc_timestamp == 0)
3742 		return (WALK_DONE);
3743 
3744 	if (kma->kma_newest == 0)
3745 		kma->kma_newest = bcp->bc_timestamp;
3746 
3747 	if (kma->kma_addr) {
3748 		if (mdb_vread(&bufsize, sizeof (bufsize),
3749 		    (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3750 			mdb_warn(
3751 			    "failed to read cache_bufsize for cache at %p",
3752 			    bcp->bc_cache);
3753 			return (WALK_ERR);
3754 		}
3755 
3756 		if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3757 		    kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3758 			return (WALK_NEXT);
3759 	}
3760 
3761 	delta = kma->kma_newest - bcp->bc_timestamp;
3762 	depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3763 
3764 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3765 	    &bcp->bc_cache->cache_name) <= 0)
3766 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3767 
3768 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3769 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3770 
3771 	for (i = 0; i < depth; i++)
3772 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3773 
3774 	return (WALK_NEXT);
3775 }
3776 
3777 int
3778 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3779 {
3780 	const char *logname = "kmem_transaction_log";
3781 	kmalog_data_t kma;
3782 
3783 	if (argc > 1)
3784 		return (DCMD_USAGE);
3785 
3786 	kma.kma_newest = 0;
3787 	if (flags & DCMD_ADDRSPEC)
3788 		kma.kma_addr = addr;
3789 	else
3790 		kma.kma_addr = NULL;
3791 
3792 	if (argc > 0) {
3793 		if (argv->a_type != MDB_TYPE_STRING)
3794 			return (DCMD_USAGE);
3795 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3796 			logname = "kmem_failure_log";
3797 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3798 			logname = "kmem_slab_log";
3799 		else
3800 			return (DCMD_USAGE);
3801 	}
3802 
3803 	if (mdb_readvar(&addr, logname) == -1) {
3804 		mdb_warn("failed to read %s log header pointer");
3805 		return (DCMD_ERR);
3806 	}
3807 
3808 	if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3809 		mdb_warn("failed to walk kmem log");
3810 		return (DCMD_ERR);
3811 	}
3812 
3813 	return (DCMD_OK);
3814 }
3815 
3816 /*
3817  * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3818  * The first piece is a structure which we use to accumulate kmem_cache_t
3819  * addresses of interest.  The kmc_add is used as a callback for the kmem_cache
3820  * walker; we either add all caches, or ones named explicitly as arguments.
3821  */
3822 
3823 typedef struct kmclist {
3824 	const char *kmc_name;			/* Name to match (or NULL) */
3825 	uintptr_t *kmc_caches;			/* List of kmem_cache_t addrs */
3826 	int kmc_nelems;				/* Num entries in kmc_caches */
3827 	int kmc_size;				/* Size of kmc_caches array */
3828 } kmclist_t;
3829 
3830 static int
3831 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3832 {
3833 	void *p;
3834 	int s;
3835 
3836 	if (kmc->kmc_name == NULL ||
3837 	    strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3838 		/*
3839 		 * If we have a match, grow our array (if necessary), and then
3840 		 * add the virtual address of the matching cache to our list.
3841 		 */
3842 		if (kmc->kmc_nelems >= kmc->kmc_size) {
3843 			s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3844 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3845 
3846 			bcopy(kmc->kmc_caches, p,
3847 			    sizeof (uintptr_t) * kmc->kmc_size);
3848 
3849 			kmc->kmc_caches = p;
3850 			kmc->kmc_size = s;
3851 		}
3852 
3853 		kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3854 		return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3855 	}
3856 
3857 	return (WALK_NEXT);
3858 }
3859 
3860 /*
3861  * The second piece of ::kmausers is a hash table of allocations.  Each
3862  * allocation owner is identified by its stack trace and data_size.  We then
3863  * track the total bytes of all such allocations, and the number of allocations
3864  * to report at the end.  Once we have a list of caches, we walk through the
3865  * allocated bufctls of each, and update our hash table accordingly.
3866  */
3867 
3868 typedef struct kmowner {
3869 	struct kmowner *kmo_head;		/* First hash elt in bucket */
3870 	struct kmowner *kmo_next;		/* Next hash elt in chain */
3871 	size_t kmo_signature;			/* Hash table signature */
3872 	uint_t kmo_num;				/* Number of allocations */
3873 	size_t kmo_data_size;			/* Size of each allocation */
3874 	size_t kmo_total_size;			/* Total bytes of allocation */
3875 	int kmo_depth;				/* Depth of stack trace */
3876 	uintptr_t kmo_stack[KMEM_STACK_DEPTH];	/* Stack trace */
3877 } kmowner_t;
3878 
3879 typedef struct kmusers {
3880 	uintptr_t kmu_addr;			/* address of interest */
3881 	const kmem_cache_t *kmu_cache;		/* Current kmem cache */
3882 	kmowner_t *kmu_hash;			/* Hash table of owners */
3883 	int kmu_nelems;				/* Number of entries in use */
3884 	int kmu_size;				/* Total number of entries */
3885 } kmusers_t;
3886 
3887 static void
3888 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
3889     size_t size, size_t data_size)
3890 {
3891 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3892 	size_t bucket, signature = data_size;
3893 	kmowner_t *kmo, *kmoend;
3894 
3895 	/*
3896 	 * If the hash table is full, double its size and rehash everything.
3897 	 */
3898 	if (kmu->kmu_nelems >= kmu->kmu_size) {
3899 		int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
3900 
3901 		kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
3902 		bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
3903 		kmu->kmu_hash = kmo;
3904 		kmu->kmu_size = s;
3905 
3906 		kmoend = kmu->kmu_hash + kmu->kmu_size;
3907 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
3908 			kmo->kmo_head = NULL;
3909 
3910 		kmoend = kmu->kmu_hash + kmu->kmu_nelems;
3911 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
3912 			bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
3913 			kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
3914 			kmu->kmu_hash[bucket].kmo_head = kmo;
3915 		}
3916 	}
3917 
3918 	/*
3919 	 * Finish computing the hash signature from the stack trace, and then
3920 	 * see if the owner is in the hash table.  If so, update our stats.
3921 	 */
3922 	for (i = 0; i < depth; i++)
3923 		signature += bcp->bc_stack[i];
3924 
3925 	bucket = signature & (kmu->kmu_size - 1);
3926 
3927 	for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
3928 		if (kmo->kmo_signature == signature) {
3929 			size_t difference = 0;
3930 
3931 			difference |= kmo->kmo_data_size - data_size;
3932 			difference |= kmo->kmo_depth - depth;
3933 
3934 			for (i = 0; i < depth; i++) {
3935 				difference |= kmo->kmo_stack[i] -
3936 				    bcp->bc_stack[i];
3937 			}
3938 
3939 			if (difference == 0) {
3940 				kmo->kmo_total_size += size;
3941 				kmo->kmo_num++;
3942 				return;
3943 			}
3944 		}
3945 	}
3946 
3947 	/*
3948 	 * If the owner is not yet hashed, grab the next element and fill it
3949 	 * in based on the allocation information.
3950 	 */
3951 	kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
3952 	kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
3953 	kmu->kmu_hash[bucket].kmo_head = kmo;
3954 
3955 	kmo->kmo_signature = signature;
3956 	kmo->kmo_num = 1;
3957 	kmo->kmo_data_size = data_size;
3958 	kmo->kmo_total_size = size;
3959 	kmo->kmo_depth = depth;
3960 
3961 	for (i = 0; i < depth; i++)
3962 		kmo->kmo_stack[i] = bcp->bc_stack[i];
3963 }
3964 
3965 /*
3966  * When ::kmausers is invoked without the -f flag, we simply update our hash
3967  * table with the information from each allocated bufctl.
3968  */
3969 /*ARGSUSED*/
3970 static int
3971 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
3972 {
3973 	const kmem_cache_t *cp = kmu->kmu_cache;
3974 
3975 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3976 	return (WALK_NEXT);
3977 }
3978 
3979 /*
3980  * When ::kmausers is invoked with the -f flag, we print out the information
3981  * for each bufctl as well as updating the hash table.
3982  */
3983 static int
3984 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
3985 {
3986 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3987 	const kmem_cache_t *cp = kmu->kmu_cache;
3988 	kmem_bufctl_t bufctl;
3989 
3990 	if (kmu->kmu_addr) {
3991 		if (mdb_vread(&bufctl, sizeof (bufctl),  addr) == -1)
3992 			mdb_warn("couldn't read bufctl at %p", addr);
3993 		else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
3994 		    kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
3995 		    cp->cache_bufsize)
3996 			return (WALK_NEXT);
3997 	}
3998 
3999 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4000 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4001 
4002 	for (i = 0; i < depth; i++)
4003 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
4004 
4005 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4006 	return (WALK_NEXT);
4007 }
4008 
4009 /*
4010  * We sort our results by allocation size before printing them.
4011  */
4012 static int
4013 kmownercmp(const void *lp, const void *rp)
4014 {
4015 	const kmowner_t *lhs = lp;
4016 	const kmowner_t *rhs = rp;
4017 
4018 	return (rhs->kmo_total_size - lhs->kmo_total_size);
4019 }
4020 
4021 /*
4022  * The main engine of ::kmausers is relatively straightforward: First we
4023  * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4024  * iterate over the allocated bufctls of each cache in the list.  Finally,
4025  * we sort and print our results.
4026  */
4027 /*ARGSUSED*/
4028 int
4029 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4030 {
4031 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
4032 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
4033 	int audited_caches = 0;		/* Number of KMF_AUDIT caches found */
4034 	int do_all_caches = 1;		/* Do all caches (no arguments) */
4035 	int opt_e = FALSE;		/* Include "small" users */
4036 	int opt_f = FALSE;		/* Print stack traces */
4037 
4038 	mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4039 	kmowner_t *kmo, *kmoend;
4040 	int i, oelems;
4041 
4042 	kmclist_t kmc;
4043 	kmusers_t kmu;
4044 
4045 	bzero(&kmc, sizeof (kmc));
4046 	bzero(&kmu, sizeof (kmu));
4047 
4048 	while ((i = mdb_getopts(argc, argv,
4049 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4050 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4051 
4052 		argv += i;	/* skip past options we just processed */
4053 		argc -= i;	/* adjust argc */
4054 
4055 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4056 			return (DCMD_USAGE);
4057 
4058 		oelems = kmc.kmc_nelems;
4059 		kmc.kmc_name = argv->a_un.a_str;
4060 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4061 
4062 		if (kmc.kmc_nelems == oelems) {
4063 			mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4064 			return (DCMD_ERR);
4065 		}
4066 
4067 		do_all_caches = 0;
4068 		argv++;
4069 		argc--;
4070 	}
4071 
4072 	if (flags & DCMD_ADDRSPEC) {
4073 		opt_f = TRUE;
4074 		kmu.kmu_addr = addr;
4075 	} else {
4076 		kmu.kmu_addr = NULL;
4077 	}
4078 
4079 	if (opt_e)
4080 		mem_threshold = cnt_threshold = 0;
4081 
4082 	if (opt_f)
4083 		callback = (mdb_walk_cb_t)kmause2;
4084 
4085 	if (do_all_caches) {
4086 		kmc.kmc_name = NULL; /* match all cache names */
4087 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4088 	}
4089 
4090 	for (i = 0; i < kmc.kmc_nelems; i++) {
4091 		uintptr_t cp = kmc.kmc_caches[i];
4092 		kmem_cache_t c;
4093 
4094 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
4095 			mdb_warn("failed to read cache at %p", cp);
4096 			continue;
4097 		}
4098 
4099 		if (!(c.cache_flags & KMF_AUDIT)) {
4100 			if (!do_all_caches) {
4101 				mdb_warn("KMF_AUDIT is not enabled for %s\n",
4102 				    c.cache_name);
4103 			}
4104 			continue;
4105 		}
4106 
4107 		kmu.kmu_cache = &c;
4108 		(void) mdb_pwalk("bufctl", callback, &kmu, cp);
4109 		audited_caches++;
4110 	}
4111 
4112 	if (audited_caches == 0 && do_all_caches) {
4113 		mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4114 		return (DCMD_ERR);
4115 	}
4116 
4117 	qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4118 	kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4119 
4120 	for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4121 		if (kmo->kmo_total_size < mem_threshold &&
4122 		    kmo->kmo_num < cnt_threshold)
4123 			continue;
4124 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4125 		    kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4126 		for (i = 0; i < kmo->kmo_depth; i++)
4127 			mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4128 	}
4129 
4130 	return (DCMD_OK);
4131 }
4132 
4133 void
4134 kmausers_help(void)
4135 {
4136 	mdb_printf(
4137 	    "Displays the largest users of the kmem allocator, sorted by \n"
4138 	    "trace.  If one or more caches is specified, only those caches\n"
4139 	    "will be searched.  By default, all caches are searched.  If an\n"
4140 	    "address is specified, then only those allocations which include\n"
4141 	    "the given address are displayed.  Specifying an address implies\n"
4142 	    "-f.\n"
4143 	    "\n"
4144 	    "\t-e\tInclude all users, not just the largest\n"
4145 	    "\t-f\tDisplay individual allocations.  By default, users are\n"
4146 	    "\t\tgrouped by stack\n");
4147 }
4148 
4149 static int
4150 kmem_ready_check(void)
4151 {
4152 	int ready;
4153 
4154 	if (mdb_readvar(&ready, "kmem_ready") < 0)
4155 		return (-1); /* errno is set for us */
4156 
4157 	return (ready);
4158 }
4159 
4160 /*ARGSUSED*/
4161 static void
4162 kmem_statechange_cb(void *arg)
4163 {
4164 	static int been_ready = 0;
4165 
4166 	leaky_cleanup(1);	/* state changes invalidate leaky state */
4167 
4168 	if (been_ready)
4169 		return;
4170 
4171 	if (kmem_ready_check() <= 0)
4172 		return;
4173 
4174 	been_ready = 1;
4175 	(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4176 }
4177 
4178 void
4179 kmem_init(void)
4180 {
4181 	mdb_walker_t w = {
4182 		"kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4183 		kmem_cache_walk_step, kmem_cache_walk_fini
4184 	};
4185 
4186 	/*
4187 	 * If kmem is ready, we'll need to invoke the kmem_cache walker
4188 	 * immediately.  Walkers in the linkage structure won't be ready until
4189 	 * _mdb_init returns, so we'll need to add this one manually.  If kmem
4190 	 * is ready, we'll use the walker to initialize the caches.  If kmem
4191 	 * isn't ready, we'll register a callback that will allow us to defer
4192 	 * cache walking until it is.
4193 	 */
4194 	if (mdb_add_walker(&w) != 0) {
4195 		mdb_warn("failed to add kmem_cache walker");
4196 		return;
4197 	}
4198 
4199 	(void) mdb_callback_add(MDB_CALLBACK_STCHG, kmem_statechange_cb, NULL);
4200 	kmem_statechange_cb(NULL);
4201 }
4202 
4203 typedef struct whatthread {
4204 	uintptr_t	wt_target;
4205 	int		wt_verbose;
4206 } whatthread_t;
4207 
4208 static int
4209 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4210 {
4211 	uintptr_t current, data;
4212 
4213 	if (t->t_stkbase == NULL)
4214 		return (WALK_NEXT);
4215 
4216 	/*
4217 	 * Warn about swapped out threads, but drive on anyway
4218 	 */
4219 	if (!(t->t_schedflag & TS_LOAD)) {
4220 		mdb_warn("thread %p's stack swapped out\n", addr);
4221 		return (WALK_NEXT);
4222 	}
4223 
4224 	/*
4225 	 * Search the thread's stack for the given pointer.  Note that it would
4226 	 * be more efficient to follow ::kgrep's lead and read in page-sized
4227 	 * chunks, but this routine is already fast and simple.
4228 	 */
4229 	for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4230 	    current += sizeof (uintptr_t)) {
4231 		if (mdb_vread(&data, sizeof (data), current) == -1) {
4232 			mdb_warn("couldn't read thread %p's stack at %p",
4233 			    addr, current);
4234 			return (WALK_ERR);
4235 		}
4236 
4237 		if (data == w->wt_target) {
4238 			if (w->wt_verbose) {
4239 				mdb_printf("%p in thread %p's stack%s\n",
4240 				    current, addr, stack_active(t, current));
4241 			} else {
4242 				mdb_printf("%#lr\n", addr);
4243 				return (WALK_NEXT);
4244 			}
4245 		}
4246 	}
4247 
4248 	return (WALK_NEXT);
4249 }
4250 
4251 int
4252 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4253 {
4254 	whatthread_t w;
4255 
4256 	if (!(flags & DCMD_ADDRSPEC))
4257 		return (DCMD_USAGE);
4258 
4259 	w.wt_verbose = FALSE;
4260 	w.wt_target = addr;
4261 
4262 	if (mdb_getopts(argc, argv,
4263 	    'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4264 		return (DCMD_USAGE);
4265 
4266 	if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4267 	    == -1) {
4268 		mdb_warn("couldn't walk threads");
4269 		return (DCMD_ERR);
4270 	}
4271 
4272 	return (DCMD_OK);
4273 }
4274