1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright 2019 Joyent, Inc.
28  * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
29  */
30 
31 #include "umem.h"
32 
33 #include <sys/vmem_impl_user.h>
34 #include <umem_impl.h>
35 
36 #include <alloca.h>
37 #include <limits.h>
38 #include <mdb/mdb_whatis.h>
39 #include <thr_uberdata.h>
40 
41 #include "misc.h"
42 #include "leaky.h"
43 #include "dist.h"
44 
45 #include "umem_pagesize.h"
46 
47 #define	UM_ALLOCATED		0x1
48 #define	UM_FREE			0x2
49 #define	UM_BUFCTL		0x4
50 #define	UM_HASH			0x8
51 
52 int umem_ready;
53 
54 static int umem_stack_depth_warned;
55 static uint32_t umem_max_ncpus;
56 uint32_t umem_stack_depth;
57 
58 size_t umem_pagesize;
59 
60 #define	UMEM_READVAR(var)				\
61 	(umem_readvar(&(var), #var) == -1 &&		\
62 	    (mdb_warn("failed to read "#var), 1))
63 
64 int
umem_update_variables(void)65 umem_update_variables(void)
66 {
67 	size_t pagesize;
68 
69 	/*
70 	 * Figure out which type of umem is being used; if it's not there
71 	 * yet, succeed quietly.
72 	 */
73 	if (umem_set_standalone() == -1) {
74 		umem_ready = 0;
75 		return (0);		/* umem not there yet */
76 	}
77 
78 	/*
79 	 * Solaris 9 used a different name for umem_max_ncpus.  It's
80 	 * cheap backwards compatibility to check for both names.
81 	 */
82 	if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 &&
83 	    umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) {
84 		mdb_warn("unable to read umem_max_ncpus or max_ncpus");
85 		return (-1);
86 	}
87 	if (UMEM_READVAR(umem_ready))
88 		return (-1);
89 	if (UMEM_READVAR(umem_stack_depth))
90 		return (-1);
91 	if (UMEM_READVAR(pagesize))
92 		return (-1);
93 
94 	if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) {
95 		if (umem_stack_depth_warned == 0) {
96 			mdb_warn("umem_stack_depth corrupted (%d > %d)\n",
97 			    umem_stack_depth, UMEM_MAX_STACK_DEPTH);
98 			umem_stack_depth_warned = 1;
99 		}
100 		umem_stack_depth = 0;
101 	}
102 
103 	umem_pagesize = pagesize;
104 
105 	return (0);
106 }
107 
108 static int
umem_ptc_walk_init(mdb_walk_state_t * wsp)109 umem_ptc_walk_init(mdb_walk_state_t *wsp)
110 {
111 	if (wsp->walk_addr == 0) {
112 		if (mdb_layered_walk("ulwp", wsp) == -1) {
113 			mdb_warn("couldn't walk 'ulwp'");
114 			return (WALK_ERR);
115 		}
116 	}
117 
118 	return (WALK_NEXT);
119 }
120 
121 static int
umem_ptc_walk_step(mdb_walk_state_t * wsp)122 umem_ptc_walk_step(mdb_walk_state_t *wsp)
123 {
124 	uintptr_t this;
125 	int rval;
126 
127 	if (wsp->walk_layer != NULL) {
128 		this = (uintptr_t)((ulwp_t *)wsp->walk_layer)->ul_self +
129 		    (uintptr_t)wsp->walk_arg;
130 	} else {
131 		this = wsp->walk_addr + (uintptr_t)wsp->walk_arg;
132 	}
133 
134 	for (;;) {
135 		if (mdb_vread(&this, sizeof (void *), this) == -1) {
136 			mdb_warn("couldn't read ptc buffer at %p", this);
137 			return (WALK_ERR);
138 		}
139 
140 		if (this == 0)
141 			break;
142 
143 		rval = wsp->walk_callback(this, &this, wsp->walk_cbdata);
144 
145 		if (rval != WALK_NEXT)
146 			return (rval);
147 	}
148 
149 	return (wsp->walk_layer != NULL ? WALK_NEXT : WALK_DONE);
150 }
151 
152 /*ARGSUSED*/
153 static int
umem_init_walkers(uintptr_t addr,const umem_cache_t * c,int * sizes)154 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, int *sizes)
155 {
156 	mdb_walker_t w;
157 	char descr[64];
158 	char name[64];
159 	int i;
160 
161 	(void) mdb_snprintf(descr, sizeof (descr),
162 	    "walk the %s cache", c->cache_name);
163 
164 	w.walk_name = c->cache_name;
165 	w.walk_descr = descr;
166 	w.walk_init = umem_walk_init;
167 	w.walk_step = umem_walk_step;
168 	w.walk_fini = umem_walk_fini;
169 	w.walk_init_arg = (void *)addr;
170 
171 	if (mdb_add_walker(&w) == -1)
172 		mdb_warn("failed to add %s walker", c->cache_name);
173 
174 	if (!(c->cache_flags & UMF_PTC))
175 		return (WALK_NEXT);
176 
177 	/*
178 	 * For the per-thread cache walker, the address is the offset in the
179 	 * tm_roots[] array of the ulwp_t.
180 	 */
181 	for (i = 0; sizes[i] != 0; i++) {
182 		if (sizes[i] == c->cache_bufsize)
183 			break;
184 	}
185 
186 	if (sizes[i] == 0) {
187 		mdb_warn("cache %s is cached per-thread, but could not find "
188 		    "size in umem_alloc_sizes\n", c->cache_name);
189 		return (WALK_NEXT);
190 	}
191 
192 	if (i >= NTMEMBASE) {
193 		mdb_warn("index for %s (%d) exceeds root slots (%d)\n",
194 		    c->cache_name, i, NTMEMBASE);
195 		return (WALK_NEXT);
196 	}
197 
198 	(void) mdb_snprintf(name, sizeof (name),
199 	    "umem_ptc_%d", c->cache_bufsize);
200 	(void) mdb_snprintf(descr, sizeof (descr),
201 	    "walk the per-thread cache for %s", c->cache_name);
202 
203 	w.walk_name = name;
204 	w.walk_descr = descr;
205 	w.walk_init = umem_ptc_walk_init;
206 	w.walk_step = umem_ptc_walk_step;
207 	w.walk_fini = NULL;
208 	w.walk_init_arg = (void *)offsetof(ulwp_t, ul_tmem.tm_roots[i]);
209 
210 	if (mdb_add_walker(&w) == -1)
211 		mdb_warn("failed to add %s walker", w.walk_name);
212 
213 	return (WALK_NEXT);
214 }
215 
216 /*ARGSUSED*/
217 static void
umem_statechange_cb(void * arg)218 umem_statechange_cb(void *arg)
219 {
220 	static int been_ready = 0;
221 	GElf_Sym sym;
222 	int *sizes;
223 
224 #ifndef _KMDB
225 	leaky_cleanup(1);	/* state changes invalidate leaky state */
226 #endif
227 
228 	if (umem_update_variables() == -1)
229 		return;
230 
231 	if (been_ready)
232 		return;
233 
234 	if (umem_ready != UMEM_READY)
235 		return;
236 
237 	been_ready = 1;
238 
239 	/*
240 	 * In order to determine the tm_roots offset of any cache that is
241 	 * cached per-thread, we need to have the umem_alloc_sizes array.
242 	 * Read this, assuring that it is zero-terminated.
243 	 */
244 	if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
245 		mdb_warn("unable to lookup 'umem_alloc_sizes'");
246 		return;
247 	}
248 
249 	sizes = mdb_zalloc(sym.st_size + sizeof (int), UM_SLEEP | UM_GC);
250 
251 	if (mdb_vread(sizes, sym.st_size, (uintptr_t)sym.st_value) == -1) {
252 		mdb_warn("couldn't read 'umem_alloc_sizes'");
253 		return;
254 	}
255 
256 	(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, sizes);
257 }
258 
259 int
umem_abort_messages(void)260 umem_abort_messages(void)
261 {
262 	char *umem_error_buffer;
263 	uint_t umem_error_begin;
264 	GElf_Sym sym;
265 	size_t bufsize;
266 
267 	if (UMEM_READVAR(umem_error_begin))
268 		return (DCMD_ERR);
269 
270 	if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) {
271 		mdb_warn("unable to look up umem_error_buffer");
272 		return (DCMD_ERR);
273 	}
274 
275 	bufsize = (size_t)sym.st_size;
276 
277 	umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC);
278 
279 	if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value)
280 	    != bufsize) {
281 		mdb_warn("unable to read umem_error_buffer");
282 		return (DCMD_ERR);
283 	}
284 	/* put a zero after the end of the buffer to simplify printing */
285 	umem_error_buffer[bufsize] = 0;
286 
287 	if ((umem_error_begin % bufsize) == 0)
288 		mdb_printf("%s\n", umem_error_buffer);
289 	else {
290 		umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0;
291 		mdb_printf("%s%s\n",
292 		    &umem_error_buffer[umem_error_begin % bufsize],
293 		    umem_error_buffer);
294 	}
295 
296 	return (DCMD_OK);
297 }
298 
299 static void
umem_log_status(const char * name,umem_log_header_t * val)300 umem_log_status(const char *name, umem_log_header_t *val)
301 {
302 	umem_log_header_t my_lh;
303 	uintptr_t pos = (uintptr_t)val;
304 	size_t size;
305 
306 	if (pos == 0)
307 		return;
308 
309 	if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) {
310 		mdb_warn("\nunable to read umem_%s_log pointer %p",
311 		    name, pos);
312 		return;
313 	}
314 
315 	size = my_lh.lh_chunksize * my_lh.lh_nchunks;
316 
317 	if (size % (1024 * 1024) == 0)
318 		mdb_printf("%s=%dm ", name, size / (1024 * 1024));
319 	else if (size % 1024 == 0)
320 		mdb_printf("%s=%dk ", name, size / 1024);
321 	else
322 		mdb_printf("%s=%d ", name, size);
323 }
324 
325 typedef struct umem_debug_flags {
326 	const char	*udf_name;
327 	uint_t		udf_flags;
328 	uint_t		udf_clear;	/* if 0, uses udf_flags */
329 } umem_debug_flags_t;
330 
331 umem_debug_flags_t umem_status_flags[] = {
332 	{ "random",	UMF_RANDOMIZE,	UMF_RANDOM },
333 	{ "default",	UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS },
334 	{ "audit",	UMF_AUDIT },
335 	{ "guards",	UMF_DEADBEEF | UMF_REDZONE },
336 	{ "nosignal",	UMF_CHECKSIGNAL },
337 	{ "firewall",	UMF_FIREWALL },
338 	{ "lite",	UMF_LITE },
339 	{ "checknull",	UMF_CHECKNULL },
340 	{ NULL }
341 };
342 
343 /*ARGSUSED*/
344 int
umem_status(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)345 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
346 {
347 	int umem_logging;
348 
349 	umem_log_header_t *umem_transaction_log;
350 	umem_log_header_t *umem_content_log;
351 	umem_log_header_t *umem_failure_log;
352 	umem_log_header_t *umem_slab_log;
353 
354 	mdb_printf("Status:\t\t%s\n",
355 	    umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" :
356 	    umem_ready == UMEM_READY_STARTUP ? "uninitialized" :
357 	    umem_ready == UMEM_READY_INITING ? "initialization in process" :
358 	    umem_ready == UMEM_READY ? "ready and active" :
359 	    umem_ready == 0 ? "not loaded into address space" :
360 	    "unknown (umem_ready invalid)");
361 
362 	if (umem_ready == 0)
363 		return (DCMD_OK);
364 
365 	mdb_printf("Concurrency:\t%d\n", umem_max_ncpus);
366 
367 	if (UMEM_READVAR(umem_logging))
368 		goto err;
369 	if (UMEM_READVAR(umem_transaction_log))
370 		goto err;
371 	if (UMEM_READVAR(umem_content_log))
372 		goto err;
373 	if (UMEM_READVAR(umem_failure_log))
374 		goto err;
375 	if (UMEM_READVAR(umem_slab_log))
376 		goto err;
377 
378 	mdb_printf("Logs:\t\t");
379 	umem_log_status("transaction", umem_transaction_log);
380 	umem_log_status("content", umem_content_log);
381 	umem_log_status("fail", umem_failure_log);
382 	umem_log_status("slab", umem_slab_log);
383 	if (!umem_logging)
384 		mdb_printf("(inactive)");
385 	mdb_printf("\n");
386 
387 	mdb_printf("Message buffer:\n");
388 	return (umem_abort_messages());
389 
390 err:
391 	mdb_printf("Message buffer:\n");
392 	(void) umem_abort_messages();
393 	return (DCMD_ERR);
394 }
395 
396 typedef struct {
397 	uintptr_t ucw_first;
398 	uintptr_t ucw_current;
399 } umem_cache_walk_t;
400 
401 int
umem_cache_walk_init(mdb_walk_state_t * wsp)402 umem_cache_walk_init(mdb_walk_state_t *wsp)
403 {
404 	umem_cache_walk_t *ucw;
405 	umem_cache_t c;
406 	uintptr_t cp;
407 	GElf_Sym sym;
408 
409 	if (umem_lookup_by_name("umem_null_cache", &sym) == -1) {
410 		mdb_warn("couldn't find umem_null_cache");
411 		return (WALK_ERR);
412 	}
413 
414 	cp = (uintptr_t)sym.st_value;
415 
416 	if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) {
417 		mdb_warn("couldn't read cache at %p", cp);
418 		return (WALK_ERR);
419 	}
420 
421 	ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP);
422 
423 	ucw->ucw_first = cp;
424 	ucw->ucw_current = (uintptr_t)c.cache_next;
425 	wsp->walk_data = ucw;
426 
427 	return (WALK_NEXT);
428 }
429 
430 int
umem_cache_walk_step(mdb_walk_state_t * wsp)431 umem_cache_walk_step(mdb_walk_state_t *wsp)
432 {
433 	umem_cache_walk_t *ucw = wsp->walk_data;
434 	umem_cache_t c;
435 	int status;
436 
437 	if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) {
438 		mdb_warn("couldn't read cache at %p", ucw->ucw_current);
439 		return (WALK_DONE);
440 	}
441 
442 	status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata);
443 
444 	if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first)
445 		return (WALK_DONE);
446 
447 	return (status);
448 }
449 
450 void
umem_cache_walk_fini(mdb_walk_state_t * wsp)451 umem_cache_walk_fini(mdb_walk_state_t *wsp)
452 {
453 	umem_cache_walk_t *ucw = wsp->walk_data;
454 	mdb_free(ucw, sizeof (umem_cache_walk_t));
455 }
456 
457 typedef struct {
458 	umem_cpu_t *ucw_cpus;
459 	uint32_t ucw_current;
460 	uint32_t ucw_max;
461 } umem_cpu_walk_state_t;
462 
463 int
umem_cpu_walk_init(mdb_walk_state_t * wsp)464 umem_cpu_walk_init(mdb_walk_state_t *wsp)
465 {
466 	umem_cpu_t *umem_cpus;
467 
468 	umem_cpu_walk_state_t *ucw;
469 
470 	if (umem_readvar(&umem_cpus, "umem_cpus") == -1) {
471 		mdb_warn("failed to read 'umem_cpus'");
472 		return (WALK_ERR);
473 	}
474 
475 	ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP);
476 
477 	ucw->ucw_cpus = umem_cpus;
478 	ucw->ucw_current = 0;
479 	ucw->ucw_max = umem_max_ncpus;
480 
481 	wsp->walk_data = ucw;
482 	return (WALK_NEXT);
483 }
484 
485 int
umem_cpu_walk_step(mdb_walk_state_t * wsp)486 umem_cpu_walk_step(mdb_walk_state_t *wsp)
487 {
488 	umem_cpu_t cpu;
489 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
490 
491 	uintptr_t caddr;
492 
493 	if (ucw->ucw_current >= ucw->ucw_max)
494 		return (WALK_DONE);
495 
496 	caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]);
497 
498 	if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) {
499 		mdb_warn("failed to read cpu %d", ucw->ucw_current);
500 		return (WALK_ERR);
501 	}
502 
503 	ucw->ucw_current++;
504 
505 	return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata));
506 }
507 
508 void
umem_cpu_walk_fini(mdb_walk_state_t * wsp)509 umem_cpu_walk_fini(mdb_walk_state_t *wsp)
510 {
511 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
512 
513 	mdb_free(ucw, sizeof (*ucw));
514 }
515 
516 int
umem_cpu_cache_walk_init(mdb_walk_state_t * wsp)517 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
518 {
519 	if (wsp->walk_addr == 0) {
520 		mdb_warn("umem_cpu_cache doesn't support global walks");
521 		return (WALK_ERR);
522 	}
523 
524 	if (mdb_layered_walk("umem_cpu", wsp) == -1) {
525 		mdb_warn("couldn't walk 'umem_cpu'");
526 		return (WALK_ERR);
527 	}
528 
529 	wsp->walk_data = (void *)wsp->walk_addr;
530 
531 	return (WALK_NEXT);
532 }
533 
534 int
umem_cpu_cache_walk_step(mdb_walk_state_t * wsp)535 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
536 {
537 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
538 	const umem_cpu_t *cpu = wsp->walk_layer;
539 	umem_cpu_cache_t cc;
540 
541 	caddr += cpu->cpu_cache_offset;
542 
543 	if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) {
544 		mdb_warn("couldn't read umem_cpu_cache at %p", caddr);
545 		return (WALK_ERR);
546 	}
547 
548 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
549 }
550 
551 int
umem_slab_walk_init(mdb_walk_state_t * wsp)552 umem_slab_walk_init(mdb_walk_state_t *wsp)
553 {
554 	uintptr_t caddr = wsp->walk_addr;
555 	umem_cache_t c;
556 
557 	if (caddr == 0) {
558 		mdb_warn("umem_slab doesn't support global walks\n");
559 		return (WALK_ERR);
560 	}
561 
562 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
563 		mdb_warn("couldn't read umem_cache at %p", caddr);
564 		return (WALK_ERR);
565 	}
566 
567 	wsp->walk_data =
568 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
569 	wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
570 
571 	return (WALK_NEXT);
572 }
573 
574 int
umem_slab_walk_partial_init(mdb_walk_state_t * wsp)575 umem_slab_walk_partial_init(mdb_walk_state_t *wsp)
576 {
577 	uintptr_t caddr = wsp->walk_addr;
578 	umem_cache_t c;
579 
580 	if (caddr == 0) {
581 		mdb_warn("umem_slab_partial doesn't support global walks\n");
582 		return (WALK_ERR);
583 	}
584 
585 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
586 		mdb_warn("couldn't read umem_cache at %p", caddr);
587 		return (WALK_ERR);
588 	}
589 
590 	wsp->walk_data =
591 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
592 	wsp->walk_addr = (uintptr_t)c.cache_freelist;
593 
594 	/*
595 	 * Some consumers (umem_walk_step(), in particular) require at
596 	 * least one callback if there are any buffers in the cache.  So
597 	 * if there are *no* partial slabs, report the last full slab, if
598 	 * any.
599 	 *
600 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
601 	 */
602 	if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
603 		wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
604 
605 	return (WALK_NEXT);
606 }
607 
608 int
umem_slab_walk_step(mdb_walk_state_t * wsp)609 umem_slab_walk_step(mdb_walk_state_t *wsp)
610 {
611 	umem_slab_t s;
612 	uintptr_t addr = wsp->walk_addr;
613 	uintptr_t saddr = (uintptr_t)wsp->walk_data;
614 	uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab);
615 
616 	if (addr == saddr)
617 		return (WALK_DONE);
618 
619 	if (mdb_vread(&s, sizeof (s), addr) == -1) {
620 		mdb_warn("failed to read slab at %p", wsp->walk_addr);
621 		return (WALK_ERR);
622 	}
623 
624 	if ((uintptr_t)s.slab_cache != caddr) {
625 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
626 		    addr, caddr, s.slab_cache);
627 		return (WALK_ERR);
628 	}
629 
630 	wsp->walk_addr = (uintptr_t)s.slab_next;
631 
632 	return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
633 }
634 
635 int
umem_cache(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)636 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
637 {
638 	umem_cache_t c;
639 
640 	if (!(flags & DCMD_ADDRSPEC)) {
641 		if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) {
642 			mdb_warn("can't walk umem_cache");
643 			return (DCMD_ERR);
644 		}
645 		return (DCMD_OK);
646 	}
647 
648 	if (DCMD_HDRSPEC(flags))
649 		mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME",
650 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
651 
652 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
653 		mdb_warn("couldn't read umem_cache at %p", addr);
654 		return (DCMD_ERR);
655 	}
656 
657 	mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name,
658 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
659 
660 	return (DCMD_OK);
661 }
662 
663 static int
addrcmp(const void * lhs,const void * rhs)664 addrcmp(const void *lhs, const void *rhs)
665 {
666 	uintptr_t p1 = *((uintptr_t *)lhs);
667 	uintptr_t p2 = *((uintptr_t *)rhs);
668 
669 	if (p1 < p2)
670 		return (-1);
671 	if (p1 > p2)
672 		return (1);
673 	return (0);
674 }
675 
676 static int
bufctlcmp(const umem_bufctl_audit_t ** lhs,const umem_bufctl_audit_t ** rhs)677 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs)
678 {
679 	const umem_bufctl_audit_t *bcp1 = *lhs;
680 	const umem_bufctl_audit_t *bcp2 = *rhs;
681 
682 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
683 		return (-1);
684 
685 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
686 		return (1);
687 
688 	return (0);
689 }
690 
691 typedef struct umem_hash_walk {
692 	uintptr_t *umhw_table;
693 	size_t umhw_nelems;
694 	size_t umhw_pos;
695 	umem_bufctl_t umhw_cur;
696 } umem_hash_walk_t;
697 
698 int
umem_hash_walk_init(mdb_walk_state_t * wsp)699 umem_hash_walk_init(mdb_walk_state_t *wsp)
700 {
701 	umem_hash_walk_t *umhw;
702 	uintptr_t *hash;
703 	umem_cache_t c;
704 	uintptr_t haddr, addr = wsp->walk_addr;
705 	size_t nelems;
706 	size_t hsize;
707 
708 	if (addr == 0) {
709 		mdb_warn("umem_hash doesn't support global walks\n");
710 		return (WALK_ERR);
711 	}
712 
713 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
714 		mdb_warn("couldn't read cache at addr %p", addr);
715 		return (WALK_ERR);
716 	}
717 
718 	if (!(c.cache_flags & UMF_HASH)) {
719 		mdb_warn("cache %p doesn't have a hash table\n", addr);
720 		return (WALK_DONE);		/* nothing to do */
721 	}
722 
723 	umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP);
724 	umhw->umhw_cur.bc_next = NULL;
725 	umhw->umhw_pos = 0;
726 
727 	umhw->umhw_nelems = nelems = c.cache_hash_mask + 1;
728 	hsize = nelems * sizeof (uintptr_t);
729 	haddr = (uintptr_t)c.cache_hash_table;
730 
731 	umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
732 	if (mdb_vread(hash, hsize, haddr) == -1) {
733 		mdb_warn("failed to read hash table at %p", haddr);
734 		mdb_free(hash, hsize);
735 		mdb_free(umhw, sizeof (umem_hash_walk_t));
736 		return (WALK_ERR);
737 	}
738 
739 	wsp->walk_data = umhw;
740 
741 	return (WALK_NEXT);
742 }
743 
744 int
umem_hash_walk_step(mdb_walk_state_t * wsp)745 umem_hash_walk_step(mdb_walk_state_t *wsp)
746 {
747 	umem_hash_walk_t *umhw = wsp->walk_data;
748 	uintptr_t addr = 0;
749 
750 	if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == 0) {
751 		while (umhw->umhw_pos < umhw->umhw_nelems) {
752 			if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != 0)
753 				break;
754 		}
755 	}
756 	if (addr == 0)
757 		return (WALK_DONE);
758 
759 	if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) {
760 		mdb_warn("couldn't read umem_bufctl_t at addr %p", addr);
761 		return (WALK_ERR);
762 	}
763 
764 	return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata));
765 }
766 
767 void
umem_hash_walk_fini(mdb_walk_state_t * wsp)768 umem_hash_walk_fini(mdb_walk_state_t *wsp)
769 {
770 	umem_hash_walk_t *umhw = wsp->walk_data;
771 
772 	if (umhw == NULL)
773 		return;
774 
775 	mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t));
776 	mdb_free(umhw, sizeof (umem_hash_walk_t));
777 }
778 
779 /*
780  * Find the address of the bufctl structure for the address 'buf' in cache
781  * 'cp', which is at address caddr, and place it in *out.
782  */
783 static int
umem_hash_lookup(umem_cache_t * cp,uintptr_t caddr,void * buf,uintptr_t * out)784 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
785 {
786 	uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf);
787 	umem_bufctl_t *bcp;
788 	umem_bufctl_t bc;
789 
790 	if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) {
791 		mdb_warn("unable to read hash bucket for %p in cache %p",
792 		    buf, caddr);
793 		return (-1);
794 	}
795 
796 	while (bcp != NULL) {
797 		if (mdb_vread(&bc, sizeof (umem_bufctl_t),
798 		    (uintptr_t)bcp) == -1) {
799 			mdb_warn("unable to read bufctl at %p", bcp);
800 			return (-1);
801 		}
802 		if (bc.bc_addr == buf) {
803 			*out = (uintptr_t)bcp;
804 			return (0);
805 		}
806 		bcp = bc.bc_next;
807 	}
808 
809 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
810 	return (-1);
811 }
812 
813 int
umem_get_magsize(const umem_cache_t * cp)814 umem_get_magsize(const umem_cache_t *cp)
815 {
816 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
817 	GElf_Sym mt_sym;
818 	umem_magtype_t mt;
819 	int res;
820 
821 	/*
822 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
823 	 * with UMF_NOMAGAZINE have disabled their magazine layers, so
824 	 * it is okay to return 0 for them.
825 	 */
826 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
827 	    (cp->cache_flags & UMF_NOMAGAZINE))
828 		return (res);
829 
830 	if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) {
831 		mdb_warn("unable to read 'umem_magtype'");
832 	} else if (addr < mt_sym.st_value ||
833 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
834 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
835 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
836 		    cp->cache_name, addr);
837 		return (0);
838 	}
839 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
840 		mdb_warn("unable to read magtype at %a", addr);
841 		return (0);
842 	}
843 	return (mt.mt_magsize);
844 }
845 
846 /*ARGSUSED*/
847 static int
umem_estimate_slab(uintptr_t addr,const umem_slab_t * sp,size_t * est)848 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est)
849 {
850 	*est -= (sp->slab_chunks - sp->slab_refcnt);
851 
852 	return (WALK_NEXT);
853 }
854 
855 /*
856  * Returns an upper bound on the number of allocated buffers in a given
857  * cache.
858  */
859 size_t
umem_estimate_allocated(uintptr_t addr,const umem_cache_t * cp)860 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp)
861 {
862 	int magsize;
863 	size_t cache_est;
864 
865 	cache_est = cp->cache_buftotal;
866 
867 	(void) mdb_pwalk("umem_slab_partial",
868 	    (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr);
869 
870 	if ((magsize = umem_get_magsize(cp)) != 0) {
871 		size_t mag_est = cp->cache_full.ml_total * magsize;
872 
873 		if (cache_est >= mag_est) {
874 			cache_est -= mag_est;
875 		} else {
876 			mdb_warn("cache %p's magazine layer holds more buffers "
877 			    "than the slab layer.\n", addr);
878 		}
879 	}
880 	return (cache_est);
881 }
882 
883 #define	READMAG_ROUNDS(rounds) { \
884 	if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \
885 		mdb_warn("couldn't read magazine at %p", ump); \
886 		goto fail; \
887 	} \
888 	for (i = 0; i < rounds; i++) { \
889 		maglist[magcnt++] = mp->mag_round[i]; \
890 		if (magcnt == magmax) { \
891 			mdb_warn("%d magazines exceeds fudge factor\n", \
892 			    magcnt); \
893 			goto fail; \
894 		} \
895 	} \
896 }
897 
898 static int
umem_read_magazines(umem_cache_t * cp,uintptr_t addr,void *** maglistp,size_t * magcntp,size_t * magmaxp)899 umem_read_magazines(umem_cache_t *cp, uintptr_t addr,
900     void ***maglistp, size_t *magcntp, size_t *magmaxp)
901 {
902 	umem_magazine_t *ump, *mp;
903 	void **maglist = NULL;
904 	int i, cpu;
905 	size_t magsize, magmax, magbsize;
906 	size_t magcnt = 0;
907 
908 	/*
909 	 * Read the magtype out of the cache, after verifying the pointer's
910 	 * correctness.
911 	 */
912 	magsize = umem_get_magsize(cp);
913 	if (magsize == 0) {
914 		*maglistp = NULL;
915 		*magcntp = 0;
916 		*magmaxp = 0;
917 		return (0);
918 	}
919 
920 	/*
921 	 * There are several places where we need to go buffer hunting:
922 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
923 	 * and the full magazine list in the depot.
924 	 *
925 	 * For an upper bound on the number of buffers in the magazine
926 	 * layer, we have the number of magazines on the cache_full
927 	 * list plus at most two magazines per CPU (the loaded and the
928 	 * spare).  Toss in 100 magazines as a fudge factor in case this
929 	 * is live (the number "100" comes from the same fudge factor in
930 	 * crash(8)).
931 	 */
932 	magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize;
933 	magbsize = offsetof(umem_magazine_t, mag_round[magsize]);
934 
935 	if (magbsize >= PAGESIZE / 2) {
936 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
937 		    addr, magbsize);
938 		return (-1);
939 	}
940 
941 	maglist = mdb_alloc(magmax * sizeof (void *), UM_SLEEP);
942 	mp = mdb_alloc(magbsize, UM_SLEEP);
943 	if (mp == NULL || maglist == NULL)
944 		goto fail;
945 
946 	/*
947 	 * First up: the magazines in the depot (i.e. on the cache_full list).
948 	 */
949 	for (ump = cp->cache_full.ml_list; ump != NULL; ) {
950 		READMAG_ROUNDS(magsize);
951 		ump = mp->mag_next;
952 
953 		if (ump == cp->cache_full.ml_list)
954 			break; /* cache_full list loop detected */
955 	}
956 
957 	dprintf(("cache_full list done\n"));
958 
959 	/*
960 	 * Now whip through the CPUs, snagging the loaded magazines
961 	 * and full spares.
962 	 */
963 	for (cpu = 0; cpu < umem_max_ncpus; cpu++) {
964 		umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
965 
966 		dprintf(("reading cpu cache %p\n",
967 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
968 
969 		if (ccp->cc_rounds > 0 &&
970 		    (ump = ccp->cc_loaded) != NULL) {
971 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
972 			READMAG_ROUNDS(ccp->cc_rounds);
973 		}
974 
975 		if (ccp->cc_prounds > 0 &&
976 		    (ump = ccp->cc_ploaded) != NULL) {
977 			dprintf(("reading %d previously loaded rounds\n",
978 			    ccp->cc_prounds));
979 			READMAG_ROUNDS(ccp->cc_prounds);
980 		}
981 	}
982 
983 	dprintf(("magazine layer: %d buffers\n", magcnt));
984 
985 	mdb_free(mp, magbsize);
986 
987 	*maglistp = maglist;
988 	*magcntp = magcnt;
989 	*magmaxp = magmax;
990 
991 	return (0);
992 
993 fail:
994 	if (mp)
995 		mdb_free(mp, magbsize);
996 	if (maglist)
997 		mdb_free(maglist, magmax * sizeof (void *));
998 
999 	return (-1);
1000 }
1001 
1002 typedef struct umem_read_ptc_walk {
1003 	void **urpw_buf;
1004 	size_t urpw_cnt;
1005 	size_t urpw_max;
1006 } umem_read_ptc_walk_t;
1007 
1008 /*ARGSUSED*/
1009 static int
umem_read_ptc_walk_buf(uintptr_t addr,const void * ignored,umem_read_ptc_walk_t * urpw)1010 umem_read_ptc_walk_buf(uintptr_t addr,
1011     const void *ignored, umem_read_ptc_walk_t *urpw)
1012 {
1013 	if (urpw->urpw_cnt == urpw->urpw_max) {
1014 		size_t nmax = urpw->urpw_max ? (urpw->urpw_max << 1) : 1;
1015 		void **new = mdb_zalloc(nmax * sizeof (void *), UM_SLEEP);
1016 
1017 		if (nmax > 1) {
1018 			size_t osize = urpw->urpw_max * sizeof (void *);
1019 			bcopy(urpw->urpw_buf, new, osize);
1020 			mdb_free(urpw->urpw_buf, osize);
1021 		}
1022 
1023 		urpw->urpw_buf = new;
1024 		urpw->urpw_max = nmax;
1025 	}
1026 
1027 	urpw->urpw_buf[urpw->urpw_cnt++] = (void *)addr;
1028 
1029 	return (WALK_NEXT);
1030 }
1031 
1032 static int
umem_read_ptc(umem_cache_t * cp,void *** buflistp,size_t * bufcntp,size_t * bufmaxp)1033 umem_read_ptc(umem_cache_t *cp,
1034     void ***buflistp, size_t *bufcntp, size_t *bufmaxp)
1035 {
1036 	umem_read_ptc_walk_t urpw;
1037 	char walk[60];
1038 	int rval;
1039 
1040 	if (!(cp->cache_flags & UMF_PTC))
1041 		return (0);
1042 
1043 	(void) mdb_snprintf(walk, sizeof (walk), "umem_ptc_%d",
1044 	    cp->cache_bufsize);
1045 
1046 	urpw.urpw_buf = *buflistp;
1047 	urpw.urpw_cnt = *bufcntp;
1048 	urpw.urpw_max = *bufmaxp;
1049 
1050 	if ((rval = mdb_walk(walk,
1051 	    (mdb_walk_cb_t)umem_read_ptc_walk_buf, &urpw)) == -1) {
1052 		mdb_warn("couldn't walk %s", walk);
1053 	}
1054 
1055 	*buflistp = urpw.urpw_buf;
1056 	*bufcntp = urpw.urpw_cnt;
1057 	*bufmaxp = urpw.urpw_max;
1058 
1059 	return (rval);
1060 }
1061 
1062 static int
umem_walk_callback(mdb_walk_state_t * wsp,uintptr_t buf)1063 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1064 {
1065 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1066 }
1067 
1068 static int
bufctl_walk_callback(umem_cache_t * cp,mdb_walk_state_t * wsp,uintptr_t buf)1069 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1070 {
1071 	umem_bufctl_audit_t *b;
1072 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
1073 
1074 	/*
1075 	 * if UMF_AUDIT is not set, we know that we're looking at a
1076 	 * umem_bufctl_t.
1077 	 */
1078 	if (!(cp->cache_flags & UMF_AUDIT) ||
1079 	    mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) {
1080 		(void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE);
1081 		if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) {
1082 			mdb_warn("unable to read bufctl at %p", buf);
1083 			return (WALK_ERR);
1084 		}
1085 	}
1086 
1087 	return (wsp->walk_callback(buf, b, wsp->walk_cbdata));
1088 }
1089 
1090 typedef struct umem_walk {
1091 	int umw_type;
1092 
1093 	uintptr_t umw_addr;		/* cache address */
1094 	umem_cache_t *umw_cp;
1095 	size_t umw_csize;
1096 
1097 	/*
1098 	 * magazine layer
1099 	 */
1100 	void **umw_maglist;
1101 	size_t umw_max;
1102 	size_t umw_count;
1103 	size_t umw_pos;
1104 
1105 	/*
1106 	 * slab layer
1107 	 */
1108 	char *umw_valid;	/* to keep track of freed buffers */
1109 	char *umw_ubase;	/* buffer for slab data */
1110 } umem_walk_t;
1111 
1112 static int
umem_walk_init_common(mdb_walk_state_t * wsp,int type)1113 umem_walk_init_common(mdb_walk_state_t *wsp, int type)
1114 {
1115 	umem_walk_t *umw;
1116 	int csize;
1117 	umem_cache_t *cp;
1118 	size_t vm_quantum;
1119 
1120 	size_t magmax, magcnt;
1121 	void **maglist = NULL;
1122 	uint_t chunksize = 1, slabsize = 1;
1123 	int status = WALK_ERR;
1124 	uintptr_t addr = wsp->walk_addr;
1125 	const char *layered;
1126 
1127 	type &= ~UM_HASH;
1128 
1129 	if (addr == 0) {
1130 		mdb_warn("umem walk doesn't support global walks\n");
1131 		return (WALK_ERR);
1132 	}
1133 
1134 	dprintf(("walking %p\n", addr));
1135 
1136 	/*
1137 	 * The number of "cpus" determines how large the cache is.
1138 	 */
1139 	csize = UMEM_CACHE_SIZE(umem_max_ncpus);
1140 	cp = mdb_alloc(csize, UM_SLEEP);
1141 
1142 	if (mdb_vread(cp, csize, addr) == -1) {
1143 		mdb_warn("couldn't read cache at addr %p", addr);
1144 		goto out2;
1145 	}
1146 
1147 	/*
1148 	 * It's easy for someone to hand us an invalid cache address.
1149 	 * Unfortunately, it is hard for this walker to survive an
1150 	 * invalid cache cleanly.  So we make sure that:
1151 	 *
1152 	 *	1. the vmem arena for the cache is readable,
1153 	 *	2. the vmem arena's quantum is a power of 2,
1154 	 *	3. our slabsize is a multiple of the quantum, and
1155 	 *	4. our chunksize is >0 and less than our slabsize.
1156 	 */
1157 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1158 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1159 	    vm_quantum == 0 ||
1160 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
1161 	    cp->cache_slabsize < vm_quantum ||
1162 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1163 	    cp->cache_chunksize == 0 ||
1164 	    cp->cache_chunksize > cp->cache_slabsize) {
1165 		mdb_warn("%p is not a valid umem_cache_t\n", addr);
1166 		goto out2;
1167 	}
1168 
1169 	dprintf(("buf total is %d\n", cp->cache_buftotal));
1170 
1171 	if (cp->cache_buftotal == 0) {
1172 		mdb_free(cp, csize);
1173 		return (WALK_DONE);
1174 	}
1175 
1176 	/*
1177 	 * If they ask for bufctls, but it's a small-slab cache,
1178 	 * there is nothing to report.
1179 	 */
1180 	if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) {
1181 		dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n",
1182 		    cp->cache_flags));
1183 		mdb_free(cp, csize);
1184 		return (WALK_DONE);
1185 	}
1186 
1187 	/*
1188 	 * Read in the contents of the magazine layer
1189 	 */
1190 	if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax) != 0)
1191 		goto out2;
1192 
1193 	/*
1194 	 * Read in the contents of the per-thread caches, if any
1195 	 */
1196 	if (umem_read_ptc(cp, &maglist, &magcnt, &magmax) != 0)
1197 		goto out2;
1198 
1199 	/*
1200 	 * We have all of the buffers from the magazines and from the
1201 	 * per-thread cache (if any);  if we are walking allocated buffers,
1202 	 * sort them so we can bsearch them later.
1203 	 */
1204 	if (type & UM_ALLOCATED)
1205 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
1206 
1207 	wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP);
1208 
1209 	umw->umw_type = type;
1210 	umw->umw_addr = addr;
1211 	umw->umw_cp = cp;
1212 	umw->umw_csize = csize;
1213 	umw->umw_maglist = maglist;
1214 	umw->umw_max = magmax;
1215 	umw->umw_count = magcnt;
1216 	umw->umw_pos = 0;
1217 
1218 	/*
1219 	 * When walking allocated buffers in a UMF_HASH cache, we walk the
1220 	 * hash table instead of the slab layer.
1221 	 */
1222 	if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) {
1223 		layered = "umem_hash";
1224 
1225 		umw->umw_type |= UM_HASH;
1226 	} else {
1227 		/*
1228 		 * If we are walking freed buffers, we only need the
1229 		 * magazine layer plus the partially allocated slabs.
1230 		 * To walk allocated buffers, we need all of the slabs.
1231 		 */
1232 		if (type & UM_ALLOCATED)
1233 			layered = "umem_slab";
1234 		else
1235 			layered = "umem_slab_partial";
1236 
1237 		/*
1238 		 * for small-slab caches, we read in the entire slab.  For
1239 		 * freed buffers, we can just walk the freelist.  For
1240 		 * allocated buffers, we use a 'valid' array to track
1241 		 * the freed buffers.
1242 		 */
1243 		if (!(cp->cache_flags & UMF_HASH)) {
1244 			chunksize = cp->cache_chunksize;
1245 			slabsize = cp->cache_slabsize;
1246 
1247 			umw->umw_ubase = mdb_alloc(slabsize +
1248 			    sizeof (umem_bufctl_t), UM_SLEEP);
1249 
1250 			if (type & UM_ALLOCATED)
1251 				umw->umw_valid =
1252 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1253 		}
1254 	}
1255 
1256 	status = WALK_NEXT;
1257 
1258 	if (mdb_layered_walk(layered, wsp) == -1) {
1259 		mdb_warn("unable to start layered '%s' walk", layered);
1260 		status = WALK_ERR;
1261 	}
1262 
1263 out1:
1264 	if (status == WALK_ERR) {
1265 		if (umw->umw_valid)
1266 			mdb_free(umw->umw_valid, slabsize / chunksize);
1267 
1268 		if (umw->umw_ubase)
1269 			mdb_free(umw->umw_ubase, slabsize +
1270 			    sizeof (umem_bufctl_t));
1271 
1272 		if (umw->umw_maglist)
1273 			mdb_free(umw->umw_maglist, umw->umw_max *
1274 			    sizeof (uintptr_t));
1275 
1276 		mdb_free(umw, sizeof (umem_walk_t));
1277 		wsp->walk_data = NULL;
1278 	}
1279 
1280 out2:
1281 	if (status == WALK_ERR)
1282 		mdb_free(cp, csize);
1283 
1284 	return (status);
1285 }
1286 
1287 int
umem_walk_step(mdb_walk_state_t * wsp)1288 umem_walk_step(mdb_walk_state_t *wsp)
1289 {
1290 	umem_walk_t *umw = wsp->walk_data;
1291 	int type = umw->umw_type;
1292 	umem_cache_t *cp = umw->umw_cp;
1293 
1294 	void **maglist = umw->umw_maglist;
1295 	int magcnt = umw->umw_count;
1296 
1297 	uintptr_t chunksize, slabsize;
1298 	uintptr_t addr;
1299 	const umem_slab_t *sp;
1300 	const umem_bufctl_t *bcp;
1301 	umem_bufctl_t bc;
1302 
1303 	int chunks;
1304 	char *kbase;
1305 	void *buf;
1306 	int i, ret;
1307 
1308 	char *valid, *ubase;
1309 
1310 	/*
1311 	 * first, handle the 'umem_hash' layered walk case
1312 	 */
1313 	if (type & UM_HASH) {
1314 		/*
1315 		 * We have a buffer which has been allocated out of the
1316 		 * global layer. We need to make sure that it's not
1317 		 * actually sitting in a magazine before we report it as
1318 		 * an allocated buffer.
1319 		 */
1320 		buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr;
1321 
1322 		if (magcnt > 0 &&
1323 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1324 		    addrcmp) != NULL)
1325 			return (WALK_NEXT);
1326 
1327 		if (type & UM_BUFCTL)
1328 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1329 
1330 		return (umem_walk_callback(wsp, (uintptr_t)buf));
1331 	}
1332 
1333 	ret = WALK_NEXT;
1334 
1335 	addr = umw->umw_addr;
1336 
1337 	/*
1338 	 * If we're walking freed buffers, report everything in the
1339 	 * magazine layer before processing the first slab.
1340 	 */
1341 	if ((type & UM_FREE) && magcnt != 0) {
1342 		umw->umw_count = 0;		/* only do this once */
1343 		for (i = 0; i < magcnt; i++) {
1344 			buf = maglist[i];
1345 
1346 			if (type & UM_BUFCTL) {
1347 				uintptr_t out;
1348 
1349 				if (cp->cache_flags & UMF_BUFTAG) {
1350 					umem_buftag_t *btp;
1351 					umem_buftag_t tag;
1352 
1353 					/* LINTED - alignment */
1354 					btp = UMEM_BUFTAG(cp, buf);
1355 					if (mdb_vread(&tag, sizeof (tag),
1356 					    (uintptr_t)btp) == -1) {
1357 						mdb_warn("reading buftag for "
1358 						    "%p at %p", buf, btp);
1359 						continue;
1360 					}
1361 					out = (uintptr_t)tag.bt_bufctl;
1362 				} else {
1363 					if (umem_hash_lookup(cp, addr, buf,
1364 					    &out) == -1)
1365 						continue;
1366 				}
1367 				ret = bufctl_walk_callback(cp, wsp, out);
1368 			} else {
1369 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1370 			}
1371 
1372 			if (ret != WALK_NEXT)
1373 				return (ret);
1374 		}
1375 	}
1376 
1377 	/*
1378 	 * Handle the buffers in the current slab
1379 	 */
1380 	chunksize = cp->cache_chunksize;
1381 	slabsize = cp->cache_slabsize;
1382 
1383 	sp = wsp->walk_layer;
1384 	chunks = sp->slab_chunks;
1385 	kbase = sp->slab_base;
1386 
1387 	dprintf(("kbase is %p\n", kbase));
1388 
1389 	if (!(cp->cache_flags & UMF_HASH)) {
1390 		valid = umw->umw_valid;
1391 		ubase = umw->umw_ubase;
1392 
1393 		if (mdb_vread(ubase, chunks * chunksize,
1394 		    (uintptr_t)kbase) == -1) {
1395 			mdb_warn("failed to read slab contents at %p", kbase);
1396 			return (WALK_ERR);
1397 		}
1398 
1399 		/*
1400 		 * Set up the valid map as fully allocated -- we'll punch
1401 		 * out the freelist.
1402 		 */
1403 		if (type & UM_ALLOCATED)
1404 			(void) memset(valid, 1, chunks);
1405 	} else {
1406 		valid = NULL;
1407 		ubase = NULL;
1408 	}
1409 
1410 	/*
1411 	 * walk the slab's freelist
1412 	 */
1413 	bcp = sp->slab_head;
1414 
1415 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1416 
1417 	/*
1418 	 * since we could be in the middle of allocating a buffer,
1419 	 * our refcnt could be one higher than it aught.  So we
1420 	 * check one further on the freelist than the count allows.
1421 	 */
1422 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1423 		uint_t ndx;
1424 
1425 		dprintf(("bcp is %p\n", bcp));
1426 
1427 		if (bcp == NULL) {
1428 			if (i == chunks)
1429 				break;
1430 			mdb_warn(
1431 			    "slab %p in cache %p freelist too short by %d\n",
1432 			    sp, addr, chunks - i);
1433 			break;
1434 		}
1435 
1436 		if (cp->cache_flags & UMF_HASH) {
1437 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1438 				mdb_warn("failed to read bufctl ptr at %p",
1439 				    bcp);
1440 				break;
1441 			}
1442 			buf = bc.bc_addr;
1443 		} else {
1444 			/*
1445 			 * Otherwise the buffer is (or should be) in the slab
1446 			 * that we've read in; determine its offset in the
1447 			 * slab, validate that it's not corrupt, and add to
1448 			 * our base address to find the umem_bufctl_t.  (Note
1449 			 * that we don't need to add the size of the bufctl
1450 			 * to our offset calculation because of the slop that's
1451 			 * allocated for the buffer at ubase.)
1452 			 */
1453 			uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1454 
1455 			if (offs > chunks * chunksize) {
1456 				mdb_warn("found corrupt bufctl ptr %p"
1457 				    " in slab %p in cache %p\n", bcp,
1458 				    wsp->walk_addr, addr);
1459 				break;
1460 			}
1461 
1462 			bc = *((umem_bufctl_t *)((uintptr_t)ubase + offs));
1463 			buf = UMEM_BUF(cp, bcp);
1464 		}
1465 
1466 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1467 
1468 		if (ndx > slabsize / cp->cache_bufsize) {
1469 			/*
1470 			 * This is very wrong; we have managed to find
1471 			 * a buffer in the slab which shouldn't
1472 			 * actually be here.  Emit a warning, and
1473 			 * try to continue.
1474 			 */
1475 			mdb_warn("buf %p is out of range for "
1476 			    "slab %p, cache %p\n", buf, sp, addr);
1477 		} else if (type & UM_ALLOCATED) {
1478 			/*
1479 			 * we have found a buffer on the slab's freelist;
1480 			 * clear its entry
1481 			 */
1482 			valid[ndx] = 0;
1483 		} else {
1484 			/*
1485 			 * Report this freed buffer
1486 			 */
1487 			if (type & UM_BUFCTL) {
1488 				ret = bufctl_walk_callback(cp, wsp,
1489 				    (uintptr_t)bcp);
1490 			} else {
1491 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1492 			}
1493 			if (ret != WALK_NEXT)
1494 				return (ret);
1495 		}
1496 
1497 		bcp = bc.bc_next;
1498 	}
1499 
1500 	if (bcp != NULL) {
1501 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1502 		    sp, addr, bcp));
1503 	}
1504 
1505 	/*
1506 	 * If we are walking freed buffers, the loop above handled reporting
1507 	 * them.
1508 	 */
1509 	if (type & UM_FREE)
1510 		return (WALK_NEXT);
1511 
1512 	if (type & UM_BUFCTL) {
1513 		mdb_warn("impossible situation: small-slab UM_BUFCTL walk for "
1514 		    "cache %p\n", addr);
1515 		return (WALK_ERR);
1516 	}
1517 
1518 	/*
1519 	 * Report allocated buffers, skipping buffers in the magazine layer.
1520 	 * We only get this far for small-slab caches.
1521 	 */
1522 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1523 		buf = (char *)kbase + i * chunksize;
1524 
1525 		if (!valid[i])
1526 			continue;		/* on slab freelist */
1527 
1528 		if (magcnt > 0 &&
1529 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1530 		    addrcmp) != NULL)
1531 			continue;		/* in magazine layer */
1532 
1533 		ret = umem_walk_callback(wsp, (uintptr_t)buf);
1534 	}
1535 	return (ret);
1536 }
1537 
1538 void
umem_walk_fini(mdb_walk_state_t * wsp)1539 umem_walk_fini(mdb_walk_state_t *wsp)
1540 {
1541 	umem_walk_t *umw = wsp->walk_data;
1542 	uintptr_t chunksize;
1543 	uintptr_t slabsize;
1544 
1545 	if (umw == NULL)
1546 		return;
1547 
1548 	if (umw->umw_maglist != NULL)
1549 		mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *));
1550 
1551 	chunksize = umw->umw_cp->cache_chunksize;
1552 	slabsize = umw->umw_cp->cache_slabsize;
1553 
1554 	if (umw->umw_valid != NULL)
1555 		mdb_free(umw->umw_valid, slabsize / chunksize);
1556 	if (umw->umw_ubase != NULL)
1557 		mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t));
1558 
1559 	mdb_free(umw->umw_cp, umw->umw_csize);
1560 	mdb_free(umw, sizeof (umem_walk_t));
1561 }
1562 
1563 /*ARGSUSED*/
1564 static int
umem_walk_all(uintptr_t addr,const umem_cache_t * c,mdb_walk_state_t * wsp)1565 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp)
1566 {
1567 	/*
1568 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1569 	 * memory in other caches.  This can be a little confusing, so we
1570 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1571 	 * that "::walk umem" and "::walk freemem" yield disjoint output).
1572 	 */
1573 	if (c->cache_cflags & UMC_NOTOUCH)
1574 		return (WALK_NEXT);
1575 
1576 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1577 	    wsp->walk_cbdata, addr) == -1)
1578 		return (WALK_DONE);
1579 
1580 	return (WALK_NEXT);
1581 }
1582 
1583 #define	UMEM_WALK_ALL(name, wsp) { \
1584 	wsp->walk_data = (name); \
1585 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \
1586 		return (WALK_ERR); \
1587 	return (WALK_DONE); \
1588 }
1589 
1590 int
umem_walk_init(mdb_walk_state_t * wsp)1591 umem_walk_init(mdb_walk_state_t *wsp)
1592 {
1593 	if (wsp->walk_arg != NULL)
1594 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1595 
1596 	if (wsp->walk_addr == 0)
1597 		UMEM_WALK_ALL("umem", wsp);
1598 	return (umem_walk_init_common(wsp, UM_ALLOCATED));
1599 }
1600 
1601 int
bufctl_walk_init(mdb_walk_state_t * wsp)1602 bufctl_walk_init(mdb_walk_state_t *wsp)
1603 {
1604 	if (wsp->walk_addr == 0)
1605 		UMEM_WALK_ALL("bufctl", wsp);
1606 	return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL));
1607 }
1608 
1609 int
freemem_walk_init(mdb_walk_state_t * wsp)1610 freemem_walk_init(mdb_walk_state_t *wsp)
1611 {
1612 	if (wsp->walk_addr == 0)
1613 		UMEM_WALK_ALL("freemem", wsp);
1614 	return (umem_walk_init_common(wsp, UM_FREE));
1615 }
1616 
1617 int
freectl_walk_init(mdb_walk_state_t * wsp)1618 freectl_walk_init(mdb_walk_state_t *wsp)
1619 {
1620 	if (wsp->walk_addr == 0)
1621 		UMEM_WALK_ALL("freectl", wsp);
1622 	return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL));
1623 }
1624 
1625 typedef struct bufctl_history_walk {
1626 	void		*bhw_next;
1627 	umem_cache_t	*bhw_cache;
1628 	umem_slab_t	*bhw_slab;
1629 	hrtime_t	bhw_timestamp;
1630 } bufctl_history_walk_t;
1631 
1632 int
bufctl_history_walk_init(mdb_walk_state_t * wsp)1633 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1634 {
1635 	bufctl_history_walk_t *bhw;
1636 	umem_bufctl_audit_t bc;
1637 	umem_bufctl_audit_t bcn;
1638 
1639 	if (wsp->walk_addr == 0) {
1640 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1641 		return (WALK_ERR);
1642 	}
1643 
1644 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1645 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1646 		return (WALK_ERR);
1647 	}
1648 
1649 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1650 	bhw->bhw_timestamp = 0;
1651 	bhw->bhw_cache = bc.bc_cache;
1652 	bhw->bhw_slab = bc.bc_slab;
1653 
1654 	/*
1655 	 * sometimes the first log entry matches the base bufctl;  in that
1656 	 * case, skip the base bufctl.
1657 	 */
1658 	if (bc.bc_lastlog != NULL &&
1659 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1660 	    bc.bc_addr == bcn.bc_addr &&
1661 	    bc.bc_cache == bcn.bc_cache &&
1662 	    bc.bc_slab == bcn.bc_slab &&
1663 	    bc.bc_timestamp == bcn.bc_timestamp &&
1664 	    bc.bc_thread == bcn.bc_thread)
1665 		bhw->bhw_next = bc.bc_lastlog;
1666 	else
1667 		bhw->bhw_next = (void *)wsp->walk_addr;
1668 
1669 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1670 	wsp->walk_data = bhw;
1671 
1672 	return (WALK_NEXT);
1673 }
1674 
1675 int
bufctl_history_walk_step(mdb_walk_state_t * wsp)1676 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1677 {
1678 	bufctl_history_walk_t *bhw = wsp->walk_data;
1679 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1680 	uintptr_t baseaddr = wsp->walk_addr;
1681 	umem_bufctl_audit_t *b;
1682 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
1683 
1684 	if (addr == 0)
1685 		return (WALK_DONE);
1686 
1687 	if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1688 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1689 		return (WALK_ERR);
1690 	}
1691 
1692 	/*
1693 	 * The bufctl is only valid if the address, cache, and slab are
1694 	 * correct.  We also check that the timestamp is decreasing, to
1695 	 * prevent infinite loops.
1696 	 */
1697 	if ((uintptr_t)b->bc_addr != baseaddr ||
1698 	    b->bc_cache != bhw->bhw_cache ||
1699 	    b->bc_slab != bhw->bhw_slab ||
1700 	    (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp))
1701 		return (WALK_DONE);
1702 
1703 	bhw->bhw_next = b->bc_lastlog;
1704 	bhw->bhw_timestamp = b->bc_timestamp;
1705 
1706 	return (wsp->walk_callback(addr, b, wsp->walk_cbdata));
1707 }
1708 
1709 void
bufctl_history_walk_fini(mdb_walk_state_t * wsp)1710 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1711 {
1712 	bufctl_history_walk_t *bhw = wsp->walk_data;
1713 
1714 	mdb_free(bhw, sizeof (*bhw));
1715 }
1716 
1717 typedef struct umem_log_walk {
1718 	umem_bufctl_audit_t *ulw_base;
1719 	umem_bufctl_audit_t **ulw_sorted;
1720 	umem_log_header_t ulw_lh;
1721 	size_t ulw_size;
1722 	size_t ulw_maxndx;
1723 	size_t ulw_ndx;
1724 } umem_log_walk_t;
1725 
1726 int
umem_log_walk_init(mdb_walk_state_t * wsp)1727 umem_log_walk_init(mdb_walk_state_t *wsp)
1728 {
1729 	uintptr_t lp = wsp->walk_addr;
1730 	umem_log_walk_t *ulw;
1731 	umem_log_header_t *lhp;
1732 	int maxndx, i, j, k;
1733 
1734 	/*
1735 	 * By default (global walk), walk the umem_transaction_log.  Otherwise
1736 	 * read the log whose umem_log_header_t is stored at walk_addr.
1737 	 */
1738 	if (lp == 0 && umem_readvar(&lp, "umem_transaction_log") == -1) {
1739 		mdb_warn("failed to read 'umem_transaction_log'");
1740 		return (WALK_ERR);
1741 	}
1742 
1743 	if (lp == 0) {
1744 		mdb_warn("log is disabled\n");
1745 		return (WALK_ERR);
1746 	}
1747 
1748 	ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP);
1749 	lhp = &ulw->ulw_lh;
1750 
1751 	if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) {
1752 		mdb_warn("failed to read log header at %p", lp);
1753 		mdb_free(ulw, sizeof (umem_log_walk_t));
1754 		return (WALK_ERR);
1755 	}
1756 
1757 	ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1758 	ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP);
1759 	maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1;
1760 
1761 	if (mdb_vread(ulw->ulw_base, ulw->ulw_size,
1762 	    (uintptr_t)lhp->lh_base) == -1) {
1763 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1764 		mdb_free(ulw->ulw_base, ulw->ulw_size);
1765 		mdb_free(ulw, sizeof (umem_log_walk_t));
1766 		return (WALK_ERR);
1767 	}
1768 
1769 	ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1770 	    sizeof (umem_bufctl_audit_t *), UM_SLEEP);
1771 
1772 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1773 		caddr_t chunk = (caddr_t)
1774 		    ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize);
1775 
1776 		for (j = 0; j < maxndx; j++) {
1777 			/* LINTED align */
1778 			ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk;
1779 			chunk += UMEM_BUFCTL_AUDIT_SIZE;
1780 		}
1781 	}
1782 
1783 	qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *),
1784 	    (int(*)(const void *, const void *))bufctlcmp);
1785 
1786 	ulw->ulw_maxndx = k;
1787 	wsp->walk_data = ulw;
1788 
1789 	return (WALK_NEXT);
1790 }
1791 
1792 int
umem_log_walk_step(mdb_walk_state_t * wsp)1793 umem_log_walk_step(mdb_walk_state_t *wsp)
1794 {
1795 	umem_log_walk_t *ulw = wsp->walk_data;
1796 	umem_bufctl_audit_t *bcp;
1797 
1798 	if (ulw->ulw_ndx == ulw->ulw_maxndx)
1799 		return (WALK_DONE);
1800 
1801 	bcp = ulw->ulw_sorted[ulw->ulw_ndx++];
1802 
1803 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base +
1804 	    (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata));
1805 }
1806 
1807 void
umem_log_walk_fini(mdb_walk_state_t * wsp)1808 umem_log_walk_fini(mdb_walk_state_t *wsp)
1809 {
1810 	umem_log_walk_t *ulw = wsp->walk_data;
1811 
1812 	mdb_free(ulw->ulw_base, ulw->ulw_size);
1813 	mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx *
1814 	    sizeof (umem_bufctl_audit_t *));
1815 	mdb_free(ulw, sizeof (umem_log_walk_t));
1816 }
1817 
1818 typedef struct allocdby_bufctl {
1819 	uintptr_t abb_addr;
1820 	hrtime_t abb_ts;
1821 } allocdby_bufctl_t;
1822 
1823 typedef struct allocdby_walk {
1824 	const char *abw_walk;
1825 	uintptr_t abw_thread;
1826 	size_t abw_nbufs;
1827 	size_t abw_size;
1828 	allocdby_bufctl_t *abw_buf;
1829 	size_t abw_ndx;
1830 } allocdby_walk_t;
1831 
1832 int
allocdby_walk_bufctl(uintptr_t addr,const umem_bufctl_audit_t * bcp,allocdby_walk_t * abw)1833 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp,
1834     allocdby_walk_t *abw)
1835 {
1836 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1837 		return (WALK_NEXT);
1838 
1839 	if (abw->abw_nbufs == abw->abw_size) {
1840 		allocdby_bufctl_t *buf;
1841 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1842 
1843 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1844 
1845 		bcopy(abw->abw_buf, buf, oldsize);
1846 		mdb_free(abw->abw_buf, oldsize);
1847 
1848 		abw->abw_size <<= 1;
1849 		abw->abw_buf = buf;
1850 	}
1851 
1852 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1853 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1854 	abw->abw_nbufs++;
1855 
1856 	return (WALK_NEXT);
1857 }
1858 
1859 /*ARGSUSED*/
1860 int
allocdby_walk_cache(uintptr_t addr,const umem_cache_t * c,allocdby_walk_t * abw)1861 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw)
1862 {
1863 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1864 	    abw, addr) == -1) {
1865 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1866 		return (WALK_DONE);
1867 	}
1868 
1869 	return (WALK_NEXT);
1870 }
1871 
1872 static int
allocdby_cmp(const allocdby_bufctl_t * lhs,const allocdby_bufctl_t * rhs)1873 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1874 {
1875 	if (lhs->abb_ts < rhs->abb_ts)
1876 		return (1);
1877 	if (lhs->abb_ts > rhs->abb_ts)
1878 		return (-1);
1879 	return (0);
1880 }
1881 
1882 static int
allocdby_walk_init_common(mdb_walk_state_t * wsp,const char * walk)1883 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1884 {
1885 	allocdby_walk_t *abw;
1886 
1887 	if (wsp->walk_addr == 0) {
1888 		mdb_warn("allocdby walk doesn't support global walks\n");
1889 		return (WALK_ERR);
1890 	}
1891 
1892 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1893 
1894 	abw->abw_thread = wsp->walk_addr;
1895 	abw->abw_walk = walk;
1896 	abw->abw_size = 128;	/* something reasonable */
1897 	abw->abw_buf =
1898 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1899 
1900 	wsp->walk_data = abw;
1901 
1902 	if (mdb_walk("umem_cache",
1903 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1904 		mdb_warn("couldn't walk umem_cache");
1905 		allocdby_walk_fini(wsp);
1906 		return (WALK_ERR);
1907 	}
1908 
1909 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1910 	    (int(*)(const void *, const void *))allocdby_cmp);
1911 
1912 	return (WALK_NEXT);
1913 }
1914 
1915 int
allocdby_walk_init(mdb_walk_state_t * wsp)1916 allocdby_walk_init(mdb_walk_state_t *wsp)
1917 {
1918 	return (allocdby_walk_init_common(wsp, "bufctl"));
1919 }
1920 
1921 int
freedby_walk_init(mdb_walk_state_t * wsp)1922 freedby_walk_init(mdb_walk_state_t *wsp)
1923 {
1924 	return (allocdby_walk_init_common(wsp, "freectl"));
1925 }
1926 
1927 int
allocdby_walk_step(mdb_walk_state_t * wsp)1928 allocdby_walk_step(mdb_walk_state_t *wsp)
1929 {
1930 	allocdby_walk_t *abw = wsp->walk_data;
1931 	uintptr_t addr;
1932 	umem_bufctl_audit_t *bcp;
1933 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
1934 
1935 	if (abw->abw_ndx == abw->abw_nbufs)
1936 		return (WALK_DONE);
1937 
1938 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1939 
1940 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1941 		mdb_warn("couldn't read bufctl at %p", addr);
1942 		return (WALK_DONE);
1943 	}
1944 
1945 	return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata));
1946 }
1947 
1948 void
allocdby_walk_fini(mdb_walk_state_t * wsp)1949 allocdby_walk_fini(mdb_walk_state_t *wsp)
1950 {
1951 	allocdby_walk_t *abw = wsp->walk_data;
1952 
1953 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1954 	mdb_free(abw, sizeof (allocdby_walk_t));
1955 }
1956 
1957 /*ARGSUSED*/
1958 int
allocdby_walk(uintptr_t addr,const umem_bufctl_audit_t * bcp,void * ignored)1959 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored)
1960 {
1961 	char c[MDB_SYM_NAMLEN];
1962 	GElf_Sym sym;
1963 	int i;
1964 
1965 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1966 	for (i = 0; i < bcp->bc_depth; i++) {
1967 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
1968 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1969 			continue;
1970 		if (is_umem_sym(c, "umem_"))
1971 			continue;
1972 		mdb_printf("%s+0x%lx",
1973 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1974 		break;
1975 	}
1976 	mdb_printf("\n");
1977 
1978 	return (WALK_NEXT);
1979 }
1980 
1981 static int
allocdby_common(uintptr_t addr,uint_t flags,const char * w)1982 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1983 {
1984 	if (!(flags & DCMD_ADDRSPEC))
1985 		return (DCMD_USAGE);
1986 
1987 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1988 
1989 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1990 		mdb_warn("can't walk '%s' for %p", w, addr);
1991 		return (DCMD_ERR);
1992 	}
1993 
1994 	return (DCMD_OK);
1995 }
1996 
1997 /*ARGSUSED*/
1998 int
allocdby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1999 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2000 {
2001 	return (allocdby_common(addr, flags, "allocdby"));
2002 }
2003 
2004 /*ARGSUSED*/
2005 int
freedby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2006 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2007 {
2008 	return (allocdby_common(addr, flags, "freedby"));
2009 }
2010 
2011 typedef struct whatis_info {
2012 	mdb_whatis_t *wi_w;
2013 	const umem_cache_t *wi_cache;
2014 	const vmem_t *wi_vmem;
2015 	vmem_t *wi_msb_arena;
2016 	size_t wi_slab_size;
2017 	int wi_slab_found;
2018 	uint_t wi_freemem;
2019 } whatis_info_t;
2020 
2021 /* call one of our dcmd functions with "-v" and the provided address */
2022 static void
whatis_call_printer(mdb_dcmd_f * dcmd,uintptr_t addr)2023 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2024 {
2025 	mdb_arg_t a;
2026 	a.a_type = MDB_TYPE_STRING;
2027 	a.a_un.a_str = "-v";
2028 
2029 	mdb_printf(":\n");
2030 	(void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2031 }
2032 
2033 static void
whatis_print_umem(whatis_info_t * wi,uintptr_t maddr,uintptr_t addr,uintptr_t baddr)2034 whatis_print_umem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2035     uintptr_t baddr)
2036 {
2037 	mdb_whatis_t *w = wi->wi_w;
2038 	const umem_cache_t *cp = wi->wi_cache;
2039 	int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2040 
2041 	int call_printer = (!quiet && (cp->cache_flags & UMF_AUDIT));
2042 
2043 	mdb_whatis_report_object(w, maddr, addr, "");
2044 
2045 	if (baddr != 0 && !call_printer)
2046 		mdb_printf("bufctl %p ", baddr);
2047 
2048 	mdb_printf("%s from %s",
2049 	    (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2050 
2051 	if (call_printer && baddr != 0) {
2052 		whatis_call_printer(bufctl, baddr);
2053 		return;
2054 	}
2055 	mdb_printf("\n");
2056 }
2057 
2058 /*ARGSUSED*/
2059 static int
whatis_walk_umem(uintptr_t addr,void * ignored,whatis_info_t * wi)2060 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2061 {
2062 	mdb_whatis_t *w = wi->wi_w;
2063 
2064 	uintptr_t cur;
2065 	size_t size = wi->wi_cache->cache_bufsize;
2066 
2067 	while (mdb_whatis_match(w, addr, size, &cur))
2068 		whatis_print_umem(wi, cur, addr, 0);
2069 
2070 	return (WHATIS_WALKRET(w));
2071 }
2072 
2073 /*ARGSUSED*/
2074 static int
whatis_walk_bufctl(uintptr_t baddr,const umem_bufctl_t * bcp,whatis_info_t * wi)2075 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_info_t *wi)
2076 {
2077 	mdb_whatis_t *w = wi->wi_w;
2078 
2079 	uintptr_t cur;
2080 	uintptr_t addr = (uintptr_t)bcp->bc_addr;
2081 	size_t size = wi->wi_cache->cache_bufsize;
2082 
2083 	while (mdb_whatis_match(w, addr, size, &cur))
2084 		whatis_print_umem(wi, cur, addr, baddr);
2085 
2086 	return (WHATIS_WALKRET(w));
2087 }
2088 
2089 
2090 static int
whatis_walk_seg(uintptr_t addr,const vmem_seg_t * vs,whatis_info_t * wi)2091 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2092 {
2093 	mdb_whatis_t *w = wi->wi_w;
2094 
2095 	size_t size = vs->vs_end - vs->vs_start;
2096 	uintptr_t cur;
2097 
2098 	/* We're not interested in anything but alloc and free segments */
2099 	if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2100 		return (WALK_NEXT);
2101 
2102 	while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2103 		mdb_whatis_report_object(w, cur, vs->vs_start, "");
2104 
2105 		/*
2106 		 * If we're not printing it seperately, provide the vmem_seg
2107 		 * pointer if it has a stack trace.
2108 		 */
2109 		if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2110 		    ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0 ||
2111 		    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2112 			mdb_printf("vmem_seg %p ", addr);
2113 		}
2114 
2115 		mdb_printf("%s from %s vmem arena",
2116 		    (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2117 		    wi->wi_vmem->vm_name);
2118 
2119 		if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2120 			whatis_call_printer(vmem_seg, addr);
2121 		else
2122 			mdb_printf("\n");
2123 	}
2124 
2125 	return (WHATIS_WALKRET(w));
2126 }
2127 
2128 static int
whatis_walk_vmem(uintptr_t addr,const vmem_t * vmem,whatis_info_t * wi)2129 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2130 {
2131 	mdb_whatis_t *w = wi->wi_w;
2132 	const char *nm = vmem->vm_name;
2133 	wi->wi_vmem = vmem;
2134 
2135 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2136 		mdb_printf("Searching vmem arena %s...\n", nm);
2137 
2138 	if (mdb_pwalk("vmem_seg",
2139 	    (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2140 		mdb_warn("can't walk vmem seg for %p", addr);
2141 		return (WALK_NEXT);
2142 	}
2143 
2144 	return (WHATIS_WALKRET(w));
2145 }
2146 
2147 /*ARGSUSED*/
2148 static int
whatis_walk_slab(uintptr_t saddr,const umem_slab_t * sp,whatis_info_t * wi)2149 whatis_walk_slab(uintptr_t saddr, const umem_slab_t *sp, whatis_info_t *wi)
2150 {
2151 	mdb_whatis_t *w = wi->wi_w;
2152 
2153 	/* It must overlap with the slab data, or it's not interesting */
2154 	if (mdb_whatis_overlaps(w,
2155 	    (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2156 		wi->wi_slab_found++;
2157 		return (WALK_DONE);
2158 	}
2159 	return (WALK_NEXT);
2160 }
2161 
2162 static int
whatis_walk_cache(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2163 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2164 {
2165 	mdb_whatis_t *w = wi->wi_w;
2166 	char *walk, *freewalk;
2167 	mdb_walk_cb_t func;
2168 	int do_bufctl;
2169 
2170 	/* Override the '-b' flag as necessary */
2171 	if (!(c->cache_flags & UMF_HASH))
2172 		do_bufctl = FALSE;	/* no bufctls to walk */
2173 	else if (c->cache_flags & UMF_AUDIT)
2174 		do_bufctl = TRUE;	/* we always want debugging info */
2175 	else
2176 		do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2177 
2178 	if (do_bufctl) {
2179 		walk = "bufctl";
2180 		freewalk = "freectl";
2181 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
2182 	} else {
2183 		walk = "umem";
2184 		freewalk = "freemem";
2185 		func = (mdb_walk_cb_t)whatis_walk_umem;
2186 	}
2187 
2188 	wi->wi_cache = c;
2189 
2190 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2191 		mdb_printf("Searching %s...\n", c->cache_name);
2192 
2193 	/*
2194 	 * If more then two buffers live on each slab, figure out if we're
2195 	 * interested in anything in any slab before doing the more expensive
2196 	 * umem/freemem (bufctl/freectl) walkers.
2197 	 */
2198 	wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2199 	if (!(c->cache_flags & UMF_HASH))
2200 		wi->wi_slab_size -= sizeof (umem_slab_t);
2201 
2202 	if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2203 		wi->wi_slab_found = 0;
2204 		if (mdb_pwalk("umem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2205 		    addr) == -1) {
2206 			mdb_warn("can't find umem_slab walker");
2207 			return (WALK_DONE);
2208 		}
2209 		if (wi->wi_slab_found == 0)
2210 			return (WALK_NEXT);
2211 	}
2212 
2213 	wi->wi_freemem = FALSE;
2214 	if (mdb_pwalk(walk, func, wi, addr) == -1) {
2215 		mdb_warn("can't find %s walker", walk);
2216 		return (WALK_DONE);
2217 	}
2218 
2219 	if (mdb_whatis_done(w))
2220 		return (WALK_DONE);
2221 
2222 	/*
2223 	 * We have searched for allocated memory; now search for freed memory.
2224 	 */
2225 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2226 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
2227 
2228 	wi->wi_freemem = TRUE;
2229 
2230 	if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2231 		mdb_warn("can't find %s walker", freewalk);
2232 		return (WALK_DONE);
2233 	}
2234 
2235 	return (WHATIS_WALKRET(w));
2236 }
2237 
2238 static int
whatis_walk_touch(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2239 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2240 {
2241 	if (c->cache_arena == wi->wi_msb_arena ||
2242 	    (c->cache_cflags & UMC_NOTOUCH))
2243 		return (WALK_NEXT);
2244 
2245 	return (whatis_walk_cache(addr, c, wi));
2246 }
2247 
2248 static int
whatis_walk_metadata(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2249 whatis_walk_metadata(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2250 {
2251 	if (c->cache_arena != wi->wi_msb_arena)
2252 		return (WALK_NEXT);
2253 
2254 	return (whatis_walk_cache(addr, c, wi));
2255 }
2256 
2257 static int
whatis_walk_notouch(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2258 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2259 {
2260 	if (c->cache_arena == wi->wi_msb_arena ||
2261 	    !(c->cache_cflags & UMC_NOTOUCH))
2262 		return (WALK_NEXT);
2263 
2264 	return (whatis_walk_cache(addr, c, wi));
2265 }
2266 
2267 /*ARGSUSED*/
2268 static int
whatis_run_umem(mdb_whatis_t * w,void * ignored)2269 whatis_run_umem(mdb_whatis_t *w, void *ignored)
2270 {
2271 	whatis_info_t wi;
2272 
2273 	bzero(&wi, sizeof (wi));
2274 	wi.wi_w = w;
2275 
2276 	/* umem's metadata is allocated from the umem_internal_arena */
2277 	if (umem_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1)
2278 		mdb_warn("unable to readvar \"umem_internal_arena\"");
2279 
2280 	/*
2281 	 * We process umem caches in the following order:
2282 	 *
2283 	 *	non-UMC_NOTOUCH, non-metadata	(typically the most interesting)
2284 	 *	metadata			(can be huge with UMF_AUDIT)
2285 	 *	UMC_NOTOUCH, non-metadata	(see umem_walk_all())
2286 	 */
2287 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2288 	    &wi) == -1 ||
2289 	    mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2290 	    &wi) == -1 ||
2291 	    mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2292 	    &wi) == -1) {
2293 		mdb_warn("couldn't find umem_cache walker");
2294 		return (1);
2295 	}
2296 	return (0);
2297 }
2298 
2299 /*ARGSUSED*/
2300 static int
whatis_run_vmem(mdb_whatis_t * w,void * ignored)2301 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2302 {
2303 	whatis_info_t wi;
2304 
2305 	bzero(&wi, sizeof (wi));
2306 	wi.wi_w = w;
2307 
2308 	if (mdb_walk("vmem_postfix",
2309 	    (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2310 		mdb_warn("couldn't find vmem_postfix walker");
2311 		return (1);
2312 	}
2313 	return (0);
2314 }
2315 
2316 int
umem_init(void)2317 umem_init(void)
2318 {
2319 	mdb_walker_t w = {
2320 		"umem_cache", "walk list of umem caches", umem_cache_walk_init,
2321 		umem_cache_walk_step, umem_cache_walk_fini
2322 	};
2323 
2324 	if (mdb_add_walker(&w) == -1) {
2325 		mdb_warn("failed to add umem_cache walker");
2326 		return (-1);
2327 	}
2328 
2329 	if (umem_update_variables() == -1)
2330 		return (-1);
2331 
2332 	/* install a callback so that our variables are always up-to-date */
2333 	(void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL);
2334 	umem_statechange_cb(NULL);
2335 
2336 	/*
2337 	 * Register our ::whatis callbacks.
2338 	 */
2339 	mdb_whatis_register("umem", whatis_run_umem, NULL,
2340 	    WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2341 	mdb_whatis_register("vmem", whatis_run_vmem, NULL,
2342 	    WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2343 
2344 	return (0);
2345 }
2346 
2347 typedef struct umem_log_cpu {
2348 	uintptr_t umc_low;
2349 	uintptr_t umc_high;
2350 } umem_log_cpu_t;
2351 
2352 int
umem_log_walk(uintptr_t addr,const umem_bufctl_audit_t * b,umem_log_cpu_t * umc)2353 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc)
2354 {
2355 	int i;
2356 
2357 	for (i = 0; i < umem_max_ncpus; i++) {
2358 		if (addr >= umc[i].umc_low && addr < umc[i].umc_high)
2359 			break;
2360 	}
2361 
2362 	if (i == umem_max_ncpus)
2363 		mdb_printf("   ");
2364 	else
2365 		mdb_printf("%3d", i);
2366 
2367 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2368 	    b->bc_timestamp, b->bc_thread);
2369 
2370 	return (WALK_NEXT);
2371 }
2372 
2373 /*ARGSUSED*/
2374 int
umem_log(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2375 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2376 {
2377 	umem_log_header_t lh;
2378 	umem_cpu_log_header_t clh;
2379 	uintptr_t lhp, clhp;
2380 	umem_log_cpu_t *umc;
2381 	int i;
2382 
2383 	if (umem_readvar(&lhp, "umem_transaction_log") == -1) {
2384 		mdb_warn("failed to read 'umem_transaction_log'");
2385 		return (DCMD_ERR);
2386 	}
2387 
2388 	if (lhp == 0) {
2389 		mdb_warn("no umem transaction log\n");
2390 		return (DCMD_ERR);
2391 	}
2392 
2393 	if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) {
2394 		mdb_warn("failed to read log header at %p", lhp);
2395 		return (DCMD_ERR);
2396 	}
2397 
2398 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2399 
2400 	umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus,
2401 	    UM_SLEEP | UM_GC);
2402 
2403 	for (i = 0; i < umem_max_ncpus; i++) {
2404 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2405 			mdb_warn("cannot read cpu %d's log header at %p",
2406 			    i, clhp);
2407 			return (DCMD_ERR);
2408 		}
2409 
2410 		umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize +
2411 		    (uintptr_t)lh.lh_base;
2412 		umc[i].umc_high = (uintptr_t)clh.clh_current;
2413 
2414 		clhp += sizeof (umem_cpu_log_header_t);
2415 	}
2416 
2417 	if (DCMD_HDRSPEC(flags)) {
2418 		mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR",
2419 		    "BUFADDR", "TIMESTAMP", "THREAD");
2420 	}
2421 
2422 	/*
2423 	 * If we have been passed an address, we'll just print out that
2424 	 * log entry.
2425 	 */
2426 	if (flags & DCMD_ADDRSPEC) {
2427 		umem_bufctl_audit_t *bp;
2428 		UMEM_LOCAL_BUFCTL_AUDIT(&bp);
2429 
2430 		if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2431 			mdb_warn("failed to read bufctl at %p", addr);
2432 			return (DCMD_ERR);
2433 		}
2434 
2435 		(void) umem_log_walk(addr, bp, umc);
2436 
2437 		return (DCMD_OK);
2438 	}
2439 
2440 	if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) {
2441 		mdb_warn("can't find umem log walker");
2442 		return (DCMD_ERR);
2443 	}
2444 
2445 	return (DCMD_OK);
2446 }
2447 
2448 typedef struct bufctl_history_cb {
2449 	int		bhc_flags;
2450 	int		bhc_argc;
2451 	const mdb_arg_t	*bhc_argv;
2452 	int		bhc_ret;
2453 } bufctl_history_cb_t;
2454 
2455 /*ARGSUSED*/
2456 static int
bufctl_history_callback(uintptr_t addr,const void * ign,void * arg)2457 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2458 {
2459 	bufctl_history_cb_t *bhc = arg;
2460 
2461 	bhc->bhc_ret =
2462 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2463 
2464 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2465 
2466 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2467 }
2468 
2469 void
bufctl_help(void)2470 bufctl_help(void)
2471 {
2472 	mdb_printf("%s\n",
2473 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n");
2474 	mdb_dec_indent(2);
2475 	mdb_printf("%<b>OPTIONS%</b>\n");
2476 	mdb_inc_indent(2);
2477 	mdb_printf("%s",
2478 "  -v    Display the full content of the bufctl, including its stack trace\n"
2479 "  -h    retrieve the bufctl's transaction history, if available\n"
2480 "  -a addr\n"
2481 "        filter out bufctls not involving the buffer at addr\n"
2482 "  -c caller\n"
2483 "        filter out bufctls without the function/PC in their stack trace\n"
2484 "  -e earliest\n"
2485 "        filter out bufctls timestamped before earliest\n"
2486 "  -l latest\n"
2487 "        filter out bufctls timestamped after latest\n"
2488 "  -t thread\n"
2489 "        filter out bufctls not involving thread\n");
2490 }
2491 
2492 int
bufctl(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2493 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2494 {
2495 	uint_t verbose = FALSE;
2496 	uint_t history = FALSE;
2497 	uint_t in_history = FALSE;
2498 	uintptr_t caller = 0, thread = 0;
2499 	uintptr_t laddr, haddr, baddr = 0;
2500 	hrtime_t earliest = 0, latest = 0;
2501 	int i, depth;
2502 	char c[MDB_SYM_NAMLEN];
2503 	GElf_Sym sym;
2504 	umem_bufctl_audit_t *bcp;
2505 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
2506 
2507 	if (mdb_getopts(argc, argv,
2508 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2509 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2510 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2511 	    'c', MDB_OPT_UINTPTR, &caller,
2512 	    't', MDB_OPT_UINTPTR, &thread,
2513 	    'e', MDB_OPT_UINT64, &earliest,
2514 	    'l', MDB_OPT_UINT64, &latest,
2515 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2516 		return (DCMD_USAGE);
2517 
2518 	if (!(flags & DCMD_ADDRSPEC))
2519 		return (DCMD_USAGE);
2520 
2521 	if (in_history && !history)
2522 		return (DCMD_USAGE);
2523 
2524 	if (history && !in_history) {
2525 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2526 		    UM_SLEEP | UM_GC);
2527 		bufctl_history_cb_t bhc;
2528 
2529 		nargv[0].a_type = MDB_TYPE_STRING;
2530 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2531 
2532 		for (i = 0; i < argc; i++)
2533 			nargv[i + 1] = argv[i];
2534 
2535 		/*
2536 		 * When in history mode, we treat each element as if it
2537 		 * were in a seperate loop, so that the headers group
2538 		 * bufctls with similar histories.
2539 		 */
2540 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2541 		bhc.bhc_argc = argc + 1;
2542 		bhc.bhc_argv = nargv;
2543 		bhc.bhc_ret = DCMD_OK;
2544 
2545 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2546 		    addr) == -1) {
2547 			mdb_warn("unable to walk bufctl_history");
2548 			return (DCMD_ERR);
2549 		}
2550 
2551 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2552 			mdb_printf("\n");
2553 
2554 		return (bhc.bhc_ret);
2555 	}
2556 
2557 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2558 		if (verbose) {
2559 			mdb_printf("%16s %16s %16s %16s\n"
2560 			    "%<u>%16s %16s %16s %16s%</u>\n",
2561 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2562 			    "", "CACHE", "LASTLOG", "CONTENTS");
2563 		} else {
2564 			mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n",
2565 			    "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER");
2566 		}
2567 	}
2568 
2569 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2570 		mdb_warn("couldn't read bufctl at %p", addr);
2571 		return (DCMD_ERR);
2572 	}
2573 
2574 	/*
2575 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2576 	 * the address does not really refer to a bufctl.
2577 	 */
2578 	depth = MIN(bcp->bc_depth, umem_stack_depth);
2579 
2580 	if (caller != 0) {
2581 		laddr = caller;
2582 		haddr = caller + sizeof (caller);
2583 
2584 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2585 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2586 			/*
2587 			 * We were provided an exact symbol value; any
2588 			 * address in the function is valid.
2589 			 */
2590 			laddr = (uintptr_t)sym.st_value;
2591 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2592 		}
2593 
2594 		for (i = 0; i < depth; i++)
2595 			if (bcp->bc_stack[i] >= laddr &&
2596 			    bcp->bc_stack[i] < haddr)
2597 				break;
2598 
2599 		if (i == depth)
2600 			return (DCMD_OK);
2601 	}
2602 
2603 	if (thread != 0 && (uintptr_t)bcp->bc_thread != thread)
2604 		return (DCMD_OK);
2605 
2606 	if (earliest != 0 && bcp->bc_timestamp < earliest)
2607 		return (DCMD_OK);
2608 
2609 	if (latest != 0 && bcp->bc_timestamp > latest)
2610 		return (DCMD_OK);
2611 
2612 	if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr)
2613 		return (DCMD_OK);
2614 
2615 	if (flags & DCMD_PIPE_OUT) {
2616 		mdb_printf("%#r\n", addr);
2617 		return (DCMD_OK);
2618 	}
2619 
2620 	if (verbose) {
2621 		mdb_printf(
2622 		    "%<b>%16p%</b> %16p %16llx %16d\n"
2623 		    "%16s %16p %16p %16p\n",
2624 		    addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread,
2625 		    "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents);
2626 
2627 		mdb_inc_indent(17);
2628 		for (i = 0; i < depth; i++)
2629 			mdb_printf("%a\n", bcp->bc_stack[i]);
2630 		mdb_dec_indent(17);
2631 		mdb_printf("\n");
2632 	} else {
2633 		mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr,
2634 		    bcp->bc_timestamp, bcp->bc_thread);
2635 
2636 		for (i = 0; i < depth; i++) {
2637 			if (mdb_lookup_by_addr(bcp->bc_stack[i],
2638 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2639 				continue;
2640 			if (is_umem_sym(c, "umem_"))
2641 				continue;
2642 			mdb_printf(" %a\n", bcp->bc_stack[i]);
2643 			break;
2644 		}
2645 
2646 		if (i >= depth)
2647 			mdb_printf("\n");
2648 	}
2649 
2650 	return (DCMD_OK);
2651 }
2652 
2653 /*ARGSUSED*/
2654 int
bufctl_audit(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2655 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2656 {
2657 	mdb_arg_t a;
2658 
2659 	if (!(flags & DCMD_ADDRSPEC))
2660 		return (DCMD_USAGE);
2661 
2662 	if (argc != 0)
2663 		return (DCMD_USAGE);
2664 
2665 	a.a_type = MDB_TYPE_STRING;
2666 	a.a_un.a_str = "-v";
2667 
2668 	return (bufctl(addr, flags, 1, &a));
2669 }
2670 
2671 typedef struct umem_verify {
2672 	uint64_t *umv_buf;		/* buffer to read cache contents into */
2673 	size_t umv_size;		/* number of bytes in umv_buf */
2674 	int umv_corruption;		/* > 0 if corruption found. */
2675 	int umv_besilent;		/* report actual corruption sites */
2676 	struct umem_cache umv_cache;	/* the cache we're operating on */
2677 } umem_verify_t;
2678 
2679 /*
2680  * verify_pattern()
2681  *	verify that buf is filled with the pattern pat.
2682  */
2683 static int64_t
verify_pattern(uint64_t * buf_arg,size_t size,uint64_t pat)2684 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2685 {
2686 	/*LINTED*/
2687 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2688 	uint64_t *buf;
2689 
2690 	for (buf = buf_arg; buf < bufend; buf++)
2691 		if (*buf != pat)
2692 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
2693 	return (-1);
2694 }
2695 
2696 /*
2697  * verify_buftag()
2698  *	verify that btp->bt_bxstat == (bcp ^ pat)
2699  */
2700 static int
verify_buftag(umem_buftag_t * btp,uintptr_t pat)2701 verify_buftag(umem_buftag_t *btp, uintptr_t pat)
2702 {
2703 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2704 }
2705 
2706 /*
2707  * verify_free()
2708  *	verify the integrity of a free block of memory by checking
2709  *	that it is filled with 0xdeadbeef and that its buftag is sane.
2710  */
2711 /*ARGSUSED1*/
2712 static int
verify_free(uintptr_t addr,const void * data,void * private)2713 verify_free(uintptr_t addr, const void *data, void *private)
2714 {
2715 	umem_verify_t *umv = (umem_verify_t *)private;
2716 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2717 	int64_t corrupt;		/* corruption offset */
2718 	umem_buftag_t *buftagp;		/* ptr to buftag */
2719 	umem_cache_t *cp = &umv->umv_cache;
2720 	int besilent = umv->umv_besilent;
2721 
2722 	/*LINTED*/
2723 	buftagp = UMEM_BUFTAG(cp, buf);
2724 
2725 	/*
2726 	 * Read the buffer to check.
2727 	 */
2728 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2729 		if (!besilent)
2730 			mdb_warn("couldn't read %p", addr);
2731 		return (WALK_NEXT);
2732 	}
2733 
2734 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
2735 	    UMEM_FREE_PATTERN)) >= 0) {
2736 		if (!besilent)
2737 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2738 			    addr, (uintptr_t)addr + corrupt);
2739 		goto corrupt;
2740 	}
2741 
2742 	if ((cp->cache_flags & UMF_HASH) &&
2743 	    buftagp->bt_redzone != UMEM_REDZONE_PATTERN) {
2744 		if (!besilent)
2745 			mdb_printf("buffer %p (free) seems to "
2746 			    "have a corrupt redzone pattern\n", addr);
2747 		goto corrupt;
2748 	}
2749 
2750 	/*
2751 	 * confirm bufctl pointer integrity.
2752 	 */
2753 	if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) {
2754 		if (!besilent)
2755 			mdb_printf("buffer %p (free) has a corrupt "
2756 			    "buftag\n", addr);
2757 		goto corrupt;
2758 	}
2759 
2760 	return (WALK_NEXT);
2761 corrupt:
2762 	umv->umv_corruption++;
2763 	return (WALK_NEXT);
2764 }
2765 
2766 /*
2767  * verify_alloc()
2768  *	Verify that the buftag of an allocated buffer makes sense with respect
2769  *	to the buffer.
2770  */
2771 /*ARGSUSED1*/
2772 static int
verify_alloc(uintptr_t addr,const void * data,void * private)2773 verify_alloc(uintptr_t addr, const void *data, void *private)
2774 {
2775 	umem_verify_t *umv = (umem_verify_t *)private;
2776 	umem_cache_t *cp = &umv->umv_cache;
2777 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2778 	/*LINTED*/
2779 	umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf);
2780 	uint32_t *ip = (uint32_t *)buftagp;
2781 	uint8_t *bp = (uint8_t *)buf;
2782 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
2783 	int besilent = umv->umv_besilent;
2784 
2785 	/*
2786 	 * Read the buffer to check.
2787 	 */
2788 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2789 		if (!besilent)
2790 			mdb_warn("couldn't read %p", addr);
2791 		return (WALK_NEXT);
2792 	}
2793 
2794 	/*
2795 	 * There are two cases to handle:
2796 	 * 1. If the buf was alloc'd using umem_cache_alloc, it will have
2797 	 *    0xfeedfacefeedface at the end of it
2798 	 * 2. If the buf was alloc'd using umem_alloc, it will have
2799 	 *    0xbb just past the end of the region in use.  At the buftag,
2800 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
2801 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2802 	 *    endianness), followed by 32 bits containing the offset of the
2803 	 *    0xbb byte in the buffer.
2804 	 *
2805 	 * Finally, the two 32-bit words that comprise the second half of the
2806 	 * buftag should xor to UMEM_BUFTAG_ALLOC
2807 	 */
2808 
2809 	if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN)
2810 		looks_ok = 1;
2811 	else if (!UMEM_SIZE_VALID(ip[1]))
2812 		size_ok = 0;
2813 	else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE)
2814 		looks_ok = 1;
2815 	else
2816 		size_ok = 0;
2817 
2818 	if (!size_ok) {
2819 		if (!besilent)
2820 			mdb_printf("buffer %p (allocated) has a corrupt "
2821 			    "redzone size encoding\n", addr);
2822 		goto corrupt;
2823 	}
2824 
2825 	if (!looks_ok) {
2826 		if (!besilent)
2827 			mdb_printf("buffer %p (allocated) has a corrupt "
2828 			    "redzone signature\n", addr);
2829 		goto corrupt;
2830 	}
2831 
2832 	if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) {
2833 		if (!besilent)
2834 			mdb_printf("buffer %p (allocated) has a "
2835 			    "corrupt buftag\n", addr);
2836 		goto corrupt;
2837 	}
2838 
2839 	return (WALK_NEXT);
2840 corrupt:
2841 	umv->umv_corruption++;
2842 	return (WALK_NEXT);
2843 }
2844 
2845 /*ARGSUSED2*/
2846 int
umem_verify(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2847 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2848 {
2849 	if (flags & DCMD_ADDRSPEC) {
2850 		int check_alloc = 0, check_free = 0;
2851 		umem_verify_t umv;
2852 
2853 		if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache),
2854 		    addr) == -1) {
2855 			mdb_warn("couldn't read umem_cache %p", addr);
2856 			return (DCMD_ERR);
2857 		}
2858 
2859 		umv.umv_size = umv.umv_cache.cache_buftag +
2860 		    sizeof (umem_buftag_t);
2861 		umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC);
2862 		umv.umv_corruption = 0;
2863 
2864 		if ((umv.umv_cache.cache_flags & UMF_REDZONE)) {
2865 			check_alloc = 1;
2866 			if (umv.umv_cache.cache_flags & UMF_DEADBEEF)
2867 				check_free = 1;
2868 		} else {
2869 			if (!(flags & DCMD_LOOP)) {
2870 				mdb_warn("cache %p (%s) does not have "
2871 				    "redzone checking enabled\n", addr,
2872 				    umv.umv_cache.cache_name);
2873 			}
2874 			return (DCMD_ERR);
2875 		}
2876 
2877 		if (flags & DCMD_LOOP) {
2878 			/*
2879 			 * table mode, don't print out every corrupt buffer
2880 			 */
2881 			umv.umv_besilent = 1;
2882 		} else {
2883 			mdb_printf("Summary for cache '%s'\n",
2884 			    umv.umv_cache.cache_name);
2885 			mdb_inc_indent(2);
2886 			umv.umv_besilent = 0;
2887 		}
2888 
2889 		if (check_alloc)
2890 			(void) mdb_pwalk("umem", verify_alloc, &umv, addr);
2891 		if (check_free)
2892 			(void) mdb_pwalk("freemem", verify_free, &umv, addr);
2893 
2894 		if (flags & DCMD_LOOP) {
2895 			if (umv.umv_corruption == 0) {
2896 				mdb_printf("%-*s %?p clean\n",
2897 				    UMEM_CACHE_NAMELEN,
2898 				    umv.umv_cache.cache_name, addr);
2899 			} else {
2900 				char *s = "";	/* optional s in "buffer[s]" */
2901 				if (umv.umv_corruption > 1)
2902 					s = "s";
2903 
2904 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2905 				    UMEM_CACHE_NAMELEN,
2906 				    umv.umv_cache.cache_name, addr,
2907 				    umv.umv_corruption, s);
2908 			}
2909 		} else {
2910 			/*
2911 			 * This is the more verbose mode, when the user has
2912 			 * type addr::umem_verify.  If the cache was clean,
2913 			 * nothing will have yet been printed. So say something.
2914 			 */
2915 			if (umv.umv_corruption == 0)
2916 				mdb_printf("clean\n");
2917 
2918 			mdb_dec_indent(2);
2919 		}
2920 	} else {
2921 		/*
2922 		 * If the user didn't specify a cache to verify, we'll walk all
2923 		 * umem_cache's, specifying ourself as a callback for each...
2924 		 * this is the equivalent of '::walk umem_cache .::umem_verify'
2925 		 */
2926 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN,
2927 		    "Cache Name", "Addr", "Cache Integrity");
2928 		(void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL));
2929 	}
2930 
2931 	return (DCMD_OK);
2932 }
2933 
2934 typedef struct vmem_node {
2935 	struct vmem_node *vn_next;
2936 	struct vmem_node *vn_parent;
2937 	struct vmem_node *vn_sibling;
2938 	struct vmem_node *vn_children;
2939 	uintptr_t vn_addr;
2940 	int vn_marked;
2941 	vmem_t vn_vmem;
2942 } vmem_node_t;
2943 
2944 typedef struct vmem_walk {
2945 	vmem_node_t *vw_root;
2946 	vmem_node_t *vw_current;
2947 } vmem_walk_t;
2948 
2949 int
vmem_walk_init(mdb_walk_state_t * wsp)2950 vmem_walk_init(mdb_walk_state_t *wsp)
2951 {
2952 	uintptr_t vaddr, paddr;
2953 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2954 	vmem_walk_t *vw;
2955 
2956 	if (umem_readvar(&vaddr, "vmem_list") == -1) {
2957 		mdb_warn("couldn't read 'vmem_list'");
2958 		return (WALK_ERR);
2959 	}
2960 
2961 	while (vaddr != 0) {
2962 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2963 		vp->vn_addr = vaddr;
2964 		vp->vn_next = head;
2965 		head = vp;
2966 
2967 		if (vaddr == wsp->walk_addr)
2968 			current = vp;
2969 
2970 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2971 			mdb_warn("couldn't read vmem_t at %p", vaddr);
2972 			goto err;
2973 		}
2974 
2975 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2976 	}
2977 
2978 	for (vp = head; vp != NULL; vp = vp->vn_next) {
2979 
2980 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == 0) {
2981 			vp->vn_sibling = root;
2982 			root = vp;
2983 			continue;
2984 		}
2985 
2986 		for (parent = head; parent != NULL; parent = parent->vn_next) {
2987 			if (parent->vn_addr != paddr)
2988 				continue;
2989 			vp->vn_sibling = parent->vn_children;
2990 			parent->vn_children = vp;
2991 			vp->vn_parent = parent;
2992 			break;
2993 		}
2994 
2995 		if (parent == NULL) {
2996 			mdb_warn("couldn't find %p's parent (%p)\n",
2997 			    vp->vn_addr, paddr);
2998 			goto err;
2999 		}
3000 	}
3001 
3002 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3003 	vw->vw_root = root;
3004 
3005 	if (current != NULL)
3006 		vw->vw_current = current;
3007 	else
3008 		vw->vw_current = root;
3009 
3010 	wsp->walk_data = vw;
3011 	return (WALK_NEXT);
3012 err:
3013 	for (vp = head; head != NULL; vp = head) {
3014 		head = vp->vn_next;
3015 		mdb_free(vp, sizeof (vmem_node_t));
3016 	}
3017 
3018 	return (WALK_ERR);
3019 }
3020 
3021 int
vmem_walk_step(mdb_walk_state_t * wsp)3022 vmem_walk_step(mdb_walk_state_t *wsp)
3023 {
3024 	vmem_walk_t *vw = wsp->walk_data;
3025 	vmem_node_t *vp;
3026 	int rval;
3027 
3028 	if ((vp = vw->vw_current) == NULL)
3029 		return (WALK_DONE);
3030 
3031 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3032 
3033 	if (vp->vn_children != NULL) {
3034 		vw->vw_current = vp->vn_children;
3035 		return (rval);
3036 	}
3037 
3038 	do {
3039 		vw->vw_current = vp->vn_sibling;
3040 		vp = vp->vn_parent;
3041 	} while (vw->vw_current == NULL && vp != NULL);
3042 
3043 	return (rval);
3044 }
3045 
3046 /*
3047  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3048  * children are visited before their parent.  We perform the postfix walk
3049  * iteratively (rather than recursively) to allow mdb to regain control
3050  * after each callback.
3051  */
3052 int
vmem_postfix_walk_step(mdb_walk_state_t * wsp)3053 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3054 {
3055 	vmem_walk_t *vw = wsp->walk_data;
3056 	vmem_node_t *vp = vw->vw_current;
3057 	int rval;
3058 
3059 	/*
3060 	 * If this node is marked, then we know that we have already visited
3061 	 * all of its children.  If the node has any siblings, they need to
3062 	 * be visited next; otherwise, we need to visit the parent.  Note
3063 	 * that vp->vn_marked will only be zero on the first invocation of
3064 	 * the step function.
3065 	 */
3066 	if (vp->vn_marked) {
3067 		if (vp->vn_sibling != NULL)
3068 			vp = vp->vn_sibling;
3069 		else if (vp->vn_parent != NULL)
3070 			vp = vp->vn_parent;
3071 		else {
3072 			/*
3073 			 * We have neither a parent, nor a sibling, and we
3074 			 * have already been visited; we're done.
3075 			 */
3076 			return (WALK_DONE);
3077 		}
3078 	}
3079 
3080 	/*
3081 	 * Before we visit this node, visit its children.
3082 	 */
3083 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3084 		vp = vp->vn_children;
3085 
3086 	vp->vn_marked = 1;
3087 	vw->vw_current = vp;
3088 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3089 
3090 	return (rval);
3091 }
3092 
3093 void
vmem_walk_fini(mdb_walk_state_t * wsp)3094 vmem_walk_fini(mdb_walk_state_t *wsp)
3095 {
3096 	vmem_walk_t *vw = wsp->walk_data;
3097 	vmem_node_t *root = vw->vw_root;
3098 	int done;
3099 
3100 	if (root == NULL)
3101 		return;
3102 
3103 	if ((vw->vw_root = root->vn_children) != NULL)
3104 		vmem_walk_fini(wsp);
3105 
3106 	vw->vw_root = root->vn_sibling;
3107 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3108 	mdb_free(root, sizeof (vmem_node_t));
3109 
3110 	if (done) {
3111 		mdb_free(vw, sizeof (vmem_walk_t));
3112 	} else {
3113 		vmem_walk_fini(wsp);
3114 	}
3115 }
3116 
3117 typedef struct vmem_seg_walk {
3118 	uint8_t vsw_type;
3119 	uintptr_t vsw_start;
3120 	uintptr_t vsw_current;
3121 } vmem_seg_walk_t;
3122 
3123 /*ARGSUSED*/
3124 int
vmem_seg_walk_common_init(mdb_walk_state_t * wsp,uint8_t type,char * name)3125 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3126 {
3127 	vmem_seg_walk_t *vsw;
3128 
3129 	if (wsp->walk_addr == 0) {
3130 		mdb_warn("vmem_%s does not support global walks\n", name);
3131 		return (WALK_ERR);
3132 	}
3133 
3134 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3135 
3136 	vsw->vsw_type = type;
3137 	vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0);
3138 	vsw->vsw_current = vsw->vsw_start;
3139 
3140 	return (WALK_NEXT);
3141 }
3142 
3143 /*
3144  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3145  */
3146 #define	VMEM_NONE	0
3147 
3148 int
vmem_alloc_walk_init(mdb_walk_state_t * wsp)3149 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3150 {
3151 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3152 }
3153 
3154 int
vmem_free_walk_init(mdb_walk_state_t * wsp)3155 vmem_free_walk_init(mdb_walk_state_t *wsp)
3156 {
3157 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3158 }
3159 
3160 int
vmem_span_walk_init(mdb_walk_state_t * wsp)3161 vmem_span_walk_init(mdb_walk_state_t *wsp)
3162 {
3163 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3164 }
3165 
3166 int
vmem_seg_walk_init(mdb_walk_state_t * wsp)3167 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3168 {
3169 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3170 }
3171 
3172 int
vmem_seg_walk_step(mdb_walk_state_t * wsp)3173 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3174 {
3175 	vmem_seg_t seg;
3176 	vmem_seg_walk_t *vsw = wsp->walk_data;
3177 	uintptr_t addr = vsw->vsw_current;
3178 	static size_t seg_size = 0;
3179 	int rval;
3180 
3181 	if (!seg_size) {
3182 		if (umem_readvar(&seg_size, "vmem_seg_size") == -1) {
3183 			mdb_warn("failed to read 'vmem_seg_size'");
3184 			seg_size = sizeof (vmem_seg_t);
3185 		}
3186 	}
3187 
3188 	if (seg_size < sizeof (seg))
3189 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3190 
3191 	if (mdb_vread(&seg, seg_size, addr) == -1) {
3192 		mdb_warn("couldn't read vmem_seg at %p", addr);
3193 		return (WALK_ERR);
3194 	}
3195 
3196 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
3197 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3198 		rval = WALK_NEXT;
3199 	} else {
3200 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3201 	}
3202 
3203 	if (vsw->vsw_current == vsw->vsw_start)
3204 		return (WALK_DONE);
3205 
3206 	return (rval);
3207 }
3208 
3209 void
vmem_seg_walk_fini(mdb_walk_state_t * wsp)3210 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3211 {
3212 	vmem_seg_walk_t *vsw = wsp->walk_data;
3213 
3214 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
3215 }
3216 
3217 #define	VMEM_NAMEWIDTH	22
3218 
3219 int
vmem(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3220 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3221 {
3222 	vmem_t v, parent;
3223 	uintptr_t paddr;
3224 	int ident = 0;
3225 	char c[VMEM_NAMEWIDTH];
3226 
3227 	if (!(flags & DCMD_ADDRSPEC)) {
3228 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3229 			mdb_warn("can't walk vmem");
3230 			return (DCMD_ERR);
3231 		}
3232 		return (DCMD_OK);
3233 	}
3234 
3235 	if (DCMD_HDRSPEC(flags))
3236 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3237 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3238 		    "TOTAL", "SUCCEED", "FAIL");
3239 
3240 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
3241 		mdb_warn("couldn't read vmem at %p", addr);
3242 		return (DCMD_ERR);
3243 	}
3244 
3245 	for (paddr = (uintptr_t)v.vm_source; paddr != 0; ident += 2) {
3246 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3247 			mdb_warn("couldn't trace %p's ancestry", addr);
3248 			ident = 0;
3249 			break;
3250 		}
3251 		paddr = (uintptr_t)parent.vm_source;
3252 	}
3253 
3254 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3255 
3256 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3257 	    addr, VMEM_NAMEWIDTH, c,
3258 	    v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total,
3259 	    v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail);
3260 
3261 	return (DCMD_OK);
3262 }
3263 
3264 void
vmem_seg_help(void)3265 vmem_seg_help(void)
3266 {
3267 	mdb_printf("%s\n",
3268 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3269 "\n"
3270 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3271 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3272 "information.\n");
3273 	mdb_dec_indent(2);
3274 	mdb_printf("%<b>OPTIONS%</b>\n");
3275 	mdb_inc_indent(2);
3276 	mdb_printf("%s",
3277 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3278 "  -s    report the size of the segment, instead of the end address\n"
3279 "  -c caller\n"
3280 "        filter out segments without the function/PC in their stack trace\n"
3281 "  -e earliest\n"
3282 "        filter out segments timestamped before earliest\n"
3283 "  -l latest\n"
3284 "        filter out segments timestamped after latest\n"
3285 "  -m minsize\n"
3286 "        filer out segments smaller than minsize\n"
3287 "  -M maxsize\n"
3288 "        filer out segments larger than maxsize\n"
3289 "  -t thread\n"
3290 "        filter out segments not involving thread\n"
3291 "  -T type\n"
3292 "        filter out segments not of type 'type'\n"
3293 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3294 }
3295 
3296 
3297 /*ARGSUSED*/
3298 int
vmem_seg(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3299 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3300 {
3301 	vmem_seg_t vs;
3302 	uintptr_t *stk = vs.vs_stack;
3303 	uintptr_t sz;
3304 	uint8_t t;
3305 	const char *type = NULL;
3306 	GElf_Sym sym;
3307 	char c[MDB_SYM_NAMLEN];
3308 	int no_debug;
3309 	int i;
3310 	int depth;
3311 	uintptr_t laddr, haddr;
3312 
3313 	uintptr_t caller = 0, thread = 0;
3314 	uintptr_t minsize = 0, maxsize = 0;
3315 
3316 	hrtime_t earliest = 0, latest = 0;
3317 
3318 	uint_t size = 0;
3319 	uint_t verbose = 0;
3320 
3321 	if (!(flags & DCMD_ADDRSPEC))
3322 		return (DCMD_USAGE);
3323 
3324 	if (mdb_getopts(argc, argv,
3325 	    'c', MDB_OPT_UINTPTR, &caller,
3326 	    'e', MDB_OPT_UINT64, &earliest,
3327 	    'l', MDB_OPT_UINT64, &latest,
3328 	    's', MDB_OPT_SETBITS, TRUE, &size,
3329 	    'm', MDB_OPT_UINTPTR, &minsize,
3330 	    'M', MDB_OPT_UINTPTR, &maxsize,
3331 	    't', MDB_OPT_UINTPTR, &thread,
3332 	    'T', MDB_OPT_STR, &type,
3333 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3334 	    NULL) != argc)
3335 		return (DCMD_USAGE);
3336 
3337 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3338 		if (verbose) {
3339 			mdb_printf("%16s %4s %16s %16s %16s\n"
3340 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3341 			    "ADDR", "TYPE", "START", "END", "SIZE",
3342 			    "", "", "THREAD", "TIMESTAMP", "");
3343 		} else {
3344 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3345 			    "START", size? "SIZE" : "END", "WHO");
3346 		}
3347 	}
3348 
3349 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3350 		mdb_warn("couldn't read vmem_seg at %p", addr);
3351 		return (DCMD_ERR);
3352 	}
3353 
3354 	if (type != NULL) {
3355 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3356 			t = VMEM_ALLOC;
3357 		else if (strcmp(type, "FREE") == 0)
3358 			t = VMEM_FREE;
3359 		else if (strcmp(type, "SPAN") == 0)
3360 			t = VMEM_SPAN;
3361 		else if (strcmp(type, "ROTR") == 0 ||
3362 		    strcmp(type, "ROTOR") == 0)
3363 			t = VMEM_ROTOR;
3364 		else if (strcmp(type, "WLKR") == 0 ||
3365 		    strcmp(type, "WALKER") == 0)
3366 			t = VMEM_WALKER;
3367 		else {
3368 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3369 			    type);
3370 			return (DCMD_ERR);
3371 		}
3372 
3373 		if (vs.vs_type != t)
3374 			return (DCMD_OK);
3375 	}
3376 
3377 	sz = vs.vs_end - vs.vs_start;
3378 
3379 	if (minsize != 0 && sz < minsize)
3380 		return (DCMD_OK);
3381 
3382 	if (maxsize != 0 && sz > maxsize)
3383 		return (DCMD_OK);
3384 
3385 	t = vs.vs_type;
3386 	depth = vs.vs_depth;
3387 
3388 	/*
3389 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3390 	 */
3391 	no_debug = (t != VMEM_ALLOC) ||
3392 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3393 
3394 	if (no_debug) {
3395 		if (caller != 0 || thread != 0 || earliest != 0 || latest != 0)
3396 			return (DCMD_OK);		/* not enough info */
3397 	} else {
3398 		if (caller != 0) {
3399 			laddr = caller;
3400 			haddr = caller + sizeof (caller);
3401 
3402 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3403 			    sizeof (c), &sym) != -1 &&
3404 			    caller == (uintptr_t)sym.st_value) {
3405 				/*
3406 				 * We were provided an exact symbol value; any
3407 				 * address in the function is valid.
3408 				 */
3409 				laddr = (uintptr_t)sym.st_value;
3410 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3411 			}
3412 
3413 			for (i = 0; i < depth; i++)
3414 				if (vs.vs_stack[i] >= laddr &&
3415 				    vs.vs_stack[i] < haddr)
3416 					break;
3417 
3418 			if (i == depth)
3419 				return (DCMD_OK);
3420 		}
3421 
3422 		if (thread != 0 && (uintptr_t)vs.vs_thread != thread)
3423 			return (DCMD_OK);
3424 
3425 		if (earliest != 0 && vs.vs_timestamp < earliest)
3426 			return (DCMD_OK);
3427 
3428 		if (latest != 0 && vs.vs_timestamp > latest)
3429 			return (DCMD_OK);
3430 	}
3431 
3432 	type = (t == VMEM_ALLOC ? "ALLC" :
3433 	    t == VMEM_FREE ? "FREE" :
3434 	    t == VMEM_SPAN ? "SPAN" :
3435 	    t == VMEM_ROTOR ? "ROTR" :
3436 	    t == VMEM_WALKER ? "WLKR" :
3437 	    "????");
3438 
3439 	if (flags & DCMD_PIPE_OUT) {
3440 		mdb_printf("%#r\n", addr);
3441 		return (DCMD_OK);
3442 	}
3443 
3444 	if (verbose) {
3445 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3446 		    addr, type, vs.vs_start, vs.vs_end, sz);
3447 
3448 		if (no_debug)
3449 			return (DCMD_OK);
3450 
3451 		mdb_printf("%16s %4s %16d %16llx\n",
3452 		    "", "", vs.vs_thread, vs.vs_timestamp);
3453 
3454 		mdb_inc_indent(17);
3455 		for (i = 0; i < depth; i++) {
3456 			mdb_printf("%a\n", stk[i]);
3457 		}
3458 		mdb_dec_indent(17);
3459 		mdb_printf("\n");
3460 	} else {
3461 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3462 		    vs.vs_start, size? sz : vs.vs_end);
3463 
3464 		if (no_debug) {
3465 			mdb_printf("\n");
3466 			return (DCMD_OK);
3467 		}
3468 
3469 		for (i = 0; i < depth; i++) {
3470 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3471 			    c, sizeof (c), &sym) == -1)
3472 				continue;
3473 			if (is_umem_sym(c, "vmem_"))
3474 				continue;
3475 			break;
3476 		}
3477 		mdb_printf(" %a\n", stk[i]);
3478 	}
3479 	return (DCMD_OK);
3480 }
3481 
3482 /*ARGSUSED*/
3483 static int
showbc(uintptr_t addr,const umem_bufctl_audit_t * bcp,hrtime_t * newest)3484 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest)
3485 {
3486 	char name[UMEM_CACHE_NAMELEN + 1];
3487 	hrtime_t delta;
3488 	int i, depth;
3489 
3490 	if (bcp->bc_timestamp == 0)
3491 		return (WALK_DONE);
3492 
3493 	if (*newest == 0)
3494 		*newest = bcp->bc_timestamp;
3495 
3496 	delta = *newest - bcp->bc_timestamp;
3497 	depth = MIN(bcp->bc_depth, umem_stack_depth);
3498 
3499 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3500 	    &bcp->bc_cache->cache_name) <= 0)
3501 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3502 
3503 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3504 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3505 
3506 	for (i = 0; i < depth; i++)
3507 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3508 
3509 	return (WALK_NEXT);
3510 }
3511 
3512 int
umalog(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3513 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3514 {
3515 	const char *logname = "umem_transaction_log";
3516 	hrtime_t newest = 0;
3517 
3518 	if ((flags & DCMD_ADDRSPEC) || argc > 1)
3519 		return (DCMD_USAGE);
3520 
3521 	if (argc > 0) {
3522 		if (argv->a_type != MDB_TYPE_STRING)
3523 			return (DCMD_USAGE);
3524 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3525 			logname = "umem_failure_log";
3526 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3527 			logname = "umem_slab_log";
3528 		else
3529 			return (DCMD_USAGE);
3530 	}
3531 
3532 	if (umem_readvar(&addr, logname) == -1) {
3533 		mdb_warn("failed to read %s log header pointer");
3534 		return (DCMD_ERR);
3535 	}
3536 
3537 	if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) {
3538 		mdb_warn("failed to walk umem log");
3539 		return (DCMD_ERR);
3540 	}
3541 
3542 	return (DCMD_OK);
3543 }
3544 
3545 /*
3546  * As the final lure for die-hard crash(8) users, we provide ::umausers here.
3547  * The first piece is a structure which we use to accumulate umem_cache_t
3548  * addresses of interest.  The umc_add is used as a callback for the umem_cache
3549  * walker; we either add all caches, or ones named explicitly as arguments.
3550  */
3551 
3552 typedef struct umclist {
3553 	const char *umc_name;			/* Name to match (or NULL) */
3554 	uintptr_t *umc_caches;			/* List of umem_cache_t addrs */
3555 	int umc_nelems;				/* Num entries in umc_caches */
3556 	int umc_size;				/* Size of umc_caches array */
3557 } umclist_t;
3558 
3559 static int
umc_add(uintptr_t addr,const umem_cache_t * cp,umclist_t * umc)3560 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc)
3561 {
3562 	void *p;
3563 	int s;
3564 
3565 	if (umc->umc_name == NULL ||
3566 	    strcmp(cp->cache_name, umc->umc_name) == 0) {
3567 		/*
3568 		 * If we have a match, grow our array (if necessary), and then
3569 		 * add the virtual address of the matching cache to our list.
3570 		 */
3571 		if (umc->umc_nelems >= umc->umc_size) {
3572 			s = umc->umc_size ? umc->umc_size * 2 : 256;
3573 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3574 
3575 			bcopy(umc->umc_caches, p,
3576 			    sizeof (uintptr_t) * umc->umc_size);
3577 
3578 			umc->umc_caches = p;
3579 			umc->umc_size = s;
3580 		}
3581 
3582 		umc->umc_caches[umc->umc_nelems++] = addr;
3583 		return (umc->umc_name ? WALK_DONE : WALK_NEXT);
3584 	}
3585 
3586 	return (WALK_NEXT);
3587 }
3588 
3589 /*
3590  * The second piece of ::umausers is a hash table of allocations.  Each
3591  * allocation owner is identified by its stack trace and data_size.  We then
3592  * track the total bytes of all such allocations, and the number of allocations
3593  * to report at the end.  Once we have a list of caches, we walk through the
3594  * allocated bufctls of each, and update our hash table accordingly.
3595  */
3596 
3597 typedef struct umowner {
3598 	struct umowner *umo_head;		/* First hash elt in bucket */
3599 	struct umowner *umo_next;		/* Next hash elt in chain */
3600 	size_t umo_signature;			/* Hash table signature */
3601 	uint_t umo_num;				/* Number of allocations */
3602 	size_t umo_data_size;			/* Size of each allocation */
3603 	size_t umo_total_size;			/* Total bytes of allocation */
3604 	int umo_depth;				/* Depth of stack trace */
3605 	uintptr_t *umo_stack;			/* Stack trace */
3606 } umowner_t;
3607 
3608 typedef struct umusers {
3609 	const umem_cache_t *umu_cache;		/* Current umem cache */
3610 	umowner_t *umu_hash;			/* Hash table of owners */
3611 	uintptr_t *umu_stacks;			/* stacks for owners */
3612 	int umu_nelems;				/* Number of entries in use */
3613 	int umu_size;				/* Total number of entries */
3614 } umusers_t;
3615 
3616 static void
umu_add(umusers_t * umu,const umem_bufctl_audit_t * bcp,size_t size,size_t data_size)3617 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp,
3618     size_t size, size_t data_size)
3619 {
3620 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3621 	size_t bucket, signature = data_size;
3622 	umowner_t *umo, *umoend;
3623 
3624 	/*
3625 	 * If the hash table is full, double its size and rehash everything.
3626 	 */
3627 	if (umu->umu_nelems >= umu->umu_size) {
3628 		int s = umu->umu_size ? umu->umu_size * 2 : 1024;
3629 		size_t umowner_size = sizeof (umowner_t);
3630 		size_t trace_size = umem_stack_depth * sizeof (uintptr_t);
3631 		uintptr_t *new_stacks;
3632 
3633 		umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC);
3634 		new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC);
3635 
3636 		bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size);
3637 		bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size);
3638 		umu->umu_hash = umo;
3639 		umu->umu_stacks = new_stacks;
3640 		umu->umu_size = s;
3641 
3642 		umoend = umu->umu_hash + umu->umu_size;
3643 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3644 			umo->umo_head = NULL;
3645 			umo->umo_stack = &umu->umu_stacks[
3646 			    umem_stack_depth * (umo - umu->umu_hash)];
3647 		}
3648 
3649 		umoend = umu->umu_hash + umu->umu_nelems;
3650 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3651 			bucket = umo->umo_signature & (umu->umu_size - 1);
3652 			umo->umo_next = umu->umu_hash[bucket].umo_head;
3653 			umu->umu_hash[bucket].umo_head = umo;
3654 		}
3655 	}
3656 
3657 	/*
3658 	 * Finish computing the hash signature from the stack trace, and then
3659 	 * see if the owner is in the hash table.  If so, update our stats.
3660 	 */
3661 	for (i = 0; i < depth; i++)
3662 		signature += bcp->bc_stack[i];
3663 
3664 	bucket = signature & (umu->umu_size - 1);
3665 
3666 	for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) {
3667 		if (umo->umo_signature == signature) {
3668 			size_t difference = 0;
3669 
3670 			difference |= umo->umo_data_size - data_size;
3671 			difference |= umo->umo_depth - depth;
3672 
3673 			for (i = 0; i < depth; i++) {
3674 				difference |= umo->umo_stack[i] -
3675 				    bcp->bc_stack[i];
3676 			}
3677 
3678 			if (difference == 0) {
3679 				umo->umo_total_size += size;
3680 				umo->umo_num++;
3681 				return;
3682 			}
3683 		}
3684 	}
3685 
3686 	/*
3687 	 * If the owner is not yet hashed, grab the next element and fill it
3688 	 * in based on the allocation information.
3689 	 */
3690 	umo = &umu->umu_hash[umu->umu_nelems++];
3691 	umo->umo_next = umu->umu_hash[bucket].umo_head;
3692 	umu->umu_hash[bucket].umo_head = umo;
3693 
3694 	umo->umo_signature = signature;
3695 	umo->umo_num = 1;
3696 	umo->umo_data_size = data_size;
3697 	umo->umo_total_size = size;
3698 	umo->umo_depth = depth;
3699 
3700 	for (i = 0; i < depth; i++)
3701 		umo->umo_stack[i] = bcp->bc_stack[i];
3702 }
3703 
3704 /*
3705  * When ::umausers is invoked without the -f flag, we simply update our hash
3706  * table with the information from each allocated bufctl.
3707  */
3708 /*ARGSUSED*/
3709 static int
umause1(uintptr_t addr,const umem_bufctl_audit_t * bcp,umusers_t * umu)3710 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3711 {
3712 	const umem_cache_t *cp = umu->umu_cache;
3713 
3714 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3715 	return (WALK_NEXT);
3716 }
3717 
3718 /*
3719  * When ::umausers is invoked with the -f flag, we print out the information
3720  * for each bufctl as well as updating the hash table.
3721  */
3722 static int
umause2(uintptr_t addr,const umem_bufctl_audit_t * bcp,umusers_t * umu)3723 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3724 {
3725 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3726 	const umem_cache_t *cp = umu->umu_cache;
3727 
3728 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3729 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3730 
3731 	for (i = 0; i < depth; i++)
3732 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3733 
3734 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3735 	return (WALK_NEXT);
3736 }
3737 
3738 /*
3739  * We sort our results by allocation size before printing them.
3740  */
3741 static int
umownercmp(const void * lp,const void * rp)3742 umownercmp(const void *lp, const void *rp)
3743 {
3744 	const umowner_t *lhs = lp;
3745 	const umowner_t *rhs = rp;
3746 
3747 	return (rhs->umo_total_size - lhs->umo_total_size);
3748 }
3749 
3750 /*
3751  * The main engine of ::umausers is relatively straightforward: First we
3752  * accumulate our list of umem_cache_t addresses into the umclist_t. Next we
3753  * iterate over the allocated bufctls of each cache in the list.  Finally,
3754  * we sort and print our results.
3755  */
3756 /*ARGSUSED*/
3757 int
umausers(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3758 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3759 {
3760 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
3761 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
3762 	int audited_caches = 0;		/* Number of UMF_AUDIT caches found */
3763 	int do_all_caches = 1;		/* Do all caches (no arguments) */
3764 	int opt_e = FALSE;		/* Include "small" users */
3765 	int opt_f = FALSE;		/* Print stack traces */
3766 
3767 	mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1;
3768 	umowner_t *umo, *umoend;
3769 	int i, oelems;
3770 
3771 	umclist_t umc;
3772 	umusers_t umu;
3773 
3774 	if (flags & DCMD_ADDRSPEC)
3775 		return (DCMD_USAGE);
3776 
3777 	bzero(&umc, sizeof (umc));
3778 	bzero(&umu, sizeof (umu));
3779 
3780 	while ((i = mdb_getopts(argc, argv,
3781 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3782 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3783 
3784 		argv += i;	/* skip past options we just processed */
3785 		argc -= i;	/* adjust argc */
3786 
3787 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3788 			return (DCMD_USAGE);
3789 
3790 		oelems = umc.umc_nelems;
3791 		umc.umc_name = argv->a_un.a_str;
3792 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3793 
3794 		if (umc.umc_nelems == oelems) {
3795 			mdb_warn("unknown umem cache: %s\n", umc.umc_name);
3796 			return (DCMD_ERR);
3797 		}
3798 
3799 		do_all_caches = 0;
3800 		argv++;
3801 		argc--;
3802 	}
3803 
3804 	if (opt_e)
3805 		mem_threshold = cnt_threshold = 0;
3806 
3807 	if (opt_f)
3808 		callback = (mdb_walk_cb_t)umause2;
3809 
3810 	if (do_all_caches) {
3811 		umc.umc_name = NULL; /* match all cache names */
3812 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3813 	}
3814 
3815 	for (i = 0; i < umc.umc_nelems; i++) {
3816 		uintptr_t cp = umc.umc_caches[i];
3817 		umem_cache_t c;
3818 
3819 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
3820 			mdb_warn("failed to read cache at %p", cp);
3821 			continue;
3822 		}
3823 
3824 		if (!(c.cache_flags & UMF_AUDIT)) {
3825 			if (!do_all_caches) {
3826 				mdb_warn("UMF_AUDIT is not enabled for %s\n",
3827 				    c.cache_name);
3828 			}
3829 			continue;
3830 		}
3831 
3832 		umu.umu_cache = &c;
3833 		(void) mdb_pwalk("bufctl", callback, &umu, cp);
3834 		audited_caches++;
3835 	}
3836 
3837 	if (audited_caches == 0 && do_all_caches) {
3838 		mdb_warn("UMF_AUDIT is not enabled for any caches\n");
3839 		return (DCMD_ERR);
3840 	}
3841 
3842 	qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp);
3843 	umoend = umu.umu_hash + umu.umu_nelems;
3844 
3845 	for (umo = umu.umu_hash; umo < umoend; umo++) {
3846 		if (umo->umo_total_size < mem_threshold &&
3847 		    umo->umo_num < cnt_threshold)
3848 			continue;
3849 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3850 		    umo->umo_total_size, umo->umo_num, umo->umo_data_size);
3851 		for (i = 0; i < umo->umo_depth; i++)
3852 			mdb_printf("\t %a\n", umo->umo_stack[i]);
3853 	}
3854 
3855 	return (DCMD_OK);
3856 }
3857 
3858 struct malloc_data {
3859 	uint32_t malloc_size;
3860 	uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */
3861 };
3862 
3863 #ifdef _LP64
3864 #define	UMI_MAX_BUCKET		(UMEM_MAXBUF - 2*sizeof (struct malloc_data))
3865 #else
3866 #define	UMI_MAX_BUCKET		(UMEM_MAXBUF - sizeof (struct malloc_data))
3867 #endif
3868 
3869 typedef struct umem_malloc_info {
3870 	size_t um_total;	/* total allocated buffers */
3871 	size_t um_malloc;	/* malloc buffers */
3872 	size_t um_malloc_size;	/* sum of malloc buffer sizes */
3873 	size_t um_malloc_overhead; /* sum of in-chunk overheads */
3874 
3875 	umem_cache_t *um_cp;
3876 
3877 	uint_t *um_bucket;
3878 } umem_malloc_info_t;
3879 
3880 static void
umem_malloc_print_dist(uint_t * um_bucket,size_t minmalloc,size_t maxmalloc,size_t maxbuckets,size_t minbucketsize,int geometric)3881 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc,
3882     size_t maxbuckets, size_t minbucketsize, int geometric)
3883 {
3884 	uint64_t um_malloc;
3885 	int minb = -1;
3886 	int maxb = -1;
3887 	int buckets;
3888 	int nbucks;
3889 	int i;
3890 	int b;
3891 	const int *distarray;
3892 
3893 	minb = (int)minmalloc;
3894 	maxb = (int)maxmalloc;
3895 
3896 	nbucks = buckets = maxb - minb + 1;
3897 
3898 	um_malloc = 0;
3899 	for (b = minb; b <= maxb; b++)
3900 		um_malloc += um_bucket[b];
3901 
3902 	if (maxbuckets != 0)
3903 		buckets = MIN(buckets, maxbuckets);
3904 
3905 	if (minbucketsize > 1) {
3906 		buckets = MIN(buckets, nbucks/minbucketsize);
3907 		if (buckets == 0) {
3908 			buckets = 1;
3909 			minbucketsize = nbucks;
3910 		}
3911 	}
3912 
3913 	if (geometric)
3914 		distarray = dist_geometric(buckets, minb, maxb, minbucketsize);
3915 	else
3916 		distarray = dist_linear(buckets, minb, maxb);
3917 
3918 	dist_print_header("malloc size", 11, "count");
3919 	for (i = 0; i < buckets; i++) {
3920 		dist_print_bucket(distarray, i, um_bucket, um_malloc, 11);
3921 	}
3922 	mdb_printf("\n");
3923 }
3924 
3925 /*
3926  * A malloc()ed buffer looks like:
3927  *
3928  *	<----------- mi.malloc_size --->
3929  *	<----------- cp.cache_bufsize ------------------>
3930  *	<----------- cp.cache_chunksize -------------------------------->
3931  *	+-------+-----------------------+---------------+---------------+
3932  *	|/tag///| mallocsz		|/round-off/////|/debug info////|
3933  *	+-------+---------------------------------------+---------------+
3934  *		<-- usable space ------>
3935  *
3936  * mallocsz is the argument to malloc(3C).
3937  * mi.malloc_size is the actual size passed to umem_alloc(), which
3938  * is rounded up to the smallest available cache size, which is
3939  * cache_bufsize.  If there is debugging or alignment overhead in
3940  * the cache, that is reflected in a larger cache_chunksize.
3941  *
3942  * The tag at the beginning of the buffer is either 8-bytes or 16-bytes,
3943  * depending upon the ISA's alignment requirements.  For 32-bit allocations,
3944  * it is always a 8-byte tag.  For 64-bit allocations larger than 8 bytes,
3945  * the tag has 8 bytes of padding before it.
3946  *
3947  * 32-byte, 64-byte buffers <= 8 bytes:
3948  *	+-------+-------+--------- ...
3949  *	|/size//|/stat//| mallocsz ...
3950  *	+-------+-------+--------- ...
3951  *			^
3952  *			pointer returned from malloc(3C)
3953  *
3954  * 64-byte buffers > 8 bytes:
3955  *	+---------------+-------+-------+--------- ...
3956  *	|/padding///////|/size//|/stat//| mallocsz ...
3957  *	+---------------+-------+-------+--------- ...
3958  *					^
3959  *					pointer returned from malloc(3C)
3960  *
3961  * The "size" field is "malloc_size", which is mallocsz + the padding.
3962  * The "stat" field is derived from malloc_size, and functions as a
3963  * validation that this buffer is actually from malloc(3C).
3964  */
3965 /*ARGSUSED*/
3966 static int
um_umem_buffer_cb(uintptr_t addr,void * buf,umem_malloc_info_t * ump)3967 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump)
3968 {
3969 	struct malloc_data md;
3970 	size_t m_addr = addr;
3971 	size_t overhead = sizeof (md);
3972 	size_t mallocsz;
3973 
3974 	ump->um_total++;
3975 
3976 #ifdef _LP64
3977 	if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) {
3978 		m_addr += overhead;
3979 		overhead += sizeof (md);
3980 	}
3981 #endif
3982 
3983 	if (mdb_vread(&md, sizeof (md), m_addr) == -1) {
3984 		mdb_warn("unable to read malloc header at %p", m_addr);
3985 		return (WALK_NEXT);
3986 	}
3987 
3988 	switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) {
3989 	case MALLOC_MAGIC:
3990 #ifdef _LP64
3991 	case MALLOC_SECOND_MAGIC:
3992 #endif
3993 		mallocsz = md.malloc_size - overhead;
3994 
3995 		ump->um_malloc++;
3996 		ump->um_malloc_size += mallocsz;
3997 		ump->um_malloc_overhead += overhead;
3998 
3999 		/* include round-off and debug overhead */
4000 		ump->um_malloc_overhead +=
4001 		    ump->um_cp->cache_chunksize - md.malloc_size;
4002 
4003 		if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET)
4004 			ump->um_bucket[mallocsz]++;
4005 
4006 		break;
4007 	default:
4008 		break;
4009 	}
4010 
4011 	return (WALK_NEXT);
4012 }
4013 
4014 int
get_umem_alloc_sizes(int ** out,size_t * out_num)4015 get_umem_alloc_sizes(int **out, size_t *out_num)
4016 {
4017 	GElf_Sym sym;
4018 
4019 	if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
4020 		mdb_warn("unable to look up umem_alloc_sizes");
4021 		return (-1);
4022 	}
4023 
4024 	*out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC);
4025 	*out_num = sym.st_size / sizeof (int);
4026 
4027 	if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) {
4028 		mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value);
4029 		*out = NULL;
4030 		return (-1);
4031 	}
4032 
4033 	return (0);
4034 }
4035 
4036 
4037 static int
um_umem_cache_cb(uintptr_t addr,umem_cache_t * cp,umem_malloc_info_t * ump)4038 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump)
4039 {
4040 	if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0)
4041 		return (WALK_NEXT);
4042 
4043 	ump->um_cp = cp;
4044 
4045 	if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) ==
4046 	    -1) {
4047 		mdb_warn("can't walk 'umem' for cache %p", addr);
4048 		return (WALK_ERR);
4049 	}
4050 
4051 	return (WALK_NEXT);
4052 }
4053 
4054 void
umem_malloc_dist_help(void)4055 umem_malloc_dist_help(void)
4056 {
4057 	mdb_printf("%s\n",
4058 	    "report distribution of outstanding malloc()s");
4059 	mdb_dec_indent(2);
4060 	mdb_printf("%<b>OPTIONS%</b>\n");
4061 	mdb_inc_indent(2);
4062 	mdb_printf("%s",
4063 "  -b maxbins\n"
4064 "        Use at most maxbins bins for the data\n"
4065 "  -B minbinsize\n"
4066 "        Make the bins at least minbinsize bytes apart\n"
4067 "  -d    dump the raw data out, without binning\n"
4068 "  -g    use geometric binning instead of linear binning\n");
4069 }
4070 
4071 /*ARGSUSED*/
4072 int
umem_malloc_dist(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4073 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4074 {
4075 	umem_malloc_info_t mi;
4076 	uint_t geometric = 0;
4077 	uint_t dump = 0;
4078 	size_t maxbuckets = 0;
4079 	size_t minbucketsize = 0;
4080 
4081 	size_t minalloc = 0;
4082 	size_t maxalloc = UMI_MAX_BUCKET;
4083 
4084 	if (flags & DCMD_ADDRSPEC)
4085 		return (DCMD_USAGE);
4086 
4087 	if (mdb_getopts(argc, argv,
4088 	    'd', MDB_OPT_SETBITS, TRUE, &dump,
4089 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
4090 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
4091 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
4092 	    NULL) != argc)
4093 		return (DCMD_USAGE);
4094 
4095 	bzero(&mi, sizeof (mi));
4096 	mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4097 	    UM_SLEEP | UM_GC);
4098 
4099 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb,
4100 	    &mi) == -1) {
4101 		mdb_warn("unable to walk 'umem_cache'");
4102 		return (DCMD_ERR);
4103 	}
4104 
4105 	if (dump) {
4106 		int i;
4107 		for (i = minalloc; i <= maxalloc; i++)
4108 			mdb_printf("%d\t%d\n", i, mi.um_bucket[i]);
4109 
4110 		return (DCMD_OK);
4111 	}
4112 
4113 	umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc,
4114 	    maxbuckets, minbucketsize, geometric);
4115 
4116 	return (DCMD_OK);
4117 }
4118 
4119 void
umem_malloc_info_help(void)4120 umem_malloc_info_help(void)
4121 {
4122 	mdb_printf("%s\n",
4123 	    "report information about malloc()s by cache.  ");
4124 	mdb_dec_indent(2);
4125 	mdb_printf("%<b>OPTIONS%</b>\n");
4126 	mdb_inc_indent(2);
4127 	mdb_printf("%s",
4128 "  -b maxbins\n"
4129 "        Use at most maxbins bins for the data\n"
4130 "  -B minbinsize\n"
4131 "        Make the bins at least minbinsize bytes apart\n"
4132 "  -d    dump the raw distribution data without binning\n"
4133 #ifndef _KMDB
4134 "  -g    use geometric binning instead of linear binning\n"
4135 #endif
4136 	    "");
4137 }
4138 int
umem_malloc_info(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4139 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4140 {
4141 	umem_cache_t c;
4142 	umem_malloc_info_t mi;
4143 
4144 	int skip = 0;
4145 
4146 	size_t maxmalloc;
4147 	size_t overhead;
4148 	size_t allocated;
4149 	size_t avg_malloc;
4150 	size_t overhead_pct;	/* 1000 * overhead_percent */
4151 
4152 	uint_t verbose = 0;
4153 	uint_t dump = 0;
4154 	uint_t geometric = 0;
4155 	size_t maxbuckets = 0;
4156 	size_t minbucketsize = 0;
4157 
4158 	int *alloc_sizes;
4159 	int idx;
4160 	size_t num;
4161 	size_t minmalloc;
4162 
4163 	if (mdb_getopts(argc, argv,
4164 	    'd', MDB_OPT_SETBITS, TRUE, &dump,
4165 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
4166 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
4167 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
4168 	    NULL) != argc)
4169 		return (DCMD_USAGE);
4170 
4171 	if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0))
4172 		verbose = 1;
4173 
4174 	if (!(flags & DCMD_ADDRSPEC)) {
4175 		if (mdb_walk_dcmd("umem_cache", "umem_malloc_info",
4176 		    argc, argv) == -1) {
4177 			mdb_warn("can't walk umem_cache");
4178 			return (DCMD_ERR);
4179 		}
4180 		return (DCMD_OK);
4181 	}
4182 
4183 	if (!mdb_vread(&c, sizeof (c), addr)) {
4184 		mdb_warn("unable to read cache at %p", addr);
4185 		return (DCMD_ERR);
4186 	}
4187 
4188 	if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) {
4189 		if (!(flags & DCMD_LOOP))
4190 			mdb_warn("umem_malloc_info: cache \"%s\" is not used "
4191 			    "by malloc()\n", c.cache_name);
4192 		skip = 1;
4193 	}
4194 
4195 	/*
4196 	 * normally, print the header only the first time.  In verbose mode,
4197 	 * print the header on every non-skipped buffer
4198 	 */
4199 	if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip))
4200 		mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n",
4201 		    "CACHE", "BUFSZ", "MAXMAL",
4202 		    "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER");
4203 
4204 	if (skip)
4205 		return (DCMD_OK);
4206 
4207 	maxmalloc = c.cache_bufsize - sizeof (struct malloc_data);
4208 #ifdef _LP64
4209 	if (c.cache_bufsize > UMEM_SECOND_ALIGN)
4210 		maxmalloc -= sizeof (struct malloc_data);
4211 #endif
4212 
4213 	bzero(&mi, sizeof (mi));
4214 	mi.um_cp = &c;
4215 	if (verbose)
4216 		mi.um_bucket =
4217 		    mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4218 		    UM_SLEEP | UM_GC);
4219 
4220 	if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) ==
4221 	    -1) {
4222 		mdb_warn("can't walk 'umem'");
4223 		return (DCMD_ERR);
4224 	}
4225 
4226 	overhead = mi.um_malloc_overhead;
4227 	allocated = mi.um_malloc_size;
4228 
4229 	/* do integer round off for the average */
4230 	if (mi.um_malloc != 0)
4231 		avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc;
4232 	else
4233 		avg_malloc = 0;
4234 
4235 	/*
4236 	 * include per-slab overhead
4237 	 *
4238 	 * Each slab in a given cache is the same size, and has the same
4239 	 * number of chunks in it;  we read in the first slab on the
4240 	 * slab list to get the number of chunks for all slabs.  To
4241 	 * compute the per-slab overhead, we just subtract the chunk usage
4242 	 * from the slabsize:
4243 	 *
4244 	 * +------------+-------+-------+ ... --+-------+-------+-------+
4245 	 * |////////////|	|	| ...	|	|///////|///////|
4246 	 * |////color///| chunk	| chunk	| ...	| chunk	|/color/|/slab//|
4247 	 * |////////////|	|	| ...	|	|///////|///////|
4248 	 * +------------+-------+-------+ ... --+-------+-------+-------+
4249 	 * |		\_______chunksize * chunks_____/		|
4250 	 * \__________________________slabsize__________________________/
4251 	 *
4252 	 * For UMF_HASH caches, there is an additional source of overhead;
4253 	 * the external umem_slab_t and per-chunk bufctl structures.  We
4254 	 * include those in our per-slab overhead.
4255 	 *
4256 	 * Once we have a number for the per-slab overhead, we estimate
4257 	 * the actual overhead by treating the malloc()ed buffers as if
4258 	 * they were densely packed:
4259 	 *
4260 	 *	additional overhead = (# mallocs) * (per-slab) / (chunks);
4261 	 *
4262 	 * carefully ordering the multiply before the divide, to avoid
4263 	 * round-off error.
4264 	 */
4265 	if (mi.um_malloc != 0) {
4266 		umem_slab_t slab;
4267 		uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next;
4268 
4269 		if (mdb_vread(&slab, sizeof (slab), saddr) == -1) {
4270 			mdb_warn("unable to read slab at %p\n", saddr);
4271 		} else {
4272 			long chunks = slab.slab_chunks;
4273 			if (chunks != 0 && c.cache_chunksize != 0 &&
4274 			    chunks <= c.cache_slabsize / c.cache_chunksize) {
4275 				uintmax_t perslab =
4276 				    c.cache_slabsize -
4277 				    (c.cache_chunksize * chunks);
4278 
4279 				if (c.cache_flags & UMF_HASH) {
4280 					perslab += sizeof (umem_slab_t) +
4281 					    chunks *
4282 					    ((c.cache_flags & UMF_AUDIT) ?
4283 					    sizeof (umem_bufctl_audit_t) :
4284 					    sizeof (umem_bufctl_t));
4285 				}
4286 				overhead +=
4287 				    (perslab * (uintmax_t)mi.um_malloc)/chunks;
4288 			} else {
4289 				mdb_warn("invalid #chunks (%d) in slab %p\n",
4290 				    chunks, saddr);
4291 			}
4292 		}
4293 	}
4294 
4295 	if (allocated != 0)
4296 		overhead_pct = (1000ULL * overhead) / allocated;
4297 	else
4298 		overhead_pct = 0;
4299 
4300 	mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n",
4301 	    addr, c.cache_bufsize, maxmalloc,
4302 	    mi.um_malloc, avg_malloc, allocated, overhead,
4303 	    overhead_pct / 10, overhead_pct % 10);
4304 
4305 	if (!verbose)
4306 		return (DCMD_OK);
4307 
4308 	if (!dump)
4309 		mdb_printf("\n");
4310 
4311 	if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1)
4312 		return (DCMD_ERR);
4313 
4314 	for (idx = 0; idx < num; idx++) {
4315 		if (alloc_sizes[idx] == c.cache_bufsize)
4316 			break;
4317 		if (alloc_sizes[idx] == 0) {
4318 			idx = num;	/* 0-terminated array */
4319 			break;
4320 		}
4321 	}
4322 	if (idx == num) {
4323 		mdb_warn(
4324 		    "cache %p's size (%d) not in umem_alloc_sizes\n",
4325 		    addr, c.cache_bufsize);
4326 		return (DCMD_ERR);
4327 	}
4328 
4329 	minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1];
4330 	if (minmalloc > 0) {
4331 #ifdef _LP64
4332 		if (minmalloc > UMEM_SECOND_ALIGN)
4333 			minmalloc -= sizeof (struct malloc_data);
4334 #endif
4335 		minmalloc -= sizeof (struct malloc_data);
4336 		minmalloc += 1;
4337 	}
4338 
4339 	if (dump) {
4340 		for (idx = minmalloc; idx <= maxmalloc; idx++)
4341 			mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]);
4342 		mdb_printf("\n");
4343 	} else {
4344 		umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc,
4345 		    maxbuckets, minbucketsize, geometric);
4346 	}
4347 
4348 	return (DCMD_OK);
4349 }
4350