xref: /illumos-gate/usr/src/cmd/mdb/common/modules/ip/ip.c (revision ffaa671a64d4e4369c7b65e6d155e9fd2211bf8a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/stropts.h>
30 #include <sys/stream.h>
31 #include <sys/socket.h>
32 #include <sys/avl_impl.h>
33 #include <net/if.h>
34 #include <net/route.h>
35 #include <netinet/in.h>
36 #include <netinet/ip6.h>
37 #include <netinet/udp.h>
38 #include <netinet/sctp.h>
39 #include <inet/mib2.h>
40 #include <inet/common.h>
41 #include <inet/ip.h>
42 #include <inet/ip_ire.h>
43 #include <inet/ip6.h>
44 #include <inet/ipclassifier.h>
45 #include <inet/mi.h>
46 #include <sys/squeue_impl.h>
47 #include <sys/modhash_impl.h>
48 #include <inet/ip_ndp.h>
49 #include <inet/ip_if.h>
50 #include <sys/dlpi.h>
51 
52 #include <mdb/mdb_modapi.h>
53 #include <mdb/mdb_ks.h>
54 
55 #define	ADDR_WIDTH 11
56 #define	L2MAXADDRSTRLEN	255
57 #define	MAX_SAP_LEN	255
58 
59 typedef struct {
60 	const char *bit_name;	/* name of bit */
61 	const char *bit_descr;	/* description of bit's purpose */
62 } bitname_t;
63 
64 static const bitname_t squeue_states[] = {
65 	{ "SQS_PROC",		"being processed" },
66 	{ "SQS_WORKER",		"... by a worker thread" },
67 	{ "SQS_ENTER",		"... by an squeue_enter() thread" },
68 	{ "SQS_FAST",		"... in fast-path mode" },
69 	{ "SQS_USER", 		"A non interrupt user" },
70 	{ "SQS_BOUND",		"worker thread bound to CPU" },
71 	{ "SQS_PROFILE",	"profiling enabled" },
72 	{ "SQS_REENTER",	"re-entered thred" },
73 	{ NULL }
74 };
75 
76 typedef struct illif_walk_data {
77 	ill_g_head_t ill_g_heads[MAX_G_HEADS];
78 	int ill_list;
79 	ill_if_t ill_if;
80 } illif_walk_data_t;
81 
82 typedef struct nce_walk_data_s {
83 	struct ndp_g_s	nce_ip_ndp;
84 	int		nce_hash_tbl_index;
85 	nce_t 		nce;
86 } nce_walk_data_t;
87 
88 typedef struct nce_cbdata_s {
89 	uintptr_t nce_addr;
90 	int	  nce_ipversion;
91 } nce_cbdata_t;
92 
93 typedef struct ire_cbdata_s {
94 	int		ire_ipversion;
95 	boolean_t	verbose;
96 } ire_cbdata_t;
97 
98 typedef struct th_walk_data {
99 	uint_t		thw_non_zero_only;
100 	boolean_t	thw_match;
101 	uintptr_t	thw_matchkey;
102 	uintptr_t	thw_ipst;
103 	clock_t		thw_lbolt;
104 } th_walk_data_t;
105 
106 static int iphdr(uintptr_t, uint_t, int, const mdb_arg_t *);
107 static int ip6hdr(uintptr_t, uint_t, int, const mdb_arg_t *);
108 
109 static int ire_format(uintptr_t addr, const void *, void *);
110 static int nce_format(uintptr_t addr, const nce_t *nce, int ipversion);
111 static int nce(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv);
112 static int nce_walk_step(mdb_walk_state_t *wsp);
113 static int nce_stack_walk_init(mdb_walk_state_t *wsp);
114 static int nce_stack_walk_step(mdb_walk_state_t *wsp);
115 static void nce_stack_walk_fini(mdb_walk_state_t *wsp);
116 static int nce_cb(uintptr_t addr, const nce_walk_data_t *iw, nce_cbdata_t *id);
117 
118 /*
119  * Given the kernel address of an ip_stack_t, return the stackid
120  */
121 static int
122 ips_to_stackid(uintptr_t kaddr)
123 {
124 	ip_stack_t ipss;
125 	netstack_t nss;
126 
127 	if (mdb_vread(&ipss, sizeof (ipss), kaddr) == -1) {
128 		mdb_warn("failed to read ip_stack_t %p", kaddr);
129 		return (0);
130 	}
131 	kaddr = (uintptr_t)ipss.ips_netstack;
132 	if (mdb_vread(&nss, sizeof (nss), kaddr) == -1) {
133 		mdb_warn("failed to read netstack_t %p", kaddr);
134 		return (0);
135 	}
136 	return (nss.netstack_stackid);
137 }
138 
139 int
140 ip_stacks_walk_init(mdb_walk_state_t *wsp)
141 {
142 	if (mdb_layered_walk("netstack", wsp) == -1) {
143 		mdb_warn("can't walk 'netstack'");
144 		return (WALK_ERR);
145 	}
146 	return (WALK_NEXT);
147 }
148 
149 int
150 ip_stacks_walk_step(mdb_walk_state_t *wsp)
151 {
152 	uintptr_t kaddr;
153 	netstack_t nss;
154 
155 	if (mdb_vread(&nss, sizeof (nss), wsp->walk_addr) == -1) {
156 		mdb_warn("can't read netstack at %p", wsp->walk_addr);
157 		return (WALK_ERR);
158 	}
159 	kaddr = (uintptr_t)nss.netstack_modules[NS_IP];
160 
161 	return (wsp->walk_callback(kaddr, wsp->walk_layer, wsp->walk_cbdata));
162 }
163 
164 int
165 th_hash_walk_init(mdb_walk_state_t *wsp)
166 {
167 	GElf_Sym sym;
168 	list_node_t *next;
169 
170 	if (wsp->walk_addr == NULL) {
171 		if (mdb_lookup_by_obj("ip", "ip_thread_list", &sym) == 0) {
172 			wsp->walk_addr = sym.st_value;
173 		} else {
174 			mdb_warn("unable to locate ip_thread_list\n");
175 			return (WALK_ERR);
176 		}
177 	}
178 
179 	if (mdb_vread(&next, sizeof (next),
180 	    wsp->walk_addr + offsetof(list_t, list_head) +
181 	    offsetof(list_node_t, list_next)) == -1 ||
182 	    next == NULL) {
183 		mdb_warn("non-DEBUG image; cannot walk th_hash list\n");
184 		return (WALK_ERR);
185 	}
186 
187 	if (mdb_layered_walk("list", wsp) == -1) {
188 		mdb_warn("can't walk 'list'");
189 		return (WALK_ERR);
190 	} else {
191 		return (WALK_NEXT);
192 	}
193 }
194 
195 int
196 th_hash_walk_step(mdb_walk_state_t *wsp)
197 {
198 	return (wsp->walk_callback(wsp->walk_addr, wsp->walk_layer,
199 	    wsp->walk_cbdata));
200 }
201 
202 /*
203  * Called with walk_addr being the address of ips_ill_g_heads
204  */
205 int
206 illif_stack_walk_init(mdb_walk_state_t *wsp)
207 {
208 	illif_walk_data_t *iw;
209 
210 	if (wsp->walk_addr == NULL) {
211 		mdb_warn("illif_stack supports only local walks\n");
212 		return (WALK_ERR);
213 	}
214 
215 	iw = mdb_alloc(sizeof (illif_walk_data_t), UM_SLEEP);
216 
217 	if (mdb_vread(iw->ill_g_heads, MAX_G_HEADS * sizeof (ill_g_head_t),
218 	    wsp->walk_addr) == -1) {
219 		mdb_warn("failed to read 'ips_ill_g_heads' at %p",
220 		    wsp->walk_addr);
221 		mdb_free(iw, sizeof (illif_walk_data_t));
222 		return (WALK_ERR);
223 	}
224 
225 	iw->ill_list = 0;
226 	wsp->walk_addr = (uintptr_t)iw->ill_g_heads[0].ill_g_list_head;
227 	wsp->walk_data = iw;
228 
229 	return (WALK_NEXT);
230 }
231 
232 int
233 illif_stack_walk_step(mdb_walk_state_t *wsp)
234 {
235 	uintptr_t addr = wsp->walk_addr;
236 	illif_walk_data_t *iw = wsp->walk_data;
237 	int list = iw->ill_list;
238 
239 	if (mdb_vread(&iw->ill_if, sizeof (ill_if_t), addr) == -1) {
240 		mdb_warn("failed to read ill_if_t at %p", addr);
241 		return (WALK_ERR);
242 	}
243 
244 	wsp->walk_addr = (uintptr_t)iw->ill_if.illif_next;
245 
246 	if (wsp->walk_addr ==
247 	    (uintptr_t)iw->ill_g_heads[list].ill_g_list_head) {
248 
249 		if (++list >= MAX_G_HEADS)
250 			return (WALK_DONE);
251 
252 		iw->ill_list = list;
253 		wsp->walk_addr =
254 		    (uintptr_t)iw->ill_g_heads[list].ill_g_list_head;
255 		return (WALK_NEXT);
256 	}
257 
258 	return (wsp->walk_callback(addr, iw, wsp->walk_cbdata));
259 }
260 
261 void
262 illif_stack_walk_fini(mdb_walk_state_t *wsp)
263 {
264 	mdb_free(wsp->walk_data, sizeof (illif_walk_data_t));
265 }
266 
267 typedef struct illif_cbdata {
268 	uint_t ill_flags;
269 	uintptr_t ill_addr;
270 	int ill_printlist;	/* list to be printed (MAX_G_HEADS for all) */
271 	boolean_t ill_printed;
272 } illif_cbdata_t;
273 
274 static int
275 illif_cb(uintptr_t addr, const illif_walk_data_t *iw, illif_cbdata_t *id)
276 {
277 	const char *version;
278 
279 	if (id->ill_printlist < MAX_G_HEADS &&
280 	    id->ill_printlist != iw->ill_list)
281 		return (WALK_NEXT);
282 
283 	if (id->ill_flags & DCMD_ADDRSPEC && id->ill_addr != addr)
284 		return (WALK_NEXT);
285 
286 	if (id->ill_flags & DCMD_PIPE_OUT) {
287 		mdb_printf("%p\n", addr);
288 		return (WALK_NEXT);
289 	}
290 
291 	switch (iw->ill_list) {
292 		case IP_V4_G_HEAD:	version = "v4";	break;
293 		case IP_V6_G_HEAD:	version = "v6";	break;
294 		default:		version = "??"; break;
295 	}
296 
297 	mdb_printf("%?p %2s %?p %10d %?p %s\n",
298 	    addr, version, addr + offsetof(ill_if_t, illif_avl_by_ppa),
299 	    iw->ill_if.illif_avl_by_ppa.avl_numnodes,
300 	    iw->ill_if.illif_ppa_arena, iw->ill_if.illif_name);
301 
302 	id->ill_printed = TRUE;
303 
304 	return (WALK_NEXT);
305 }
306 
307 int
308 ip_stacks_common_walk_init(mdb_walk_state_t *wsp)
309 {
310 	if (mdb_layered_walk("ip_stacks", wsp) == -1) {
311 		mdb_warn("can't walk 'ip_stacks'");
312 		return (WALK_ERR);
313 	}
314 
315 	return (WALK_NEXT);
316 }
317 
318 int
319 illif_walk_step(mdb_walk_state_t *wsp)
320 {
321 	uintptr_t kaddr;
322 
323 	kaddr = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ill_g_heads);
324 
325 	if (mdb_vread(&kaddr, sizeof (kaddr), kaddr) == -1) {
326 		mdb_warn("can't read ips_ip_cache_table at %p", kaddr);
327 		return (WALK_ERR);
328 	}
329 
330 	if (mdb_pwalk("illif_stack", wsp->walk_callback,
331 	    wsp->walk_cbdata, kaddr) == -1) {
332 		mdb_warn("couldn't walk 'illif_stack' for ips_ill_g_heads %p",
333 		    kaddr);
334 		return (WALK_ERR);
335 	}
336 	return (WALK_NEXT);
337 }
338 
339 int
340 illif(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
341 {
342 	illif_cbdata_t id;
343 	ill_if_t ill_if;
344 	const char *opt_P = NULL;
345 	int printlist = MAX_G_HEADS;
346 
347 	if (mdb_getopts(argc, argv,
348 	    'P', MDB_OPT_STR, &opt_P, NULL) != argc)
349 		return (DCMD_USAGE);
350 
351 	if (opt_P != NULL) {
352 		if (strcmp("v4", opt_P) == 0) {
353 			printlist = IP_V4_G_HEAD;
354 		} else if (strcmp("v6", opt_P) == 0) {
355 			printlist = IP_V6_G_HEAD;
356 		} else {
357 			mdb_warn("invalid protocol '%s'\n", opt_P);
358 			return (DCMD_USAGE);
359 		}
360 	}
361 
362 	if (DCMD_HDRSPEC(flags) && (flags & DCMD_PIPE_OUT) == 0) {
363 		mdb_printf("%<u>%?s %2s %?s %10s %?s %-10s%</u>\n",
364 		    "ADDR", "IP", "AVLADDR", "NUMNODES", "ARENA", "NAME");
365 	}
366 
367 	id.ill_flags = flags;
368 	id.ill_addr = addr;
369 	id.ill_printlist = printlist;
370 	id.ill_printed = FALSE;
371 
372 	if (mdb_walk("illif", (mdb_walk_cb_t)illif_cb, &id) == -1) {
373 		mdb_warn("can't walk ill_if_t structures");
374 		return (DCMD_ERR);
375 	}
376 
377 	if (!(flags & DCMD_ADDRSPEC) || opt_P != NULL || id.ill_printed)
378 		return (DCMD_OK);
379 
380 	/*
381 	 * If an address is specified and the walk doesn't find it,
382 	 * print it anyway.
383 	 */
384 	if (mdb_vread(&ill_if, sizeof (ill_if_t), addr) == -1) {
385 		mdb_warn("failed to read ill_if_t at %p", addr);
386 		return (DCMD_ERR);
387 	}
388 
389 	mdb_printf("%?p %2s %?p %10d %?p %s\n",
390 	    addr, "??", addr + offsetof(ill_if_t, illif_avl_by_ppa),
391 	    ill_if.illif_avl_by_ppa.avl_numnodes,
392 	    ill_if.illif_ppa_arena, ill_if.illif_name);
393 
394 	return (DCMD_OK);
395 }
396 
397 static void
398 illif_help(void)
399 {
400 	mdb_printf("Options:\n");
401 	mdb_printf("\t-P v4 | v6"
402 	    "\tfilter interface structures for the specified protocol\n");
403 }
404 
405 int
406 ire_walk_init(mdb_walk_state_t *wsp)
407 {
408 	if (mdb_layered_walk("ire_cache", wsp) == -1) {
409 		mdb_warn("can't walk 'ire_cache'");
410 		return (WALK_ERR);
411 	}
412 
413 	return (WALK_NEXT);
414 }
415 
416 int
417 ire_walk_step(mdb_walk_state_t *wsp)
418 {
419 	ire_t ire;
420 
421 	if (mdb_vread(&ire, sizeof (ire), wsp->walk_addr) == -1) {
422 		mdb_warn("can't read ire at %p", wsp->walk_addr);
423 		return (WALK_ERR);
424 	}
425 
426 	return (wsp->walk_callback(wsp->walk_addr, &ire, wsp->walk_cbdata));
427 }
428 
429 
430 int
431 ire_ctable_walk_step(mdb_walk_state_t *wsp)
432 {
433 	uintptr_t kaddr;
434 	irb_t *irb;
435 	uint32_t cache_table_size;
436 	int i;
437 	ire_cbdata_t ire_cb;
438 
439 	ire_cb.verbose = B_FALSE;
440 	ire_cb.ire_ipversion = 0;
441 
442 
443 	kaddr = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ip_cache_table_size);
444 
445 	if (mdb_vread(&cache_table_size, sizeof (uint32_t), kaddr) == -1) {
446 		mdb_warn("can't read ips_ip_cache_table at %p", kaddr);
447 		return (WALK_ERR);
448 	}
449 
450 	kaddr = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ip_cache_table);
451 	if (mdb_vread(&kaddr, sizeof (kaddr), kaddr) == -1) {
452 		mdb_warn("can't read ips_ip_cache_table at %p", kaddr);
453 		return (WALK_ERR);
454 	}
455 
456 	irb = mdb_alloc(sizeof (irb_t) * cache_table_size, UM_SLEEP|UM_GC);
457 	if (mdb_vread(irb, sizeof (irb_t) * cache_table_size, kaddr) == -1) {
458 		mdb_warn("can't read irb at %p", kaddr);
459 		return (WALK_ERR);
460 	}
461 	for (i = 0; i < cache_table_size; i++) {
462 		kaddr = (uintptr_t)irb[i].irb_ire;
463 
464 		if (mdb_pwalk("ire_next", ire_format, &ire_cb,
465 		    kaddr) == -1) {
466 			mdb_warn("can't walk 'ire_next' for ire %p", kaddr);
467 			return (WALK_ERR);
468 		}
469 	}
470 	return (WALK_NEXT);
471 }
472 
473 /* ARGSUSED */
474 int
475 ire_next_walk_init(mdb_walk_state_t *wsp)
476 {
477 	return (WALK_NEXT);
478 }
479 
480 int
481 ire_next_walk_step(mdb_walk_state_t *wsp)
482 {
483 	ire_t ire;
484 	int status;
485 
486 
487 	if (wsp->walk_addr == NULL)
488 		return (WALK_DONE);
489 
490 	if (mdb_vread(&ire, sizeof (ire), wsp->walk_addr) == -1) {
491 		mdb_warn("can't read ire at %p", wsp->walk_addr);
492 		return (WALK_ERR);
493 	}
494 	status = wsp->walk_callback(wsp->walk_addr, &ire,
495 	    wsp->walk_cbdata);
496 
497 	if (status != WALK_NEXT)
498 		return (status);
499 
500 	wsp->walk_addr = (uintptr_t)ire.ire_next;
501 	return (status);
502 }
503 
504 static int
505 ire_format(uintptr_t addr, const void *ire_arg, void *ire_cb_arg)
506 {
507 	const ire_t *irep = ire_arg;
508 	ire_cbdata_t *ire_cb = ire_cb_arg;
509 	boolean_t verbose = ire_cb->verbose;
510 
511 	static const mdb_bitmask_t tmasks[] = {
512 		{ "BROADCAST",	IRE_BROADCAST,		IRE_BROADCAST	},
513 		{ "DEFAULT",	IRE_DEFAULT,		IRE_DEFAULT	},
514 		{ "LOCAL",	IRE_LOCAL,		IRE_LOCAL	},
515 		{ "LOOPBACK",	IRE_LOOPBACK,		IRE_LOOPBACK	},
516 		{ "PREFIX",	IRE_PREFIX,		IRE_PREFIX	},
517 		{ "CACHE",	IRE_CACHE,		IRE_CACHE	},
518 		{ "IF_NORESOLVER", IRE_IF_NORESOLVER,	IRE_IF_NORESOLVER },
519 		{ "IF_RESOLVER", IRE_IF_RESOLVER,	IRE_IF_RESOLVER	},
520 		{ "HOST",	IRE_HOST,		IRE_HOST	},
521 		{ "HOST_REDIRECT", IRE_HOST_REDIRECT,	IRE_HOST_REDIRECT },
522 		{ NULL,		0,			0		}
523 	};
524 
525 	static const mdb_bitmask_t mmasks[] = {
526 		{ "CONDEMNED",	IRE_MARK_CONDEMNED,	IRE_MARK_CONDEMNED },
527 		{ "NORECV",	IRE_MARK_NORECV,	IRE_MARK_NORECV	},
528 		{ "HIDDEN",	IRE_MARK_HIDDEN,	IRE_MARK_HIDDEN	},
529 		{ "NOADD",	IRE_MARK_NOADD,		IRE_MARK_NOADD	},
530 		{ "TEMPORARY",	IRE_MARK_TEMPORARY,	IRE_MARK_TEMPORARY },
531 		{ "USESRC",	IRE_MARK_USESRC_CHECK,	IRE_MARK_USESRC_CHECK },
532 		{ "PRIVATE",	IRE_MARK_PRIVATE_ADDR,	IRE_MARK_PRIVATE_ADDR },
533 		{ "UNCACHED",	IRE_MARK_UNCACHED,	IRE_MARK_UNCACHED },
534 		{ NULL,		0,			0		}
535 	};
536 
537 	static const mdb_bitmask_t fmasks[] = {
538 		{ "UP",		RTF_UP,			RTF_UP		},
539 		{ "GATEWAY",	RTF_GATEWAY,		RTF_GATEWAY	},
540 		{ "HOST",	RTF_HOST,		RTF_HOST	},
541 		{ "REJECT",	RTF_REJECT,		RTF_REJECT	},
542 		{ "DYNAMIC",	RTF_DYNAMIC,		RTF_DYNAMIC	},
543 		{ "MODIFIED",	RTF_MODIFIED,		RTF_MODIFIED	},
544 		{ "DONE",	RTF_DONE,		RTF_DONE	},
545 		{ "MASK",	RTF_MASK,		RTF_MASK	},
546 		{ "CLONING",	RTF_CLONING,		RTF_CLONING	},
547 		{ "XRESOLVE",	RTF_XRESOLVE,		RTF_XRESOLVE	},
548 		{ "LLINFO",	RTF_LLINFO,		RTF_LLINFO	},
549 		{ "STATIC",	RTF_STATIC,		RTF_STATIC	},
550 		{ "BLACKHOLE",	RTF_BLACKHOLE,		RTF_BLACKHOLE	},
551 		{ "PRIVATE",	RTF_PRIVATE,		RTF_PRIVATE	},
552 		{ "PROTO2",	RTF_PROTO2,		RTF_PROTO2	},
553 		{ "PROTO1",	RTF_PROTO1,		RTF_PROTO1	},
554 		{ "MULTIRT",	RTF_MULTIRT,		RTF_MULTIRT	},
555 		{ "SETSRC",	RTF_SETSRC,		RTF_SETSRC	},
556 		{ NULL,		0,			0		}
557 	};
558 
559 	if (ire_cb->ire_ipversion != 0 &&
560 	    irep->ire_ipversion != ire_cb->ire_ipversion)
561 		return (WALK_NEXT);
562 
563 	if (irep->ire_ipversion == IPV6_VERSION && verbose) {
564 
565 		mdb_printf("%<b>%?p%</b> %40N <%hb>\n"
566 		    "%?s %40N <%hb>\n"
567 		    "%?s %40d %4d <%hb>\n",
568 		    addr, &irep->ire_src_addr_v6, irep->ire_type, tmasks,
569 		    "", &irep->ire_addr_v6, (ushort_t)irep->ire_marks, mmasks,
570 		    "", ips_to_stackid((uintptr_t)irep->ire_ipst),
571 		    irep->ire_zoneid,
572 		    irep->ire_flags, fmasks);
573 
574 	} else if (irep->ire_ipversion == IPV6_VERSION) {
575 
576 		mdb_printf("%?p %30N %30N %5d %4d\n",
577 		    addr, &irep->ire_src_addr_v6,
578 		    &irep->ire_addr_v6,
579 		    ips_to_stackid((uintptr_t)irep->ire_ipst),
580 		    irep->ire_zoneid);
581 
582 	} else if (verbose) {
583 
584 		mdb_printf("%<b>%?p%</b> %40I <%hb>\n"
585 		    "%?s %40I <%hb>\n"
586 		    "%?s %40d %4d <%hb>\n",
587 		    addr, irep->ire_src_addr, irep->ire_type, tmasks,
588 		    "", irep->ire_addr, (ushort_t)irep->ire_marks, mmasks,
589 		    "", ips_to_stackid((uintptr_t)irep->ire_ipst),
590 		    irep->ire_zoneid, irep->ire_flags, fmasks);
591 
592 	} else {
593 
594 		mdb_printf("%?p %30I %30I %5d %4d\n", addr, irep->ire_src_addr,
595 		    irep->ire_addr, ips_to_stackid((uintptr_t)irep->ire_ipst),
596 		    irep->ire_zoneid);
597 	}
598 
599 	return (WALK_NEXT);
600 }
601 
602 /*
603  * There are faster ways to do this.  Given the interactive nature of this
604  * use I don't think its worth much effort.
605  */
606 static unsigned short
607 ipcksum(void *p, int len)
608 {
609 	int32_t	sum = 0;
610 
611 	while (len > 1) {
612 		/* alignment */
613 		sum += *(uint16_t *)p;
614 		p = (char *)p + sizeof (uint16_t);
615 		if (sum & 0x80000000)
616 			sum = (sum & 0xFFFF) + (sum >> 16);
617 		len -= 2;
618 	}
619 
620 	if (len)
621 		sum += (uint16_t)*(unsigned char *)p;
622 
623 	while (sum >> 16)
624 		sum = (sum & 0xFFFF) + (sum >> 16);
625 
626 	return (~sum);
627 }
628 
629 static const mdb_bitmask_t tcp_flags[] = {
630 	{ "SYN",	TH_SYN,		TH_SYN	},
631 	{ "ACK",	TH_ACK,		TH_ACK	},
632 	{ "FIN",	TH_FIN,		TH_FIN	},
633 	{ "RST",	TH_RST,		TH_RST	},
634 	{ "PSH",	TH_PUSH,	TH_PUSH	},
635 	{ "ECE",	TH_ECE,		TH_ECE	},
636 	{ "CWR",	TH_CWR,		TH_CWR	},
637 	{ NULL,		0,		0	}
638 };
639 
640 static void
641 tcphdr_print(struct tcphdr *tcph)
642 {
643 	in_port_t	sport, dport;
644 	tcp_seq		seq, ack;
645 	uint16_t	win, urp;
646 
647 	mdb_printf("%<b>TCP header%</b>\n");
648 
649 	mdb_nhconvert(&sport, &tcph->th_sport, sizeof (sport));
650 	mdb_nhconvert(&dport, &tcph->th_dport, sizeof (dport));
651 	mdb_nhconvert(&seq, &tcph->th_seq, sizeof (seq));
652 	mdb_nhconvert(&ack, &tcph->th_ack, sizeof (ack));
653 	mdb_nhconvert(&win, &tcph->th_win, sizeof (win));
654 	mdb_nhconvert(&urp, &tcph->th_urp, sizeof (urp));
655 
656 	mdb_printf("%<u>%6s %6s %10s %10s %4s %5s %5s %5s %-15s%</u>\n",
657 	    "SPORT", "DPORT", "SEQ", "ACK", "HLEN", "WIN", "CSUM", "URP",
658 	    "FLAGS");
659 	mdb_printf("%6hu %6hu %10u %10u %4d %5hu %5hu %5hu <%b>\n",
660 	    sport, dport, seq, ack, tcph->th_off << 2, win,
661 	    tcph->th_sum, urp, tcph->th_flags, tcp_flags);
662 	mdb_printf("0x%04x 0x%04x 0x%08x 0x%08x\n\n",
663 	    sport, dport, seq, ack);
664 }
665 
666 /* ARGSUSED */
667 static int
668 tcphdr(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *av)
669 {
670 	struct tcphdr	tcph;
671 
672 	if (!(flags & DCMD_ADDRSPEC))
673 		return (DCMD_USAGE);
674 
675 	if (mdb_vread(&tcph, sizeof (tcph), addr) == -1) {
676 		mdb_warn("failed to read TCP header at %p", addr);
677 		return (DCMD_ERR);
678 	}
679 	tcphdr_print(&tcph);
680 	return (DCMD_OK);
681 }
682 
683 static void
684 udphdr_print(struct udphdr *udph)
685 {
686 	in_port_t	sport, dport;
687 	uint16_t	hlen;
688 
689 	mdb_printf("%<b>UDP header%</b>\n");
690 
691 	mdb_nhconvert(&sport, &udph->uh_sport, sizeof (sport));
692 	mdb_nhconvert(&dport, &udph->uh_dport, sizeof (dport));
693 	mdb_nhconvert(&hlen, &udph->uh_ulen, sizeof (hlen));
694 
695 	mdb_printf("%<u>%14s %14s %5s %6s%</u>\n",
696 	    "SPORT", "DPORT", "LEN", "CSUM");
697 	mdb_printf("%5hu (0x%04x) %5hu (0x%04x) %5hu 0x%04hx\n\n", sport, sport,
698 	    dport, dport, hlen, udph->uh_sum);
699 }
700 
701 /* ARGSUSED */
702 static int
703 udphdr(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *av)
704 {
705 	struct udphdr	udph;
706 
707 	if (!(flags & DCMD_ADDRSPEC))
708 		return (DCMD_USAGE);
709 
710 	if (mdb_vread(&udph, sizeof (udph), addr) == -1) {
711 		mdb_warn("failed to read UDP header at %p", addr);
712 		return (DCMD_ERR);
713 	}
714 	udphdr_print(&udph);
715 	return (DCMD_OK);
716 }
717 
718 static void
719 sctphdr_print(sctp_hdr_t *sctph)
720 {
721 	in_port_t sport, dport;
722 
723 	mdb_printf("%<b>SCTP header%</b>\n");
724 	mdb_nhconvert(&sport, &sctph->sh_sport, sizeof (sport));
725 	mdb_nhconvert(&dport, &sctph->sh_dport, sizeof (dport));
726 
727 	mdb_printf("%<u>%14s %14s %10s %10s%</u>\n",
728 	    "SPORT", "DPORT", "VTAG", "CHKSUM");
729 	mdb_printf("%5hu (0x%04x) %5hu (0x%04x) %10u 0x%08x\n\n", sport, sport,
730 	    dport, dport, sctph->sh_verf, sctph->sh_chksum);
731 }
732 
733 /* ARGSUSED */
734 static int
735 sctphdr(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *av)
736 {
737 	sctp_hdr_t sctph;
738 
739 	if (!(flags & DCMD_ADDRSPEC))
740 		return (DCMD_USAGE);
741 
742 	if (mdb_vread(&sctph, sizeof (sctph), addr) == -1) {
743 		mdb_warn("failed to read SCTP header at %p", addr);
744 		return (DCMD_ERR);
745 	}
746 
747 	sctphdr_print(&sctph);
748 	return (DCMD_OK);
749 }
750 
751 static int
752 transport_hdr(int proto, uintptr_t addr)
753 {
754 	mdb_printf("\n");
755 	switch (proto) {
756 	case IPPROTO_TCP: {
757 		struct tcphdr tcph;
758 
759 		if (mdb_vread(&tcph, sizeof (tcph), addr) == -1) {
760 			mdb_warn("failed to read TCP header at %p", addr);
761 			return (DCMD_ERR);
762 		}
763 		tcphdr_print(&tcph);
764 		break;
765 	}
766 	case IPPROTO_UDP:  {
767 		struct udphdr udph;
768 
769 		if (mdb_vread(&udph, sizeof (udph), addr) == -1) {
770 			mdb_warn("failed to read UDP header at %p", addr);
771 			return (DCMD_ERR);
772 		}
773 		udphdr_print(&udph);
774 		break;
775 	}
776 	case IPPROTO_SCTP: {
777 		sctp_hdr_t sctph;
778 
779 		if (mdb_vread(&sctph, sizeof (sctph), addr) == -1) {
780 			mdb_warn("failed to read SCTP header at %p", addr);
781 			return (DCMD_ERR);
782 		}
783 		sctphdr_print(&sctph);
784 		break;
785 	}
786 	default:
787 		break;
788 	}
789 
790 	return (DCMD_OK);
791 }
792 
793 static const mdb_bitmask_t ip_flags[] = {
794 	{ "DF",	IPH_DF, IPH_DF	},
795 	{ "MF", IPH_MF,	IPH_MF	},
796 	{ NULL, 0,	0	}
797 };
798 
799 /* ARGSUSED */
800 static int
801 iphdr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
802 {
803 	uint_t		verbose = FALSE, force = FALSE;
804 	ipha_t		iph[1];
805 	uint16_t	ver, totlen, hdrlen, ipid, off, csum;
806 	uintptr_t	nxt_proto;
807 	char		exp_csum[8];
808 
809 	if (mdb_getopts(argc, argv,
810 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
811 	    'f', MDB_OPT_SETBITS, TRUE, &force, NULL) != argc)
812 		return (DCMD_USAGE);
813 
814 	if (mdb_vread(iph, sizeof (*iph), addr) == -1) {
815 		mdb_warn("failed to read IPv4 header at %p", addr);
816 		return (DCMD_ERR);
817 	}
818 
819 	ver = (iph->ipha_version_and_hdr_length & 0xf0) >> 4;
820 	if (ver != IPV4_VERSION) {
821 		if (ver == IPV6_VERSION) {
822 			return (ip6hdr(addr, flags, argc, argv));
823 		} else if (!force) {
824 			mdb_warn("unknown IP version: %d\n", ver);
825 			return (DCMD_ERR);
826 		}
827 	}
828 
829 	mdb_printf("%<b>IPv4 header%</b>\n");
830 	mdb_printf("%-34s %-34s\n"
831 	    "%<u>%-4s %-4s %-5s %-5s %-6s %-5s %-5s %-6s %-8s %-6s%</u>\n",
832 	    "SRC", "DST",
833 	    "HLEN", "TOS", "LEN", "ID", "OFFSET", "TTL", "PROTO", "CHKSUM",
834 	    "EXP-CSUM", "FLGS");
835 
836 	hdrlen = (iph->ipha_version_and_hdr_length & 0x0f) << 2;
837 	mdb_nhconvert(&totlen, &iph->ipha_length, sizeof (totlen));
838 	mdb_nhconvert(&ipid, &iph->ipha_ident, sizeof (ipid));
839 	mdb_nhconvert(&off, &iph->ipha_fragment_offset_and_flags, sizeof (off));
840 	if (hdrlen == IP_SIMPLE_HDR_LENGTH) {
841 		if ((csum = ipcksum(iph, sizeof (*iph))) != 0)
842 			csum = ~(~csum + ~iph->ipha_hdr_checksum);
843 		else
844 			csum = iph->ipha_hdr_checksum;
845 		mdb_snprintf(exp_csum, 8, "%u", csum);
846 	} else {
847 		mdb_snprintf(exp_csum, 8, "<n/a>");
848 	}
849 
850 	mdb_printf("%-34I %-34I%\n"
851 	    "%-4d %-4d %-5hu %-5hu %-6hu %-5hu %-5hu %-6u %-8s <%5hb>\n",
852 	    iph->ipha_src, iph->ipha_dst,
853 	    hdrlen, iph->ipha_type_of_service, totlen, ipid,
854 	    (off << 3) & 0xffff, iph->ipha_ttl, iph->ipha_protocol,
855 	    iph->ipha_hdr_checksum, exp_csum, off, ip_flags);
856 
857 	if (verbose) {
858 		nxt_proto = addr + hdrlen;
859 		return (transport_hdr(iph->ipha_protocol, nxt_proto));
860 	} else {
861 		return (DCMD_OK);
862 	}
863 }
864 
865 /* ARGSUSED */
866 static int
867 ip6hdr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
868 {
869 	uint_t		verbose = FALSE, force = FALSE;
870 	ip6_t		iph[1];
871 	int		ver, class, flow;
872 	uint16_t	plen;
873 	uintptr_t	nxt_proto;
874 
875 	if (mdb_getopts(argc, argv,
876 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
877 	    'f', MDB_OPT_SETBITS, TRUE, &force, NULL) != argc)
878 		return (DCMD_USAGE);
879 
880 	if (mdb_vread(iph, sizeof (*iph), addr) == -1) {
881 		mdb_warn("failed to read IPv6 header at %p", addr);
882 		return (DCMD_ERR);
883 	}
884 
885 	ver = (iph->ip6_vfc & 0xf0) >> 4;
886 	if (ver != IPV6_VERSION) {
887 		if (ver == IPV4_VERSION) {
888 			return (iphdr(addr, flags, argc, argv));
889 		} else if (!force) {
890 			mdb_warn("unknown IP version: %d\n", ver);
891 			return (DCMD_ERR);
892 		}
893 	}
894 
895 	mdb_printf("%<b>IPv6 header%</b>\n");
896 	mdb_printf("%<u>%-26s %-26s %4s %7s %5s %3s %3s%</u>\n",
897 	    "SRC", "DST", "TCLS", "FLOW-ID", "PLEN", "NXT", "HOP");
898 
899 	class = (iph->ip6_vcf & IPV6_FLOWINFO_TCLASS) >> 20;
900 	mdb_nhconvert(&class, &class, sizeof (class));
901 	flow = iph->ip6_vcf & IPV6_FLOWINFO_FLOWLABEL;
902 	mdb_nhconvert(&flow, &flow, sizeof (flow));
903 	mdb_nhconvert(&plen, &iph->ip6_plen, sizeof (plen));
904 
905 	mdb_printf("%-26N %-26N %4d %7d %5hu %3d %3d\n",
906 	    &iph->ip6_src, &iph->ip6_dst,
907 	    class, flow, plen, iph->ip6_nxt, iph->ip6_hlim);
908 
909 	if (verbose) {
910 		nxt_proto = addr + sizeof (ip6_t);
911 		return (transport_hdr(iph->ip6_nxt, nxt_proto));
912 	} else {
913 		return (DCMD_OK);
914 	}
915 }
916 
917 int
918 ire(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
919 {
920 	uint_t verbose = FALSE;
921 	ire_t ire;
922 	ire_cbdata_t ire_cb;
923 	int ipversion = 0;
924 	const char *opt_P = NULL;
925 
926 	if (mdb_getopts(argc, argv,
927 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
928 	    'P', MDB_OPT_STR, &opt_P, NULL) != argc)
929 		return (DCMD_USAGE);
930 
931 	if (opt_P != NULL) {
932 		if (strcmp("v4", opt_P) == 0) {
933 			ipversion = IPV4_VERSION;
934 		} else if (strcmp("v6", opt_P) == 0) {
935 			ipversion = IPV6_VERSION;
936 		} else {
937 			mdb_warn("invalid protocol '%s'\n", opt_P);
938 			return (DCMD_USAGE);
939 		}
940 	}
941 
942 	if ((flags & DCMD_LOOPFIRST) || !(flags & DCMD_LOOP)) {
943 
944 		if (verbose) {
945 			mdb_printf("%?s %40s %-20s%\n"
946 			    "%?s %40s %-20s%\n"
947 			    "%<u>%?s %40s %4s %-20s%</u>\n",
948 			    "ADDR", "SRC", "TYPE",
949 			    "", "DST", "MARKS",
950 			    "", "STACK", "ZONE", "FLAGS");
951 		} else {
952 			mdb_printf("%<u>%?s %30s %30s %5s %4s%</u>\n",
953 			    "ADDR", "SRC", "DST", "STACK", "ZONE");
954 		}
955 	}
956 
957 	ire_cb.verbose = (verbose == TRUE);
958 	ire_cb.ire_ipversion = ipversion;
959 
960 	if (flags & DCMD_ADDRSPEC) {
961 		(void) mdb_vread(&ire, sizeof (ire_t), addr);
962 		(void) ire_format(addr, &ire, &ire_cb);
963 	} else if (mdb_walk("ire", (mdb_walk_cb_t)ire_format, &ire_cb) == -1) {
964 		mdb_warn("failed to walk ire table");
965 		return (DCMD_ERR);
966 	}
967 
968 	return (DCMD_OK);
969 }
970 
971 static size_t
972 mi_osize(const queue_t *q)
973 {
974 	/*
975 	 * The code in common/inet/mi.c allocates an extra word to store the
976 	 * size of the allocation.  An mi_o_s is thus a size_t plus an mi_o_s.
977 	 */
978 	struct mi_block {
979 		size_t mi_nbytes;
980 		struct mi_o_s mi_o;
981 	} m;
982 
983 	if (mdb_vread(&m, sizeof (m), (uintptr_t)q->q_ptr -
984 	    sizeof (m)) == sizeof (m))
985 		return (m.mi_nbytes - sizeof (m));
986 
987 	return (0);
988 }
989 
990 static void
991 ip_ill_qinfo(const queue_t *q, char *buf, size_t nbytes)
992 {
993 	char name[32];
994 	ill_t ill;
995 
996 	if (mdb_vread(&ill, sizeof (ill),
997 	    (uintptr_t)q->q_ptr) == sizeof (ill) &&
998 	    mdb_readstr(name, sizeof (name), (uintptr_t)ill.ill_name) > 0)
999 		(void) mdb_snprintf(buf, nbytes, "if: %s", name);
1000 }
1001 
1002 void
1003 ip_qinfo(const queue_t *q, char *buf, size_t nbytes)
1004 {
1005 	size_t size = mi_osize(q);
1006 
1007 	if (size == sizeof (ill_t))
1008 		ip_ill_qinfo(q, buf, nbytes);
1009 }
1010 
1011 uintptr_t
1012 ip_rnext(const queue_t *q)
1013 {
1014 	size_t size = mi_osize(q);
1015 	ill_t ill;
1016 
1017 	if (size == sizeof (ill_t) && mdb_vread(&ill, sizeof (ill),
1018 	    (uintptr_t)q->q_ptr) == sizeof (ill))
1019 		return ((uintptr_t)ill.ill_rq);
1020 
1021 	return (NULL);
1022 }
1023 
1024 uintptr_t
1025 ip_wnext(const queue_t *q)
1026 {
1027 	size_t size = mi_osize(q);
1028 	ill_t ill;
1029 
1030 	if (size == sizeof (ill_t) && mdb_vread(&ill, sizeof (ill),
1031 	    (uintptr_t)q->q_ptr) == sizeof (ill))
1032 		return ((uintptr_t)ill.ill_wq);
1033 
1034 	return (NULL);
1035 }
1036 
1037 /*
1038  * Print the core fields in an squeue_t.  With the "-v" argument,
1039  * provide more verbose output.
1040  */
1041 static int
1042 squeue(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1043 {
1044 	unsigned int	i;
1045 	unsigned int	verbose = FALSE;
1046 	const int	SQUEUE_STATEDELT = (int)(sizeof (uintptr_t) + 9);
1047 	boolean_t	arm;
1048 	squeue_t	squeue;
1049 
1050 	if (!(flags & DCMD_ADDRSPEC)) {
1051 		if (mdb_walk_dcmd("genunix`squeue_cache", "ip`squeue",
1052 		    argc, argv) == -1) {
1053 			mdb_warn("failed to walk squeue cache");
1054 			return (DCMD_ERR);
1055 		}
1056 		return (DCMD_OK);
1057 	}
1058 
1059 	if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, TRUE, &verbose, NULL)
1060 	    != argc)
1061 		return (DCMD_USAGE);
1062 
1063 	if (!DCMD_HDRSPEC(flags) && verbose)
1064 		mdb_printf("\n\n");
1065 
1066 	if (DCMD_HDRSPEC(flags) || verbose) {
1067 		mdb_printf("%?s %-5s %-3s %?s %?s %?s\n",
1068 		    "ADDR", "STATE", "CPU",
1069 		    "FIRST", "LAST", "WORKER");
1070 	}
1071 
1072 	if (mdb_vread(&squeue, sizeof (squeue_t), addr) == -1) {
1073 		mdb_warn("cannot read squeue_t at %p", addr);
1074 		return (DCMD_ERR);
1075 	}
1076 
1077 	mdb_printf("%0?p %05x %3d %0?p %0?p %0?p\n",
1078 	    addr, squeue.sq_state, squeue.sq_bind,
1079 	    squeue.sq_first, squeue.sq_last, squeue.sq_worker);
1080 
1081 	if (!verbose)
1082 		return (DCMD_OK);
1083 
1084 	arm = B_TRUE;
1085 	for (i = 0; squeue_states[i].bit_name != NULL; i++) {
1086 		if (((squeue.sq_state) & (1 << i)) == 0)
1087 			continue;
1088 
1089 		if (arm) {
1090 			mdb_printf("%*s|\n", SQUEUE_STATEDELT, "");
1091 			mdb_printf("%*s+-->  ", SQUEUE_STATEDELT, "");
1092 			arm = B_FALSE;
1093 		} else
1094 			mdb_printf("%*s      ", SQUEUE_STATEDELT, "");
1095 
1096 		mdb_printf("%-12s %s\n", squeue_states[i].bit_name,
1097 		    squeue_states[i].bit_descr);
1098 	}
1099 
1100 	return (DCMD_OK);
1101 }
1102 
1103 static void
1104 ip_squeue_help(void)
1105 {
1106 	mdb_printf("Print the core information for a given NCA squeue_t.\n\n");
1107 	mdb_printf("Options:\n");
1108 	mdb_printf("\t-v\tbe verbose (more descriptive)\n");
1109 }
1110 
1111 /*
1112  * This is called by ::th_trace (via a callback) when walking the th_hash
1113  * list.  It calls modent to find the entries.
1114  */
1115 /* ARGSUSED */
1116 static int
1117 modent_summary(uintptr_t addr, const void *data, void *private)
1118 {
1119 	th_walk_data_t *thw = private;
1120 	const struct mod_hash_entry *mhe = data;
1121 	th_trace_t th;
1122 
1123 	if (mdb_vread(&th, sizeof (th), (uintptr_t)mhe->mhe_val) == -1) {
1124 		mdb_warn("failed to read th_trace_t %p", mhe->mhe_val);
1125 		return (WALK_ERR);
1126 	}
1127 
1128 	if (th.th_refcnt == 0 && thw->thw_non_zero_only)
1129 		return (WALK_NEXT);
1130 
1131 	if (!thw->thw_match) {
1132 		mdb_printf("%?p %?p %?p %8d %?p\n", thw->thw_ipst, mhe->mhe_key,
1133 		    mhe->mhe_val, th.th_refcnt, th.th_id);
1134 	} else if (thw->thw_matchkey == (uintptr_t)mhe->mhe_key) {
1135 		int i, j, k;
1136 		tr_buf_t *tr;
1137 
1138 		mdb_printf("Object %p in IP stack %p:\n", mhe->mhe_key,
1139 		    thw->thw_ipst);
1140 		i = th.th_trace_lastref;
1141 		mdb_printf("\tThread %p refcnt %d:\n", th.th_id,
1142 		    th.th_refcnt);
1143 		for (j = TR_BUF_MAX; j > 0; j--) {
1144 			tr = th.th_trbuf + i;
1145 			if (tr->tr_depth == 0 || tr->tr_depth > TR_STACK_DEPTH)
1146 				break;
1147 			mdb_printf("\t  T%+ld:\n", tr->tr_time -
1148 			    thw->thw_lbolt);
1149 			for (k = 0; k < tr->tr_depth; k++)
1150 				mdb_printf("\t\t%a\n", tr->tr_stack[k]);
1151 			if (--i < 0)
1152 				i = TR_BUF_MAX - 1;
1153 		}
1154 	}
1155 	return (WALK_NEXT);
1156 }
1157 
1158 /*
1159  * This is called by ::th_trace (via a callback) when walking the th_hash
1160  * list.  It calls modent to find the entries.
1161  */
1162 /* ARGSUSED */
1163 static int
1164 th_hash_summary(uintptr_t addr, const void *data, void *private)
1165 {
1166 	const th_hash_t *thh = data;
1167 	th_walk_data_t *thw = private;
1168 
1169 	thw->thw_ipst = (uintptr_t)thh->thh_ipst;
1170 	return (mdb_pwalk("modent", modent_summary, private,
1171 	    (uintptr_t)thh->thh_hash));
1172 }
1173 
1174 /*
1175  * Print or summarize the th_trace_t structures.
1176  */
1177 static int
1178 th_trace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1179 {
1180 	th_walk_data_t thw;
1181 
1182 	(void) memset(&thw, 0, sizeof (thw));
1183 
1184 	if (mdb_getopts(argc, argv,
1185 	    'n', MDB_OPT_SETBITS, TRUE, &thw.thw_non_zero_only,
1186 	    NULL) != argc)
1187 		return (DCMD_USAGE);
1188 
1189 	if (!(flags & DCMD_ADDRSPEC)) {
1190 		/*
1191 		 * No address specified.  Walk all of the th_hash_t in the
1192 		 * system, and summarize the th_trace_t entries in each.
1193 		 */
1194 		mdb_printf("%?s %?s %?s %8s %?s\n",
1195 		    "IPSTACK", "OBJECT", "TRACE", "REFCNT", "THREAD");
1196 		thw.thw_match = B_FALSE;
1197 	} else {
1198 		thw.thw_match = B_TRUE;
1199 		thw.thw_matchkey = addr;
1200 		if (mdb_readvar(&thw.thw_lbolt,
1201 		    mdb_prop_postmortem ? "panic_lbolt" : "lbolt") == -1) {
1202 			mdb_warn("failed to read lbolt");
1203 			return (DCMD_ERR);
1204 		}
1205 	}
1206 	if (mdb_pwalk("th_hash", th_hash_summary, &thw, NULL) == -1) {
1207 		mdb_warn("can't walk th_hash entries");
1208 		return (DCMD_ERR);
1209 	}
1210 	return (DCMD_OK);
1211 }
1212 
1213 static void
1214 th_trace_help(void)
1215 {
1216 	mdb_printf("If given an address of an ill_t, ipif_t, ire_t, or nce_t, "
1217 	    "print the\n"
1218 	    "corresponding th_trace_t structure in detail.  Otherwise, if no "
1219 	    "address is\n"
1220 	    "given, then summarize all th_trace_t structures.\n\n");
1221 	mdb_printf("Options:\n"
1222 	    "\t-n\tdisplay only entries with non-zero th_refcnt\n");
1223 }
1224 
1225 static const mdb_dcmd_t dcmds[] = {
1226 	{ "illif", "?[-P v4 | v6]",
1227 	    "display or filter IP Lower Level InterFace structures", illif,
1228 	    illif_help },
1229 	{ "iphdr", ":[-vf]", "display an IPv4 header", iphdr },
1230 	{ "ip6hdr", ":[-vf]", "display an IPv6 header", ip6hdr },
1231 	{ "ire", "?[-v] [-P v4|v6]",
1232 	    "display Internet Route Entry structures", ire },
1233 	{ "nce", "?[-P v4 | v6]", "display Neighbor Cache Entry structures",
1234 	    nce },
1235 	{ "squeue", ":[-v]", "print core squeue_t info", squeue,
1236 	    ip_squeue_help },
1237 	{ "tcphdr", ":", "display a TCP header", tcphdr },
1238 	{ "udphdr", ":", "display an UDP header", udphdr },
1239 	{ "sctphdr", ":", "display an SCTP header", sctphdr },
1240 	{ "th_trace", "?[-n]", "display th_trace_t structures", th_trace,
1241 	    th_trace_help },
1242 	{ NULL }
1243 };
1244 
1245 static const mdb_walker_t walkers[] = {
1246 	{ "illif", "walk list of ill interface types for all stacks",
1247 		ip_stacks_common_walk_init, illif_walk_step, NULL },
1248 	{ "illif_stack", "walk list of ill interface types",
1249 		illif_stack_walk_init, illif_stack_walk_step,
1250 		illif_stack_walk_fini },
1251 	{ "ire", "walk active ire_t structures",
1252 		ire_walk_init, ire_walk_step, NULL },
1253 	{ "ire_ctable", "walk ire_t structures in the ctable",
1254 		ip_stacks_common_walk_init, ire_ctable_walk_step, NULL },
1255 	{ "ire_next", "walk ire_t structures in the ctable",
1256 		ire_next_walk_init, ire_next_walk_step, NULL },
1257 	{ "ip_stacks", "walk all the ip_stack_t",
1258 		ip_stacks_walk_init, ip_stacks_walk_step, NULL },
1259 	{ "th_hash", "walk all the th_hash_t entries",
1260 		th_hash_walk_init, th_hash_walk_step, NULL },
1261 	{ "nce", "walk list of nce structures for all stacks",
1262 		ip_stacks_common_walk_init, nce_walk_step, NULL },
1263 	{ "nce_stack", "walk list of nce structures",
1264 		nce_stack_walk_init, nce_stack_walk_step,
1265 		nce_stack_walk_fini},
1266 	{ NULL }
1267 };
1268 
1269 static const mdb_qops_t ip_qops = { ip_qinfo, ip_rnext, ip_wnext };
1270 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1271 
1272 const mdb_modinfo_t *
1273 _mdb_init(void)
1274 {
1275 	GElf_Sym sym;
1276 
1277 	if (mdb_lookup_by_obj("ip", "ipwinit", &sym) == 0)
1278 		mdb_qops_install(&ip_qops, (uintptr_t)sym.st_value);
1279 
1280 	return (&modinfo);
1281 }
1282 
1283 void
1284 _mdb_fini(void)
1285 {
1286 	GElf_Sym sym;
1287 
1288 	if (mdb_lookup_by_obj("ip", "ipwinit", &sym) == 0)
1289 		mdb_qops_remove(&ip_qops, (uintptr_t)sym.st_value);
1290 }
1291 
1292 static char *
1293 nce_state(int nce_state)
1294 {
1295 	switch (nce_state) {
1296 	case ND_UNCHANGED:
1297 		return ("unchanged");
1298 	case ND_INCOMPLETE:
1299 		return ("incomplete");
1300 	case ND_REACHABLE:
1301 		return ("reachable");
1302 	case ND_STALE:
1303 		return ("stale");
1304 	case ND_DELAY:
1305 		return ("delay");
1306 	case ND_PROBE:
1307 		return ("probe");
1308 	case ND_UNREACHABLE:
1309 		return ("unreach");
1310 	case ND_INITIAL:
1311 		return ("initial");
1312 	default:
1313 		return ("??");
1314 	}
1315 }
1316 
1317 static char *
1318 nce_l2_addr(const nce_t *nce, const ill_t *ill)
1319 {
1320 	uchar_t *h;
1321 	static char addr_buf[L2MAXADDRSTRLEN];
1322 	mblk_t mp;
1323 	size_t mblen;
1324 
1325 	if (ill->ill_flags & ILLF_XRESOLV) {
1326 		return ("XRESOLV");
1327 	}
1328 
1329 	if (nce->nce_res_mp == NULL) {
1330 		return ("None");
1331 	}
1332 
1333 	if (ill->ill_net_type == IRE_IF_RESOLVER) {
1334 
1335 		if (mdb_vread(&mp, sizeof (mblk_t),
1336 		    (uintptr_t)nce->nce_res_mp) == -1) {
1337 			mdb_warn("failed to read nce_res_mp at %p",
1338 			    nce->nce_res_mp);
1339 		}
1340 
1341 		if (ill->ill_nd_lla_len == 0)
1342 			return ("None");
1343 		mblen = mp.b_wptr - mp.b_rptr;
1344 		if (mblen > (sizeof (dl_unitdata_req_t) + MAX_SAP_LEN) ||
1345 		    ill->ill_nd_lla_len > MAX_SAP_LEN ||
1346 		    NCE_LL_ADDR_OFFSET(ill) + ill->ill_nd_lla_len > mblen) {
1347 			return ("Truncated");
1348 		}
1349 		h = mdb_zalloc(mblen, UM_SLEEP);
1350 		if (mdb_vread(h, mblen, (uintptr_t)(mp.b_rptr)) == -1) {
1351 			mdb_warn("failed to read hwaddr at %p",
1352 			    mp.b_rptr + NCE_LL_ADDR_OFFSET(ill));
1353 			return ("Unknown");
1354 		}
1355 		mdb_mac_addr(h + NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len,
1356 		    addr_buf, sizeof (addr_buf));
1357 	} else {
1358 		return ("None");
1359 	}
1360 	mdb_free(h, mblen);
1361 	return (addr_buf);
1362 }
1363 
1364 static void
1365 nce_header(uint_t flags)
1366 {
1367 	if ((flags & DCMD_LOOPFIRST) || !(flags & DCMD_LOOP)) {
1368 
1369 		mdb_printf("%<u>%?s %-20s %-10s %-8s %-5s %s%</u>\n",
1370 		    "ADDR", "HW_ADDR", "STATE", "FLAGS", "ILL", "IP ADDR");
1371 	}
1372 }
1373 
1374 int
1375 nce(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1376 {
1377 	nce_t nce;
1378 	nce_cbdata_t id;
1379 	int ipversion = 0;
1380 	const char *opt_P = NULL;
1381 
1382 	if (mdb_getopts(argc, argv,
1383 	    'P', MDB_OPT_STR, &opt_P, NULL) != argc)
1384 		return (DCMD_USAGE);
1385 
1386 	if (opt_P != NULL) {
1387 		if (strcmp("v4", opt_P) == 0) {
1388 			ipversion = IPV4_VERSION;
1389 		} else if (strcmp("v6", opt_P) == 0) {
1390 			ipversion = IPV6_VERSION;
1391 		} else {
1392 			mdb_warn("invalid protocol '%s'\n", opt_P);
1393 			return (DCMD_USAGE);
1394 		}
1395 	}
1396 
1397 	if (flags & DCMD_ADDRSPEC) {
1398 
1399 		if (mdb_vread(&nce, sizeof (nce_t), addr) == -1) {
1400 			mdb_warn("failed to read nce at %p\n", addr);
1401 			return (DCMD_ERR);
1402 		}
1403 		if (ipversion != 0 && nce.nce_ipversion != ipversion) {
1404 			mdb_printf("IP Version mismatch\n");
1405 			return (DCMD_ERR);
1406 		}
1407 		nce_header(flags);
1408 		return (nce_format(addr, &nce, ipversion));
1409 
1410 	} else {
1411 		id.nce_addr = addr;
1412 		id.nce_ipversion = ipversion;
1413 		nce_header(flags);
1414 		if (mdb_walk("nce", (mdb_walk_cb_t)nce_cb, &id) == -1) {
1415 			mdb_warn("failed to walk nce table\n");
1416 			return (DCMD_ERR);
1417 		}
1418 	}
1419 	return (DCMD_OK);
1420 }
1421 
1422 static int
1423 nce_format(uintptr_t addr, const nce_t *nce, int ipversion)
1424 {
1425 	static const mdb_bitmask_t nce_flags[] = {
1426 		{ "P",	NCE_F_PERMANENT,	NCE_F_PERMANENT },
1427 		{ "R",	NCE_F_ISROUTER,		NCE_F_ISROUTER	},
1428 		{ "N",	NCE_F_NONUD,		NCE_F_NONUD	},
1429 		{ "A",	NCE_F_ANYCAST,		NCE_F_ANYCAST	},
1430 		{ "C",	NCE_F_CONDEMNED,	NCE_F_CONDEMNED	},
1431 		{ "U",	NCE_F_UNSOL_ADV,	NCE_F_UNSOL_ADV },
1432 		{ "B",	NCE_F_BCAST,		NCE_F_BCAST	},
1433 		{ NULL,	0,			0		}
1434 	};
1435 #define	NCE_MAX_FLAGS	(sizeof (nce_flags) / sizeof (mdb_bitmask_t))
1436 	struct in_addr nceaddr;
1437 	ill_t ill;
1438 	char ill_name[LIFNAMSIZ];
1439 	char flagsbuf[NCE_MAX_FLAGS];
1440 
1441 	if (mdb_vread(&ill, sizeof (ill), (uintptr_t)nce->nce_ill) == -1) {
1442 		mdb_warn("failed to read nce_ill at %p",
1443 		    nce->nce_ill);
1444 		return (DCMD_ERR);
1445 	}
1446 
1447 	(void) mdb_readstr(ill_name, MIN(LIFNAMSIZ, ill.ill_name_length),
1448 	    (uintptr_t)ill.ill_name);
1449 
1450 	mdb_snprintf(flagsbuf, sizeof (flagsbuf), "%hb",
1451 	    nce->nce_flags, nce_flags);
1452 
1453 	if (ipversion != 0 && nce->nce_ipversion != ipversion)
1454 		return (DCMD_OK);
1455 
1456 	if (nce->nce_ipversion == IPV4_VERSION) {
1457 		IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr);
1458 		mdb_printf("%?p %-20s %-10s "
1459 		    "%-8s "
1460 		    "%-5s %I\n",
1461 		    addr, nce_l2_addr(nce, &ill),
1462 		    nce_state(nce->nce_state),
1463 		    flagsbuf,
1464 		    ill_name, nceaddr.s_addr);
1465 	} else {
1466 		mdb_printf("%?p %-20s %-10s %-8s %-5s %N\n",
1467 		    addr,  nce_l2_addr(nce, &ill),
1468 		    nce_state(nce->nce_state),
1469 		    flagsbuf,
1470 		    ill_name, &nce->nce_addr);
1471 	}
1472 
1473 	return (DCMD_OK);
1474 }
1475 
1476 static uintptr_t
1477 nce_get_next_hash_tbl(uintptr_t start, int *index, struct ndp_g_s ndp)
1478 {
1479 	uintptr_t addr = start;
1480 	int i = *index;
1481 
1482 	while (addr == NULL) {
1483 
1484 		if (++i >= NCE_TABLE_SIZE)
1485 			break;
1486 		addr = (uintptr_t)ndp.nce_hash_tbl[i];
1487 	}
1488 	*index = i;
1489 	return (addr);
1490 }
1491 
1492 static int
1493 nce_walk_step(mdb_walk_state_t *wsp)
1494 {
1495 	uintptr_t kaddr4, kaddr6;
1496 
1497 	kaddr4 = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ndp4);
1498 	kaddr6 = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ndp6);
1499 
1500 	if (mdb_vread(&kaddr4, sizeof (kaddr4), kaddr4) == -1) {
1501 		mdb_warn("can't read ips_ip_cache_table at %p", kaddr4);
1502 		return (WALK_ERR);
1503 	}
1504 	if (mdb_vread(&kaddr6, sizeof (kaddr6), kaddr6) == -1) {
1505 		mdb_warn("can't read ips_ip_cache_table at %p", kaddr6);
1506 		return (WALK_ERR);
1507 	}
1508 	if (mdb_pwalk("nce_stack", wsp->walk_callback, wsp->walk_cbdata,
1509 	    kaddr4) == -1) {
1510 		mdb_warn("couldn't walk 'nce_stack' for ips_ndp4 %p",
1511 		    kaddr4);
1512 		return (WALK_ERR);
1513 	}
1514 	if (mdb_pwalk("nce_stack", wsp->walk_callback,
1515 	    wsp->walk_cbdata, kaddr6) == -1) {
1516 		mdb_warn("couldn't walk 'nce_stack' for ips_ndp6 %p",
1517 		    kaddr6);
1518 		return (WALK_ERR);
1519 	}
1520 	return (WALK_NEXT);
1521 }
1522 
1523 /*
1524  * Called with walk_addr being the address of ips_ndp{4,6}
1525  */
1526 static int
1527 nce_stack_walk_init(mdb_walk_state_t *wsp)
1528 {
1529 	nce_walk_data_t *nw;
1530 
1531 	if (wsp->walk_addr == NULL) {
1532 		mdb_warn("nce_stack requires ndp_g_s address\n");
1533 		return (WALK_ERR);
1534 	}
1535 
1536 	nw = mdb_alloc(sizeof (nce_walk_data_t), UM_SLEEP);
1537 
1538 	if (mdb_vread(&nw->nce_ip_ndp, sizeof (struct ndp_g_s),
1539 	    wsp->walk_addr) == -1) {
1540 		mdb_warn("failed to read 'ip_ndp' at %p",
1541 		    wsp->walk_addr);
1542 		mdb_free(nw, sizeof (nce_walk_data_t));
1543 		return (WALK_ERR);
1544 	}
1545 
1546 	nw->nce_hash_tbl_index = 0;
1547 	wsp->walk_addr = nce_get_next_hash_tbl(NULL,
1548 	    &nw->nce_hash_tbl_index, nw->nce_ip_ndp);
1549 	wsp->walk_data = nw;
1550 
1551 	return (WALK_NEXT);
1552 }
1553 
1554 static int
1555 nce_stack_walk_step(mdb_walk_state_t *wsp)
1556 {
1557 	uintptr_t addr = wsp->walk_addr;
1558 	nce_walk_data_t *nw = wsp->walk_data;
1559 
1560 	if (addr == NULL)
1561 		return (WALK_DONE);
1562 
1563 	if (mdb_vread(&nw->nce, sizeof (nce_t), addr) == -1) {
1564 		mdb_warn("failed to read nce_t at %p", addr);
1565 		return (WALK_ERR);
1566 	}
1567 
1568 	wsp->walk_addr = (uintptr_t)nw->nce.nce_next;
1569 
1570 	wsp->walk_addr = nce_get_next_hash_tbl(wsp->walk_addr,
1571 	    &nw->nce_hash_tbl_index, nw->nce_ip_ndp);
1572 
1573 	return (wsp->walk_callback(addr, nw, wsp->walk_cbdata));
1574 }
1575 
1576 static void
1577 nce_stack_walk_fini(mdb_walk_state_t *wsp)
1578 {
1579 	mdb_free(wsp->walk_data, sizeof (nce_walk_data_t));
1580 }
1581 
1582 /* ARGSUSED */
1583 static int
1584 nce_cb(uintptr_t addr, const nce_walk_data_t *iw, nce_cbdata_t *id)
1585 {
1586 	nce_t nce;
1587 
1588 	if (mdb_vread(&nce, sizeof (nce_t), addr) == -1) {
1589 		mdb_warn("failed to read nce at %p", addr);
1590 		return (WALK_NEXT);
1591 	}
1592 	(void) nce_format(addr, &nce, id->nce_ipversion);
1593 	return (WALK_NEXT);
1594 }
1595