1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <assert.h>
28#include <dtrace.h>
29#include <limits.h>
30#include <link.h>
31#include <priv.h>
32#include <signal.h>
33#include <stdlib.h>
34#include <stdarg.h>
35#include <stdio.h>
36#include <string.h>
37#include <strings.h>
38#include <errno.h>
39#include <sys/wait.h>
40#include <libgen.h>
41#include <libproc.h>
42
43static char *g_pname;
44static dtrace_hdl_t *g_dtp;
45struct ps_prochandle *g_pr;
46
47#define	E_SUCCESS	0
48#define	E_ERROR		1
49#define	E_USAGE		2
50
51/*
52 * For hold times we use a global associative array since for mutexes, in
53 * user-land, it's not invalid to release a sychonization primitive that
54 * another thread acquired; rwlocks require a thread-local associative array
55 * since multiple thread can hold the same lock for reading. Note that we
56 * ignore recursive mutex acquisitions and releases as they don't truly
57 * affect lock contention.
58 */
59static const char *g_hold_init =
60"plockstat$target:::rw-acquire\n"
61"{\n"
62"	self->rwhold[arg0] = timestamp;\n"
63"}\n"
64"plockstat$target:::mutex-acquire\n"
65"/arg1 == 0/\n"
66"{\n"
67"	mtxhold[arg0] = timestamp;\n"
68"}\n";
69
70static const char *g_hold_histogram =
71"plockstat$target:::rw-release\n"
72"/self->rwhold[arg0] && arg1 == 1/\n"
73"{\n"
74"	@rw_w_hold[arg0, ustack()] =\n"
75"	    quantize(timestamp - self->rwhold[arg0]);\n"
76"	self->rwhold[arg0] = 0;\n"
77"	rw_w_hold_found = 1;\n"
78"}\n"
79"plockstat$target:::rw-release\n"
80"/self->rwhold[arg0]/\n"
81"{\n"
82"	@rw_r_hold[arg0, ustack()] =\n"
83"	    quantize(timestamp - self->rwhold[arg0]);\n"
84"	self->rwhold[arg0] = 0;\n"
85"	rw_r_hold_found = 1;\n"
86"}\n"
87"plockstat$target:::mutex-release\n"
88"/mtxhold[arg0] && arg1 == 0/\n"
89"{\n"
90"	@mtx_hold[arg0, ustack()] = quantize(timestamp - mtxhold[arg0]);\n"
91"	mtxhold[arg0] = 0;\n"
92"	mtx_hold_found = 1;\n"
93"}\n"
94"\n"
95"END\n"
96"/mtx_hold_found/\n"
97"{\n"
98"	trace(\"Mutex hold\");\n"
99"	printa(@mtx_hold);\n"
100"}\n"
101"END\n"
102"/rw_r_hold_found/\n"
103"{\n"
104"	trace(\"R/W reader hold\");\n"
105"	printa(@rw_r_hold);\n"
106"}\n"
107"END\n"
108"/rw_w_hold_found/\n"
109"{\n"
110"	trace(\"R/W writer hold\");\n"
111"	printa(@rw_w_hold);\n"
112"}\n";
113
114static const char *g_hold_times =
115"plockstat$target:::rw-release\n"
116"/self->rwhold[arg0] && arg1 == 1/\n"
117"{\n"
118"	@rw_w_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
119"	@rw_w_hold_count[arg0, ustack(5)] = count();\n"
120"	self->rwhold[arg0] = 0;\n"
121"	rw_w_hold_found = 1;\n"
122"}\n"
123"plockstat$target:::rw-release\n"
124"/self->rwhold[arg0]/\n"
125"{\n"
126"	@rw_r_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
127"	@rw_r_hold_count[arg0, ustack(5)] = count();\n"
128"	self->rwhold[arg0] = 0;\n"
129"	rw_r_hold_found = 1;\n"
130"}\n"
131"plockstat$target:::mutex-release\n"
132"/mtxhold[arg0] && arg1 == 0/\n"
133"{\n"
134"	@mtx_hold[arg0, ustack(5)] = sum(timestamp - mtxhold[arg0]);\n"
135"	@mtx_hold_count[arg0, ustack(5)] = count();\n"
136"	mtxhold[arg0] = 0;\n"
137"	mtx_hold_found = 1;\n"
138"}\n"
139"\n"
140"END\n"
141"/mtx_hold_found/\n"
142"{\n"
143"	trace(\"Mutex hold\");\n"
144"	printa(@mtx_hold, @mtx_hold_count);\n"
145"}\n"
146"END\n"
147"/rw_r_hold_found/\n"
148"{\n"
149"	trace(\"R/W reader hold\");\n"
150"	printa(@rw_r_hold, @rw_r_hold_count);\n"
151"}\n"
152"END\n"
153"/rw_w_hold_found/\n"
154"{\n"
155"	trace(\"R/W writer hold\");\n"
156"	printa(@rw_w_hold, @rw_w_hold_count);\n"
157"}\n";
158
159
160/*
161 * For contention, we use thread-local associative arrays since we're tracing
162 * a single thread's activity in libc and multiple threads can be blocking or
163 * spinning on the same sychonization primitive.
164 */
165static const char *g_ctnd_init =
166"plockstat$target:::rw-block\n"
167"{\n"
168"	self->rwblock[arg0] = timestamp;\n"
169"}\n"
170"plockstat$target:::mutex-block\n"
171"{\n"
172"	self->mtxblock[arg0] = timestamp;\n"
173"}\n"
174"plockstat$target:::mutex-spin\n"
175"{\n"
176"	self->mtxspin[arg0] = timestamp;\n"
177"}\n";
178
179static const char *g_ctnd_histogram =
180"plockstat$target:::rw-blocked\n"
181"/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
182"{\n"
183"	@rw_w_block[arg0, ustack()] =\n"
184"	    quantize(timestamp - self->rwblock[arg0]);\n"
185"	self->rwblock[arg0] = 0;\n"
186"	rw_w_block_found = 1;\n"
187"}\n"
188"plockstat$target:::rw-blocked\n"
189"/self->rwblock[arg0] && arg2 != 0/\n"
190"{\n"
191"	@rw_r_block[arg0, ustack()] =\n"
192"	    quantize(timestamp - self->rwblock[arg0]);\n"
193"	self->rwblock[arg0] = 0;\n"
194"	rw_r_block_found = 1;\n"
195"}\n"
196"plockstat$target:::rw-blocked\n"
197"/self->rwblock[arg0]/\n"
198"{\n"
199"	self->rwblock[arg0] = 0;\n"
200"}\n"
201"plockstat$target:::mutex-spun\n"
202"/self->mtxspin[arg0] && arg1 != 0/\n"
203"{\n"
204"	@mtx_spin[arg0, ustack()] =\n"
205"	    quantize(timestamp - self->mtxspin[arg0]);\n"
206"	self->mtxspin[arg0] = 0;\n"
207"	mtx_spin_found = 1;\n"
208"}\n"
209"plockstat$target:::mutex-spun\n"
210"/self->mtxspin[arg0]/\n"
211"{\n"
212"	@mtx_vain_spin[arg0, ustack()] =\n"
213"	    quantize(timestamp - self->mtxspin[arg0]);\n"
214"	self->mtxspin[arg0] = 0;\n"
215"	mtx_vain_spin_found = 1;\n"
216"}\n"
217"plockstat$target:::mutex-blocked\n"
218"/self->mtxblock[arg0] && arg1 != 0/\n"
219"{\n"
220"	@mtx_block[arg0, ustack()] =\n"
221"	    quantize(timestamp - self->mtxblock[arg0]);\n"
222"	self->mtxblock[arg0] = 0;\n"
223"	mtx_block_found = 1;\n"
224"}\n"
225"plockstat$target:::mutex-blocked\n"
226"/self->mtxblock[arg0]/\n"
227"{\n"
228"	self->mtxblock[arg0] = 0;\n"
229"}\n"
230"\n"
231"END\n"
232"/mtx_block_found/\n"
233"{\n"
234"	trace(\"Mutex block\");\n"
235"	printa(@mtx_block);\n"
236"}\n"
237"END\n"
238"/mtx_spin_found/\n"
239"{\n"
240"	trace(\"Mutex spin\");\n"
241"	printa(@mtx_spin);\n"
242"}\n"
243"END\n"
244"/mtx_vain_spin_found/\n"
245"{\n"
246"	trace(\"Mutex unsuccessful spin\");\n"
247"	printa(@mtx_vain_spin);\n"
248"}\n"
249"END\n"
250"/rw_r_block_found/\n"
251"{\n"
252"	trace(\"R/W reader block\");\n"
253"	printa(@rw_r_block);\n"
254"}\n"
255"END\n"
256"/rw_w_block_found/\n"
257"{\n"
258"	trace(\"R/W writer block\");\n"
259"	printa(@rw_w_block);\n"
260"}\n";
261
262
263static const char *g_ctnd_times =
264"plockstat$target:::rw-blocked\n"
265"/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
266"{\n"
267"	@rw_w_block[arg0, ustack(5)] =\n"
268"	    sum(timestamp - self->rwblock[arg0]);\n"
269"	@rw_w_block_count[arg0, ustack(5)] = count();\n"
270"	self->rwblock[arg0] = 0;\n"
271"	rw_w_block_found = 1;\n"
272"}\n"
273"plockstat$target:::rw-blocked\n"
274"/self->rwblock[arg0] && arg2 != 0/\n"
275"{\n"
276"	@rw_r_block[arg0, ustack(5)] =\n"
277"	    sum(timestamp - self->rwblock[arg0]);\n"
278"	@rw_r_block_count[arg0, ustack(5)] = count();\n"
279"	self->rwblock[arg0] = 0;\n"
280"	rw_r_block_found = 1;\n"
281"}\n"
282"plockstat$target:::rw-blocked\n"
283"/self->rwblock[arg0]/\n"
284"{\n"
285"	self->rwblock[arg0] = 0;\n"
286"}\n"
287"plockstat$target:::mutex-spun\n"
288"/self->mtxspin[arg0] && arg1 != 0/\n"
289"{\n"
290"	@mtx_spin[arg0, ustack(5)] =\n"
291"	    sum(timestamp - self->mtxspin[arg0]);\n"
292"	@mtx_spin_count[arg0, ustack(5)] = count();\n"
293"	self->mtxspin[arg0] = 0;\n"
294"	mtx_spin_found = 1;\n"
295"}\n"
296"plockstat$target:::mutex-spun\n"
297"/self->mtxspin[arg0]/\n"
298"{\n"
299"	@mtx_vain_spin[arg0, ustack(5)] =\n"
300"	    sum(timestamp - self->mtxspin[arg0]);\n"
301"	@mtx_vain_spin_count[arg0, ustack(5)] = count();\n"
302"	self->mtxspin[arg0] = 0;\n"
303"	mtx_vain_spin_found = 1;\n"
304"}\n"
305"plockstat$target:::mutex-blocked\n"
306"/self->mtxblock[arg0] && arg1 != 0/\n"
307"{\n"
308"	@mtx_block[arg0, ustack(5)] =\n"
309"	    sum(timestamp - self->mtxblock[arg0]);\n"
310"	@mtx_block_count[arg0, ustack(5)] = count();\n"
311"	self->mtxblock[arg0] = 0;\n"
312"	mtx_block_found = 1;\n"
313"}\n"
314"plockstat$target:::mutex-blocked\n"
315"/self->mtxblock[arg0]/\n"
316"{\n"
317"	self->mtxblock[arg0] = 0;\n"
318"}\n"
319"\n"
320"END\n"
321"/mtx_block_found/\n"
322"{\n"
323"	trace(\"Mutex block\");\n"
324"	printa(@mtx_block, @mtx_block_count);\n"
325"}\n"
326"END\n"
327"/mtx_spin_found/\n"
328"{\n"
329"	trace(\"Mutex spin\");\n"
330"	printa(@mtx_spin, @mtx_spin_count);\n"
331"}\n"
332"END\n"
333"/mtx_vain_spin_found/\n"
334"{\n"
335"	trace(\"Mutex unsuccessful spin\");\n"
336"	printa(@mtx_vain_spin, @mtx_vain_spin_count);\n"
337"}\n"
338"END\n"
339"/rw_r_block_found/\n"
340"{\n"
341"	trace(\"R/W reader block\");\n"
342"	printa(@rw_r_block, @rw_r_block_count);\n"
343"}\n"
344"END\n"
345"/rw_w_block_found/\n"
346"{\n"
347"	trace(\"R/W writer block\");\n"
348"	printa(@rw_w_block, @rw_w_block_count);\n"
349"}\n";
350
351static char g_prog[4096];
352static size_t g_proglen;
353static int g_opt_V, g_opt_s;
354static int g_intr;
355static int g_exited;
356static dtrace_optval_t g_nframes;
357static ulong_t g_nent = ULONG_MAX;
358
359#define	PLOCKSTAT_OPTSTR	"n:ps:e:vx:ACHV"
360
361static void
362usage(void)
363{
364	(void) fprintf(stderr, "Usage:\n"
365	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
366	    "\t    command [arg...]\n"
367	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
368	    "\t    -p pid\n", g_pname, g_pname);
369
370	exit(E_USAGE);
371}
372
373static void
374verror(const char *fmt, va_list ap)
375{
376	int error = errno;
377
378	(void) fprintf(stderr, "%s: ", g_pname);
379	(void) vfprintf(stderr, fmt, ap);
380
381	if (fmt[strlen(fmt) - 1] != '\n')
382		(void) fprintf(stderr, ": %s\n", strerror(error));
383}
384
385/*PRINTFLIKE1*/
386static void
387fatal(const char *fmt, ...)
388{
389	va_list ap;
390
391	va_start(ap, fmt);
392	verror(fmt, ap);
393	va_end(ap);
394
395	if (g_pr != NULL && g_dtp != NULL)
396		dtrace_proc_release(g_dtp, g_pr);
397
398	exit(E_ERROR);
399}
400
401/*PRINTFLIKE1*/
402static void
403dfatal(const char *fmt, ...)
404{
405	va_list ap;
406
407	va_start(ap, fmt);
408
409	(void) fprintf(stderr, "%s: ", g_pname);
410	if (fmt != NULL)
411		(void) vfprintf(stderr, fmt, ap);
412
413	va_end(ap);
414
415	if (fmt != NULL && fmt[strlen(fmt) - 1] != '\n') {
416		(void) fprintf(stderr, ": %s\n",
417		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
418	} else if (fmt == NULL) {
419		(void) fprintf(stderr, "%s\n",
420		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
421	}
422
423	if (g_pr != NULL) {
424		dtrace_proc_continue(g_dtp, g_pr);
425		dtrace_proc_release(g_dtp, g_pr);
426	}
427
428	exit(E_ERROR);
429}
430
431/*PRINTFLIKE1*/
432static void
433notice(const char *fmt, ...)
434{
435	va_list ap;
436
437	va_start(ap, fmt);
438	verror(fmt, ap);
439	va_end(ap);
440}
441
442static void
443dprog_add(const char *prog)
444{
445	size_t len = strlen(prog);
446	bcopy(prog, g_prog + g_proglen, len + 1);
447	g_proglen += len;
448	assert(g_proglen < sizeof (g_prog));
449}
450
451static void
452dprog_compile(void)
453{
454	dtrace_prog_t *prog;
455	dtrace_proginfo_t info;
456
457	if (g_opt_V) {
458		(void) fprintf(stderr, "%s: vvvv D program vvvv\n", g_pname);
459		(void) fputs(g_prog, stderr);
460		(void) fprintf(stderr, "%s: ^^^^ D program ^^^^\n", g_pname);
461	}
462
463	if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
464	    DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
465		dfatal("failed to compile program");
466
467	if (dtrace_program_exec(g_dtp, prog, &info) == -1)
468		dfatal("failed to enable probes");
469}
470
471void
472print_legend(void)
473{
474	(void) printf("%5s %8s %-28s %s\n", "Count", "nsec", "Lock", "Caller");
475}
476
477void
478print_bar(void)
479{
480	(void) printf("---------------------------------------"
481	    "----------------------------------------\n");
482}
483
484void
485print_histogram_header(void)
486{
487	(void) printf("\n%10s ---- Time Distribution --- %5s %s\n",
488	    "nsec", "count", "Stack");
489}
490
491/*
492 * Convert an address to a symbolic string or a numeric string. If nolocks
493 * is set, we return an error code if this symbol appears to be a mutex- or
494 * rwlock-related symbol in libc so the caller has a chance to find a more
495 * helpful symbol.
496 */
497static int
498getsym(struct ps_prochandle *P, uintptr_t addr, char *buf, size_t size,
499    int nolocks)
500{
501	char name[256];
502	GElf_Sym sym;
503	prsyminfo_t info;
504	size_t len;
505
506	if (P == NULL || Pxlookup_by_addr(P, addr, name, sizeof (name),
507	    &sym, &info) != 0) {
508		(void) snprintf(buf, size, "%#lx", addr);
509		return (0);
510	}
511	if (info.prs_object == NULL)
512		info.prs_object = "<unknown>";
513
514	if (info.prs_lmid != LM_ID_BASE) {
515		len = snprintf(buf, size, "LM%lu`", info.prs_lmid);
516		buf += len;
517		size -= len;
518	}
519
520	len = snprintf(buf, size, "%s`%s", info.prs_object, info.prs_name);
521	buf += len;
522	size -= len;
523
524	if (sym.st_value != addr)
525		len = snprintf(buf, size, "+%#lx", addr - sym.st_value);
526
527	if (nolocks && strcmp("libc.so.1", info.prs_object) == 0 &&
528	    (strstr("mutex", info.prs_name) == 0 ||
529	    strstr("rw", info.prs_name) == 0))
530		return (-1);
531
532	return (0);
533}
534
535/*ARGSUSED*/
536static int
537process_aggregate(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
538{
539	const dtrace_recdesc_t *rec;
540	uintptr_t lock;
541	uint64_t *stack;
542	caddr_t data;
543	pid_t pid;
544	struct ps_prochandle *P;
545	char buf[256];
546	int i, j;
547	uint64_t sum, count, avg;
548
549	if ((*(uint_t *)arg)++ >= g_nent)
550		return (DTRACE_AGGWALK_NEXT);
551
552	rec = aggsdata[0]->dtada_desc->dtagd_rec;
553	data = aggsdata[0]->dtada_data;
554
555	/*LINTED - alignment*/
556	lock = (uintptr_t)*(uint64_t *)(data + rec[1].dtrd_offset);
557	/*LINTED - alignment*/
558	stack = (uint64_t *)(data + rec[2].dtrd_offset);
559
560	if (!g_opt_s) {
561		/*LINTED - alignment*/
562		sum = *(uint64_t *)(aggsdata[1]->dtada_data +
563		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
564		/*LINTED - alignment*/
565		count = *(uint64_t *)(aggsdata[2]->dtada_data +
566		    aggsdata[2]->dtada_desc->dtagd_rec[3].dtrd_offset);
567	} else {
568		uint64_t *a;
569
570		/*LINTED - alignment*/
571		a = (uint64_t *)(aggsdata[1]->dtada_data +
572		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
573
574		print_bar();
575		print_legend();
576
577		for (count = sum = 0, i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
578		    i < DTRACE_QUANTIZE_NBUCKETS; i++, j++) {
579			count += a[i];
580			sum += a[i] << (j - 64);
581		}
582	}
583
584	avg = sum / count;
585	(void) printf("%5llu %8llu ", (u_longlong_t)count, (u_longlong_t)avg);
586
587	pid = stack[0];
588	P = dtrace_proc_grab(g_dtp, pid, PGRAB_RDONLY);
589
590	(void) getsym(P, lock, buf, sizeof (buf), 0);
591	(void) printf("%-28s ", buf);
592
593	for (i = 2; i <= 5; i++) {
594		if (getsym(P, stack[i], buf, sizeof (buf), 1) == 0)
595			break;
596	}
597	(void) printf("%s\n", buf);
598
599	if (g_opt_s) {
600		int stack_done = 0;
601		int quant_done = 0;
602		int first_bin, last_bin;
603		uint64_t bin_size, *a;
604
605		/*LINTED - alignment*/
606		a = (uint64_t *)(aggsdata[1]->dtada_data +
607		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
608
609		print_histogram_header();
610
611		for (first_bin = DTRACE_QUANTIZE_ZEROBUCKET;
612		    a[first_bin] == 0; first_bin++)
613			continue;
614		for (last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 63;
615		    a[last_bin] == 0; last_bin--)
616			continue;
617
618		for (i = 0; !stack_done || !quant_done; i++) {
619			if (!stack_done) {
620				(void) getsym(P, stack[i + 2], buf,
621				    sizeof (buf), 0);
622			} else {
623				buf[0] = '\0';
624			}
625
626			if (!quant_done) {
627				bin_size = a[first_bin];
628
629				(void) printf("%10llu |%-24.*s| %5llu %s\n",
630				    1ULL <<
631				    (first_bin - DTRACE_QUANTIZE_ZEROBUCKET),
632				    (int)(24.0 * bin_size / count),
633				    "@@@@@@@@@@@@@@@@@@@@@@@@@@",
634				    (u_longlong_t)bin_size, buf);
635			} else {
636				(void) printf("%43s %s\n", "", buf);
637			}
638
639			if (i + 1 >= g_nframes || stack[i + 3] == 0)
640				stack_done = 1;
641
642			if (first_bin++ == last_bin)
643				quant_done = 1;
644		}
645	}
646
647	dtrace_proc_release(g_dtp, P);
648
649	return (DTRACE_AGGWALK_NEXT);
650}
651
652/*ARGSUSED*/
653static void
654prochandler(struct ps_prochandle *P, const char *msg, void *arg)
655{
656	const psinfo_t *prp = Ppsinfo(P);
657	int pid = Pstatus(P)->pr_pid;
658	char name[SIG2STR_MAX];
659
660	if (msg != NULL) {
661		notice("pid %d: %s\n", pid, msg);
662		return;
663	}
664
665	switch (Pstate(P)) {
666	case PS_UNDEAD:
667		/*
668		 * Ideally we would like to always report pr_wstat here, but it
669		 * isn't possible given current /proc semantics.  If we grabbed
670		 * the process, Ppsinfo() will either fail or return a zeroed
671		 * psinfo_t depending on how far the parent is in reaping it.
672		 * When /proc provides a stable pr_wstat in the status file,
673		 * this code can be improved by examining this new pr_wstat.
674		 */
675		if (prp != NULL && WIFSIGNALED(prp->pr_wstat)) {
676			notice("pid %d terminated by %s\n", pid,
677			    proc_signame(WTERMSIG(prp->pr_wstat),
678			    name, sizeof (name)));
679		} else if (prp != NULL && WEXITSTATUS(prp->pr_wstat) != 0) {
680			notice("pid %d exited with status %d\n",
681			    pid, WEXITSTATUS(prp->pr_wstat));
682		} else {
683			notice("pid %d has exited\n", pid);
684		}
685		g_exited = 1;
686		break;
687
688	case PS_LOST:
689		notice("pid %d exec'd a set-id or unobservable program\n", pid);
690		g_exited = 1;
691		break;
692	}
693}
694
695/*ARGSUSED*/
696static int
697chewrec(const dtrace_probedata_t *data, const dtrace_recdesc_t *rec, void *arg)
698{
699	dtrace_eprobedesc_t *epd = data->dtpda_edesc;
700	dtrace_aggvarid_t aggvars[2];
701	const void *buf;
702	int i, nagv;
703
704	/*
705	 * A NULL rec indicates that we've processed the last record.
706	 */
707	if (rec == NULL)
708		return (DTRACE_CONSUME_NEXT);
709
710	buf = data->dtpda_data - rec->dtrd_offset;
711
712	switch (rec->dtrd_action) {
713	case DTRACEACT_DIFEXPR:
714		(void) printf("\n%s\n\n", (char *)buf + rec->dtrd_offset);
715		if (!g_opt_s) {
716			print_legend();
717			print_bar();
718		}
719		return (DTRACE_CONSUME_NEXT);
720
721	case DTRACEACT_PRINTA:
722		for (nagv = 0, i = 0; i < epd->dtepd_nrecs - 1; i++) {
723			const dtrace_recdesc_t *nrec = &rec[i];
724
725			if (nrec->dtrd_uarg != rec->dtrd_uarg)
726				break;
727
728			/*LINTED - alignment*/
729			aggvars[nagv++] = *(dtrace_aggvarid_t *)((caddr_t)buf +
730			    nrec->dtrd_offset);
731		}
732
733		if (nagv == (g_opt_s ? 1 : 2)) {
734			uint_t nent = 0;
735			if (dtrace_aggregate_walk_joined(g_dtp, aggvars, nagv,
736			    process_aggregate, &nent) != 0)
737				dfatal("failed to walk aggregate");
738		}
739
740		return (DTRACE_CONSUME_NEXT);
741	}
742
743	return (DTRACE_CONSUME_THIS);
744}
745
746/*ARGSUSED*/
747static void
748intr(int signo)
749{
750	g_intr = 1;
751}
752
753int
754main(int argc, char **argv)
755{
756	ucred_t *ucp;
757	int err;
758	int opt_C = 0, opt_H = 0, opt_p = 0, opt_v = 0;
759	char c, *p, *end;
760	struct sigaction act;
761	int done = 0;
762
763	g_pname = basename(argv[0]);
764	argv[0] = g_pname; /* rewrite argv[0] for getopt errors */
765
766	/*
767	 * Make sure we have the required dtrace_proc privilege.
768	 */
769	if ((ucp = ucred_get(getpid())) != NULL) {
770		const priv_set_t *psp;
771		if ((psp = ucred_getprivset(ucp, PRIV_EFFECTIVE)) != NULL &&
772		    !priv_ismember(psp, PRIV_DTRACE_PROC)) {
773			fatal("dtrace_proc privilege required\n");
774		}
775
776		ucred_free(ucp);
777	}
778
779	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
780		switch (c) {
781		case 'n':
782			errno = 0;
783			g_nent = strtoul(optarg, &end, 10);
784			if (*end != '\0' || errno != 0) {
785				(void) fprintf(stderr, "%s: invalid count "
786				    "'%s'\n", g_pname, optarg);
787				usage();
788			}
789			break;
790
791		case 'p':
792			opt_p = 1;
793			break;
794
795		case 'v':
796			opt_v = 1;
797			break;
798
799		case 'A':
800			opt_C = opt_H = 1;
801			break;
802
803		case 'C':
804			opt_C = 1;
805			break;
806
807		case 'H':
808			opt_H = 1;
809			break;
810
811		case 'V':
812			g_opt_V = 1;
813			break;
814
815		default:
816			if (strchr(PLOCKSTAT_OPTSTR, c) == NULL)
817				usage();
818		}
819	}
820
821	/*
822	 * We need a command or at least one pid.
823	 */
824	if (argc == optind)
825		usage();
826
827	if (opt_C == 0 && opt_H == 0)
828		opt_C = 1;
829
830	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL)
831		fatal("failed to initialize dtrace: %s\n",
832		    dtrace_errmsg(NULL, err));
833
834	/*
835	 * The longest string we trace is 23 bytes long -- so 32 is plenty.
836	 */
837	if (dtrace_setopt(g_dtp, "strsize", "32") == -1)
838		dfatal("failed to set 'strsize'");
839
840	/*
841	 * 1k should be more than enough for all trace() and printa() actions.
842	 */
843	if (dtrace_setopt(g_dtp, "bufsize", "1k") == -1)
844		dfatal("failed to set 'bufsize'");
845
846	/*
847	 * The table we produce has the hottest locks at the top.
848	 */
849	if (dtrace_setopt(g_dtp, "aggsortrev", NULL) == -1)
850		dfatal("failed to set 'aggsortrev'");
851
852	/*
853	 * These are two reasonable defaults which should suffice.
854	 */
855	if (dtrace_setopt(g_dtp, "aggsize", "256k") == -1)
856		dfatal("failed to set 'aggsize'");
857	if (dtrace_setopt(g_dtp, "aggrate", "1sec") == -1)
858		dfatal("failed to set 'aggrate'");
859
860	/*
861	 * Take a second pass through to look for options that set options now
862	 * that we have an open dtrace handle.
863	 */
864	optind = 1;
865	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
866		switch (c) {
867		case 's':
868			g_opt_s = 1;
869			if (dtrace_setopt(g_dtp, "ustackframes", optarg) == -1)
870				dfatal("failed to set 'ustackframes'");
871			break;
872
873		case 'x':
874			if ((p = strchr(optarg, '=')) != NULL)
875				*p++ = '\0';
876
877			if (dtrace_setopt(g_dtp, optarg, p) != 0)
878				dfatal("failed to set -x %s", optarg);
879			break;
880
881		case 'e':
882			errno = 0;
883			(void) strtoul(optarg, &end, 10);
884			if (*optarg == '-' || *end != '\0' || errno != 0) {
885				(void) fprintf(stderr, "%s: invalid timeout "
886				    "'%s'\n", g_pname, optarg);
887				usage();
888			}
889
890			/*
891			 * Construct a DTrace enabling that will exit after
892			 * the specified number of seconds.
893			 */
894			dprog_add("BEGIN\n{\n\tend = timestamp + ");
895			dprog_add(optarg);
896			dprog_add(" * 1000000000;\n}\n");
897			dprog_add("tick-10hz\n/timestamp >= end/\n");
898			dprog_add("{\n\texit(0);\n}\n");
899			break;
900		}
901	}
902
903	argc -= optind;
904	argv += optind;
905
906	if (opt_H) {
907		dprog_add(g_hold_init);
908		if (g_opt_s == 0)
909			dprog_add(g_hold_times);
910		else
911			dprog_add(g_hold_histogram);
912	}
913
914	if (opt_C) {
915		dprog_add(g_ctnd_init);
916		if (g_opt_s == 0)
917			dprog_add(g_ctnd_times);
918		else
919			dprog_add(g_ctnd_histogram);
920	}
921
922	if (opt_p) {
923		ulong_t pid;
924
925		if (argc > 1) {
926			(void) fprintf(stderr, "%s: only one pid is allowed\n",
927			    g_pname);
928			usage();
929		}
930
931		errno = 0;
932		pid = strtoul(argv[0], &end, 10);
933		if (*end != '\0' || errno != 0 || (pid_t)pid != pid) {
934			(void) fprintf(stderr, "%s: invalid pid '%s'\n",
935			    g_pname, argv[0]);
936			usage();
937		}
938
939		if ((g_pr = dtrace_proc_grab(g_dtp, (pid_t)pid, 0)) == NULL)
940			dfatal(NULL);
941	} else {
942		if ((g_pr = dtrace_proc_create(g_dtp, argv[0], argv)) == NULL)
943			dfatal(NULL);
944	}
945
946	dprog_compile();
947
948	if (dtrace_handle_proc(g_dtp, &prochandler, NULL) == -1)
949		dfatal("failed to establish proc handler");
950
951	(void) sigemptyset(&act.sa_mask);
952	act.sa_flags = 0;
953	act.sa_handler = intr;
954	(void) sigaction(SIGINT, &act, NULL);
955	(void) sigaction(SIGTERM, &act, NULL);
956
957	if (dtrace_go(g_dtp) != 0)
958		dfatal("dtrace_go()");
959
960	if (dtrace_getopt(g_dtp, "ustackframes", &g_nframes) != 0)
961		dfatal("failed to get 'ustackframes'");
962
963	dtrace_proc_continue(g_dtp, g_pr);
964
965	if (opt_v)
966		(void) printf("%s: tracing enabled for pid %d\n", g_pname,
967		    (int)Pstatus(g_pr)->pr_pid);
968
969	do {
970		if (!g_intr && !done)
971			dtrace_sleep(g_dtp);
972
973		if (done || g_intr || g_exited) {
974			done = 1;
975			if (dtrace_stop(g_dtp) == -1)
976				dfatal("couldn't stop tracing");
977		}
978
979		switch (dtrace_work(g_dtp, stdout, NULL, chewrec, NULL)) {
980		case DTRACE_WORKSTATUS_DONE:
981			done = 1;
982			break;
983		case DTRACE_WORKSTATUS_OKAY:
984			break;
985		default:
986			dfatal("processing aborted");
987		}
988
989	} while (!done);
990
991	dtrace_close(g_dtp);
992
993	return (0);
994}
995