1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <fcntl.h>
30#include <errno.h>
31#include <unistd.h>
32#include <signal.h>
33#include <strings.h>
34#include <limits.h>
35#include <sys/mman.h>
36#include <sys/pset.h>
37#include <sys/varargs.h>
38#include <sys/trapstat.h>
39#include <sys/wait.h>
40#include <stddef.h>
41#include <termio.h>
42#include "_trapstat.h"
43
44#define	TSTAT_DEVICE	"/dev/trapstat"
45#define	TSTAT_COMMAND	"trapstat"
46#define	TSTAT_DELTA(data, old, member) g_absolute ? (data)->member : \
47	(uint64_t)(0.5 + (g_interval / (double)((data)->tdata_snapts - \
48	(old)->tdata_snapts)) * (double)((data)->member - (old)->member))
49
50#define	TSTAT_PRINT_MISSDATA(diff, time) \
51	(void) printf(" %9lld %4.1f", (diff), (time));
52
53#define	TSTAT_PAGESIZE_MODIFIERS	" kmgtp"
54#define	TSTAT_PAGESIZE_STRLEN		10
55#define	TSTAT_MAX_RATE			5000
56#define	TSTAT_COLUMN_OFFS	26
57#define	TSTAT_COLUMNS_PER_CPU	9
58
59static tstat_data_t *g_data[2];
60static tstat_data_t *g_ndata, *g_odata;
61static processorid_t g_max_cpus;
62static int8_t *g_selected;
63static timer_t g_tid;
64static int g_interval = NANOSEC;
65static int g_peffect = 1;
66static int g_absolute = 0;
67static sigset_t g_oset;
68
69static psetid_t g_pset = PS_NONE;
70static processorid_t *g_pset_cpus;
71static uint_t g_pset_ncpus;
72
73static int g_cpus_per_line = (80 - TSTAT_COLUMN_OFFS) / TSTAT_COLUMNS_PER_CPU;
74static int g_winch;
75
76static int g_pgsizes;
77static size_t *g_pgsize;
78static char **g_pgnames;
79static size_t g_datasize;
80
81static int g_gen;
82static int g_fd;
83static uint8_t g_active[TSTAT_NENT];
84
85static hrtime_t g_start;
86
87static int g_exec_errno;
88static int g_child_exited;
89static int g_child_status;
90
91static void (*g_process)(void *, uint64_t, double);
92static void *g_arg;
93
94typedef struct tstat_sum {
95	uint64_t	tsum_diff;
96	double		tsum_time;
97} tstat_sum_t;
98
99/*
100 * Define a dummy g_traps reader to establish a symbol capabilities lead.
101 * This routine should never be called, as the sun4u and sun4v variants
102 * will be used as appropriate.
103 */
104/* ARGSUSED0 */
105tstat_ent_t *
106get_trap_ent(int ndx)
107{
108	return (NULL);
109}
110
111static void
112usage(void)
113{
114	(void) fprintf(stderr,
115	    "\nusage:  trapstat [ -t | -T | -e entrylist ]\n"
116	    "   [ -C psrset | -c cpulist ]\n"
117	    "   [ -P ] [ -a ] [ -r rate ] [[ interval [ count ] ] | "
118	    "command [ args ] ]\n\n"
119	    "Trap selection options:\n\n"
120	    " -t             TLB statistics\n"
121	    " -T             TLB statistics, with pagesize information\n"
122	    " -e entrylist   Enable statistics only for entries specified "
123	    "by entrylist\n\n"
124	    "CPU selection options:\n\n"
125	    " -c cpulist     Enable statistics only for specified CPU list\n"
126	    " -C psrset      Enable statistics only for specified processor "
127	    "set\n\n"
128	    "Other options:\n\n"
129	    " -a             Display trap values as accumulating values "
130	    "instead of rates\n"
131	    " -l             List trap table entries and exit\n"
132	    " -P             Display output in parsable format\n"
133	    " -r hz          Set sampling rate to be hz samples "
134	    "per second\n\n");
135
136	exit(EXIT_FAILURE);
137}
138
139static void
140fatal(char *fmt, ...)
141{
142	va_list ap;
143	int error = errno;
144
145	va_start(ap, fmt);
146
147	(void) fprintf(stderr, TSTAT_COMMAND ": ");
148	(void) vfprintf(stderr, fmt, ap);
149
150	if (fmt[strlen(fmt) - 1] != '\n')
151		(void) fprintf(stderr, ": %s\n", strerror(error));
152
153	exit(EXIT_FAILURE);
154}
155
156static void
157set_width(void)
158{
159	struct winsize win;
160
161	if (!isatty(fileno(stdout)))
162		return;
163
164	if (ioctl(fileno(stdout), TIOCGWINSZ, &win) == -1)
165		return;
166
167	if (win.ws_col == 0) {
168		/*
169		 * If TIOCGWINSZ returned 0 for the columns, just return --
170		 * thereby using the default value of g_cpus_per_line.  (This
171		 * happens, e.g., when running over a tip line.)
172		 */
173		return;
174	}
175
176	g_cpus_per_line = (win.ws_col - TSTAT_COLUMN_OFFS) /
177	    TSTAT_COLUMNS_PER_CPU;
178
179	if (g_cpus_per_line < 1)
180		g_cpus_per_line = 1;
181}
182
183static void
184intr(int signo)
185{
186	int error = errno;
187
188	switch (signo) {
189	case SIGWINCH:
190		g_winch = 1;
191		set_width();
192		break;
193
194	case SIGCHLD:
195		g_child_exited = 1;
196
197		while (wait(&g_child_status) == -1 && errno == EINTR)
198			continue;
199		break;
200
201	default:
202		break;
203	}
204
205	errno = error;
206}
207
208static void
209setup(void)
210{
211	struct sigaction act;
212	struct sigevent ev;
213	sigset_t set;
214	int i;
215
216	for (i = 0; i < TSTAT_NENT; i++) {
217		tstat_ent_t	*gtp;
218
219		if ((gtp = get_trap_ent(i)) == NULL)
220			continue;
221
222		if (gtp->tent_type == TSTAT_ENT_RESERVED)
223			gtp->tent_name = "reserved";
224
225		if (gtp->tent_type == TSTAT_ENT_UNUSED)
226			gtp->tent_name = "unused";
227	}
228
229	g_max_cpus = (processorid_t)sysconf(_SC_CPUID_MAX) + 1;
230
231	if ((g_selected = malloc(sizeof (int8_t) * g_max_cpus)) == NULL)
232		fatal("could not allocate g_selected");
233
234	bzero(g_selected, sizeof (int8_t) * g_max_cpus);
235
236	g_pset_cpus = malloc(sizeof (processorid_t) * g_max_cpus);
237	if (g_pset_cpus == NULL)
238		fatal("could not allocate g_pset_cpus");
239
240	bzero(g_pset_cpus, sizeof (processorid_t) * g_max_cpus);
241
242	if ((g_pgsizes = getpagesizes(NULL, 0)) == -1)
243		fatal("getpagesizes()");
244
245	if ((g_pgsize = malloc(sizeof (size_t) * g_pgsizes)) == NULL)
246		fatal("could not allocate g_pgsize array");
247
248	if (getpagesizes(g_pgsize, g_pgsizes) == -1)
249		fatal("getpagesizes(%d)", g_pgsizes);
250
251	if ((g_pgnames = malloc(sizeof (char *) * g_pgsizes)) == NULL)
252		fatal("could not allocate g_pgnames");
253
254	for (i = 0; i < g_pgsizes; i++) {
255		size_t j, mul;
256		size_t sz = g_pgsize[i];
257
258		if ((g_pgnames[i] = malloc(TSTAT_PAGESIZE_STRLEN)) == NULL)
259			fatal("could not allocate g_pgnames[%d]", i);
260
261		for (j = 0, mul = 10; (1 << mul) <= sz; j++, mul += 10)
262			continue;
263
264		(void) snprintf(g_pgnames[i], TSTAT_PAGESIZE_STRLEN,
265		    "%d%c", sz >> (mul - 10), " kmgtpe"[j]);
266	}
267
268	g_datasize =
269	    sizeof (tstat_data_t) + (g_pgsizes - 1) * sizeof (tstat_pgszdata_t);
270
271	if ((g_data[0] = malloc(g_datasize * g_max_cpus)) == NULL)
272		fatal("could not allocate data buffer 0");
273
274	if ((g_data[1] = malloc(g_datasize * g_max_cpus)) == NULL)
275		fatal("could not allocate data buffer 1");
276
277	(void) sigemptyset(&act.sa_mask);
278	act.sa_flags = 0;
279	act.sa_handler = intr;
280	(void) sigaction(SIGUSR1, &act, NULL);
281	(void) sigaction(SIGCHLD, &act, NULL);
282
283	(void) sigaddset(&act.sa_mask, SIGCHLD);
284	(void) sigaddset(&act.sa_mask, SIGUSR1);
285	(void) sigaction(SIGWINCH, &act, NULL);
286	set_width();
287
288	(void) sigemptyset(&set);
289	(void) sigaddset(&set, SIGCHLD);
290	(void) sigaddset(&set, SIGUSR1);
291	(void) sigaddset(&set, SIGWINCH);
292	(void) sigprocmask(SIG_BLOCK, &set, &g_oset);
293
294	ev.sigev_notify = SIGEV_SIGNAL;
295	ev.sigev_signo = SIGUSR1;
296
297	if (timer_create(CLOCK_HIGHRES, &ev, &g_tid) == -1)
298		fatal("cannot create CLOCK_HIGHRES timer");
299}
300
301static void
302set_interval(hrtime_t nsec)
303{
304	struct itimerspec ts;
305
306	/*
307	 * If the interval is less than one second, we'll report the
308	 * numbers in terms of rate-per-interval.  If the interval is
309	 * greater than one second, we'll report numbers in terms of
310	 * rate-per-second.
311	 */
312	g_interval = nsec < NANOSEC ? nsec : NANOSEC;
313
314	ts.it_value.tv_sec = nsec / NANOSEC;
315	ts.it_value.tv_nsec = nsec % NANOSEC;
316	ts.it_interval.tv_sec = nsec / NANOSEC;
317	ts.it_interval.tv_nsec = nsec % NANOSEC;
318
319	if (timer_settime(g_tid, TIMER_RELTIME, &ts, NULL) == -1)
320		fatal("cannot set time on CLOCK_HIGHRES timer");
321}
322
323static void
324print_entries(FILE *stream, int parsable)
325{
326	int entno;
327
328	if (!parsable) {
329		(void) fprintf(stream, "  %3s %3s | %-20s | %s\n", "hex",
330		    "dec", "entry name", "description");
331
332		(void) fprintf(stream, "----------+----------------------"
333		    "+-----------------------\n");
334	}
335
336	for (entno = 0; entno < TSTAT_NENT; entno++) {
337		tstat_ent_t	*gtp;
338
339		if ((gtp = get_trap_ent(entno)) == NULL)
340			continue;
341
342		if (gtp->tent_type != TSTAT_ENT_USED)
343			continue;
344
345		(void) fprintf(stream, "0x%03x %3d %s%-20s %s%s\n",
346		    entno, entno,
347		    parsable ? "" : "| ", gtp->tent_name,
348		    parsable ? "" : "| ", gtp->tent_descr);
349	}
350}
351
352static void
353select_entry(char *entry)
354{
355	ulong_t entno;
356	char *end;
357
358	/*
359	 * The entry may be specified as a number (e.g., "0x68", "104") or
360	 * as a name ("dtlb-miss").
361	 */
362	entno = strtoul(entry, &end, 0);
363
364	if (*end == '\0') {
365		if (entno >= TSTAT_NENT)
366			goto bad_entry;
367	} else {
368		for (entno = 0; entno < TSTAT_NENT; entno++) {
369			tstat_ent_t	*gtp;
370
371			if ((gtp = get_trap_ent(entno)) == NULL)
372				continue;
373
374			if (gtp->tent_type != TSTAT_ENT_USED)
375				continue;
376
377			if (strcmp(entry, gtp->tent_name) == 0)
378				break;
379		}
380
381		if (entno == TSTAT_NENT)
382			goto bad_entry;
383	}
384
385	if (ioctl(g_fd, TSTATIOC_ENTRY, entno) == -1)
386		fatal("TSTATIOC_ENTRY failed for entry 0x%x", entno);
387
388	g_active[entno] = 1;
389	return;
390
391bad_entry:
392	(void) fprintf(stderr, TSTAT_COMMAND ": invalid entry '%s'", entry);
393	(void) fprintf(stderr, "; valid entries:\n\n");
394	print_entries(stderr, 0);
395	exit(EXIT_FAILURE);
396}
397
398static void
399select_cpu(processorid_t cpu)
400{
401	if (g_pset != PS_NONE)
402		fatal("cannot specify both a processor set and a processor\n");
403
404	if (cpu < 0 || cpu >= g_max_cpus)
405		fatal("cpu %d out of range\n", cpu);
406
407	if (p_online(cpu, P_STATUS) == -1) {
408		if (errno != EINVAL)
409			fatal("could not get status for cpu %d", cpu);
410		fatal("cpu %d not present\n", cpu);
411	}
412
413	g_selected[cpu] = 1;
414}
415
416static void
417select_cpus(processorid_t low, processorid_t high)
418{
419	if (g_pset != PS_NONE)
420		fatal("cannot specify both a processor set and processors\n");
421
422	if (low < 0 || low >= g_max_cpus)
423		fatal("invalid cpu '%d'\n", low);
424
425	if (high < 0 || high >= g_max_cpus)
426		fatal("invalid cpu '%d'\n", high);
427
428	if (low >= high)
429		fatal("invalid range '%d' to '%d'\n", low, high);
430
431	do {
432		if (p_online(low, P_STATUS) != -1)
433			g_selected[low] = 1;
434	} while (++low <= high);
435}
436
437static void
438select_pset(psetid_t pset)
439{
440	processorid_t i;
441
442	if (pset < 0)
443		fatal("processor set %d is out of range\n", pset);
444
445	/*
446	 * Only one processor set can be specified.
447	 */
448	if (g_pset != PS_NONE)
449		fatal("at most one processor set may be specified\n");
450
451	/*
452	 * One cannot select processors _and_ a processor set.
453	 */
454	for (i = 0; i < g_max_cpus; i++)
455		if (g_selected[i])
456			break;
457
458	if (i != g_max_cpus)
459		fatal("cannot specify both a processor and a processor set\n");
460
461	g_pset = pset;
462	g_pset_ncpus = g_max_cpus;
463
464	if (pset_info(g_pset, NULL, &g_pset_ncpus, g_pset_cpus) == -1)
465		fatal("invalid processor set: %d\n", g_pset);
466
467	if (g_pset_ncpus == 0)
468		fatal("processor set %d empty\n", g_pset);
469
470	if (ioctl(g_fd, TSTATIOC_NOCPU) == -1)
471		fatal("TSTATIOC_NOCPU failed");
472
473	for (i = 0; i < g_pset_ncpus; i++)
474		g_selected[g_pset_cpus[i]] = 1;
475}
476
477static void
478check_pset(void)
479{
480	uint_t ncpus = g_max_cpus;
481	processorid_t i;
482
483	if (g_pset == PS_NONE)
484		return;
485
486	if (pset_info(g_pset, NULL, &ncpus, g_pset_cpus) == -1) {
487		if (errno == EINVAL)
488			fatal("processor set %d destroyed\n", g_pset);
489
490		fatal("couldn't get info for processor set %d", g_pset);
491	}
492
493	if (ncpus == 0)
494		fatal("processor set %d empty\n", g_pset);
495
496	if (ncpus == g_pset_ncpus) {
497		for (i = 0; i < g_pset_ncpus; i++) {
498			if (!g_selected[g_pset_cpus[i]])
499				break;
500		}
501
502		/*
503		 * If the number of CPUs hasn't changed, and every CPU
504		 * in the processor set is also selected, we know that the
505		 * processor set itself hasn't changed.
506		 */
507		if (i == g_pset_ncpus)
508			return;
509	}
510
511	/*
512	 * If we're here, we have a new processor set.  First, we need
513	 * to zero out the selection array.
514	 */
515	bzero(g_selected, sizeof (int8_t) * g_max_cpus);
516
517	g_pset_ncpus = ncpus;
518
519	if (ioctl(g_fd, TSTATIOC_STOP) == -1)
520		fatal("TSTATIOC_STOP failed");
521
522	if (ioctl(g_fd, TSTATIOC_NOCPU) == -1)
523		fatal("TSATIOC_NOCPU failed");
524
525	for (i = 0; i < g_pset_ncpus; i++) {
526		g_selected[g_pset_cpus[i]] = 1;
527		if (ioctl(g_fd, TSTATIOC_CPU, g_pset_cpus[i]) == -1)
528			fatal("TSTATIOC_CPU failed for cpu %d", i);
529	}
530
531	/*
532	 * Now that we have selected the CPUs, we're going to reenable
533	 * trapstat, and reread the data for the current generation.
534	 */
535	if (ioctl(g_fd, TSTATIOC_GO) == -1)
536		fatal("TSTATIOC_GO failed");
537
538	if (ioctl(g_fd, TSTATIOC_READ, g_data[g_gen]) == -1)
539		fatal("TSTATIOC_READ failed");
540}
541
542static void
543missdata(tstat_missdata_t *miss, tstat_missdata_t *omiss)
544{
545	hrtime_t ts = g_ndata->tdata_snapts - g_odata->tdata_snapts;
546	hrtime_t tick = g_ndata->tdata_snaptick - g_odata->tdata_snaptick;
547	uint64_t raw = miss->tmiss_count - omiss->tmiss_count;
548	uint64_t diff = g_absolute ? miss->tmiss_count :
549	    (uint64_t)(0.5 + g_interval /
550	    (double)ts * (double)(miss->tmiss_count - omiss->tmiss_count));
551	hrtime_t peffect = raw * g_ndata->tdata_peffect * g_peffect, time;
552	double p;
553
554	/*
555	 * Now we need to account for the trapstat probe effect.  Take
556	 * the amount of time spent in the handler, and add the
557	 * amount of time known to be due to the trapstat probe effect.
558	 */
559	time = miss->tmiss_time - omiss->tmiss_time + peffect;
560
561	if (time >= tick) {
562		/*
563		 * This really shouldn't happen unless our calculation of
564		 * the probe effect was vastly incorrect.  In any case,
565		 * print 99.9 for the time instead of printing negative
566		 * values...
567		 */
568		time = tick / 1000 * 999;
569	}
570
571	p = (double)time / (double)tick * (double)100.0;
572
573	(*g_process)(g_arg, diff, p);
574}
575
576static void
577tlbdata(tstat_tlbdata_t *tlb, tstat_tlbdata_t *otlb)
578{
579	missdata(&tlb->ttlb_tlb, &otlb->ttlb_tlb);
580	missdata(&tlb->ttlb_tsb, &otlb->ttlb_tsb);
581}
582
583static void
584print_missdata(double *ttl, uint64_t diff, double p)
585{
586	TSTAT_PRINT_MISSDATA(diff, p);
587
588	if (ttl != NULL)
589		*ttl += p;
590}
591
592static void
593print_modepgsz(char *prefix, tstat_modedata_t *data, tstat_modedata_t *odata)
594{
595	int ps;
596	size_t incr = sizeof (tstat_pgszdata_t);
597
598	for (ps = 0; ps < g_pgsizes; ps++) {
599		double ttl = 0.0;
600
601		g_process = (void(*)(void *, uint64_t, double))print_missdata;
602		g_arg = &ttl;
603
604		(void) printf("%s %4s|", prefix, g_pgnames[ps]);
605		tlbdata(&data->tmode_itlb, &odata->tmode_itlb);
606		(void) printf(" |");
607		tlbdata(&data->tmode_dtlb, &odata->tmode_dtlb);
608
609		(void) printf(" |%4.1f\n", ttl);
610
611		data = (tstat_modedata_t *)((uintptr_t)data + incr);
612		odata = (tstat_modedata_t *)((uintptr_t)odata + incr);
613	}
614}
615
616static void
617parsable_modepgsz(char *prefix, tstat_modedata_t *data, tstat_modedata_t *odata)
618{
619	int ps;
620	size_t incr = sizeof (tstat_pgszdata_t);
621
622	g_process = (void(*)(void *, uint64_t, double))print_missdata;
623	g_arg = NULL;
624
625	for (ps = 0; ps < g_pgsizes; ps++) {
626		(void) printf("%s %7d", prefix, g_pgsize[ps]);
627		tlbdata(&data->tmode_itlb, &odata->tmode_itlb);
628		tlbdata(&data->tmode_dtlb, &odata->tmode_dtlb);
629		(void) printf("\n");
630
631		data = (tstat_modedata_t *)((uintptr_t)data + incr);
632		odata = (tstat_modedata_t *)((uintptr_t)odata + incr);
633	}
634}
635
636static void
637sum_missdata(void *sump, uint64_t diff, double p)
638{
639	tstat_sum_t *sum = *((tstat_sum_t **)sump);
640
641	sum->tsum_diff += diff;
642	sum->tsum_time += p;
643
644	(*(tstat_sum_t **)sump)++;
645}
646
647static void
648sum_modedata(tstat_modedata_t *data, tstat_modedata_t *odata, tstat_sum_t *sum)
649{
650	int ps, incr = sizeof (tstat_pgszdata_t);
651	tstat_sum_t *sump;
652
653	for (ps = 0; ps < g_pgsizes; ps++) {
654		sump = sum;
655
656		g_process = sum_missdata;
657		g_arg = &sump;
658
659		tlbdata(&data->tmode_itlb, &odata->tmode_itlb);
660		tlbdata(&data->tmode_dtlb, &odata->tmode_dtlb);
661
662		data = (tstat_modedata_t *)((uintptr_t)data + incr);
663		odata = (tstat_modedata_t *)((uintptr_t)odata + incr);
664	}
665}
666
667static void
668print_sum(tstat_sum_t *sum, int divisor)
669{
670	int i;
671	double ttl = 0.0;
672
673	for (i = 0; i < 4; i++) {
674		if (i == 2)
675			(void) printf(" |");
676
677		sum[i].tsum_time /= divisor;
678
679		TSTAT_PRINT_MISSDATA(sum[i].tsum_diff, sum[i].tsum_time);
680		ttl += sum[i].tsum_time;
681	}
682
683	(void) printf(" |%4.1f\n", ttl);
684}
685
686static void
687print_tlbpgsz(tstat_data_t *data, tstat_data_t *odata)
688{
689	int i, cpu, ncpus = 0;
690	char pre[12];
691	tstat_sum_t sum[4];
692
693	(void) printf("cpu m size| %9s %4s %9s %4s | %9s %4s %9s %4s |%4s\n"
694	    "----------+-------------------------------+-----------------------"
695	    "--------+----\n", "itlb-miss", "%tim", "itsb-miss", "%tim",
696	    "dtlb-miss", "%tim", "dtsb-miss", "%tim", "%tim");
697
698	bzero(sum, sizeof (sum));
699
700	for (i = 0; i < g_max_cpus; i++) {
701		tstat_pgszdata_t *pgsz = data->tdata_pgsz;
702		tstat_pgszdata_t *opgsz = odata->tdata_pgsz;
703
704		if ((cpu = data->tdata_cpuid) == -1)
705			break;
706
707		if (i != 0)
708			(void) printf("----------+-----------------------------"
709			    "--+-------------------------------+----\n");
710
711		g_ndata = data;
712		g_odata = odata;
713
714		(void) sprintf(pre, "%3d u", cpu);
715		print_modepgsz(pre, &pgsz->tpgsz_user, &opgsz->tpgsz_user);
716		sum_modedata(&pgsz->tpgsz_user, &opgsz->tpgsz_user, sum);
717
718		(void) printf("- - - - - + - - - - - - - - - - - - - -"
719		    " - + - - - - - - - - - - - - - - - + - -\n");
720
721		(void) sprintf(pre, "%3d k", cpu);
722		print_modepgsz(pre, &pgsz->tpgsz_kernel, &opgsz->tpgsz_kernel);
723		sum_modedata(&pgsz->tpgsz_kernel, &opgsz->tpgsz_kernel, sum);
724
725		data = (tstat_data_t *)((uintptr_t)data + g_datasize);
726		odata = (tstat_data_t *)((uintptr_t)odata + g_datasize);
727		ncpus++;
728	}
729
730	(void) printf("==========+===============================+========="
731	    "======================+====\n");
732	(void) printf("      ttl |");
733	print_sum(sum, ncpus);
734	(void) printf("\n");
735}
736
737static void
738parsable_tlbpgsz(tstat_data_t *data, tstat_data_t *odata)
739{
740	int i, cpu;
741	char pre[30];
742
743	for (i = 0; i < g_max_cpus; i++) {
744		tstat_pgszdata_t *pgsz = data->tdata_pgsz;
745		tstat_pgszdata_t *opgsz = odata->tdata_pgsz;
746
747		if ((cpu = data->tdata_cpuid) == -1)
748			break;
749
750		g_ndata = data;
751		g_odata = odata;
752
753		(void) sprintf(pre, "%lld %3d u",
754		    data->tdata_snapts - g_start, cpu);
755		parsable_modepgsz(pre, &pgsz->tpgsz_user, &opgsz->tpgsz_user);
756
757		pre[strlen(pre) - 1] = 'k';
758		parsable_modepgsz(pre, &pgsz->tpgsz_kernel,
759		    &opgsz->tpgsz_kernel);
760
761		data = (tstat_data_t *)((uintptr_t)data + g_datasize);
762		odata = (tstat_data_t *)((uintptr_t)odata + g_datasize);
763	}
764}
765
766static void
767print_modedata(tstat_modedata_t *data, tstat_modedata_t *odata, int parsable)
768{
769	int ps, i;
770	size_t incr = sizeof (tstat_pgszdata_t);
771	tstat_sum_t sum[4], *sump = sum;
772	double ttl = 0.0;
773
774	bzero(sum, sizeof (sum));
775	g_process = sum_missdata;
776	g_arg = &sump;
777
778	for (ps = 0; ps < g_pgsizes; ps++) {
779		tlbdata(&data->tmode_itlb, &odata->tmode_itlb);
780		tlbdata(&data->tmode_dtlb, &odata->tmode_dtlb);
781
782		data = (tstat_modedata_t *)((uintptr_t)data + incr);
783		odata = (tstat_modedata_t *)((uintptr_t)odata + incr);
784		sump = sum;
785	}
786
787	for (i = 0; i < 4; i++) {
788		if (i == 2 && !parsable)
789			(void) printf(" |");
790
791		TSTAT_PRINT_MISSDATA(sum[i].tsum_diff, sum[i].tsum_time);
792		ttl += sum[i].tsum_time;
793	}
794
795	if (parsable) {
796		(void) printf("\n");
797		return;
798	}
799
800	(void) printf(" |%4.1f\n", ttl);
801}
802
803static void
804print_tlb(tstat_data_t *data, tstat_data_t *odata)
805{
806	int i, cpu, ncpus = 0;
807	tstat_sum_t sum[4];
808
809	(void) printf("cpu m| %9s %4s %9s %4s | %9s %4s %9s %4s |%4s\n"
810	    "-----+-------------------------------+-----------------------"
811	    "--------+----\n", "itlb-miss", "%tim", "itsb-miss", "%tim",
812	    "dtlb-miss", "%tim", "dtsb-miss", "%tim", "%tim");
813
814	bzero(sum, sizeof (sum));
815
816	for (i = 0; i < g_max_cpus; i++) {
817		tstat_pgszdata_t *pgsz = data->tdata_pgsz;
818		tstat_pgszdata_t *opgsz = odata->tdata_pgsz;
819
820		if ((cpu = data->tdata_cpuid) == -1)
821			break;
822
823		if (i != 0)
824			(void) printf("-----+-------------------------------+-"
825			    "------------------------------+----\n");
826
827		g_ndata = data;
828		g_odata = odata;
829
830		(void) printf("%3d u|", cpu);
831		print_modedata(&pgsz->tpgsz_user, &opgsz->tpgsz_user, 0);
832		sum_modedata(&pgsz->tpgsz_user, &opgsz->tpgsz_user, sum);
833
834		(void) printf("%3d k|", cpu);
835		print_modedata(&pgsz->tpgsz_kernel, &opgsz->tpgsz_kernel, 0);
836		sum_modedata(&pgsz->tpgsz_kernel, &opgsz->tpgsz_kernel, sum);
837
838		data = (tstat_data_t *)((uintptr_t)data + g_datasize);
839		odata = (tstat_data_t *)((uintptr_t)odata + g_datasize);
840		ncpus++;
841	}
842
843	(void) printf("=====+===============================+========="
844	    "======================+====\n");
845
846	(void) printf(" ttl |");
847	print_sum(sum, ncpus);
848	(void) printf("\n");
849}
850
851static void
852parsable_tlb(tstat_data_t *data, tstat_data_t *odata)
853{
854	int i, cpu;
855
856	for (i = 0; i < g_max_cpus; i++) {
857		tstat_pgszdata_t *pgsz = data->tdata_pgsz;
858		tstat_pgszdata_t *opgsz = odata->tdata_pgsz;
859
860		if ((cpu = data->tdata_cpuid) == -1)
861			break;
862
863		g_ndata = data;
864		g_odata = odata;
865
866		(void) printf("%lld %3d u ", data->tdata_snapts - g_start, cpu);
867		print_modedata(&pgsz->tpgsz_user, &opgsz->tpgsz_user, 1);
868		(void) printf("%lld %3d k ", data->tdata_snapts - g_start, cpu);
869		print_modedata(&pgsz->tpgsz_kernel, &opgsz->tpgsz_kernel, 1);
870
871		data = (tstat_data_t *)((uintptr_t)data + g_datasize);
872		odata = (tstat_data_t *)((uintptr_t)odata + g_datasize);
873	}
874}
875
876static void
877print_stats(tstat_data_t *data, tstat_data_t *odata)
878{
879	int i, j, k, done;
880	processorid_t id;
881	tstat_data_t *base = data;
882
883	/*
884	 * First, blast through all of the data updating our array
885	 * of active traps.  We keep an array of active traps to prevent
886	 * printing lines for traps that are never seen -- while still printing
887	 * lines for traps that have been seen only once on some CPU.
888	 */
889	for (i = 0; i < g_max_cpus; i++) {
890		if (data[i].tdata_cpuid == -1)
891			break;
892
893		for (j = 0; j < TSTAT_NENT; j++) {
894			if (!data[i].tdata_traps[j] || g_active[j])
895				continue;
896
897			g_active[j] = 1;
898		}
899	}
900
901	data = base;
902
903	for (done = 0; !done; data += g_cpus_per_line) {
904		for (i = 0; i < g_cpus_per_line; i++) {
905			if (&data[i] - base >= g_max_cpus)
906				break;
907
908			if ((id = data[i].tdata_cpuid) == -1)
909				break;
910
911			if (i == 0)
912				(void) printf("vct name                |");
913
914			(void) printf("   %scpu%d", id >= 100 ? "" :
915			    id >= 10 ? " " : "  ", id);
916		}
917
918		if (i == 0)
919			break;
920
921		if (i != g_cpus_per_line)
922			done = 1;
923
924		(void) printf("\n------------------------+");
925
926		for (j = 0; j < i; j++)
927			(void) printf("---------");
928		(void) printf("\n");
929
930		for (j = 0; j < TSTAT_NENT; j++) {
931			tstat_ent_t	*gtp;
932
933			if ((!g_active[j]) || ((gtp = get_trap_ent(j)) == NULL))
934				continue;
935
936			(void) printf("%3x %-20s|", j, gtp->tent_name);
937			for (k = 0; k < i; k++) {
938				(void) printf(" %8lld", TSTAT_DELTA(&data[k],
939				    &odata[data - base + k], tdata_traps[j]));
940			}
941			(void) printf("\n");
942		}
943		(void) printf("\n");
944	}
945}
946
947static void
948parsable_stats(tstat_data_t *data, tstat_data_t *odata)
949{
950	tstat_data_t *base;
951	int i;
952
953	for (base = data; data - base < g_max_cpus; data++, odata++) {
954		if (data->tdata_cpuid == -1)
955			break;
956
957		for (i = 0; i < TSTAT_NENT; i++) {
958			tstat_ent_t	*gtp;
959
960			if ((!data->tdata_traps[i] && !g_active[i]) ||
961			    ((gtp = get_trap_ent(i)) == NULL))
962				continue;
963
964			(void) printf("%lld %d %x %s ",
965			    data->tdata_snapts - g_start, data->tdata_cpuid, i,
966			    gtp->tent_name);
967
968			(void) printf("%lld\n", TSTAT_DELTA(data, odata,
969			    tdata_traps[i]));
970		}
971	}
972}
973
974static void
975check_data(tstat_data_t *data, tstat_data_t *odata)
976{
977	tstat_data_t *ndata;
978	int i;
979
980	if (data->tdata_cpuid == -1) {
981		/*
982		 * The last CPU we were watching must have been DR'd out
983		 * of the system.  Print a vaguely useful message and exit.
984		 */
985		fatal("all initially selected CPUs have been unconfigured\n");
986	}
987
988	/*
989	 * If a CPU is DR'd out of the system, we'll stop receiving data
990	 * for it.  CPUs are never added, however (that is, if a CPU is
991	 * DR'd into the system, we won't automatically start receiving
992	 * data for it).  We check for this by making sure that all of
993	 * the CPUs present in the old data are present in the new data.
994	 * If we find one missing in the new data, we correct the old data
995	 * by removing the old CPU.  This assures that delta are printed
996	 * correctly.
997	 */
998	for (i = 0; i < g_max_cpus; i++) {
999		if (odata->tdata_cpuid == -1)
1000			return;
1001
1002		if (data->tdata_cpuid != odata->tdata_cpuid)
1003			break;
1004
1005		data = (tstat_data_t *)((uintptr_t)data + g_datasize);
1006		odata = (tstat_data_t *)((uintptr_t)odata + g_datasize);
1007	}
1008
1009	if (i == g_max_cpus)
1010		return;
1011
1012	/*
1013	 * If we're here, we know that the odata is a CPU which has been
1014	 * DR'd out.  We'll now smoosh it out of the old data.
1015	 */
1016	for (odata->tdata_cpuid = -1; i < g_max_cpus - 1; i++) {
1017		ndata = (tstat_data_t *)((uintptr_t)odata + g_datasize);
1018		bcopy(ndata, odata, g_datasize);
1019		ndata->tdata_cpuid = -1;
1020	}
1021
1022	/*
1023	 * There may be other CPUs DR'd out; tail-call recurse.
1024	 */
1025	check_data(data, odata);
1026}
1027
1028int
1029main(int argc, char **argv)
1030{
1031	processorid_t id;
1032	char c, *end;
1033	ulong_t indefinite;
1034	long count = 0, rate = 0;
1035	int list = 0, parsable = 0;
1036	void (*print)(tstat_data_t *, tstat_data_t *);
1037	sigset_t set;
1038
1039	struct {
1040		char opt;
1041		void (*print)(tstat_data_t *, tstat_data_t *);
1042		void (*parsable)(tstat_data_t *, tstat_data_t *);
1043		int repeat;
1044	} tab[] = {
1045		{ '\0',	print_stats,	parsable_stats,		0 },
1046		{ 'e',	print_stats,	parsable_stats,		1 },
1047		{ 't',	print_tlb,	parsable_tlb,		0 },
1048		{ 'T',	print_tlbpgsz,	parsable_tlbpgsz,	0 },
1049		{ -1,	NULL,		NULL,			0 }
1050	}, *tabent = NULL, *iter;
1051
1052	uintptr_t offs = (uintptr_t)&tab->print - (uintptr_t)tab;
1053
1054	/*
1055	 * If argv[0] is non-NULL, set argv[0] to keep any getopt(3C) output
1056	 * consistent with other error output.
1057	 */
1058	if (argv[0] != NULL)
1059		argv[0] = TSTAT_COMMAND;
1060
1061	if ((g_fd = open(TSTAT_DEVICE, O_RDWR)) == -1)
1062		fatal("couldn't open " TSTAT_DEVICE);
1063
1064	setup();
1065
1066	while ((c = getopt(argc, argv, "alnNtTc:C:r:e:P")) != EOF) {
1067		/*
1068		 * First, check to see if this option changes our printing
1069		 * function.
1070		 */
1071		for (iter = tab; iter->opt >= 0; iter++) {
1072			if (c != iter->opt)
1073				continue;
1074
1075			if (tabent != NULL) {
1076				if (tabent == iter) {
1077					if (tabent->repeat) {
1078						/*
1079						 * This option is allowed to
1080						 * have repeats; break out.
1081						 */
1082						break;
1083					}
1084
1085					fatal("expected -%c at most once\n", c);
1086				}
1087
1088				fatal("only one of -%c, -%c expected\n",
1089				    tabent->opt, c);
1090			}
1091
1092			tabent = iter;
1093			break;
1094		}
1095
1096		switch (c) {
1097		case 'a':
1098			g_absolute = 1;
1099			break;
1100
1101		case 'e': {
1102			char *s = strtok(optarg, ",");
1103
1104			while (s != NULL) {
1105				select_entry(s);
1106				s = strtok(NULL, ",");
1107			}
1108
1109			break;
1110		}
1111
1112		case 'l':
1113			list = 1;
1114			break;
1115
1116		case 'n':
1117			/*
1118			 * This undocumented option prevents trapstat from
1119			 * actually switching the %tba to point to the
1120			 * interposing trap table.  It's very useful when
1121			 * debugging trapstat bugs:  one can specify "-n"
1122			 * and then examine the would-be interposing trap
1123			 * table without running the risk of RED stating.
1124			 */
1125			if (ioctl(g_fd, TSTATIOC_NOGO) == -1)
1126				fatal("TSTATIOC_NOGO");
1127			break;
1128
1129		case 'N':
1130			/*
1131			 * This undocumented option forces trapstat to ignore
1132			 * its determined probe effect.  This may be useful
1133			 * if it is believed that the probe effect has been
1134			 * grossly overestimated.
1135			 */
1136			g_peffect = 0;
1137			break;
1138
1139		case 't':
1140		case 'T':
1141			/*
1142			 * When running with TLB statistics, we want to
1143			 * minimize probe effect by running with all other
1144			 * entries explicitly disabled.
1145			 */
1146			if (ioctl(g_fd, TSTATIOC_NOENTRY) == -1)
1147				fatal("TSTATIOC_NOENTRY");
1148
1149			if (ioctl(g_fd, TSTATIOC_TLBDATA) == -1)
1150				fatal("TSTATIOC_TLBDATA");
1151			break;
1152
1153		case 'c': {
1154			/*
1155			 * We allow CPUs to be specified as an optionally
1156			 * comma separated list of either CPU IDs or ranges
1157			 * of CPU IDs.
1158			 */
1159			char *s = strtok(optarg, ",");
1160
1161			while (s != NULL) {
1162				id = strtoul(s, &end, 0);
1163
1164				if (id == ULONG_MAX && errno == ERANGE) {
1165					*end = '\0';
1166					fatal("invalid cpu '%s'\n", s);
1167				}
1168
1169				if (*(s = end) != '\0') {
1170					processorid_t p;
1171
1172					if (*s != '-')
1173						fatal("invalid cpu '%s'\n", s);
1174					p = strtoul(++s, &end, 0);
1175
1176					if (*end != '\0' ||
1177					    (p == ULONG_MAX && errno == ERANGE))
1178						fatal("invalid cpu '%s'\n", s);
1179
1180					select_cpus(id, p);
1181				} else {
1182					select_cpu(id);
1183				}
1184
1185				s = strtok(NULL, ",");
1186			}
1187
1188			break;
1189		}
1190
1191		case 'C': {
1192			psetid_t pset = strtoul(optarg, &end, 0);
1193
1194			if (*end != '\0' ||
1195			    (pset == ULONG_MAX && errno == ERANGE))
1196				fatal("invalid processor set '%s'\n", optarg);
1197
1198			select_pset(pset);
1199			break;
1200		}
1201
1202		case 'r': {
1203			rate = strtol(optarg, &end, 0);
1204
1205			if (*end != '\0' ||
1206			    (rate == LONG_MAX && errno == ERANGE))
1207				fatal("invalid rate '%s'\n", optarg);
1208
1209			if (rate <= 0)
1210				fatal("rate must be greater than zero\n");
1211
1212			if (rate > TSTAT_MAX_RATE)
1213				fatal("rate may not exceed %d\n",
1214				    TSTAT_MAX_RATE);
1215
1216			set_interval(NANOSEC / rate);
1217			break;
1218		}
1219
1220		case 'P':
1221			offs = (uintptr_t)&tab->parsable - (uintptr_t)tab;
1222			parsable = 1;
1223			break;
1224
1225		default:
1226			usage();
1227		}
1228	}
1229
1230	if (list) {
1231		print_entries(stdout, parsable);
1232		exit(EXIT_SUCCESS);
1233	}
1234
1235	if (optind != argc) {
1236
1237		int interval = strtol(argv[optind], &end, 0);
1238
1239		if (*end != '\0') {
1240			/*
1241			 * That wasn't a valid number.  It must be that we're
1242			 * to execute this command.
1243			 */
1244			switch (vfork()) {
1245			case 0:
1246				(void) close(g_fd);
1247				(void) sigprocmask(SIG_SETMASK, &g_oset, NULL);
1248				(void) execvp(argv[optind], &argv[optind]);
1249
1250				/*
1251				 * No luck.  Set errno.
1252				 */
1253				g_exec_errno = errno;
1254				_exit(EXIT_FAILURE);
1255				/*NOTREACHED*/
1256			case -1:
1257				fatal("cannot fork");
1258				/*NOTREACHED*/
1259			default:
1260				break;
1261			}
1262		} else {
1263			if (interval <= 0)
1264				fatal("interval must be greater than zero.\n");
1265
1266			if (interval == LONG_MAX && errno == ERANGE)
1267				fatal("invalid interval '%s'\n", argv[optind]);
1268
1269			set_interval(NANOSEC * (hrtime_t)interval);
1270
1271			if (++optind != argc) {
1272				char *s = argv[optind];
1273
1274				count = strtol(s, &end, 0);
1275
1276				if (*end != '\0' || count <= 0 ||
1277				    (count == LONG_MAX && errno == ERANGE))
1278					fatal("invalid count '%s'\n", s);
1279			}
1280		}
1281	} else {
1282		if (!rate)
1283			set_interval(NANOSEC);
1284	}
1285
1286	if (tabent == NULL)
1287		tabent = tab;
1288
1289	print = *(void(**)(tstat_data_t *, tstat_data_t *))
1290	    ((uintptr_t)tabent + offs);
1291
1292	for (id = 0; id < g_max_cpus; id++) {
1293		if (!g_selected[id])
1294			continue;
1295
1296		if (ioctl(g_fd, TSTATIOC_CPU, id) == -1)
1297			fatal("TSTATIOC_CPU failed for cpu %d", id);
1298	}
1299
1300	g_start = gethrtime();
1301
1302	if (ioctl(g_fd, TSTATIOC_GO) == -1)
1303		fatal("TSTATIOC_GO failed");
1304
1305	if (ioctl(g_fd, TSTATIOC_READ, g_data[g_gen ^ 1]) == -1)
1306		fatal("initial TSTATIOC_READ failed");
1307
1308	(void) sigemptyset(&set);
1309
1310	for (indefinite = (count == 0); indefinite || count; count--) {
1311
1312		(void) sigsuspend(&set);
1313
1314		if (g_winch) {
1315			g_winch = 0;
1316			continue;
1317		}
1318
1319		if (g_child_exited && g_exec_errno != 0) {
1320			errno = g_exec_errno;
1321			fatal("could not execute %s", argv[optind]);
1322		}
1323
1324		if (ioctl(g_fd, TSTATIOC_READ, g_data[g_gen]) == -1)
1325			fatal("TSTATIOC_READ failed");
1326
1327		/*
1328		 * Before we blithely print the data, we need to
1329		 * make sure that we haven't lost a CPU.
1330		 */
1331		check_data(g_data[g_gen], g_data[g_gen ^ 1]);
1332		(*print)(g_data[g_gen], g_data[g_gen ^ 1]);
1333		(void) fflush(stdout);
1334
1335		if (g_child_exited) {
1336			if (WIFEXITED(g_child_status)) {
1337				if (WEXITSTATUS(g_child_status) == 0)
1338					break;
1339
1340				(void) fprintf(stderr, TSTAT_COMMAND ": "
1341				    "warning: %s exited with code %d\n",
1342				    argv[optind], WEXITSTATUS(g_child_status));
1343			} else {
1344				(void) fprintf(stderr, TSTAT_COMMAND ": "
1345				    "warning: %s died on signal %d\n",
1346				    argv[optind], WTERMSIG(g_child_status));
1347			}
1348			break;
1349		}
1350
1351		check_pset();
1352
1353		g_gen ^= 1;
1354	}
1355
1356	return (0);
1357}
1358