1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2015, Joyent, Inc.
25 */
26
27/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28
29#include <stdio.h>
30#include <stdio_ext.h>
31#include <stdlib.h>
32#include <unistd.h>
33#include <fcntl.h>
34#include <ctype.h>
35#include <string.h>
36#include <memory.h>
37#include <signal.h>
38#include <wait.h>
39#include <limits.h>
40#include <errno.h>
41#include <sys/types.h>
42#include <sys/time.h>
43#include <sys/times.h>
44#include <sys/fstyp.h>
45#include <sys/fsid.h>
46#include <sys/stat.h>
47#include <sys/mman.h>
48#include <sys/resource.h>
49#include <libproc.h>
50#include <priv.h>
51#include "ramdata.h"
52#include "proto.h"
53#include "htbl.h"
54
55/*
56 * The user can trace individual threads by using the 'pid/1,3-6,8-' syntax.
57 * This structure keeps track of pid/lwp specifications.  If there are no LWPs
58 * specified, then 'lwps' will be NULL.
59 */
60typedef struct proc_set {
61	pid_t		pid;
62	const char 	*lwps;
63} proc_set_t;
64
65/*
66 * Function prototypes for static routines in this file.
67 */
68void	setup_basetime(hrtime_t, struct timeval *);
69int	xcreat(char *);
70void	setoutput(int);
71void	report(private_t *, time_t);
72void	prtim(timestruc_t *);
73void	pids(char *, proc_set_t *);
74void	psargs(private_t *);
75int	control(private_t *, pid_t);
76int	grabit(private_t *, proc_set_t *);
77void	release(private_t *, pid_t);
78void	intr(int);
79int	wait4all(void);
80void	letgo(private_t *);
81void	child_to_file();
82void	file_to_parent();
83void	per_proc_init();
84int	lib_sort(const void *, const void *);
85int	key_sort(const void *, const void *);
86
87void	*worker_thread(void *);
88void	main_thread(int);
89
90/*
91 * Test for empty set.
92 * is_empty() should not be called directly.
93 */
94int	is_empty(const uint32_t *, size_t);
95#define	isemptyset(sp) \
96	is_empty((uint32_t *)(sp), sizeof (*(sp)) / sizeof (uint32_t))
97
98/*
99 * OR the second set into the first set.
100 * or_set() should not be called directly.
101 */
102void	or_set(uint32_t *, const uint32_t *, size_t);
103#define	prorset(sp1, sp2) \
104	or_set((uint32_t *)(sp1), (uint32_t *)(sp2), \
105	sizeof (*(sp1)) / sizeof (uint32_t))
106
107/* fetch or allocate thread-private data */
108private_t *
109get_private()
110{
111	void *value;
112	private_t *pri = NULL;
113
114	if (thr_getspecific(private_key, &value) == 0)
115		pri = value;
116	if (pri == NULL) {
117		pri = my_malloc(sizeof (*pri), NULL);
118		(void) memset(pri, 0, sizeof (*pri));
119		pri->sys_path = my_malloc(pri->sys_psize = 16, NULL);
120		pri->sys_string = my_malloc(pri->sys_ssize = 32, NULL);
121		if (thr_setspecific(private_key, pri) == ENOMEM)
122			abend("memory allocation failure", NULL);
123	}
124	return (pri);
125}
126
127/* destructor function for thread-private data */
128void
129free_private(void *value)
130{
131	private_t *pri = value;
132
133	if (pri->sys_path)
134		free(pri->sys_path);
135	if (pri->sys_string)
136		free(pri->sys_string);
137	if (pri->exec_string)
138		free(pri->exec_string);
139	if (pri->str_buffer)
140		free(pri->str_buffer);
141	free(pri);
142}
143
144/*
145 * This is called by the main thread (via create_thread())
146 * and is also called from other threads in worker_thread()
147 * while holding truss_lock.  No further locking is required.
148 */
149void
150insert_lwpid(lwpid_t lwpid)
151{
152	int i;
153
154	truss_nlwp++;
155	for (i = 0; i < truss_maxlwp; i++) {
156		if (truss_lwpid[i] == 0)
157			break;
158	}
159	if (i == truss_maxlwp) {
160		/* double the size of the array */
161		truss_lwpid = my_realloc(truss_lwpid,
162		    truss_maxlwp * 2 * sizeof (lwpid_t), NULL);
163		(void) memset(&truss_lwpid[truss_maxlwp], 0,
164		    truss_maxlwp * sizeof (lwpid_t));
165		truss_maxlwp *= 2;
166	}
167	truss_lwpid[i] = lwpid;
168}
169
170/*
171 * This is called from the first worker thread to encounter one of
172 * (leave_hung || interrupt || sigusr1).  It must notify all other
173 * worker threads of the same condition.  truss_lock is held.
174 */
175void
176broadcast_signals(void)
177{
178	static int int_notified = FALSE;
179	static int usr1_notified = FALSE;
180	static int usr2_notified = FALSE;
181	lwpid_t my_id = thr_self();
182	lwpid_t lwpid;
183	int i;
184
185	if (interrupt && !int_notified) {
186		int_notified = TRUE;
187		for (i = 0; i < truss_maxlwp; i++) {
188			if ((lwpid = truss_lwpid[i]) != 0 && lwpid != my_id)
189				(void) thr_kill(lwpid, interrupt);
190		}
191	}
192	if (sigusr1 && !usr1_notified) {
193		usr1_notified = TRUE;
194		for (i = 0; i < truss_maxlwp; i++) {
195			if ((lwpid = truss_lwpid[i]) != 0 && lwpid != my_id)
196				(void) thr_kill(lwpid, SIGUSR1);
197		}
198	}
199	if (leave_hung && !usr2_notified) {
200		usr2_notified = TRUE;
201		for (i = 0; i < truss_maxlwp; i++) {
202			if ((lwpid = truss_lwpid[i]) != 0 && lwpid != my_id)
203				(void) thr_kill(lwpid, SIGUSR2);
204		}
205	}
206}
207
208static struct ps_lwphandle *
209grab_lwp(lwpid_t who)
210{
211	struct ps_lwphandle *Lwp;
212	int gcode;
213
214	if ((Lwp = Lgrab(Proc, who, &gcode)) == NULL) {
215		if (gcode != G_NOPROC) {
216			(void) fprintf(stderr,
217			    "%s: cannot grab LWP %u in process %d,"
218			    " reason: %s\n",
219			    command, who, (int)Pstatus(Proc)->pr_pid,
220			    Lgrab_error(gcode));
221			interrupt = SIGTERM;	/* post an interrupt */
222		}
223	}
224	return (Lwp);
225}
226
227/*
228 * Iteration function called for each initial lwp in the controlled process.
229 */
230/* ARGSUSED */
231int
232create_thread(void *arg, const lwpstatus_t *Lsp)
233{
234	struct ps_lwphandle *new_Lwp;
235	lwpid_t lwpid;
236	int *count = arg;
237
238	if (lwptrace(Pstatus(Proc)->pr_pid, Lsp->pr_lwpid))
239		*count += 1;
240
241	if ((new_Lwp = grab_lwp(Lsp->pr_lwpid)) != NULL) {
242		if (thr_create(NULL, 0, worker_thread, new_Lwp,
243		    THR_BOUND | THR_SUSPENDED, &lwpid) != 0)
244			abend("cannot create lwp to follow child lwp", NULL);
245		insert_lwpid(lwpid);
246	}
247	return (0);
248}
249
250int
251main(int argc, char *argv[])
252{
253	private_t *pri;
254	struct tms tms;
255	struct rlimit rlim;
256	int ofd = -1;
257	int opt;
258	int i;
259	int first;
260	int errflg = FALSE;
261	int badname = FALSE;
262	proc_set_t *grab = NULL;
263	const pstatus_t *Psp;
264	const lwpstatus_t *Lsp;
265	int sharedmem;
266
267	/* a few of these need to be initialized to NULL */
268	Cp = NULL;
269	fcall_tbl = NULL;
270
271	/*
272	 * Make sure fd's 0, 1, and 2 are allocated,
273	 * just in case truss was invoked from init.
274	 */
275	while ((i = open("/dev/null", O_RDWR)) >= 0 && i < 2)
276		;
277	if (i > 2)
278		(void) close(i);
279
280	starttime = times(&tms);	/* for elapsed timing */
281
282	/* this should be per-traced-process */
283	pagesize = sysconf(_SC_PAGESIZE);
284
285	/* command name (e.g., "truss") */
286	if ((command = strrchr(argv[0], '/')) != NULL)
287		command++;
288	else
289		command = argv[0];
290
291	/* set up the initial private data */
292	(void) mutex_init(&truss_lock, USYNC_THREAD, NULL);
293	(void) mutex_init(&count_lock, USYNC_THREAD, NULL);
294	(void) cond_init(&truss_cv, USYNC_THREAD, NULL);
295	if (thr_keycreate(&private_key, free_private) == ENOMEM)
296		abend("memory allocation failure", NULL);
297	pri = get_private();
298
299	Euid = geteuid();
300	Egid = getegid();
301	Ruid = getuid();
302	Rgid = getgid();
303	ancestor = getpid();
304
305	prfillset(&trace);	/* default: trace all system calls */
306	premptyset(&verbose);	/* default: no syscall verbosity */
307	premptyset(&rawout);	/* default: no raw syscall interpretation */
308
309	prfillset(&signals);	/* default: trace all signals */
310
311	prfillset(&faults);	/* default: trace all faults */
312	prdelset(&faults, FLTPAGE);	/* except this one */
313
314	premptyset(&readfd);	/* default: dump no buffers */
315	premptyset(&writefd);
316
317	premptyset(&syshang);	/* default: hang on no system calls */
318	premptyset(&sighang);	/* default: hang on no signals */
319	premptyset(&flthang);	/* default: hang on no faults */
320
321	(void) sigemptyset(&emptyset);	/* for unblocking all signals */
322	(void) sigfillset(&fillset);	/* for blocking all signals */
323
324#define	OPTIONS	"FpfcaeildDEht:T:v:x:s:S:m:M:u:U:r:w:o:"
325	while ((opt = getopt(argc, argv, OPTIONS)) != EOF) {
326		switch (opt) {
327		case 'F':		/* force grabbing (no O_EXCL) */
328			Fflag = PGRAB_FORCE;
329			break;
330		case 'p':		/* grab processes */
331			pflag = TRUE;
332			break;
333		case 'f':		/* follow children */
334			fflag = TRUE;
335			break;
336		case 'c':		/* don't trace, just count */
337			cflag = TRUE;
338			iflag = TRUE;	/* implies no interruptable syscalls */
339			break;
340		case 'a':		/* display argument lists */
341			aflag = TRUE;
342			break;
343		case 'e':		/* display environments */
344			eflag = TRUE;
345			break;
346		case 'i':		/* don't show interruptable syscalls */
347			iflag = TRUE;
348			break;
349		case 'l':		/* show lwp id for each syscall */
350			lflag = TRUE;
351			break;
352		case 'h':		/* debugging: report hash stats */
353			hflag = TRUE;
354			break;
355		case 'd':		/* show time stamps */
356			dflag = TRUE;
357			break;
358		case 'D':		/* show time deltas */
359			Dflag = TRUE;
360			break;
361		case 'E':
362			Eflag = TRUE;	/* show syscall times */
363			break;
364		case 't':		/* system calls to trace */
365			if (syslist(optarg, &trace, &tflag))
366				badname = TRUE;
367			break;
368		case 'T':		/* system calls to hang process */
369			if (syslist(optarg, &syshang, &Tflag))
370				badname = TRUE;
371			break;
372		case 'v':		/* verbose interpretation of syscalls */
373			if (syslist(optarg, &verbose, &vflag))
374				badname = TRUE;
375			break;
376		case 'x':		/* raw interpretation of syscalls */
377			if (syslist(optarg, &rawout, &xflag))
378				badname = TRUE;
379			break;
380		case 's':		/* signals to trace */
381			if (siglist(pri, optarg, &signals, &sflag))
382				badname = TRUE;
383			break;
384		case 'S':		/* signals to hang process */
385			if (siglist(pri, optarg, &sighang, &Sflag))
386				badname = TRUE;
387			break;
388		case 'm':		/* machine faults to trace */
389			if (fltlist(optarg, &faults, &mflag))
390				badname = TRUE;
391			break;
392		case 'M':		/* machine faults to hang process */
393			if (fltlist(optarg, &flthang, &Mflag))
394				badname = TRUE;
395			break;
396		case 'u':		/* user library functions to trace */
397			if (liblist(optarg, 0))
398				badname = TRUE;
399			break;
400		case 'U':		/* user library functions to hang */
401			if (liblist(optarg, 1))
402				badname = TRUE;
403			break;
404		case 'r':		/* show contents of read(fd) */
405			if (fdlist(optarg, &readfd))
406				badname = TRUE;
407			break;
408		case 'w':		/* show contents of write(fd) */
409			if (fdlist(optarg, &writefd))
410				badname = TRUE;
411			break;
412		case 'o':		/* output file for trace */
413			oflag = TRUE;
414			if (ofd >= 0)
415				(void) close(ofd);
416			if ((ofd = xcreat(optarg)) < 0) {
417				perror(optarg);
418				badname = TRUE;
419			}
420			break;
421		default:
422			errflg = TRUE;
423			break;
424		}
425	}
426
427	if (badname)
428		exit(2);
429
430	/* if -a or -e was specified, force tracing of exec() */
431	if (aflag || eflag)
432		praddset(&trace, SYS_execve);
433
434	/*
435	 * Make sure that all system calls, signals, and machine faults
436	 * that hang the process are added to their trace sets.
437	 */
438	prorset(&trace, &syshang);
439	prorset(&signals, &sighang);
440	prorset(&faults, &flthang);
441
442	argc -= optind;
443	argv += optind;
444
445	/* collect the specified process ids */
446	if (pflag && argc > 0) {
447		grab = my_malloc(argc * sizeof (proc_set_t),
448		    "memory for process-ids");
449		while (argc-- > 0)
450			pids(*argv++, grab);
451	}
452
453	if (errflg || (argc <= 0 && ngrab <= 0)) {
454		(void) fprintf(stderr,
455	"usage:\t%s [-fcaeildDEF] [-[tTvx] [!]syscalls] [-[sS] [!]signals]\\\n",
456		    command);
457		(void) fprintf(stderr,
458	"\t[-[mM] [!]faults] [-[rw] [!]fds] [-[uU] [!]libs:[:][!]funcs]\\\n");
459		(void) fprintf(stderr,
460		    "\t[-o outfile]  command | -p pid[/lwps] ...\n");
461		exit(2);
462	}
463
464	if (argc > 0) {		/* create the controlled process */
465		int err;
466		char path[PATH_MAX];
467
468		Proc = Pcreate(argv[0], &argv[0], &err, path, sizeof (path));
469		if (Proc == NULL) {
470			switch (err) {
471			case C_PERM:
472				(void) fprintf(stderr,
473				    "%s: cannot trace set-id or "
474				    "unreadable object file: %s\n",
475				    command, path);
476				break;
477			case C_LP64:
478				(void) fprintf(stderr,
479				    "%s: cannot control _LP64 "
480				    "program: %s\n",
481				    command, path);
482				break;
483			case C_NOEXEC:
484				(void) fprintf(stderr,
485				    "%s: cannot execute program: %s\n",
486				    command, argv[0]);
487				break;
488			case C_NOENT:
489				(void) fprintf(stderr,
490				    "%s: cannot find program: %s\n",
491				    command, argv[0]);
492				break;
493			case C_STRANGE:
494				break;
495			default:
496				(void) fprintf(stderr, "%s: %s\n",
497				    command, Pcreate_error(err));
498				break;
499			}
500			exit(2);
501		}
502		if (fflag || Dynpat != NULL)
503			(void) Psetflags(Proc, PR_FORK);
504		else
505			(void) Punsetflags(Proc, PR_FORK);
506		Psp = Pstatus(Proc);
507		Lsp = &Psp->pr_lwp;
508		pri->lwpstat = Lsp;
509		data_model = Psp->pr_dmodel;
510		created = Psp->pr_pid;
511		make_pname(pri, 0);
512		(void) sysentry(pri, 1);
513		pri->length = 0;
514		if (!cflag && prismember(&trace, SYS_execve)) {
515			pri->exec_string = my_realloc(pri->exec_string,
516			    strlen(pri->sys_string) + 1, NULL);
517			(void) strcpy(pri->exec_pname, pri->pname);
518			(void) strcpy(pri->exec_string, pri->sys_string);
519			pri->length += strlen(pri->sys_string);
520			pri->exec_lwpid = pri->lwpstat->pr_lwpid;
521			pri->sys_leng = 0;
522			*pri->sys_string = '\0';
523		}
524		pri->syslast = Psp->pr_stime;
525		pri->usrlast = Psp->pr_utime;
526	}
527
528	/*
529	 * Now that we have created the victim process,
530	 * give ourself a million file descriptors.
531	 * This is enough to deal with a multithreaded
532	 * victim process that has half a million lwps.
533	 */
534	rlim.rlim_cur = 1024 * 1024;
535	rlim.rlim_max = 1024 * 1024;
536	if ((Euid != 0 || setrlimit(RLIMIT_NOFILE, &rlim) != 0) &&
537	    getrlimit(RLIMIT_NOFILE, &rlim) == 0) {
538		/*
539		 * Failing the million, give ourself as many
540		 * file descriptors as we can get.
541		 */
542		rlim.rlim_cur = rlim.rlim_max;
543		(void) setrlimit(RLIMIT_NOFILE, &rlim);
544	}
545	(void) enable_extended_FILE_stdio(-1, -1);
546
547	setoutput(ofd);		/* establish truss output */
548	istty = isatty(1);
549
550	if (setvbuf(stdout, (char *)NULL, _IOFBF, MYBUFSIZ) != 0)
551		abend("setvbuf() failure", NULL);
552
553	/*
554	 * Set up signal dispositions.
555	 */
556	if (created && (oflag || !istty)) {	/* ignore interrupts */
557		(void) sigset(SIGHUP, SIG_IGN);
558		(void) sigset(SIGINT, SIG_IGN);
559		(void) sigset(SIGQUIT, SIG_IGN);
560	} else {				/* receive interrupts */
561		if (sigset(SIGHUP, SIG_IGN) == SIG_DFL)
562			(void) sigset(SIGHUP, intr);
563		if (sigset(SIGINT, SIG_IGN) == SIG_DFL)
564			(void) sigset(SIGINT, intr);
565		if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL)
566			(void) sigset(SIGQUIT, intr);
567	}
568	(void) sigset(SIGTERM, intr);
569	(void) sigset(SIGUSR1, intr);
570	(void) sigset(SIGUSR2, intr);
571	(void) sigset(SIGPIPE, intr);
572
573	/* don't accumulate zombie children */
574	(void) sigset(SIGCLD, SIG_IGN);
575
576	/* create shared mem space for global mutexes */
577
578	sharedmem = (fflag || Dynpat != NULL || ngrab > 1);
579	gps = (void *)mmap(NULL, sizeof (struct global_psinfo),
580	    PROT_READ|PROT_WRITE,
581	    MAP_ANON | (sharedmem? MAP_SHARED : MAP_PRIVATE),
582	    -1, (off_t)0);
583	if (gps == MAP_FAILED)
584		abend("cannot allocate ", "memory for counts");
585	i = sharedmem? USYNC_PROCESS : USYNC_THREAD;
586	(void) mutex_init(&gps->ps_mutex0, i, NULL);
587	(void) mutex_init(&gps->ps_mutex1, i, NULL);
588	(void) mutex_init(&gps->fork_lock, i, NULL);
589	(void) cond_init(&gps->fork_cv, i, NULL);
590
591
592	/* config tmp file if counting and following */
593	if (fflag && cflag) {
594		char *tmps = tempnam("/var/tmp", "truss");
595		sfd = open(tmps, O_CREAT|O_APPEND|O_EXCL|O_RDWR, 0600);
596		if (sfd == -1)
597			abend("Error creating tmpfile", NULL);
598		if (unlink(tmps) == -1)
599			abend("Error unlinking tmpfile", NULL);
600		free(tmps);
601		tmps = NULL;
602	}
603
604	if (created) {
605		per_proc_init();
606		procadd(created, NULL);
607		show_cred(pri, TRUE, FALSE);
608	} else {		/* grab the specified processes */
609		int gotone = FALSE;
610
611		i = 0;
612		while (i < ngrab) {		/* grab first process */
613			if (grabit(pri, &grab[i++])) {
614				Psp = Pstatus(Proc);
615				Lsp = &Psp->pr_lwp;
616				gotone = TRUE;
617				break;
618			}
619		}
620		if (!gotone)
621			abend(NULL, NULL);
622		per_proc_init();
623		while (i < ngrab) {		/* grab the remainder */
624			proc_set_t *set = &grab[i++];
625
626			(void) mutex_lock(&truss_lock);
627			switch (fork()) {
628			case -1:
629				(void) fprintf(stderr,
630			"%s: cannot fork to control process, pid# %d\n",
631				    command, (int)set->pid);
632				/* FALLTHROUGH */
633			default:
634				(void) mutex_unlock(&truss_lock);
635				continue;	/* parent carries on */
636
637			case 0:			/* child grabs process */
638				(void) mutex_unlock(&truss_lock);
639				Pfree(Proc);
640				descendent = TRUE;
641				if (grabit(pri, set)) {
642					Psp = Pstatus(Proc);
643					Lsp = &Psp->pr_lwp;
644					per_proc_init();
645					break;
646				}
647				exit(2);
648			}
649			break;
650		}
651		free(grab);
652	}
653
654
655	/*
656	 * If running setuid-root, become root for real to avoid
657	 * affecting the per-user limitation on the maximum number
658	 * of processes (one benefit of running setuid-root).
659	 */
660	if (Rgid != Egid)
661		(void) setgid(Egid);
662	if (Ruid != Euid)
663		(void) setuid(Euid);
664
665	if (!created && aflag && prismember(&trace, SYS_execve)) {
666		psargs(pri);
667		Flush();
668	}
669
670	if (created && Pstate(Proc) != PS_STOP)	/* assertion */
671		if (!(interrupt | sigusr1))
672			abend("ASSERT error: process is not stopped", NULL);
673
674	traceeven = trace;		/* trace these system calls */
675
676	/* trace these regardless, even if we don't report results */
677	praddset(&traceeven, SYS_exit);
678	praddset(&traceeven, SYS_lwp_create);
679	praddset(&traceeven, SYS_lwp_exit);
680	praddset(&traceeven, SYS_execve);
681	praddset(&traceeven, SYS_openat);
682	praddset(&traceeven, SYS_openat64);
683	praddset(&traceeven, SYS_open);
684	praddset(&traceeven, SYS_open64);
685	praddset(&traceeven, SYS_vfork);
686	praddset(&traceeven, SYS_forksys);
687
688	/* for I/O buffer dumps, force tracing of read()s and write()s */
689	if (!isemptyset(&readfd)) {
690		praddset(&traceeven, SYS_read);
691		praddset(&traceeven, SYS_readv);
692		praddset(&traceeven, SYS_pread);
693		praddset(&traceeven, SYS_pread64);
694		praddset(&traceeven, SYS_recv);
695		praddset(&traceeven, SYS_recvfrom);
696		praddset(&traceeven, SYS_recvmsg);
697	}
698	if (!isemptyset(&writefd)) {
699		praddset(&traceeven, SYS_write);
700		praddset(&traceeven, SYS_writev);
701		praddset(&traceeven, SYS_pwrite);
702		praddset(&traceeven, SYS_pwrite64);
703		praddset(&traceeven, SYS_send);
704		praddset(&traceeven, SYS_sendto);
705		praddset(&traceeven, SYS_sendmsg);
706	}
707
708	if (cflag || Eflag) {
709		Psetsysentry(Proc, &traceeven);
710	}
711	Psetsysexit(Proc, &traceeven);
712
713	/* special case -- cannot trace sysexit because context is changed */
714	if (prismember(&trace, SYS_context)) {
715		(void) Psysentry(Proc, SYS_context, TRUE);
716		(void) Psysexit(Proc, SYS_context, FALSE);
717		prdelset(&traceeven, SYS_context);
718	}
719
720	/* special case -- trace exec() on entry to get the args */
721	(void) Psysentry(Proc, SYS_execve, TRUE);
722
723	/* special case -- sysexit never reached */
724	(void) Psysentry(Proc, SYS_exit, TRUE);
725	(void) Psysentry(Proc, SYS_lwp_exit, TRUE);
726	(void) Psysexit(Proc, SYS_exit, FALSE);
727	(void) Psysexit(Proc, SYS_lwp_exit, FALSE);
728
729	Psetsignal(Proc, &signals);	/* trace these signals */
730	Psetfault(Proc, &faults);	/* trace these faults */
731
732	/* for function call tracing */
733	if (Dynpat != NULL) {
734		/* trace these regardless, to deal with function calls */
735		(void) Pfault(Proc, FLTBPT, TRUE);
736		(void) Pfault(Proc, FLTTRACE, TRUE);
737
738		/* needed for x86 */
739		(void) Psetflags(Proc, PR_BPTADJ);
740
741		/*
742		 * Find functions and set breakpoints on grabbed process.
743		 * A process stopped on exec() gets its breakpoints set below.
744		 */
745		if ((Lsp->pr_why != PR_SYSENTRY &&
746		    Lsp->pr_why != PR_SYSEXIT) ||
747		    Lsp->pr_what != SYS_execve) {
748			establish_breakpoints();
749			establish_stacks();
750		}
751	}
752
753	/*
754	 * Use asynchronous-stop for multithreaded truss.
755	 * truss runs one lwp for each lwp in the target process.
756	 */
757	(void) Psetflags(Proc, PR_ASYNC);
758
759	/* flush out all tracing flags now. */
760	Psync(Proc);
761
762	/*
763	 * If we grabbed a running process, set it running again.
764	 * Since we are tracing lwp_create() and lwp_exit(), the
765	 * lwps will not change in the process until we create all
766	 * of the truss worker threads.
767	 * We leave a created process stopped so its exec() can be reported.
768	 */
769	first = created? FALSE : TRUE;
770	if (!created &&
771	    ((Pstate(Proc) == PS_STOP && Lsp->pr_why == PR_REQUESTED) ||
772	    (Lsp->pr_flags & PR_DSTOP)))
773		first = FALSE;
774
775	main_thread(first);
776	return (0);
777}
778
779/*
780 * Called from main() and from control() after fork().
781 */
782void
783main_thread(int first)
784{
785	private_t *pri = get_private();
786	struct tms tms;
787	int flags;
788	int retc;
789	int i;
790	int count;
791
792	/*
793	 * Block all signals in the main thread.
794	 * Some worker thread will receive signals.
795	 */
796	(void) thr_sigsetmask(SIG_SETMASK, &fillset, NULL);
797
798	/*
799	 * If we are dealing with a previously hung process,
800	 * arrange not to leave it hung on the same system call.
801	 */
802	primary_lwp = (first && Pstate(Proc) == PS_STOP)?
803	    Pstatus(Proc)->pr_lwp.pr_lwpid : 0;
804
805	/*
806	 * Create worker threads to match the lwps in the target process.
807	 */
808	truss_nlwp = 0;
809	truss_maxlwp = 1;
810	truss_lwpid = my_realloc(truss_lwpid, sizeof (lwpid_t), NULL);
811	truss_lwpid[0] = 0;
812	count = 0;
813	(void) Plwp_iter(Proc, create_thread, &count);
814
815	if (count == 0) {
816		(void) printf("(Warning: no matching active LWPs found, "
817		    "waiting)\n");
818		Flush();
819	}
820
821	/*
822	 * Set all of the truss worker threads running now.
823	 */
824	(void) mutex_lock(&truss_lock);
825	for (i = 0; i < truss_maxlwp; i++) {
826		if (truss_lwpid[i])
827			(void) thr_continue(truss_lwpid[i]);
828	}
829	(void) mutex_unlock(&truss_lock);
830
831	/*
832	 * Wait until all worker threads terminate.
833	 */
834	while (thr_join(0, NULL, NULL) == 0)
835		continue;
836
837	(void) Punsetflags(Proc, PR_ASYNC);
838	Psync(Proc);
839	if (sigusr1)
840		letgo(pri);
841	flags = PRELEASE_CLEAR;
842	if (leave_hung)
843		flags |= PRELEASE_HANG;
844	Prelease(Proc, flags);
845
846	procdel();
847	retc = (leave_hung? 0 : wait4all());
848
849	if (!descendent) {
850		interrupt = 0;	/* another interrupt kills the report */
851		if (cflag) {
852			if (fflag)
853				file_to_parent();
854			report(pri, times(&tms) - starttime);
855		}
856	} else if (cflag && fflag) {
857		child_to_file();
858	}
859
860	exit(retc);	/* exit with exit status of created process, else 0 */
861}
862
863void *
864worker_thread(void *arg)
865{
866	struct ps_lwphandle *Lwp = (struct ps_lwphandle *)arg;
867	const pstatus_t *Psp = Pstatus(Proc);
868	const lwpstatus_t *Lsp = Lstatus(Lwp);
869	struct syscount *scp;
870	lwpid_t who = Lsp->pr_lwpid;
871	int first = (who == primary_lwp);
872	private_t *pri = get_private();
873	int req_flag = 0;
874	int leave_it_hung = FALSE;
875	int reset_traps = FALSE;
876	int gcode;
877	int what;
878	int ow_in_effect = 0;
879	long ow_syscall = 0;
880	long ow_subcode = 0;
881	char *ow_string = NULL;
882	sysset_t full_set;
883	sysset_t running_set;
884	int dotrace = lwptrace(Psp->pr_pid, Lsp->pr_lwpid);
885
886	pri->Lwp = Lwp;
887	pri->lwpstat = Lsp;
888	pri->syslast = Lsp->pr_stime;
889	pri->usrlast = Lsp->pr_utime;
890	make_pname(pri, 0);
891
892	prfillset(&full_set);
893
894	/* we were created with all signals blocked; unblock them */
895	(void) thr_sigsetmask(SIG_SETMASK, &emptyset, NULL);
896
897	/*
898	 * Run this loop until the victim lwp terminates or we receive
899	 * a termination condition (leave_hung | interrupt | sigusr1).
900	 */
901	for (;;) {
902		if (interrupt | sigusr1) {
903			(void) Lstop(Lwp, MILLISEC);
904			if (Lstate(Lwp) == PS_RUN)
905				break;
906		}
907		if (Lstate(Lwp) == PS_RUN) {
908			/* millisecond timeout is for sleeping syscalls */
909			uint_t tout = (iflag || req_flag)? 0 : MILLISEC;
910
911			/*
912			 * If we are to leave this lwp stopped in sympathy
913			 * with another lwp that has been left hung, or if
914			 * we have been interrupted or instructed to release
915			 * our victim process, and this lwp is stopped but
916			 * not on an event of interest to /proc, then just
917			 * leave it in that state.
918			 */
919			if ((leave_hung | interrupt | sigusr1) &&
920			    (Lsp->pr_flags & (PR_STOPPED|PR_ISTOP))
921			    == PR_STOPPED)
922				break;
923
924			(void) Lwait(Lwp, tout);
925			if (Lstate(Lwp) == PS_RUN &&
926			    tout != 0 && !(interrupt | sigusr1)) {
927				(void) mutex_lock(&truss_lock);
928				if ((Lsp->pr_flags & PR_STOPPED) &&
929				    Lsp->pr_why == PR_JOBCONTROL)
930					req_flag = jobcontrol(pri, dotrace);
931				else
932					req_flag = requested(pri, req_flag,
933					    dotrace);
934				(void) mutex_unlock(&truss_lock);
935			}
936			continue;
937		}
938		data_model = Psp->pr_dmodel;
939		if (Lstate(Lwp) == PS_UNDEAD)
940			break;
941		if (Lstate(Lwp) == PS_LOST) {	/* we lost control */
942			/*
943			 * After exec(), only one LWP remains in the process.
944			 * /proc makes the thread following that LWP receive
945			 * EAGAIN (PS_LOST) if the program being exec()ed
946			 * is a set-id program.  Every other controlling
947			 * thread receives ENOENT (because its LWP vanished).
948			 * We are the controlling thread for the exec()ing LWP.
949			 * We must wait until all of our siblings terminate
950			 * before attempting to reopen the process.
951			 */
952			(void) mutex_lock(&truss_lock);
953			while (truss_nlwp > 1)
954				(void) cond_wait(&truss_cv, &truss_lock);
955			if (Preopen(Proc) == 0) { /* we got control back */
956				/*
957				 * We have to free and re-grab the LWP.
958				 * The process is guaranteed to be at exit
959				 * from exec() or execve() and have only
960				 * one LWP, namely this one, and the LWP
961				 * is guaranteed to have lwpid == 1.
962				 * This "cannot fail".
963				 */
964				who = 1;
965				Lfree(Lwp);
966				pri->Lwp = Lwp =
967				    Lgrab(Proc, who, &gcode);
968				if (Lwp == NULL)
969					abend("Lgrab error: ",
970					    Lgrab_error(gcode));
971				pri->lwpstat = Lsp = Lstatus(Lwp);
972				(void) mutex_unlock(&truss_lock);
973				continue;
974			}
975
976			/* we really lost it */
977			if (pri->exec_string && *pri->exec_string) {
978				if (pri->exec_pname[0] != '\0')
979					(void) fputs(pri->exec_pname, stdout);
980				timestamp(pri);
981				(void) fputs(pri->exec_string, stdout);
982				(void) fputc('\n', stdout);
983			} else if (pri->length) {
984				(void) fputc('\n', stdout);
985			}
986			if (pri->sys_valid)
987				(void) printf(
988			"%s\t*** cannot trace across exec() of %s ***\n",
989				    pri->pname, pri->sys_path);
990			else
991				(void) printf(
992				"%s\t*** lost control of process ***\n",
993				    pri->pname);
994			pri->length = 0;
995			Flush();
996			(void) mutex_unlock(&truss_lock);
997			break;
998		}
999		if (Lstate(Lwp) != PS_STOP) {
1000			(void) fprintf(stderr,
1001			    "%s: state = %d\n", command, Lstate(Lwp));
1002			abend(pri->pname, "uncaught status of subject lwp");
1003		}
1004
1005		make_pname(pri, 0);
1006
1007		(void) mutex_lock(&truss_lock);
1008
1009		what = Lsp->pr_what;
1010		req_flag = 0;
1011
1012		switch (Lsp->pr_why) {
1013		case PR_REQUESTED:
1014			break;
1015		case PR_SIGNALLED:
1016			req_flag = signalled(pri, req_flag, dotrace);
1017			if (Sflag && !first && prismember(&sighang, what))
1018				leave_it_hung = TRUE;
1019			break;
1020		case PR_FAULTED:
1021			if (what == FLTBPT) {
1022				int rval;
1023
1024				(void) Pstop(Proc, 0);
1025				rval = function_trace(pri, first, 0, dotrace);
1026				if (rval == 1)
1027					leave_it_hung = TRUE;
1028				if (rval >= 0)
1029					break;
1030			}
1031			if (faulted(pri, dotrace) &&
1032			    Mflag && !first && prismember(&flthang, what))
1033				leave_it_hung = TRUE;
1034			break;
1035		case PR_JOBCONTROL:	/* can't happen except first time */
1036			req_flag = jobcontrol(pri, dotrace);
1037			break;
1038		case PR_SYSENTRY:
1039			/* protect ourself from operating system error */
1040			if (what <= 0 || what > PRMAXSYS)
1041				what = PRMAXSYS;
1042			pri->length = 0;
1043			/*
1044			 * ow_in_effect checks to see whether or not we
1045			 * are attempting to quantify the time spent in
1046			 * a one way system call.  This is necessary as
1047			 * some system calls never return, yet it is desireable
1048			 * to determine how much time the traced process
1049			 * spends in these calls.  To do this, a one way
1050			 * flag is set on SYSENTRY when the call is recieved.
1051			 * After this, the call mask for the SYSENTRY events
1052			 * is filled so that the traced process will stop
1053			 * on the entry to the very next system call.
1054			 * This appears to the the best way to determine
1055			 * system time elapsed between a one way system call.
1056			 * Once the next call occurs, values that have been
1057			 * stashed are used to record the correct syscall
1058			 * and time, and the SYSENTRY event mask is restored
1059			 * so that the traced process may continue.
1060			 */
1061			if (dotrace && ow_in_effect) {
1062				if (cflag) {
1063					(void) mutex_lock(&count_lock);
1064					scp = Cp->syscount[ow_syscall];
1065					if (ow_subcode != -1)
1066						scp += ow_subcode;
1067					scp->count++;
1068					accumulate(&scp->stime,
1069					    &Lsp->pr_stime, &pri->syslast);
1070					accumulate(&Cp->usrtotal,
1071					    &Lsp->pr_utime, &pri->usrlast);
1072					pri->syslast = Lsp->pr_stime;
1073					pri->usrlast = Lsp->pr_utime;
1074					(void) mutex_unlock(&count_lock);
1075				} else if (Eflag) {
1076					putpname(pri);
1077					timestamp(pri);
1078					(void) printf("%s\n", ow_string);
1079					free(ow_string);
1080					ow_string = NULL;
1081					pri->syslast = Lsp->pr_stime;
1082				}
1083				ow_in_effect = 0;
1084				Psetsysentry(Proc, &running_set);
1085			}
1086
1087			/*
1088			 * Special cases.  Most syscalls are traced on exit.
1089			 */
1090			switch (what) {
1091			case SYS_exit:			/* exit() */
1092			case SYS_lwp_exit:		/* lwp_exit() */
1093			case SYS_context:		/* [get|set]context() */
1094				if (dotrace && cflag &&
1095				    prismember(&trace, what)) {
1096					ow_in_effect = 1;
1097					ow_syscall = what;
1098					ow_subcode = getsubcode(pri);
1099					pri->syslast = Lsp->pr_stime;
1100					running_set =
1101					    (Pstatus(Proc))->pr_sysentry;
1102					Psetsysentry(Proc, &full_set);
1103				} else if (dotrace && Eflag &&
1104				    prismember(&trace, what)) {
1105					(void) sysentry(pri, dotrace);
1106					ow_in_effect = 1;
1107					ow_string = my_malloc(
1108					    strlen(pri->sys_string) + 1, NULL);
1109					(void) strcpy(ow_string,
1110					    pri->sys_string);
1111					running_set =
1112					    (Pstatus(Proc))->pr_sysentry;
1113					Psetsysentry(Proc, &full_set);
1114					pri->syslast = Lsp->pr_stime;
1115				} else if (dotrace &&
1116				    prismember(&trace, what)) {
1117					(void) sysentry(pri, dotrace);
1118					putpname(pri);
1119					timestamp(pri);
1120					pri->length +=
1121					    printf("%s\n", pri->sys_string);
1122					Flush();
1123				}
1124				pri->sys_leng = 0;
1125				*pri->sys_string = '\0';
1126
1127				if (what == SYS_exit)
1128					exit_called = TRUE;
1129				break;
1130			case SYS_execve:
1131				show_cred(pri, FALSE, TRUE);
1132				(void) sysentry(pri, dotrace);
1133				if (dotrace && !cflag &&
1134				    prismember(&trace, what)) {
1135					pri->exec_string =
1136					    my_realloc(pri->exec_string,
1137					    strlen(pri->sys_string) + 1,
1138					    NULL);
1139					(void) strcpy(pri->exec_pname,
1140					    pri->pname);
1141					(void) strcpy(pri->exec_string,
1142					    pri->sys_string);
1143					pri->length += strlen(pri->sys_string);
1144					pri->exec_lwpid = Lsp->pr_lwpid;
1145				}
1146				pri->sys_leng = 0;
1147				*pri->sys_string = '\0';
1148				break;
1149			default:
1150				if (dotrace && (cflag || Eflag) &&
1151				    prismember(&trace, what)) {
1152					pri->syslast = Lsp->pr_stime;
1153				}
1154				break;
1155			}
1156			if (dotrace && Tflag && !first &&
1157			    (prismember(&syshang, what) ||
1158			    (exit_called && prismember(&syshang, SYS_exit))))
1159				leave_it_hung = TRUE;
1160			break;
1161		case PR_SYSEXIT:
1162			/* check for write open of a /proc file */
1163			if (what == SYS_openat || what == SYS_openat64 ||
1164			    what == SYS_open || what == SYS_open64) {
1165				int readonly;
1166
1167				(void) sysentry(pri, dotrace);
1168				pri->Errno = Lsp->pr_errno;
1169				pri->ErrPriv = Lsp->pr_errpriv;
1170				readonly =
1171				    ((what == SYS_openat ||
1172				    what == SYS_openat64) &&
1173				    pri->sys_nargs > 2 &&
1174				    (pri->sys_args[2]&0x3) == O_RDONLY) ||
1175				    ((what == SYS_open ||
1176				    what == SYS_open64) &&
1177				    pri->sys_nargs > 1 &&
1178				    (pri->sys_args[1]&0x3) == O_RDONLY);
1179				if ((pri->Errno == 0 || pri->Errno == EBUSY) &&
1180				    pri->sys_valid && !readonly) {
1181					int rv = checkproc(pri);
1182					if (rv == 1 && Fflag != PGRAB_FORCE) {
1183						/*
1184						 * The process opened itself
1185						 * and no -F flag was specified.
1186						 * Just print the open() call
1187						 * and let go of the process.
1188						 */
1189						if (dotrace && !cflag &&
1190						    prismember(&trace, what)) {
1191							putpname(pri);
1192							timestamp(pri);
1193							(void) printf("%s\n",
1194							    pri->sys_string);
1195							Flush();
1196						}
1197						sigusr1 = TRUE;
1198						(void) mutex_unlock(
1199						    &truss_lock);
1200						goto out;
1201					}
1202					if (rv == 2) {
1203						/*
1204						 * Process opened someone else.
1205						 * The open is being reissued.
1206						 * Don't report this one.
1207						 */
1208						pri->sys_leng = 0;
1209						*pri->sys_string = '\0';
1210						pri->sys_nargs = 0;
1211						break;
1212					}
1213				}
1214			}
1215			if (what == SYS_execve && pri->Errno == 0) {
1216				/*
1217				 * Refresh the data model on exec() in case it
1218				 * is different from the parent.  Lwait()
1219				 * doesn't update process-wide status, so we
1220				 * have to explicitly call Pstopstatus() to get
1221				 * the new state.
1222				 */
1223				(void) Pstopstatus(Proc, PCNULL, 0);
1224				data_model = Psp->pr_dmodel;
1225			}
1226			if (sysexit(pri, dotrace))
1227				Flush();
1228			if (what == SYS_lwp_create && pri->Rval1 != 0) {
1229				struct ps_lwphandle *new_Lwp;
1230				lwpid_t lwpid;
1231
1232				if ((new_Lwp = grab_lwp(pri->Rval1)) != NULL) {
1233					(void) thr_sigsetmask(SIG_SETMASK,
1234					    &fillset, NULL);
1235					if (thr_create(NULL, 0, worker_thread,
1236					    new_Lwp, THR_BOUND | THR_SUSPENDED,
1237					    &lwpid) != 0)
1238						abend("cannot create lwp ",
1239						    "to follow child lwp");
1240					insert_lwpid(lwpid);
1241					(void) thr_continue(lwpid);
1242					(void) thr_sigsetmask(SIG_SETMASK,
1243					    &emptyset, NULL);
1244				}
1245			}
1246			pri->sys_nargs = 0;
1247			if (dotrace && Tflag && !first &&
1248			    prismember(&syshang, what))
1249				leave_it_hung = TRUE;
1250			if (what == SYS_execve && pri->Errno == 0) {
1251				is_vfork_child = FALSE;
1252				reset_breakpoints();
1253				/*
1254				 * exec() resets the calling LWP's lwpid to 1.
1255				 * If the LWP has changed its lwpid, then
1256				 * we have to free and re-grab the LWP
1257				 * in order to keep libproc consistent.
1258				 * This "cannot fail".
1259				 */
1260				if (who != Lsp->pr_lwpid) {
1261					/*
1262					 * We must wait for all of our
1263					 * siblings to terminate.
1264					 */
1265					while (truss_nlwp > 1)
1266						(void) cond_wait(&truss_cv,
1267						    &truss_lock);
1268					who = Lsp->pr_lwpid;
1269					Lfree(Lwp);
1270					pri->Lwp = Lwp =
1271					    Lgrab(Proc, who, &gcode);
1272					if (Lwp == NULL)
1273						abend("Lgrab error: ",
1274						    Lgrab_error(gcode));
1275					pri->lwpstat = Lsp = Lstatus(Lwp);
1276				}
1277			}
1278			break;
1279		default:
1280			req_flag = 0;
1281			(void) fprintf(stderr,
1282			    "unknown reason for stopping: %d/%d\n",
1283			    Lsp->pr_why, what);
1284			abend(NULL, NULL);
1285		}
1286
1287		if (pri->child) {	/* controlled process fork()ed */
1288			if (fflag || Dynpat != NULL)  {
1289				if (Lsp->pr_why == PR_SYSEXIT &&
1290				    (Lsp->pr_what == SYS_vfork ||
1291				    (Lsp->pr_what == SYS_forksys &&
1292				    Lsp->pr_sysarg[0] == 2))) {
1293					is_vfork_child = TRUE;
1294					(void) Pstop(Proc, 0);
1295				}
1296				if (control(pri, pri->child)) {
1297					(void) mutex_unlock(&truss_lock);
1298					pri->child = 0;
1299					if (!fflag) {
1300						/*
1301						 * If this is vfork(), then
1302						 * this clears the breakpoints
1303						 * in the parent's address space
1304						 * as well as in the child's.
1305						 */
1306						clear_breakpoints();
1307						Prelease(Proc, PRELEASE_CLEAR);
1308						_exit(0);
1309					}
1310					main_thread(FALSE);
1311					/* NOTREACHED */
1312				}
1313
1314				/*
1315				 * Here, we are still the parent truss.
1316				 * If the child messes with the breakpoints and
1317				 * this is vfork(), we have to set them again.
1318				 */
1319				if (Dynpat != NULL && is_vfork_child && !fflag)
1320					reset_traps = TRUE;
1321				is_vfork_child = FALSE;
1322			}
1323			pri->child = 0;
1324		}
1325
1326		if (leave_it_hung) {
1327			(void) mutex_unlock(&truss_lock);
1328			break;
1329		}
1330
1331		if (reset_traps) {
1332			/*
1333			 * To recover from vfork, we must catch the lwp
1334			 * that issued the vfork() when it returns to user
1335			 * level, with all other lwps remaining stopped.
1336			 * For this purpose, we have directed all lwps to
1337			 * stop and we now set the vfork()ing lwp running
1338			 * with the PRSTEP flag.  We expect to capture it
1339			 * when it stops again showing PR_FAULTED/FLTTRACE.
1340			 * We are holding truss_lock, so no other threads
1341			 * in truss will set any other lwps in the victim
1342			 * process running.
1343			 */
1344			reset_traps = FALSE;
1345			(void) Lsetrun(Lwp, 0, PRSTEP);
1346			do {
1347				(void) Lwait(Lwp, 0);
1348			} while (Lstate(Lwp) == PS_RUN);
1349			if (Lstate(Lwp) == PS_STOP &&
1350			    Lsp->pr_why == PR_FAULTED &&
1351			    Lsp->pr_what == FLTTRACE) {
1352				reestablish_traps();
1353				(void) Lsetrun(Lwp, 0, PRCFAULT|PRSTOP);
1354			} else {
1355				(void) printf("%s\t*** Expected PR_FAULTED/"
1356				    "FLTTRACE stop following vfork()\n",
1357				    pri->pname);
1358			}
1359		}
1360
1361		if (Lstate(Lwp) == PS_STOP) {
1362			int flags = 0;
1363
1364			if (interrupt | sigusr1) {
1365				(void) mutex_unlock(&truss_lock);
1366				break;
1367			}
1368			/*
1369			 * If we must leave this lwp hung is sympathy with
1370			 * another lwp that is being left hung on purpose,
1371			 * then push the state onward toward PR_REQUESTED.
1372			 */
1373			if (leave_hung) {
1374				if (Lsp->pr_why == PR_REQUESTED) {
1375					(void) mutex_unlock(&truss_lock);
1376					break;
1377				}
1378				flags |= PRSTOP;
1379			}
1380			if (Lsetrun(Lwp, 0, flags) != 0 &&
1381			    Lstate(Lwp) != PS_LOST &&
1382			    Lstate(Lwp) != PS_UNDEAD) {
1383				(void) mutex_unlock(&truss_lock);
1384				perror("Lsetrun");
1385				abend("cannot start subject lwp", NULL);
1386				/* NOTREACHED */
1387			}
1388		}
1389		first = FALSE;
1390
1391		(void) mutex_unlock(&truss_lock);
1392	}
1393
1394out:
1395	/* block all signals in preparation for exiting */
1396	(void) thr_sigsetmask(SIG_SETMASK, &fillset, NULL);
1397
1398	if (Lstate(Lwp) == PS_UNDEAD || Lstate(Lwp) == PS_LOST)
1399		(void) mutex_lock(&truss_lock);
1400	else {
1401		(void) Lstop(Lwp, MILLISEC);
1402		(void) mutex_lock(&truss_lock);
1403		if (Lstate(Lwp) == PS_STOP &&
1404		    Lsp->pr_why == PR_FAULTED &&
1405		    Lsp->pr_what == FLTBPT)
1406			(void) function_trace(pri, 0, 1, dotrace);
1407	}
1408
1409	if (dotrace && ow_in_effect) {
1410		if (cflag) {
1411			(void) mutex_lock(&count_lock);
1412			scp = Cp->syscount[ow_syscall];
1413			if (ow_subcode != -1)
1414				scp += ow_subcode;
1415			scp->count++;
1416			accumulate(&scp->stime,
1417			    &Lsp->pr_stime, &pri->syslast);
1418			accumulate(&Cp->usrtotal,
1419			    &Lsp->pr_utime, &pri->usrlast);
1420			pri->syslast = Lsp->pr_stime;
1421			pri->usrlast = Lsp->pr_utime;
1422			(void) mutex_unlock(&count_lock);
1423		} else if (Eflag) {
1424			putpname(pri);
1425			timestamp(pri);
1426			(void) printf("%s\n", ow_string);
1427			free(ow_string);
1428			ow_string = NULL;
1429			pri->syslast = Lsp->pr_stime;
1430		}
1431		ow_in_effect = 0;
1432		Psetsysentry(Proc, &running_set);
1433	}
1434
1435	if (Lstate(Lwp) == PS_UNDEAD || Lstate(Lwp) == PS_LOST) {
1436		/*
1437		 * The victim thread has exited or we lost control of
1438		 * the process.  Remove ourself from the list of all
1439		 * truss threads and notify everyone waiting for this.
1440		 */
1441		lwpid_t my_id = thr_self();
1442		int i;
1443
1444		for (i = 0; i < truss_maxlwp; i++) {
1445			if (truss_lwpid[i] == my_id) {
1446				truss_lwpid[i] = 0;
1447				break;
1448			}
1449		}
1450		if (--truss_nlwp != 0) {
1451			(void) cond_broadcast(&truss_cv);
1452		} else {
1453			/*
1454			 * The last truss worker thread is terminating.
1455			 * The address space is gone (UNDEAD) or is
1456			 * inaccessible (LOST) so we cannot clear the
1457			 * breakpoints.  Just report the htable stats.
1458			 */
1459			report_htable_stats();
1460		}
1461	} else {
1462		/*
1463		 * The victim thread is not a zombie thread, and we have not
1464		 * lost control of the process.  We must have gotten here due
1465		 * to (leave_hung || leave_it_hung || interrupt || sigusr1).
1466		 * In these cases, we must carefully uninstrument the process
1467		 * and either set it running or leave it stopped and abandoned.
1468		 */
1469		static int nstopped = 0;
1470		static int cleared = 0;
1471
1472		if (leave_it_hung)
1473			leave_hung = TRUE;
1474		if ((leave_hung | interrupt | sigusr1) == 0)
1475			abend("(leave_hung | interrupt | sigusr1) == 0", NULL);
1476
1477		/*
1478		 * The first truss thread through here needs to instruct all
1479		 * application threads to stop -- they're not necessarily
1480		 * going to stop on their own.
1481		 */
1482		if (nstopped++ == 0)
1483			(void) Pdstop(Proc);
1484
1485		/*
1486		 * Notify all other worker threads about the reason
1487		 * for being here (leave_hung || interrupt || sigusr1).
1488		 */
1489		broadcast_signals();
1490
1491		/*
1492		 * Once the last thread has reached this point, then and
1493		 * only then is it safe to remove breakpoints and other
1494		 * instrumentation.  Since breakpoints are executed without
1495		 * truss_lock held, a monitor thread can't exit until all
1496		 * breakpoints have been removed, and we can't be sure the
1497		 * procedure to execute a breakpoint won't temporarily
1498		 * reinstall a breakpont.  Accordingly, we need to wait
1499		 * until all threads are in a known state.
1500		 */
1501		while (nstopped != truss_nlwp)
1502			(void) cond_wait(&truss_cv, &truss_lock);
1503
1504		/*
1505		 * All truss threads have reached this point.
1506		 * One of them clears the breakpoints and
1507		 * wakes up everybody else to finish up.
1508		 */
1509		if (cleared++ == 0) {
1510			/*
1511			 * All threads should already be stopped,
1512			 * but just to be safe...
1513			 */
1514			(void) Pstop(Proc, MILLISEC);
1515			clear_breakpoints();
1516			(void) Psysexit(Proc, SYS_vfork, FALSE);
1517			(void) Psysexit(Proc, SYS_forksys, FALSE);
1518			(void) Punsetflags(Proc, PR_FORK);
1519			Psync(Proc);
1520			fflag = 0;
1521			(void) cond_broadcast(&truss_cv);
1522		}
1523
1524		if (!leave_hung && Lstate(Lwp) == PS_STOP)
1525			(void) Lsetrun(Lwp, 0, 0);
1526	}
1527
1528	(void) Lfree(Lwp);
1529	(void) mutex_unlock(&truss_lock);
1530	return (NULL);
1531}
1532
1533/*
1534 * Give a base date for time stamps, adjusted to the
1535 * stop time of the selected (first or created) process.
1536 */
1537void
1538setup_basetime(hrtime_t basehrtime, struct timeval *basedate)
1539{
1540	const pstatus_t *Psp = Pstatus(Proc);
1541	(void) mutex_lock(&count_lock);
1542	Cp->basetime = Psp->pr_lwp.pr_tstamp;
1543	(void) mutex_unlock(&count_lock);
1544
1545	if ((dflag|Dflag) && !cflag) {
1546		const struct tm *ptm;
1547		const char *ptime;
1548		const char *pdst;
1549		hrtime_t delta = basehrtime -
1550		    ((hrtime_t)Cp->basetime.tv_sec * NANOSEC +
1551		    Cp->basetime.tv_nsec);
1552
1553		if (delta > 0) {
1554			basedate->tv_sec -= (time_t)(delta / NANOSEC);
1555			basedate->tv_usec -= (delta % NANOSEC) / 1000;
1556			if (basedate->tv_usec < 0) {
1557				basedate->tv_sec--;
1558				basedate->tv_usec += MICROSEC;
1559			}
1560		}
1561		ptm = localtime(&basedate->tv_sec);
1562		ptime = asctime(ptm);
1563		if ((pdst = tzname[ptm->tm_isdst ? 1 : 0]) == NULL)
1564			pdst = "???";
1565		if (dflag) {
1566			(void) printf(
1567			    "Base time stamp:  %ld.%4.4ld  [ %.20s%s %.4s ]\n",
1568			    basedate->tv_sec, basedate->tv_usec / 100,
1569			    ptime, pdst, ptime + 20);
1570			Flush();
1571		}
1572	}
1573}
1574
1575/*
1576 * Performs per-process initializations. If truss is following a victim
1577 * process it will fork additional truss processes to follow new processes
1578 * created.  Here is where each new truss process gets its per-process data
1579 * initialized.
1580 */
1581
1582void
1583per_proc_init()
1584{
1585	void *pmem;
1586	struct timeval basedate;
1587	hrtime_t basehrtime;
1588	struct syscount *scp;
1589	int i;
1590	timestruc_t c_basetime;
1591
1592	/* Make sure we only configure the basetime for the first truss proc */
1593
1594	if (Cp == NULL) {
1595		pmem = my_malloc(sizeof (struct counts) + maxsyscalls() *
1596		    sizeof (struct syscount), NULL);
1597		Cp = (struct counts *)pmem;
1598		basehrtime = gethrtime();
1599		(void) gettimeofday(&basedate, NULL);
1600		setup_basetime(basehrtime, &basedate);
1601	}
1602
1603	c_basetime = Cp->basetime;
1604
1605	(void) memset(Cp, 0, sizeof (struct counts) + maxsyscalls() *
1606	    sizeof (struct syscount));
1607
1608	Cp->basetime = c_basetime;
1609
1610	if (fcall_tbl != NULL)
1611		destroy_hash(fcall_tbl);
1612	fcall_tbl = init_hash(4096);
1613
1614	(void) mutex_lock(&count_lock);
1615	scp = (struct syscount *)(Cp + 1);
1616	for (i = 0; i <= PRMAXSYS; i++) {
1617		Cp->syscount[i] = scp;
1618		scp += nsubcodes(i);
1619	}
1620	(void) mutex_unlock(&count_lock);
1621}
1622
1623
1624/*
1625 * Writes child state to a tempfile where it can be read and
1626 * accumulated by the parent process. The file descriptor is shared
1627 * among the processes.  Ordering of writes does not matter, it is, however,
1628 * necessary to ensure that all writes are atomic.
1629 */
1630
1631void
1632child_to_file()
1633{
1634	hiter_t *itr;
1635	hentry_t *ntry;
1636	hdntry_t fentry;
1637	char *s = NULL;
1638	char *t = NULL;
1639	unsigned char *buf = NULL;
1640	size_t bufsz = 0;
1641	size_t i = 0;
1642	size_t j = 0;
1643
1644	/* ensure that we are in fact a child process */
1645	if (!descendent)
1646		return;
1647
1648	/* enumerate fcall_tbl (tbl locked until freed) */
1649	if (Dynpat != NULL) {
1650		itr = iterate_hash(fcall_tbl);
1651
1652		ntry = iter_next(itr);
1653		while (ntry != NULL) {
1654			fentry.type = HD_hashntry;
1655			fentry.count = ntry->count;
1656			s = ntry->key;
1657			t = ntry->lib;
1658			i = strlen(s) + 1;
1659			j = strlen(t) + 1;
1660			fentry.sz_key = i;
1661			fentry.sz_lib = j;
1662			if (i + sizeof (fentry) > bufsz) {
1663				buf = my_realloc(buf, i + j + sizeof (fentry),
1664				    NULL);
1665				bufsz = i + j + sizeof (fentry);
1666			}
1667			(void) memcpy(buf, &fentry, sizeof (fentry));
1668			(void) strlcpy((char *)(buf + sizeof (fentry)), t, j);
1669			(void) strlcpy((char *)(buf + sizeof (fentry) + j),
1670			    s, i);
1671			if (write(sfd, buf, sizeof (fentry) + i + j) == -1)
1672				abend("Error writing to tmp file", NULL);
1673			ntry = iter_next(itr);
1674		}
1675		iter_free(itr);
1676	}
1677
1678	/* Now write the count/syscount structs down */
1679	bufsz = sizeof (fentry) + (sizeof (struct counts) + maxsyscalls() *
1680	    sizeof (struct syscount));
1681	buf = my_realloc(buf, bufsz, NULL);
1682	fentry.type = HD_cts_syscts;
1683	fentry.count = 0;	/* undefined, really */
1684	fentry.sz_key = bufsz - sizeof (fentry);
1685	fentry.sz_lib = 0;	/* also undefined */
1686	(void) memcpy(buf, &fentry, sizeof (fentry));
1687	(void) memcpy((char *)(buf + sizeof (fentry)), Cp,
1688	    bufsz - sizeof (fentry));
1689	if (write(sfd, buf, bufsz) == -1)
1690		abend("Error writing cts/syscts to tmpfile", NULL);
1691
1692	free(buf);
1693}
1694
1695/*
1696 * The following reads entries from the tempfile back to the parent
1697 * so that information can be collected and summed for overall statistics.
1698 * This reads records out of the tempfile.  If they are hash table entries,
1699 * the record is merged with the hash table kept by the parent process.
1700 * If the information is a struct count/struct syscount pair, they are
1701 * copied and added into the count/syscount array kept by the parent.
1702 */
1703
1704void
1705file_to_parent()
1706{
1707	hdntry_t ntry;
1708	char *s = NULL;
1709	char *t = NULL;
1710	size_t c_offset = 0;
1711	size_t filesz;
1712	size_t t_strsz = 0;
1713	size_t s_strsz = 0;
1714	struct stat fsi;
1715
1716	if (descendent)
1717		return;
1718
1719	if (fstat(sfd, &fsi) == -1)
1720		abend("Error stat-ing tempfile", NULL);
1721	filesz = fsi.st_size;
1722
1723	while (c_offset < filesz) {
1724		/* first get hdntry */
1725		if (pread(sfd, &ntry, sizeof (hdntry_t), c_offset) !=
1726		    sizeof (hdntry_t))
1727			abend("Unable to perform full read of hdntry", NULL);
1728		c_offset += sizeof (hdntry_t);
1729
1730		switch (ntry.type) {
1731		case HD_hashntry:
1732
1733			/* first get lib string */
1734			if (ntry.sz_lib > t_strsz) {
1735				t = my_realloc(t, ntry.sz_lib, NULL);
1736				t_strsz = ntry.sz_lib;
1737			}
1738
1739			(void) memset(t, 0, t_strsz);
1740
1741			/* now actually get the string */
1742			if (pread(sfd, t, ntry.sz_lib, c_offset) != ntry.sz_lib)
1743				abend("Unable to perform full read of lib str",
1744				    NULL);
1745			c_offset += ntry.sz_lib;
1746
1747			/* now get key string */
1748
1749			if (ntry.sz_key > s_strsz) {
1750				s = my_realloc(s, ntry.sz_key, NULL);
1751				s_strsz = ntry.sz_key;
1752			}
1753			(void) memset(s, 0, s_strsz);
1754			if (pread(sfd, s, ntry.sz_key, c_offset) != ntry.sz_key)
1755				abend("Unable to perform full read of key str",
1756				    NULL);
1757			c_offset += ntry.sz_key;
1758
1759			add_fcall(fcall_tbl, t, s, ntry.count);
1760			break;
1761
1762		case HD_cts_syscts:
1763		{
1764			struct counts *ncp;
1765			size_t bfsz = sizeof (struct counts) + maxsyscalls()
1766			    * sizeof (struct syscount);
1767			int i;
1768			struct syscount *sscp;
1769
1770			if (ntry.sz_key != bfsz)
1771				abend("cts/syscts size does not sanity check",
1772				    NULL);
1773			ncp = my_malloc(ntry.sz_key, NULL);
1774
1775			if (pread(sfd, ncp, ntry.sz_key, c_offset) !=
1776			    ntry.sz_key)
1777				abend("Unable to perform full read of cts",
1778				    NULL);
1779			c_offset += ntry.sz_key;
1780
1781			sscp = (struct syscount *)(ncp + 1);
1782
1783			(void) mutex_lock(&count_lock);
1784
1785			Cp->usrtotal.tv_sec += ncp->usrtotal.tv_sec;
1786			Cp->usrtotal.tv_nsec += ncp->usrtotal.tv_nsec;
1787			if (Cp->usrtotal.tv_nsec >= NANOSEC) {
1788				Cp->usrtotal.tv_nsec -= NANOSEC;
1789				Cp->usrtotal.tv_sec++;
1790			}
1791			for (i = 0; i <= PRMAXSYS; i++) {
1792				ncp->syscount[i] = sscp;
1793				sscp += nsubcodes(i);
1794			}
1795
1796			for (i = 0; i <= PRMAXFAULT; i++) {
1797				Cp->fltcount[i] += ncp->fltcount[i];
1798			}
1799
1800			for (i = 0; i <= PRMAXSIG; i++) {
1801				Cp->sigcount[i] += ncp->sigcount[i];
1802			}
1803
1804			for (i = 0; i <= PRMAXSYS; i++) {
1805				struct syscount *scp = Cp->syscount[i];
1806				struct syscount *nscp = ncp->syscount[i];
1807				int n = nsubcodes(i);
1808				int subcode;
1809
1810				for (subcode = 0; subcode < n; subcode++,
1811				    scp++, nscp++) {
1812					scp->count += nscp->count;
1813					scp->error += nscp->error;
1814					scp->stime.tv_sec += nscp->stime.tv_sec;
1815					scp->stime.tv_nsec +=
1816					    nscp->stime.tv_nsec;
1817					if (scp->stime.tv_nsec >= NANOSEC) {
1818						scp->stime.tv_nsec -= NANOSEC;
1819						scp->stime.tv_sec++;
1820					}
1821				}
1822			}
1823			(void) mutex_unlock(&count_lock);
1824			free(ncp);
1825			break;
1826		}
1827		default:
1828
1829			abend("Unknown file entry type encountered", NULL);
1830			break;
1831
1832		}
1833
1834		if (fstat(sfd, &fsi) == -1)
1835			abend("Error stat-ing tempfile", NULL);
1836		filesz = fsi.st_size;
1837	}
1838	if (s != NULL)
1839		free(s);
1840	if (t != NULL)
1841		free(t);
1842}
1843
1844void
1845make_pname(private_t *pri, id_t tid)
1846{
1847	if (!cflag) {
1848		int ff = (fflag || ngrab > 1);
1849		int lf = (lflag | tid | (Thr_agent != NULL) | (truss_nlwp > 1));
1850		pid_t pid = Pstatus(Proc)->pr_pid;
1851		id_t lwpid = pri->lwpstat->pr_lwpid;
1852
1853		if (ff != pri->pparam.ff ||
1854		    lf != pri->pparam.lf ||
1855		    pid != pri->pparam.pid ||
1856		    lwpid != pri->pparam.lwpid ||
1857		    tid != pri->pparam.tid) {
1858			char *s = pri->pname;
1859
1860			if (ff)
1861				s += sprintf(s, "%d", (int)pid);
1862			if (lf)
1863				s += sprintf(s, "/%d", (int)lwpid);
1864			if (tid)
1865				s += sprintf(s, "@%d", (int)tid);
1866			if (ff || lf)
1867				*s++ = ':', *s++ = '\t';
1868			if (ff && lf && s < pri->pname + 9)
1869				*s++ = '\t';
1870			*s = '\0';
1871			pri->pparam.ff = ff;
1872			pri->pparam.lf = lf;
1873			pri->pparam.pid = pid;
1874			pri->pparam.lwpid = lwpid;
1875			pri->pparam.tid = tid;
1876		}
1877	}
1878}
1879
1880/*
1881 * Print the pri->pname[] string, if any.
1882 */
1883void
1884putpname(private_t *pri)
1885{
1886	if (pri->pname[0])
1887		(void) fputs(pri->pname, stdout);
1888}
1889
1890/*
1891 * Print the timestamp, if requested (-d, -D, or -E).
1892 */
1893void
1894timestamp(private_t *pri)
1895{
1896	const lwpstatus_t *Lsp = pri->lwpstat;
1897	int seconds;
1898	int fraction;
1899
1900	if (!(dflag|Dflag|Eflag) || !(Lsp->pr_flags & PR_STOPPED))
1901		return;
1902
1903	seconds = Lsp->pr_tstamp.tv_sec - Cp->basetime.tv_sec;
1904	fraction = Lsp->pr_tstamp.tv_nsec - Cp->basetime.tv_nsec;
1905	if (fraction < 0) {
1906		seconds--;
1907		fraction += NANOSEC;
1908	}
1909	/* fraction in 1/10 milliseconds, rounded up */
1910	fraction = (fraction + 50000) / 100000;
1911	if (fraction >= (MILLISEC * 10)) {
1912		seconds++;
1913		fraction -= (MILLISEC * 10);
1914	}
1915
1916	if (dflag)		/* time stamp */
1917		(void) printf("%2d.%4.4d\t", seconds, fraction);
1918
1919	if (Dflag) {		/* time delta */
1920		int oseconds = pri->seconds;
1921		int ofraction = pri->fraction;
1922
1923		pri->seconds = seconds;
1924		pri->fraction = fraction;
1925		seconds -= oseconds;
1926		fraction -= ofraction;
1927		if (fraction < 0) {
1928			seconds--;
1929			fraction += (MILLISEC * 10);
1930		}
1931		(void) printf("%2d.%4.4d\t", seconds, fraction);
1932	}
1933
1934	if (Eflag) {
1935		seconds = Lsp->pr_stime.tv_sec - pri->syslast.tv_sec;
1936		fraction = Lsp->pr_stime.tv_nsec - pri->syslast.tv_nsec;
1937
1938		if (fraction < 0) {
1939			seconds--;
1940			fraction += NANOSEC;
1941		}
1942		/* fraction in 1/10 milliseconds, rounded up */
1943		fraction = (fraction + 50000) / 100000;
1944		if (fraction >= (MILLISEC * 10)) {
1945			seconds++;
1946			fraction -= (MILLISEC * 10);
1947		}
1948		(void) printf("%2d.%4.4d\t", seconds, fraction);
1949	}
1950}
1951
1952/*
1953 * Create output file, being careful about
1954 * suid/sgid and file descriptor 0, 1, 2 issues.
1955 */
1956int
1957xcreat(char *path)
1958{
1959	int fd;
1960	int mode = 0666;
1961
1962	if (Euid == Ruid && Egid == Rgid)	/* not set-id */
1963		fd = creat(path, mode);
1964	else if (access(path, F_OK) != 0) {	/* file doesn't exist */
1965		/* if directory permissions OK, create file & set ownership */
1966
1967		char *dir;
1968		char *p;
1969		char dot[4];
1970
1971		/* generate path for directory containing file */
1972		if ((p = strrchr(path, '/')) == NULL) {	/* no '/' */
1973			p = dir = dot;
1974			*p++ = '.';		/* current directory */
1975			*p = '\0';
1976		} else if (p == path) {			/* leading '/' */
1977			p = dir = dot;
1978			*p++ = '/';		/* root directory */
1979			*p = '\0';
1980		} else {				/* embedded '/' */
1981			dir = path;		/* directory path */
1982			*p = '\0';
1983		}
1984
1985		if (access(dir, W_OK|X_OK) != 0) {
1986			/* not writeable/searchable */
1987			*p = '/';
1988			fd = -1;
1989		} else {	/* create file and set ownership correctly */
1990			*p = '/';
1991			if ((fd = creat(path, mode)) >= 0)
1992				(void) chown(path, (int)Ruid, (int)Rgid);
1993		}
1994	} else if (access(path, W_OK) != 0)	/* file not writeable */
1995		fd = -1;
1996	else
1997		fd = creat(path, mode);
1998
1999	/*
2000	 * Make sure it's not one of 0, 1, or 2.
2001	 * This allows truss to work when spawned by init(1m).
2002	 */
2003	if (0 <= fd && fd <= 2) {
2004		int dfd = fcntl(fd, F_DUPFD, 3);
2005		(void) close(fd);
2006		fd = dfd;
2007	}
2008
2009	/*
2010	 * Mark it close-on-exec so created processes don't inherit it.
2011	 */
2012	if (fd >= 0)
2013		(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
2014
2015	return (fd);
2016}
2017
2018void
2019setoutput(int ofd)
2020{
2021	if (ofd < 0) {
2022		(void) close(1);
2023		(void) fcntl(2, F_DUPFD, 1);
2024	} else if (ofd != 1) {
2025		(void) close(1);
2026		(void) fcntl(ofd, F_DUPFD, 1);
2027		(void) close(ofd);
2028		/* if no stderr, make it the same file */
2029		if ((ofd = dup(2)) < 0)
2030			(void) fcntl(1, F_DUPFD, 2);
2031		else
2032			(void) close(ofd);
2033	}
2034}
2035
2036/*
2037 * Accumulate time differencies:  a += e - s;
2038 */
2039void
2040accumulate(timestruc_t *ap, const timestruc_t *ep, const timestruc_t *sp)
2041{
2042	ap->tv_sec += ep->tv_sec - sp->tv_sec;
2043	ap->tv_nsec += ep->tv_nsec - sp->tv_nsec;
2044	if (ap->tv_nsec >= NANOSEC) {
2045		ap->tv_nsec -= NANOSEC;
2046		ap->tv_sec++;
2047	} else if (ap->tv_nsec < 0) {
2048		ap->tv_nsec += NANOSEC;
2049		ap->tv_sec--;
2050	}
2051}
2052
2053int
2054lib_sort(const void *p1, const void *p2)
2055{
2056	int cmpr = 0;
2057	long i;
2058	long j;
2059
2060	hentry_t *t1 = (hentry_t *)p1;
2061	hentry_t *t2 = (hentry_t *)p2;
2062
2063	char *p = t1->lib;
2064	char *q = t2->lib;
2065
2066	if ((cmpr = strcmp(p, q)) == 0) {
2067		i = t1->count;
2068		j = t2->count;
2069		if (i > j)
2070			return (-1);
2071		else if (i < j)
2072			return (1);
2073		else {
2074			p = t1->key;
2075			q = t2->key;
2076			return (strcmp(p, q));
2077		}
2078	} else
2079		return (cmpr);
2080}
2081
2082void
2083report(private_t *pri, time_t lapse)	/* elapsed time, clock ticks */
2084{
2085	int i;
2086	long count;
2087	const char *name;
2088	long error;
2089	long total;
2090	long errtot;
2091	timestruc_t tickzero;
2092	timestruc_t ticks;
2093	timestruc_t ticktot;
2094
2095	if (descendent)
2096		return;
2097
2098	for (i = 0, total = 0; i <= PRMAXFAULT && !interrupt; i++) {
2099		if ((count = Cp->fltcount[i]) != 0) {
2100			if (total == 0)		/* produce header */
2101				(void) printf("faults -------------\n");
2102
2103			name = proc_fltname(i, pri->flt_name,
2104			    sizeof (pri->flt_name));
2105
2106			(void) printf("%s%s\t%4ld\n", name,
2107			    (((int)strlen(name) < 8)?
2108			    (const char *)"\t" : (const char *)""),
2109			    count);
2110			total += count;
2111		}
2112	}
2113	if (total && !interrupt)
2114		(void) printf("total:\t\t%4ld\n\n", total);
2115
2116	for (i = 0, total = 0; i <= PRMAXSIG && !interrupt; i++) {
2117		if ((count = Cp->sigcount[i]) != 0) {
2118			if (total == 0)		/* produce header */
2119				(void) printf("signals ------------\n");
2120			name = signame(pri, i);
2121			(void) printf("%s%s\t%4ld\n", name,
2122			    (((int)strlen(name) < 8)?
2123			    (const char *)"\t" : (const char *)""),
2124			    count);
2125			total += count;
2126		}
2127	}
2128	if (total && !interrupt)
2129		(void) printf("total:\t\t%4ld\n\n", total);
2130
2131	if ((Dynpat != NULL) && !interrupt) {
2132		size_t elem = elements_in_table(fcall_tbl);
2133		hiter_t *itr = iterate_hash(fcall_tbl);
2134		hentry_t *tmp = iter_next(itr);
2135		hentry_t *stbl = my_malloc(elem * sizeof (hentry_t), NULL);
2136		i = 0;
2137		while ((tmp != NULL) && (i < elem)) {
2138			stbl[i].prev = tmp->prev;
2139			stbl[i].next = tmp->next;
2140			stbl[i].lib = tmp->lib;
2141			stbl[i].key = tmp->key;
2142			stbl[i].count = tmp->count;
2143			tmp = iter_next(itr);
2144			i++;
2145		}
2146		qsort((void *)stbl, elem, sizeof (hentry_t),
2147		    lib_sort);
2148		(void) printf(
2149		    "\n%-20s %-40s %s\n", "Library:", "Function", "calls");
2150		for (i = 0; i < elem; i++) {
2151			(void) printf("%-20s %-40s %ld\n", stbl[i].lib,
2152			    stbl[i].key, stbl[i].count);
2153		}
2154		iter_free(itr);
2155		free(stbl);
2156		itr = NULL;
2157	}
2158
2159	if (!interrupt)
2160		(void) printf(
2161		"\nsyscall               seconds   calls  errors\n");
2162
2163	total = errtot = 0;
2164	tickzero.tv_sec = ticks.tv_sec = ticktot.tv_sec = 0;
2165	tickzero.tv_nsec = ticks.tv_nsec = ticktot.tv_nsec = 0;
2166	for (i = 0; i <= PRMAXSYS && !interrupt; i++) {
2167		struct syscount *scp = Cp->syscount[i];
2168		int n = nsubcodes(i);
2169		int subcode;
2170
2171		for (subcode = 0; subcode < n; subcode++, scp++) {
2172			if ((count = scp->count) != 0 || scp->error) {
2173				(void) printf("%-19.19s ",
2174				    sysname(pri, i, subcode));
2175
2176				ticks = scp->stime;
2177				accumulate(&ticktot, &ticks, &tickzero);
2178				prtim(&ticks);
2179
2180				(void) printf(" %7ld", count);
2181				if ((error = scp->error) != 0)
2182					(void) printf(" %7ld", error);
2183				(void) fputc('\n', stdout);
2184				total += count;
2185				errtot += error;
2186			}
2187		}
2188	}
2189
2190	if (!interrupt) {
2191		(void) printf(
2192		"                     --------  ------   ----\n");
2193		(void) printf("sys totals:         ");
2194		prtim(&ticktot);
2195		(void) printf(" %7ld %6ld\n", total, errtot);
2196	}
2197
2198	if (!interrupt) {
2199		(void) printf("usr time:           ");
2200		prtim(&Cp->usrtotal);
2201		(void) fputc('\n', stdout);
2202	}
2203
2204	if (!interrupt) {
2205		int hz = (int)sysconf(_SC_CLK_TCK);
2206
2207		ticks.tv_sec = lapse / hz;
2208		ticks.tv_nsec = (lapse % hz) * (1000000000 / hz);
2209		(void) printf("elapsed:            ");
2210		prtim(&ticks);
2211		(void) fputc('\n', stdout);
2212	}
2213}
2214
2215void
2216prtim(timestruc_t *tp)
2217{
2218	time_t sec;
2219
2220	if ((sec = tp->tv_sec) != 0)			/* whole seconds */
2221		(void) printf("%5lu", sec);
2222	else
2223		(void) printf("     ");
2224
2225	(void) printf(".%3.3ld", tp->tv_nsec/1000000);	/* fraction */
2226}
2227
2228/*
2229 * Gather process id's.
2230 * Return 0 on success, != 0 on failure.
2231 */
2232void
2233pids(char *arg, proc_set_t *grab)
2234{
2235	pid_t pid = -1;
2236	int i;
2237	const char *lwps = NULL;
2238
2239	if ((pid = proc_arg_xpsinfo(arg, PR_ARG_PIDS, NULL, &i, &lwps)) < 0) {
2240		(void) fprintf(stderr, "%s: cannot trace '%s': %s\n",
2241		    command, arg, Pgrab_error(i));
2242		return;
2243	}
2244
2245	for (i = 0; i < ngrab; i++)
2246		if (grab[i].pid == pid)	/* duplicate */
2247			break;
2248
2249	if (i == ngrab) {
2250		grab[ngrab].pid = pid;
2251		grab[ngrab].lwps = lwps;
2252		ngrab++;
2253	} else {
2254		(void) fprintf(stderr, "%s: duplicate process-id ignored: %d\n",
2255		    command, (int)pid);
2256	}
2257}
2258
2259/*
2260 * Report psargs string.
2261 */
2262void
2263psargs(private_t *pri)
2264{
2265	pid_t pid = Pstatus(Proc)->pr_pid;
2266	psinfo_t psinfo;
2267
2268	if (proc_get_psinfo(pid, &psinfo) == 0)
2269		(void) printf("%spsargs: %.64s\n",
2270		    pri->pname, psinfo.pr_psargs);
2271	else {
2272		perror("psargs()");
2273		(void) printf("%s\t*** Cannot read psinfo file for pid %d\n",
2274		    pri->pname, (int)pid);
2275	}
2276}
2277
2278char *
2279fetchstring(private_t *pri, long addr, int maxleng)
2280{
2281	int nbyte;
2282	int leng = 0;
2283	char string[41];
2284
2285	string[40] = '\0';
2286	if (pri->str_bsize == 0)  /* initial allocation of string buffer */
2287		pri->str_buffer =
2288		    my_malloc(pri->str_bsize = 16, "string buffer");
2289	*pri->str_buffer = '\0';
2290
2291	for (nbyte = 40; nbyte == 40 && leng < maxleng; addr += 40) {
2292		if ((nbyte = Pread(Proc, string, 40, addr)) <= 0)
2293			return (leng? pri->str_buffer : NULL);
2294		if (nbyte > 0 &&
2295		    (nbyte = strlen(string)) > 0) {
2296			while (leng + nbyte >= pri->str_bsize)
2297				pri->str_buffer =
2298				    my_realloc(pri->str_buffer,
2299				    pri->str_bsize *= 2, "string buffer");
2300			(void) strcpy(pri->str_buffer+leng, string);
2301			leng += nbyte;
2302		}
2303	}
2304
2305	if (leng > maxleng)
2306		leng = maxleng;
2307	pri->str_buffer[leng] = '\0';
2308
2309	return (pri->str_buffer);
2310}
2311
2312static priv_set_t *
2313getset(prpriv_t *p, priv_ptype_t set)
2314{
2315	return ((priv_set_t *)
2316	    &p->pr_sets[priv_getsetbyname(set) * p->pr_setsize]);
2317}
2318
2319void
2320show_cred(private_t *pri, int new, int loadonly)
2321{
2322	prcred_t cred;
2323	prpriv_t *privs;
2324
2325	if (proc_get_cred(Pstatus(Proc)->pr_pid, &cred, 0) < 0) {
2326		perror("show_cred() - credential");
2327		(void) printf("%s\t*** Cannot get credentials\n", pri->pname);
2328		return;
2329	}
2330	if ((privs = proc_get_priv(Pstatus(Proc)->pr_pid)) == NULL) {
2331		perror("show_cred() - privileges");
2332		(void) printf("%s\t*** Cannot get privileges\n", pri->pname);
2333		return;
2334	}
2335
2336	if (!loadonly && !cflag && prismember(&trace, SYS_execve)) {
2337		if (new)
2338			credentials = cred;
2339		if ((new && cred.pr_ruid != cred.pr_suid) ||
2340		    cred.pr_ruid != credentials.pr_ruid ||
2341		    cred.pr_suid != credentials.pr_suid)
2342			(void) printf(
2343		"%s    *** SUID: ruid/euid/suid = %d / %d / %d  ***\n",
2344			    pri->pname,
2345			    (int)cred.pr_ruid,
2346			    (int)cred.pr_euid,
2347			    (int)cred.pr_suid);
2348		if ((new && cred.pr_rgid != cred.pr_sgid) ||
2349		    cred.pr_rgid != credentials.pr_rgid ||
2350		    cred.pr_sgid != credentials.pr_sgid)
2351			(void) printf(
2352		"%s    *** SGID: rgid/egid/sgid = %d / %d / %d  ***\n",
2353			    pri->pname,
2354			    (int)cred.pr_rgid,
2355			    (int)cred.pr_egid,
2356			    (int)cred.pr_sgid);
2357		if (privdata != NULL && cred.pr_euid != 0) {
2358			priv_set_t *npset = getset(privs, PRIV_PERMITTED);
2359			priv_set_t *opset = getset(privdata, PRIV_PERMITTED);
2360			char *s, *t;
2361			if (!priv_issubset(npset, opset)) {
2362				/* Use the to be freed privdata as scratch */
2363				priv_inverse(opset);
2364				priv_intersect(npset, opset);
2365				s = priv_set_to_str(opset, ',', PRIV_STR_SHORT);
2366				t = priv_set_to_str(npset, ',', PRIV_STR_SHORT);
2367				(void) printf("%s    *** FPRIV: P/E: %s ***\n",
2368				    pri->pname,
2369				    strlen(s) > strlen(t) ? t : s);
2370				free(s);
2371				free(t);
2372			}
2373		}
2374	}
2375
2376	if (privdata != NULL)
2377		proc_free_priv(privdata);
2378	credentials = cred;
2379	privdata = privs;
2380}
2381
2382/*
2383 * Take control of a child process.
2384 * We come here with truss_lock held.
2385 */
2386int
2387control(private_t *pri, pid_t pid)
2388{
2389	const pstatus_t *Psp;
2390	const lwpstatus_t *Lsp;
2391	pid_t childpid = 0;
2392	long flags;
2393	int rc;
2394
2395	(void) mutex_lock(&gps->fork_lock);
2396	while (gps->fork_pid != 0)
2397		(void) cond_wait(&gps->fork_cv, &gps->fork_lock);
2398	gps->fork_pid = getpid();	/* parent pid */
2399	if ((childpid = fork()) == -1) {
2400		(void) printf("%s\t*** Cannot fork() to control process #%d\n",
2401		    pri->pname, (int)pid);
2402		Flush();
2403		gps->fork_pid = 0;
2404		(void) cond_broadcast(&gps->fork_cv);
2405		(void) mutex_unlock(&gps->fork_lock);
2406		release(pri, pid);
2407		return (FALSE);
2408	}
2409
2410	if (childpid != 0) {
2411		/*
2412		 * The parent carries on, after a brief pause.
2413		 * The parent must wait until the child executes procadd(pid).
2414		 */
2415		while (gps->fork_pid != childpid)
2416			(void) cond_wait(&gps->fork_cv, &gps->fork_lock);
2417		gps->fork_pid = 0;
2418		(void) cond_broadcast(&gps->fork_cv);
2419		(void) mutex_unlock(&gps->fork_lock);
2420		return (FALSE);
2421	}
2422
2423	childpid = getpid();
2424	descendent = TRUE;
2425	exit_called = FALSE;
2426	Pfree(Proc);	/* forget old process */
2427
2428	/*
2429	 * The parent process owns the shared gps->fork_lock.
2430	 * The child must grab it again.
2431	 */
2432	(void) mutex_lock(&gps->fork_lock);
2433
2434	/*
2435	 * Child grabs the process and retains the tracing flags.
2436	 */
2437	if ((Proc = Pgrab(pid, PGRAB_RETAIN, &rc)) == NULL) {
2438		(void) fprintf(stderr,
2439		    "%s: cannot control child process, pid# %d: %s\n",
2440		    command, (int)pid, Pgrab_error(rc));
2441		gps->fork_pid = childpid;
2442		(void) cond_broadcast(&gps->fork_cv);
2443		(void) mutex_unlock(&gps->fork_lock);
2444		exit(2);
2445	}
2446
2447	per_proc_init();
2448	/*
2449	 * Add ourself to the set of truss processes
2450	 * and notify the parent to carry on.
2451	 */
2452	procadd(pid, NULL);
2453	gps->fork_pid = childpid;
2454	(void) cond_broadcast(&gps->fork_cv);
2455	(void) mutex_unlock(&gps->fork_lock);
2456
2457	/*
2458	 * We may have grabbed the child before it is fully stopped on exit
2459	 * from fork.  Wait one second (at most) for it to settle down.
2460	 */
2461	(void) Pwait(Proc, MILLISEC);
2462	if (Rdb_agent != NULL)
2463		Rdb_agent = Prd_agent(Proc);
2464
2465	Psp = Pstatus(Proc);
2466	Lsp = &Psp->pr_lwp;
2467	pri->lwpstat = Lsp;
2468	data_model = Psp->pr_dmodel;
2469
2470	make_pname(pri, 0);
2471
2472	pri->syslast = Psp->pr_stime;
2473	pri->usrlast = Psp->pr_utime;
2474
2475	flags = PR_FORK | PR_ASYNC;
2476	if (Dynpat != NULL)
2477		flags |= PR_BPTADJ;	/* needed for x86 */
2478	(void) Psetflags(Proc, flags);
2479
2480	return (TRUE);
2481}
2482
2483/*
2484 * Take control of an existing process.
2485 */
2486int
2487grabit(private_t *pri, proc_set_t *set)
2488{
2489	const pstatus_t *Psp;
2490	const lwpstatus_t *Lsp;
2491	int gcode;
2492
2493	/*
2494	 * Don't force the takeover unless the -F option was specified.
2495	 */
2496	if ((Proc = Pgrab(set->pid, Fflag, &gcode)) == NULL) {
2497		(void) fprintf(stderr, "%s: %s: %d\n",
2498		    command, Pgrab_error(gcode), (int)set->pid);
2499		pri->lwpstat = NULL;
2500		return (FALSE);
2501	}
2502	Psp = Pstatus(Proc);
2503	Lsp = &Psp->pr_lwp;
2504	pri->lwpstat = Lsp;
2505
2506	make_pname(pri, 0);
2507
2508	data_model = Psp->pr_dmodel;
2509	pri->syslast = Psp->pr_stime;
2510	pri->usrlast = Psp->pr_utime;
2511
2512	if (fflag || Dynpat != NULL)
2513		(void) Psetflags(Proc, PR_FORK);
2514	else
2515		(void) Punsetflags(Proc, PR_FORK);
2516	procadd(set->pid, set->lwps);
2517	show_cred(pri, TRUE, FALSE);
2518	return (TRUE);
2519}
2520
2521/*
2522 * Release process from control.
2523 */
2524void
2525release(private_t *pri, pid_t pid)
2526{
2527	/*
2528	 * The process in question is the child of a traced process.
2529	 * We are here to turn off the inherited tracing flags.
2530	 */
2531	int fd;
2532	char ctlname[100];
2533	long ctl[2];
2534
2535	ctl[0] = PCSET;
2536	ctl[1] = PR_RLC;
2537
2538	/* process is freshly forked, no need for exclusive open */
2539	(void) sprintf(ctlname, "/proc/%d/ctl", (int)pid);
2540	if ((fd = open(ctlname, O_WRONLY)) < 0 ||
2541	    write(fd, (char *)ctl, sizeof (ctl)) < 0) {
2542		perror("release()");
2543		(void) printf(
2544		    "%s\t*** Cannot release child process, pid# %d\n",
2545		    pri->pname, (int)pid);
2546		Flush();
2547	}
2548	if (fd >= 0)	/* run-on-last-close sets the process running */
2549		(void) close(fd);
2550}
2551
2552void
2553intr(int sig)
2554{
2555	/*
2556	 * SIGUSR1 is special.  It is used by one truss process to tell
2557	 * another truss process to release its controlled process.
2558	 * SIGUSR2 is also special.  It is used to wake up threads waiting
2559	 * for a victim lwp to stop after an event that will leave the
2560	 * process hung (stopped and abandoned) has occurred.
2561	 */
2562	if (sig == SIGUSR1) {
2563		sigusr1 = TRUE;
2564	} else if (sig == SIGUSR2) {
2565		void *value;
2566		private_t *pri;
2567		struct ps_lwphandle *Lwp;
2568
2569		if (thr_getspecific(private_key, &value) == 0 &&
2570		    (pri = value) != NULL &&
2571		    (Lwp = pri->Lwp) != NULL)
2572			(void) Lstop(Lwp, MILLISEC / 10);
2573	} else {
2574		interrupt = sig;
2575	}
2576}
2577
2578void
2579errmsg(const char *s, const char *q)
2580{
2581	char msg[512];
2582
2583	if (s || q) {
2584		msg[0] = '\0';
2585		if (command) {
2586			(void) strcpy(msg, command);
2587			(void) strcat(msg, ": ");
2588		}
2589		if (s)
2590			(void) strcat(msg, s);
2591		if (q)
2592			(void) strcat(msg, q);
2593		(void) strcat(msg, "\n");
2594		(void) write(2, msg, (size_t)strlen(msg));
2595	}
2596}
2597
2598void
2599abend(const char *s, const char *q)
2600{
2601	(void) thr_sigsetmask(SIG_SETMASK, &fillset, NULL);
2602	if (Proc) {
2603		Flush();
2604		errmsg(s, q);
2605		clear_breakpoints();
2606		(void) Punsetflags(Proc, PR_ASYNC);
2607		Prelease(Proc, created? PRELEASE_KILL : PRELEASE_CLEAR);
2608		procdel();
2609		(void) wait4all();
2610	} else {
2611		errmsg(s, q);
2612	}
2613	exit(2);
2614}
2615
2616/*
2617 * Allocate memory.
2618 * If allocation fails then print a message and abort.
2619 */
2620void *
2621my_realloc(void *buf, size_t size, const char *msg)
2622{
2623	if ((buf = realloc(buf, size)) == NULL) {
2624		if (msg != NULL)
2625			abend("cannot allocate ", msg);
2626		else
2627			abend("memory allocation failure", NULL);
2628	}
2629
2630	return (buf);
2631}
2632
2633void *
2634my_calloc(size_t nelem, size_t elsize, const char *msg)
2635{
2636	void *buf = NULL;
2637
2638	if ((buf = calloc(nelem, elsize)) == NULL) {
2639		if (msg != NULL)
2640			abend("cannot allocate ", msg);
2641		else
2642			abend("memory allocation failure", NULL);
2643	}
2644
2645	return (buf);
2646}
2647
2648void *
2649my_malloc(size_t size, const char *msg)
2650{
2651	return (my_realloc(NULL, size, msg));
2652}
2653
2654int
2655wait4all()
2656{
2657	int i;
2658	pid_t pid;
2659	int rc = 0;
2660	int status;
2661
2662	for (i = 0; i < 10; i++) {
2663		while ((pid = wait(&status)) != -1) {
2664			/* return exit() code of the created process */
2665			if (pid == created) {
2666				if (WIFEXITED(status))
2667					rc = WEXITSTATUS(status);
2668				else
2669					rc |= 0x80; /* +128 to indicate sig */
2670			}
2671		}
2672		if (errno != EINTR && errno != ERESTART)
2673			break;
2674	}
2675
2676	if (i >= 10)	/* repeated interrupts */
2677		rc = 2;
2678
2679	return (rc);
2680}
2681
2682void
2683letgo(private_t *pri)
2684{
2685	(void) printf("%s\t*** process otherwise traced, releasing ...\n",
2686	    pri->pname);
2687}
2688
2689/*
2690 * Test for empty set.
2691 * support routine used by isemptyset() macro.
2692 */
2693int
2694is_empty(const uint32_t *sp,	/* pointer to set (array of int32's) */
2695	size_t n)		/* number of int32's in set */
2696{
2697	if (n) {
2698		do {
2699			if (*sp++)
2700				return (FALSE);
2701		} while (--n);
2702	}
2703
2704	return (TRUE);
2705}
2706
2707/*
2708 * OR the second set into the first.
2709 * The sets must be the same size.
2710 */
2711void
2712or_set(uint32_t *sp1, const uint32_t *sp2, size_t n)
2713{
2714	if (n) {
2715		do {
2716			*sp1++ |= *sp2++;
2717		} while (--n);
2718	}
2719}
2720