1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include	<sysexits.h>
29#include	<stdlib.h>
30#include	<stdio.h>
31#include	<unistd.h>
32#include	"gprof.h"
33#include	"profile.h"
34
35char		*whoami = "gprof";
36static pctype	lowpc, highpc;		/* range profiled, in UNIT's */
37
38/*
39 *	things which get -E excluded by default.
40 */
41static char *defaultEs[] = {
42	"mcount",
43	"__mcleanup",
44	NULL
45};
46
47#ifdef DEBUG
48
49static char *objname[] = {
50	"<invalid object>",
51	"PROF_BUFFER_T",
52	"PROF_CALLGRAPH_T",
53	"PROF_MODULES_T",
54	NULL
55};
56#define	MAX_OBJTYPES	3
57
58#endif /* DEBUG */
59
60void
61done(void)
62{
63
64	exit(EX_OK);
65}
66
67static pctype
68max(pctype a, pctype b)
69{
70	if (a > b)
71		return (a);
72	return (b);
73}
74
75static pctype
76min(pctype a, pctype b)
77{
78	if (a < b)
79		return (a);
80	return (b);
81}
82
83/*
84 *	calculate scaled entry point addresses (to save time in asgnsamples),
85 *	and possibly push the scaled entry points over the entry mask,
86 *	if it turns out that the entry point is in one bucket and the code
87 *	for a routine is in the next bucket.
88 *
89 */
90static void
91alignentries(void)
92{
93	struct nl *nlp;
94#ifdef DEBUG
95	pctype			bucket_of_entry;
96	pctype			bucket_of_code;
97#endif /* DEBUG */
98
99	/* for old-style gmon.out, nameslist is only in modules.nl */
100
101	for (nlp = modules.nl; nlp < modules.npe; nlp++) {
102		nlp->svalue = nlp->value / sizeof (UNIT);
103#ifdef DEBUG
104		bucket_of_entry = (nlp->svalue - lowpc) / scale;
105		bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale;
106		if (bucket_of_entry < bucket_of_code) {
107			if (debug & SAMPLEDEBUG) {
108				(void) printf(
109				    "[alignentries] pushing svalue 0x%llx "
110				    "to 0x%llx\n", nlp->svalue,
111				    nlp->svalue + UNITS_TO_CODE);
112			}
113		}
114#endif /* DEBUG */
115	}
116}
117
118/*
119 *	old-style gmon.out
120 *	------------------
121 *
122 *	Assign samples to the procedures to which they belong.
123 *
124 *	There are three cases as to where pcl and pch can be
125 *	with respect to the routine entry addresses svalue0 and svalue1
126 *	as shown in the following diagram.  overlap computes the
127 *	distance between the arrows, the fraction of the sample
128 *	that is to be credited to the routine which starts at svalue0.
129 *
130 *	    svalue0                                         svalue1
131 *	       |                                               |
132 *	       v                                               v
133 *
134 *	       +-----------------------------------------------+
135 *	       |					       |
136 *	  |  ->|    |<-		->|         |<-		->|    |<-  |
137 *	  |         |		  |         |		  |         |
138 *	  +---------+		  +---------+		  +---------+
139 *
140 *	  ^         ^		  ^         ^		  ^         ^
141 *	  |         |		  |         |		  |         |
142 *	 pcl       pch		 pcl       pch		 pcl       pch
143 *
144 *	For the vax we assert that samples will never fall in the first
145 *	two bytes of any routine, since that is the entry mask,
146 *	thus we give call alignentries() to adjust the entry points if
147 *	the entry mask falls in one bucket but the code for the routine
148 *	doesn't start until the next bucket.  In conjunction with the
149 *	alignment of routine addresses, this should allow us to have
150 *	only one sample for every four bytes of text space and never
151 *	have any overlap (the two end cases, above).
152 */
153static void
154asgnsamples(void)
155{
156	sztype		i, j;
157	unsigned_UNIT	ccnt;
158	double		time;
159	pctype		pcl, pch;
160	pctype		overlap;
161	pctype		svalue0, svalue1;
162
163	extern mod_info_t	modules;
164	nltype		*nl = modules.nl;
165	sztype		nname = modules.nname;
166
167	/* read samples and assign to namelist symbols */
168	scale = highpc - lowpc;
169	scale /= nsamples;
170	alignentries();
171	for (i = 0, j = 1; i < nsamples; i++) {
172		ccnt = samples[i];
173		if (ccnt == 0)
174			continue;
175		/*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/
176		pcl = lowpc + scale * i;
177		/*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/
178		pch = lowpc + scale * (i + 1);
179		time = ccnt;
180#ifdef DEBUG
181		if (debug & SAMPLEDEBUG) {
182			(void) printf(
183			    "[asgnsamples] pcl 0x%llx pch 0x%llx ccnt %d\n",
184			    pcl, pch, ccnt);
185		}
186#endif /* DEBUG */
187		totime += time;
188		for (j = (j ? j - 1 : 0); j < nname; j++) {
189			svalue0 = nl[j].svalue;
190			svalue1 = nl[j+1].svalue;
191			/*
192			 *	if high end of tick is below entry address,
193			 *	go for next tick.
194			 */
195			if (pch < svalue0)
196				break;
197			/*
198			 *	if low end of tick into next routine,
199			 *	go for next routine.
200			 */
201			if (pcl >= svalue1)
202				continue;
203			overlap = min(pch, svalue1) - max(pcl, svalue0);
204			if (overlap != 0) {
205#ifdef DEBUG
206				if (debug & SAMPLEDEBUG) {
207					(void) printf("[asgnsamples] "
208					    "(0x%llx->0x%llx-0x%llx) %s gets "
209					    "%f ticks %lld overlap\n",
210					    nl[j].value/sizeof (UNIT), svalue0,
211					    svalue1, nl[j].name,
212					    overlap * time / scale, overlap);
213				}
214#endif /* DEBUG */
215				nl[j].time += overlap * time / scale;
216			}
217		}
218	}
219#ifdef DEBUG
220	if (debug & SAMPLEDEBUG) {
221		(void) printf("[asgnsamples] totime %f\n", totime);
222	}
223#endif /* DEBUG */
224}
225
226
227static void
228dump_callgraph(FILE *fp, char *filename, unsigned long tarcs,
229    unsigned long ncallees)
230{
231	ProfCallGraph		prof_cgraph;
232	ProfFunction		prof_func;
233	arctype	*arcp;
234	mod_info_t		*mi;
235	nltype			*nlp;
236	size_t			cur_offset;
237	unsigned long		caller_id = 0, callee_id = 0;
238
239	/*
240	 * Write the callgraph header
241	 */
242	prof_cgraph.type = PROF_CALLGRAPH_T;
243	prof_cgraph.version = PROF_CALLGRAPH_VER;
244	prof_cgraph.functions = PROFCGRAPH_SZ;
245	prof_cgraph.size = PROFCGRAPH_SZ + tarcs * PROFFUNC_SZ;
246	if (fwrite(&prof_cgraph, sizeof (ProfCallGraph), 1, fp) != 1) {
247		perror(filename);
248		exit(EX_IOERR);
249	}
250	/* CONSTCOND */
251	if (CGRAPH_FILLER)
252		(void) fseek(fp, CGRAPH_FILLER, SEEK_CUR);
253
254	/* Current offset inside the callgraph object */
255	cur_offset = prof_cgraph.functions;
256
257	for (mi = &modules; mi; mi = mi->next) {
258		for (nlp = mi->nl; nlp < mi->npe; nlp++) {
259			if (nlp->ncallers == 0)
260				continue;
261
262			/* If this is the last callee, set next_to to 0 */
263			callee_id++;
264			if (callee_id == ncallees)
265				prof_func.next_to = 0;
266			else {
267				prof_func.next_to = cur_offset +
268				    nlp->ncallers * PROFFUNC_SZ;
269			}
270
271			/*
272			 * Dump this callee's raw arc information with all
273			 * its callers
274			 */
275			caller_id = 1;
276			for (arcp = nlp->parents; arcp;
277			    arcp = arcp->arc_parentlist) {
278				/*
279				 * If no more callers for this callee, set
280				 * next_from to 0
281				 */
282				if (caller_id == nlp->ncallers)
283					prof_func.next_from = 0;
284				else {
285					prof_func.next_from = cur_offset +
286					    PROFFUNC_SZ;
287				}
288
289				prof_func.frompc =
290				    arcp->arc_parentp->module->load_base +
291				    (arcp->arc_parentp->value -
292				    arcp->arc_parentp->module->txt_origin);
293				prof_func.topc = mi->load_base +
294				    (nlp->value - mi->txt_origin);
295				prof_func.count = arcp->arc_count;
296
297
298				if (fwrite(&prof_func, sizeof (ProfFunction),
299				    1, fp) != 1) {
300					perror(filename);
301					exit(EX_IOERR);
302				}
303				/* CONSTCOND */
304				if (FUNC_FILLER)
305					(void) fseek(fp, FUNC_FILLER, SEEK_CUR);
306
307				cur_offset += PROFFUNC_SZ;
308				caller_id++;
309			}
310		} /* for nlp... */
311	} /* for mi... */
312}
313
314/*
315 * To save all pc-hits in all the gmon.out's is infeasible, as this
316 * may become quite huge even with a small number of files to sum.
317 * Instead, we'll dump *fictitious hits* to correct functions
318 * by scanning module namelists. Again, since this is summing
319 * pc-hits, we may have to dump the pcsamples out in chunks if the
320 * number of pc-hits is high.
321 */
322static void
323dump_hits(FILE *fp, char *filename, nltype *nlp)
324{
325	Address		*p, hitpc;
326	size_t		i, nelem, ntowrite;
327
328	if ((nelem = nlp->nticks) > PROF_BUFFER_SIZE)
329		nelem = PROF_BUFFER_SIZE;
330
331	if ((p = (Address *) calloc(nelem, sizeof (Address))) == NULL) {
332		(void) fprintf(stderr, "%s: no room for %d pcsamples\n",
333		    whoami, nelem);
334		exit(EX_OSERR);
335	}
336
337	/*
338	 * Set up *fictitious* hits (to function entry) buffer
339	 */
340	hitpc = nlp->module->load_base + (nlp->value - nlp->module->txt_origin);
341	for (i = 0; i < nelem; i++)
342		p[i] = hitpc;
343
344	for (ntowrite = nlp->nticks; ntowrite >= nelem; ntowrite -= nelem) {
345		if (fwrite(p, nelem * sizeof (Address), 1, fp) != 1) {
346			perror(filename);
347			exit(EX_IOERR);
348		}
349	}
350
351	if (ntowrite) {
352		if (fwrite(p, ntowrite * sizeof (Address), 1, fp) != 1) {
353			perror(filename);
354			exit(EX_IOERR);
355		}
356	}
357
358	free(p);
359}
360
361static void
362dump_pcsamples(FILE *fp, char *filename, unsigned long *tarcs,
363    unsigned long *ncallees)
364{
365	ProfBuffer		prof_buffer;
366	arctype	*arcp;
367	mod_info_t		*mi;
368	nltype			*nlp;
369
370	prof_buffer.type = PROF_BUFFER_T;
371	prof_buffer.version = PROF_BUFFER_VER;
372	prof_buffer.buffer = PROFBUF_SZ;
373	prof_buffer.bufsize = n_pcsamples;
374	prof_buffer.size = PROFBUF_SZ + n_pcsamples * sizeof (Address);
375	if (fwrite(&prof_buffer, sizeof (ProfBuffer), 1, fp) != 1) {
376		perror(filename);
377		exit(EX_IOERR);
378	}
379	/* CONSTCOND */
380	if (BUF_FILLER)
381		(void) fseek(fp, BUF_FILLER, SEEK_CUR);
382
383	*tarcs = 0;
384	*ncallees = 0;
385	for (mi = &modules; mi; mi = mi->next) {
386		for (nlp = mi->nl; nlp < mi->npe; nlp++) {
387			if (nlp->nticks)
388				dump_hits(fp, filename, nlp);
389
390			nlp->ncallers = 0;
391			for (arcp = nlp->parents; arcp;
392			    arcp = arcp->arc_parentlist) {
393				(nlp->ncallers)++;
394			}
395
396			if (nlp->ncallers) {
397				(*tarcs) += nlp->ncallers;
398				(*ncallees)++;
399			}
400		}
401	}
402}
403
404static void
405dump_modules(FILE *fp, char *filename, size_t pbuf_sz)
406{
407	char		*pbuf, *p;
408	size_t		namelen;
409	Index		off_nxt, off_path;
410	mod_info_t	*mi;
411
412	ProfModuleList	prof_modlist;
413	ProfModule	prof_mod;
414
415	/* Allocate for path strings buffer */
416	pbuf_sz = CEIL(pbuf_sz, STRUCT_ALIGN);
417	if ((p = pbuf = calloc(pbuf_sz, sizeof (char))) == NULL) {
418		(void) fprintf(stderr, "%s: no room for %d bytes\n",
419		    whoami, pbuf_sz * sizeof (char));
420		exit(EX_OSERR);
421	}
422
423	/* Dump out PROF_MODULE_T info for all non-aout modules */
424	prof_modlist.type = PROF_MODULES_T;
425	prof_modlist.version = PROF_MODULES_VER;
426	prof_modlist.modules = PROFMODLIST_SZ;
427	prof_modlist.size = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ +
428	    pbuf_sz;
429	if (fwrite(&prof_modlist, sizeof (ProfModuleList), 1, fp) != 1) {
430		perror(filename);
431		exit(EX_IOERR);
432	}
433	/* CONSTCOND */
434	if (MODLIST_FILLER)
435		(void) fseek(fp, MODLIST_FILLER, SEEK_CUR);
436
437	/*
438	 * Initialize offsets for ProfModule elements.
439	 */
440	off_nxt = PROFMODLIST_SZ + PROFMOD_SZ;
441	off_path = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ;
442
443	for (mi = modules.next; mi; mi = mi->next) {
444		if (mi->next)
445			prof_mod.next = off_nxt;
446		else
447			prof_mod.next = 0;
448		prof_mod.path = off_path;
449		prof_mod.startaddr = mi->load_base;
450		prof_mod.endaddr = mi->load_end;
451
452		if (fwrite(&prof_mod, sizeof (ProfModule), 1, fp) != 1) {
453			perror(filename);
454			exit(EX_IOERR);
455		}
456
457		/* CONSTCOND */
458		if (MOD_FILLER)
459			(void) fseek(fp, MOD_FILLER, SEEK_CUR);
460
461		(void) strcpy(p, mi->name);
462		namelen = strlen(mi->name);
463		p += namelen + 1;
464
465		/* Note that offset to every path str need not be aligned */
466		off_nxt += PROFMOD_SZ;
467		off_path += namelen + 1;
468	}
469
470	/* Write out the module path strings */
471	if (pbuf_sz) {
472		if (fwrite(pbuf, pbuf_sz, 1, fp) != 1) {
473			perror(filename);
474			exit(EX_IOERR);
475		}
476
477		free(pbuf);
478	}
479}
480
481/*
482 * If we have inactive modules, their current load addresses may overlap with
483 * active ones, and so we've to assign fictitious, non-overlapping addresses
484 * to all modules before we dump them.
485 */
486static void
487fixup_maps(size_t *pathsz)
488{
489	unsigned int	n_inactive = 0;
490	Address		lbase = 0, lend;
491	mod_info_t	*mi;
492
493	/* Pick the lowest load address among modules */
494	*pathsz = 0;
495	for (mi = &modules; mi; mi = mi->next) {
496
497		if (mi->active == FALSE)
498			n_inactive++;
499
500		if (mi == &modules || mi->load_base < lbase)
501			lbase = mi->load_base;
502
503		/*
504		 * Return total path size of non-aout modules only
505		 */
506		if (mi != &modules)
507			*pathsz = (*pathsz) + strlen(mi->name) + 1;
508	}
509
510	/*
511	 * All module info is in fine shape already if there are no
512	 * inactive modules
513	 */
514	if (n_inactive == 0)
515		return;
516
517	/*
518	 * Assign fictitious load addresses to all (non-aout) modules so
519	 * that sum info can be dumped out.
520	 */
521	for (mi = modules.next; mi; mi = mi->next) {
522		lend = lbase + (mi->data_end - mi->txt_origin);
523		if ((lbase < modules.load_base && lend < modules.load_base) ||
524		    (lbase > modules.load_end && lend > modules.load_end)) {
525
526			mi->load_base = lbase;
527			mi->load_end = lend;
528
529			/* just to give an appearance of reality */
530			lbase = CEIL(lend + PGSZ, PGSZ);
531		} else {
532			/*
533			 * can't use this lbase & lend pair, as it
534			 * overlaps with aout's addresses
535			 */
536			mi->load_base = CEIL(modules.load_end + PGSZ, PGSZ);
537			mi->load_end = mi->load_base + (lend - lbase);
538
539			lbase = CEIL(mi->load_end + PGSZ, PGSZ);
540		}
541	}
542}
543
544static void
545dump_gprofhdr(FILE *fp, char *filename)
546{
547	ProfHeader	prof_hdr;
548
549	prof_hdr.h_magic = PROF_MAGIC;
550	prof_hdr.h_major_ver = PROF_MAJOR_VERSION;
551	prof_hdr.h_minor_ver = PROF_MINOR_VERSION;
552	prof_hdr.size = PROFHDR_SZ;
553	if (fwrite(&prof_hdr, sizeof (prof_hdr), 1, fp) != 1) {
554		perror(filename);
555		exit(EX_IOERR);
556	}
557
558	/* CONSTCOND */
559	if (HDR_FILLER)
560		(void) fseek(fp, HDR_FILLER, SEEK_CUR);
561}
562
563static void
564dumpsum_ostyle(char *sumfile)
565{
566	nltype *nlp;
567	arctype *arcp;
568	struct rawarc arc;
569	struct rawarc32 arc32;
570	FILE *sfile;
571
572	if ((sfile = fopen(sumfile, "w")) == NULL) {
573		perror(sumfile);
574		exit(EX_IOERR);
575	}
576	/*
577	 * dump the header; use the last header read in
578	 */
579	if (Bflag) {
580		if (fwrite(&h, sizeof (h), 1, sfile) != 1) {
581			perror(sumfile);
582			exit(EX_IOERR);
583		}
584	} else {
585		struct hdr32 hdr;
586		hdr.lowpc  = (pctype32)h.lowpc;
587		hdr.highpc = (pctype32)h.highpc;
588		hdr.ncnt   = (pctype32)h.ncnt;
589		if (fwrite(&hdr, sizeof (hdr), 1, sfile) != 1) {
590			perror(sumfile);
591			exit(EX_IOERR);
592		}
593	}
594	/*
595	 * dump the samples
596	 */
597	if (fwrite(samples, sizeof (unsigned_UNIT), nsamples, sfile) !=
598	    nsamples) {
599		perror(sumfile);
600		exit(EX_IOERR);
601	}
602	/*
603	 * dump the normalized raw arc information. For old-style dumping,
604	 * the only namelist is in modules.nl
605	 */
606	for (nlp = modules.nl; nlp < modules.npe; nlp++) {
607		for (arcp = nlp->children; arcp;
608		    arcp = arcp->arc_childlist) {
609			if (Bflag) {
610				arc.raw_frompc = arcp->arc_parentp->value;
611				arc.raw_selfpc = arcp->arc_childp->value;
612				arc.raw_count = arcp->arc_count;
613				if (fwrite(&arc, sizeof (arc), 1, sfile) != 1) {
614					perror(sumfile);
615					exit(EX_IOERR);
616				}
617			} else {
618				arc32.raw_frompc =
619				    (pctype32)arcp->arc_parentp->value;
620				arc32.raw_selfpc =
621				    (pctype32)arcp->arc_childp->value;
622				arc32.raw_count = (actype32)arcp->arc_count;
623				if (fwrite(&arc32, sizeof (arc32), 1, sfile) !=
624				    1) {
625					perror(sumfile);
626					exit(EX_IOERR);
627				}
628			}
629#ifdef DEBUG
630			if (debug & SAMPLEDEBUG) {
631				(void) printf(
632				    "[dumpsum_ostyle] frompc 0x%llx selfpc "
633				    "0x%llx count %lld\n", arc.raw_frompc,
634				    arc.raw_selfpc, arc.raw_count);
635			}
636#endif /* DEBUG */
637		}
638	}
639	(void) fclose(sfile);
640}
641
642/*
643 * dump out the gmon.sum file
644 */
645static void
646dumpsum(char *sumfile)
647{
648	FILE		*sfile;
649	size_t		pathbuf_sz;
650	unsigned long	total_arcs;	/* total number of arcs in all */
651	unsigned long	ncallees;	/* no. of callees with parents */
652
653	if (old_style) {
654		dumpsum_ostyle(sumfile);
655		return;
656	}
657
658	if ((sfile = fopen(sumfile, "w")) == NULL) {
659		perror(sumfile);
660		exit(EX_IOERR);
661	}
662
663	/*
664	 * Dump the new-style gprof header. Even if one of the original
665	 * profiled-files was of a older version, the summed file is of
666	 * current version only.
667	 */
668	dump_gprofhdr(sfile, sumfile);
669
670	/*
671	 * Fix up load-maps and dump out modules info
672	 *
673	 * Fix up module load maps so inactive modules get *some* address
674	 * (and btw, could you get the total size of non-aout module path
675	 * strings please ?)
676	 */
677	fixup_maps(&pathbuf_sz);
678	dump_modules(sfile, sumfile, pathbuf_sz);
679
680
681	/*
682	 * Dump out the summ'd pcsamples
683	 *
684	 * For dumping call graph information later, we need certain
685	 * statistics (like total arcs, number of callers for each node);
686	 * collect these also while we are at it.
687	 */
688	dump_pcsamples(sfile, sumfile, &total_arcs, &ncallees);
689
690	/*
691	 * Dump out the summ'd call graph information
692	 */
693	dump_callgraph(sfile, sumfile, total_arcs, ncallees);
694
695
696	(void) fclose(sfile);
697}
698
699static void
700tally(mod_info_t *caller_mod, mod_info_t *callee_mod, struct rawarc *rawp)
701{
702	nltype		*parentp;
703	nltype		*childp;
704
705	/*
706	 * if count == 0 this is a null arc and
707	 * we don't need to tally it.
708	 */
709	if (rawp->raw_count == 0)
710		return;
711
712	/*
713	 * Lookup the caller and callee pcs in namelists of
714	 * appropriate modules
715	 */
716	parentp = nllookup(caller_mod, rawp->raw_frompc, NULL);
717	childp = nllookup(callee_mod, rawp->raw_selfpc, NULL);
718	if (childp && parentp) {
719		if (!Dflag)
720			childp->ncall += rawp->raw_count;
721		else {
722			if (first_file)
723				childp->ncall += rawp->raw_count;
724			else {
725				childp->ncall -= rawp->raw_count;
726				if (childp->ncall < 0)
727					childp->ncall = 0;
728			}
729		}
730
731#ifdef DEBUG
732		if (debug & TALLYDEBUG) {
733			(void) printf("[tally] arc from %s to %s traversed "
734			    "%lld times\n", parentp->name,
735			    childp->name, rawp->raw_count);
736		}
737#endif /* DEBUG */
738		addarc(parentp, childp, rawp->raw_count);
739	}
740}
741
742/*
743 * Look up a module's base address in a sorted list of pc-hits. Unlike
744 * nllookup(), this deals with misses by mapping them to the next *higher*
745 * pc-hit. This is so that we get into the module's first pc-hit rightaway,
746 * even if the module's entry-point (load_base) itself is not a hit.
747 */
748static Address *
749locate(Address	*pclist, size_t nelem, Address keypc)
750{
751	size_t	low = 0, middle, high = nelem - 1;
752
753	if (keypc <= pclist[low])
754		return (pclist);
755
756	if (keypc > pclist[high])
757		return (NULL);
758
759	while (low != high) {
760		middle = (high + low) >> 1;
761
762		if ((pclist[middle] < keypc) && (pclist[middle + 1] >= keypc))
763			return (&pclist[middle + 1]);
764
765		if (pclist[middle] >= keypc)
766			high = middle;
767		else
768			low = middle + 1;
769	}
770
771	/* must never reach here! */
772	return (NULL);
773}
774
775static void
776assign_pcsamples(mod_info_t *module, Address *pcsmpl, size_t n_samples)
777{
778	Address		*pcptr, *pcse = pcsmpl + n_samples;
779	pctype		nxt_func;
780	nltype		*fnl;
781	size_t		func_nticks;
782#ifdef DEBUG
783	size_t		n_hits_in_module = 0;
784#endif /* DEBUG */
785
786	/* Locate the first pc-hit for this module */
787	if ((pcptr = locate(pcsmpl, n_samples, module->load_base)) == NULL) {
788#ifdef DEBUG
789		if (debug & PCSMPLDEBUG) {
790			(void) printf("[assign_pcsamples] no pc-hits in\n");
791			(void) printf(
792			    "                   `%s'\n", module->name);
793		}
794#endif /* DEBUG */
795		return;			/* no pc-hits in this module */
796	}
797
798	/* Assign all pc-hits in this module to appropriate functions */
799	while ((pcptr < pcse) && (*pcptr < module->load_end)) {
800
801		/* Update the corresponding function's time */
802		if (fnl = nllookup(module, (pctype) *pcptr, &nxt_func)) {
803			/*
804			 * Collect all pc-hits in this function. Each
805			 * pc-hit counts as 1 tick.
806			 */
807			func_nticks = 0;
808			while ((pcptr < pcse) && (*pcptr < nxt_func)) {
809				func_nticks++;
810				pcptr++;
811			}
812
813			if (func_nticks == 0)
814				pcptr++;
815			else {
816				fnl->nticks += func_nticks;
817				fnl->time += func_nticks;
818				totime += func_nticks;
819			}
820
821#ifdef DEBUG
822			n_hits_in_module += func_nticks;
823#endif /* DEBUG */
824		} else {
825			/*
826			 * pc sample could not be assigned to function;
827			 * probably in a PLT
828			 */
829			pcptr++;
830		}
831	}
832
833#ifdef DEBUG
834	if (debug & PCSMPLDEBUG) {
835		(void) printf(
836		    "[assign_pcsamples] %ld hits in\n", n_hits_in_module);
837		(void) printf("                   `%s'\n", module->name);
838	}
839#endif /* DEBUG */
840}
841
842int
843pc_cmp(const void *arg1, const void *arg2)
844{
845	Address *pc1 = (Address *)arg1;
846	Address *pc2 = (Address *)arg2;
847
848	if (*pc1 > *pc2)
849		return (1);
850
851	if (*pc1 < *pc2)
852		return (-1);
853
854	return (0);
855}
856
857static void
858process_pcsamples(ProfBuffer *bufp)
859{
860	Address		*pc_samples;
861	mod_info_t	*mi;
862	caddr_t		p;
863	size_t		chunk_size, nelem_read, nelem_to_read;
864
865#ifdef DEBUG
866	if (debug & PCSMPLDEBUG) {
867		(void) printf(
868		    "[process_pcsamples] number of pcsamples = %lld\n",
869		    bufp->bufsize);
870	}
871#endif /* DEBUG */
872
873	/* buffer with no pc samples ? */
874	if (bufp->bufsize == 0)
875		return;
876
877	/*
878	 * If we're processing pcsamples of a profile sum, we could have
879	 * more than PROF_BUFFER_SIZE number of samples. In such a case,
880	 * we must read the pcsamples in chunks.
881	 */
882	if ((chunk_size = bufp->bufsize) > PROF_BUFFER_SIZE)
883		chunk_size = PROF_BUFFER_SIZE;
884
885	/* Allocate for the pcsample chunk */
886	pc_samples = (Address *) calloc(chunk_size, sizeof (Address));
887	if (pc_samples == NULL) {
888		(void) fprintf(stderr, "%s: no room for %d sample pc's\n",
889		    whoami, chunk_size);
890		exit(EX_OSERR);
891	}
892
893	/* Copy the current set of pcsamples */
894	nelem_read = 0;
895	nelem_to_read = bufp->bufsize;
896	p = (char *)bufp + bufp->buffer;
897
898	while (nelem_read < nelem_to_read) {
899		(void) memcpy((void *) pc_samples, p,
900		    chunk_size * sizeof (Address));
901
902		/* Sort the pc samples */
903		qsort(pc_samples, chunk_size, sizeof (Address), pc_cmp);
904
905		/*
906		 * Assign pcsamples to functions in the currently active
907		 * module list
908		 */
909		for (mi = &modules; mi; mi = mi->next) {
910			if (mi->active == FALSE)
911				continue;
912			assign_pcsamples(mi, pc_samples, chunk_size);
913		}
914
915		p += (chunk_size * sizeof (Address));
916		nelem_read += chunk_size;
917
918		if ((nelem_to_read - nelem_read) < chunk_size)
919			chunk_size = nelem_to_read - nelem_read;
920	}
921
922	free(pc_samples);
923
924	/* Update total number of pcsamples read so far */
925	n_pcsamples += bufp->bufsize;
926}
927
928static mod_info_t *
929find_module(Address addr)
930{
931	mod_info_t	*mi;
932
933	for (mi = &modules; mi; mi = mi->next) {
934		if (mi->active == FALSE)
935			continue;
936
937		if (addr >= mi->load_base && addr < mi->load_end)
938			return (mi);
939	}
940
941	return (NULL);
942}
943
944static void
945process_cgraph(ProfCallGraph *cgp)
946{
947	struct rawarc	arc;
948	mod_info_t	*callee_mi, *caller_mi;
949	ProfFunction	*calleep, *callerp;
950	Index		caller_off, callee_off;
951
952	/*
953	 * Note that *callee_off* increment in the for loop below
954	 * uses *calleep* and *calleep* doesn't get set until the for loop
955	 * is entered. We don't expect the increment to be executed before
956	 * the loop body is executed atleast once, so this should be ok.
957	 */
958	for (callee_off = cgp->functions; callee_off;
959	    callee_off = calleep->next_to) {
960
961		/* LINTED: pointer cast */
962		calleep = (ProfFunction *)((char *)cgp + callee_off);
963
964		/*
965		 * We could choose either to sort the {caller, callee}
966		 * list twice and assign callee/caller to modules or inspect
967		 * each callee/caller in the active modules list. Since
968		 * the modules list is usually very small, we'l choose the
969		 * latter.
970		 */
971
972		/*
973		 * If we cannot identify a callee with a module, there's
974		 * no use worrying about who called it.
975		 */
976		if ((callee_mi = find_module(calleep->topc)) == NULL) {
977#ifdef DEBUG
978			if (debug & CGRAPHDEBUG) {
979				(void) printf(
980				    "[process_cgraph] callee %#llx missed\n",
981				    calleep->topc);
982			}
983#endif /* DEBUG */
984			continue;
985		} else
986			arc.raw_selfpc = calleep->topc;
987
988		for (caller_off = callee_off; caller_off;
989		    caller_off = callerp->next_from)  {
990
991			/* LINTED: pointer cast */
992			callerp = (ProfFunction *)((char *)cgp + caller_off);
993			if ((caller_mi = find_module(callerp->frompc)) ==
994			    NULL) {
995#ifdef DEBUG
996				if (debug & CGRAPHDEBUG) {
997					(void) printf(
998					    "[process_cgraph] caller %#llx "
999					    "missed\n", callerp->frompc);
1000				}
1001#endif /* DEBUG */
1002				continue;
1003			}
1004
1005			arc.raw_frompc = callerp->frompc;
1006			arc.raw_count = callerp->count;
1007
1008#ifdef DEBUG
1009			if (debug & CGRAPHDEBUG) {
1010				(void) printf(
1011				    "[process_cgraph] arc <%#llx, %#llx, "
1012				    "%lld>\n", arc.raw_frompc, arc.raw_selfpc,
1013				    arc.raw_count);
1014			}
1015#endif /* DEBUG */
1016			tally(caller_mi, callee_mi, &arc);
1017		}
1018	}
1019
1020#ifdef DEBUG
1021	puts("\n");
1022#endif /* DEBUG */
1023}
1024
1025/*
1026 * Two modules overlap each other if they don't lie completely *outside*
1027 * each other.
1028 */
1029static bool
1030does_overlap(ProfModule *new, mod_info_t *old)
1031{
1032	/* case 1: new module lies completely *before* the old one */
1033	if (new->startaddr < old->load_base && new->endaddr <= old->load_base)
1034		return (FALSE);
1035
1036	/* case 2: new module lies completely *after* the old one */
1037	if (new->startaddr >= old->load_end && new->endaddr >= old->load_end)
1038		return (FALSE);
1039
1040	/* probably a dlopen: the modules overlap each other */
1041	return (TRUE);
1042}
1043
1044static bool
1045is_same_as_aout(char *modpath, struct stat *buf)
1046{
1047	if (stat(modpath, buf) == -1) {
1048		(void) fprintf(stderr, "%s: can't get info on `%s'\n",
1049		    whoami, modpath);
1050		exit(EX_NOINPUT);
1051	}
1052
1053	if ((buf->st_dev == aout_info.dev) && (buf->st_ino == aout_info.ino))
1054		return (TRUE);
1055	else
1056		return (FALSE);
1057}
1058
1059static void
1060process_modules(ProfModuleList *modlp)
1061{
1062	ProfModule	*newmodp;
1063	mod_info_t	*mi, *last, *new_module;
1064	char		*so_path;
1065	bool		more_modules = TRUE;
1066	struct stat	so_statbuf;
1067
1068#ifdef DEBUG
1069	if (debug & MODULEDEBUG) {
1070		(void) printf("[process_modules] module obj version %u\n",
1071		    modlp->version);
1072	}
1073#endif /* DEBUG */
1074
1075	/* Check version of module type object */
1076	if (modlp->version > PROF_MODULES_VER) {
1077		(void) fprintf(stderr, "%s: version %d for module type objects"
1078		    "is not supported\n", whoami, modlp->version);
1079		exit(EX_SOFTWARE);
1080	}
1081
1082
1083	/*
1084	 * Scan the PROF_MODULES_T list and add modules to current list
1085	 * of modules, if they're not present already
1086	 */
1087	/* LINTED: pointer cast */
1088	newmodp = (ProfModule *)((char *)modlp + modlp->modules);
1089	do {
1090		/*
1091		 * Since the prog could've been renamed after its run, we
1092		 * should see if this overlaps a.out. If it does, it is
1093		 * probably the renamed aout. We should also skip any other
1094		 * non-sharedobj's that we see (or should we report an error ?)
1095		 */
1096		so_path = (caddr_t)modlp + newmodp->path;
1097		if (does_overlap(newmodp, &modules) ||
1098		    is_same_as_aout(so_path, &so_statbuf) ||
1099		    (!is_shared_obj(so_path))) {
1100
1101			if (!newmodp->next)
1102				more_modules = FALSE;
1103
1104			/* LINTED: pointer cast */
1105			newmodp = (ProfModule *)
1106			    ((caddr_t)modlp + newmodp->next);
1107#ifdef DEBUG
1108			if (debug & MODULEDEBUG) {
1109				(void) printf(
1110				    "[process_modules] `%s'\n", so_path);
1111				(void) printf("                  skipped\n");
1112			}
1113#endif /* DEBUG */
1114			continue;
1115		}
1116#ifdef DEBUG
1117		if (debug & MODULEDEBUG)
1118			(void) printf("[process_modules] `%s'...\n", so_path);
1119#endif /* DEBUG */
1120
1121		/*
1122		 * Check all modules (leave the first one, 'cos that
1123		 * is the program executable info). If this module is already
1124		 * there in the list, update the load addresses and proceed.
1125		 */
1126		last = &modules;
1127		while ((mi = last->next) != NULL) {
1128			/*
1129			 * We expect the full pathname for all shared objects
1130			 * needed by the program executable. In this case, we
1131			 * simply need to compare the paths to see if they are
1132			 * the same file.
1133			 */
1134			if (strcmp(mi->name, so_path) == 0)
1135				break;
1136
1137			/*
1138			 * Check if this new shared object will overlap
1139			 * any existing module. If yes, remove the old one
1140			 * from the linked list (but don't free it, 'cos
1141			 * there may be symbols referring to this module
1142			 * still)
1143			 */
1144			if (does_overlap(newmodp, mi)) {
1145#ifdef DEBUG
1146				if (debug & MODULEDEBUG) {
1147					(void) printf(
1148					    "[process_modules] `%s'\n",
1149					    so_path);
1150					(void) printf(
1151					    "                  overlaps\n");
1152					(void) printf(
1153					    "                  `%s'\n",
1154					    mi->name);
1155				}
1156#endif /* DEBUG */
1157				mi->active = FALSE;
1158			}
1159
1160			last = mi;
1161		}
1162
1163		/* Module already there, skip it */
1164		if (mi != NULL) {
1165			mi->load_base = newmodp->startaddr;
1166			mi->load_end = newmodp->endaddr;
1167			mi->active = TRUE;
1168			if (!newmodp->next)
1169				more_modules = FALSE;
1170
1171			/* LINTED: pointer cast */
1172			newmodp = (ProfModule *)
1173			    ((caddr_t)modlp + newmodp->next);
1174
1175#ifdef DEBUG
1176			if (debug & MODULEDEBUG) {
1177				(void) printf("[process_modules] base=%#llx, "
1178				    "end=%#llx\n", mi->load_base, mi->load_end);
1179			}
1180#endif /* DEBUG */
1181			continue;
1182		}
1183
1184		/*
1185		 * Check if gmon.out is outdated with respect to the new
1186		 * module we want to add
1187		 */
1188		if (gmonout_info.mtime < so_statbuf.st_mtime) {
1189			(void) fprintf(stderr,
1190			    "%s: shared obj outdates prof info\n", whoami);
1191			(void) fprintf(stderr, "\t(newer %s)\n", so_path);
1192			exit(EX_NOINPUT);
1193		}
1194
1195		/* Create a new module element */
1196		new_module = malloc(sizeof (mod_info_t));
1197		if (new_module == NULL) {
1198			(void) fprintf(stderr, "%s: no room for %d bytes\n",
1199			    whoami, sizeof (mod_info_t));
1200			exit(EX_OSERR);
1201		}
1202
1203		/* and fill in info... */
1204		new_module->id = n_modules + 1;
1205		new_module->load_base = newmodp->startaddr;
1206		new_module->load_end = newmodp->endaddr;
1207		new_module->name = malloc(strlen(so_path) + 1);
1208		if (new_module->name == NULL) {
1209			(void) fprintf(stderr, "%s: no room for %d bytes\n",
1210			    whoami, strlen(so_path) + 1);
1211			exit(EX_OSERR);
1212		}
1213		(void) strcpy(new_module->name, so_path);
1214#ifdef DEBUG
1215		if (debug & MODULEDEBUG) {
1216			(void) printf(
1217			    "[process_modules] base=%#llx, end=%#llx\n",
1218			    new_module->load_base, new_module->load_end);
1219		}
1220#endif /* DEBUG */
1221
1222		/* Create this module's nameslist */
1223		process_namelist(new_module);
1224
1225		/* Add it to the tail of active module list */
1226		last->next = new_module;
1227		n_modules++;
1228
1229#ifdef DEBUG
1230		if (debug & MODULEDEBUG) {
1231			(void) printf(
1232			    "[process_modules] total shared objects = %ld\n",
1233			    n_modules - 1);
1234		}
1235#endif /* DEBUG */
1236		/*
1237		 * Move to the next module in the PROF_MODULES_T list
1238		 * (if present)
1239		 */
1240		if (!newmodp->next)
1241			more_modules = FALSE;
1242
1243		/* LINTED: pointer cast */
1244		newmodp = (ProfModule *)((caddr_t)modlp + newmodp->next);
1245
1246	} while (more_modules);
1247}
1248
1249static void
1250reset_active_modules(void)
1251{
1252	mod_info_t	*mi;
1253
1254	/* Except the executable, no other module should remain active */
1255	for (mi = modules.next; mi; mi = mi->next)
1256		mi->active = FALSE;
1257}
1258
1259static void
1260getpfiledata(caddr_t memp, size_t fsz)
1261{
1262	ProfObject	*objp;
1263	caddr_t		file_end;
1264	bool		found_pcsamples = FALSE, found_cgraph = FALSE;
1265
1266	/*
1267	 * Before processing a new gmon.out, all modules except the
1268	 * program executable must be made inactive, so that symbols
1269	 * are searched only in the program executable, if we don't
1270	 * find a MODULES_T object. Don't do it *after* we read a gmon.out,
1271	 * because we need the active module data after we're done with
1272	 * the last gmon.out, if we're doing summing.
1273	 */
1274	reset_active_modules();
1275
1276	file_end = memp + fsz;
1277	/* LINTED: pointer cast */
1278	objp = (ProfObject *)(memp + ((ProfHeader *)memp)->size);
1279	while ((caddr_t)objp < file_end) {
1280#ifdef DEBUG
1281		{
1282			unsigned int	type = 0;
1283
1284			if (debug & MONOUTDEBUG) {
1285				if (objp->type <= MAX_OBJTYPES)
1286					type = objp->type;
1287
1288				(void) printf(
1289				    "\n[getpfiledata] object %s [%#lx]\n",
1290				    objname[type], objp->type);
1291			}
1292		}
1293#endif /* DEBUG */
1294		switch (objp->type) {
1295			case PROF_MODULES_T :
1296				process_modules((ProfModuleList *) objp);
1297				break;
1298
1299			case PROF_CALLGRAPH_T :
1300				process_cgraph((ProfCallGraph *) objp);
1301				found_cgraph = TRUE;
1302				break;
1303
1304			case PROF_BUFFER_T :
1305				process_pcsamples((ProfBuffer *) objp);
1306				found_pcsamples = TRUE;
1307				break;
1308
1309			default :
1310				(void) fprintf(stderr,
1311				    "%s: unknown prof object type=%d\n",
1312				    whoami, objp->type);
1313				exit(EX_SOFTWARE);
1314		}
1315		/* LINTED: pointer cast */
1316		objp = (ProfObject *)((caddr_t)objp + objp->size);
1317	}
1318
1319	if (!found_cgraph || !found_pcsamples) {
1320		(void) fprintf(stderr,
1321		    "%s: missing callgraph/pcsamples object\n", whoami);
1322		exit(EX_SOFTWARE);
1323	}
1324
1325	if ((caddr_t)objp > file_end) {
1326		(void) fprintf(stderr, "%s: malformed profile file.\n", whoami);
1327		exit(EX_SOFTWARE);
1328	}
1329
1330	if (first_file)
1331		first_file = FALSE;
1332}
1333
1334static void
1335readarcs(FILE *pfile)
1336{
1337	/*
1338	 *	the rest of the file consists of
1339	 *	a bunch of <from,self,count> tuples.
1340	 */
1341	/* CONSTCOND */
1342	while (1) {
1343		struct rawarc	arc;
1344
1345		if (rflag) {
1346			if (Bflag) {
1347				L_cgarc64		rtld_arc64;
1348
1349				/*
1350				 * If rflag is set then this is an profiled
1351				 * image generated by rtld.  It needs to be
1352				 * 'converted' to the standard data format.
1353				 */
1354				if (fread(&rtld_arc64,
1355				    sizeof (L_cgarc64), 1, pfile) != 1)
1356					break;
1357
1358				if (rtld_arc64.cg_from == PRF_OUTADDR64)
1359					arc.raw_frompc = s_highpc + 0x10;
1360				else
1361					arc.raw_frompc =
1362					    (pctype)rtld_arc64.cg_from;
1363				arc.raw_selfpc = (pctype)rtld_arc64.cg_to;
1364				arc.raw_count = (actype)rtld_arc64.cg_count;
1365			} else {
1366				L_cgarc		rtld_arc;
1367
1368				/*
1369				 * If rflag is set then this is an profiled
1370				 * image generated by rtld.  It needs to be
1371				 * 'converted' to the standard data format.
1372				 */
1373				if (fread(&rtld_arc,
1374				    sizeof (L_cgarc), 1, pfile) != 1)
1375					break;
1376
1377				if (rtld_arc.cg_from == PRF_OUTADDR)
1378					arc.raw_frompc = s_highpc + 0x10;
1379				else
1380					arc.raw_frompc = (pctype)
1381					    (uintptr_t)rtld_arc.cg_from;
1382				arc.raw_selfpc = (pctype)
1383				    (uintptr_t)rtld_arc.cg_to;
1384				arc.raw_count = (actype)rtld_arc.cg_count;
1385			}
1386		} else {
1387			if (Bflag) {
1388				if (fread(&arc, sizeof (struct rawarc), 1,
1389				    pfile) != 1) {
1390					break;
1391				}
1392			} else {
1393				/*
1394				 * If these aren't big %pc's, we need to read
1395				 * into the 32-bit raw arc structure, and
1396				 * assign the members into the actual arc.
1397				 */
1398				struct rawarc32 arc32;
1399				if (fread(&arc32, sizeof (struct rawarc32),
1400				    1, pfile) != 1)
1401					break;
1402				arc.raw_frompc = (pctype)arc32.raw_frompc;
1403				arc.raw_selfpc = (pctype)arc32.raw_selfpc;
1404				arc.raw_count  = (actype)arc32.raw_count;
1405			}
1406		}
1407
1408#ifdef DEBUG
1409		if (debug & SAMPLEDEBUG) {
1410			(void) printf("[getpfile] frompc 0x%llx selfpc "
1411			    "0x%llx count %lld\n", arc.raw_frompc,
1412			    arc.raw_selfpc, arc.raw_count);
1413		}
1414#endif /* DEBUG */
1415		/*
1416		 *	add this arc
1417		 */
1418		tally(&modules, &modules, &arc);
1419	}
1420	if (first_file)
1421		first_file = FALSE;
1422}
1423
1424static void
1425readsamples(FILE *pfile)
1426{
1427	sztype		i;
1428	unsigned_UNIT	sample;
1429
1430	if (samples == 0) {
1431		samples = (unsigned_UNIT *) calloc(nsamples,
1432		    sizeof (unsigned_UNIT));
1433		if (samples == 0) {
1434			(void) fprintf(stderr,
1435			    "%s: No room for %d sample pc's\n",
1436			    whoami, sampbytes / sizeof (unsigned_UNIT));
1437			exit(EX_OSERR);
1438		}
1439	}
1440
1441	for (i = 0; i < nsamples; i++) {
1442		(void) fread(&sample, sizeof (unsigned_UNIT), 1, pfile);
1443		if (feof(pfile))
1444			break;
1445		samples[i] += sample;
1446	}
1447	if (i != nsamples) {
1448		(void) fprintf(stderr,
1449		    "%s: unexpected EOF after reading %d/%d samples\n",
1450		    whoami, --i, nsamples);
1451		exit(EX_IOERR);
1452	}
1453}
1454
1455static void *
1456handle_versioned(FILE *pfile, char *filename, size_t *fsz)
1457{
1458	int		fd;
1459	bool		invalid_version;
1460	caddr_t		fmem;
1461	struct stat	buf;
1462	ProfHeader	prof_hdr;
1463	off_t		lret;
1464
1465	/*
1466	 * Check versioning info. For now, let's say we provide
1467	 * backward compatibility, so we accept all older versions.
1468	 */
1469	if (fread(&prof_hdr, sizeof (ProfHeader), 1, pfile) == 0) {
1470		perror("fread()");
1471		exit(EX_IOERR);
1472	}
1473
1474	invalid_version = FALSE;
1475	if (prof_hdr.h_major_ver > PROF_MAJOR_VERSION)
1476		invalid_version = TRUE;
1477	else if (prof_hdr.h_major_ver == PROF_MAJOR_VERSION) {
1478		if (prof_hdr.h_minor_ver > PROF_MINOR_VERSION)
1479			invalid_version = FALSE;
1480	}
1481
1482	if (invalid_version) {
1483		(void) fprintf(stderr, "%s: version %d.%d not supported\n",
1484		    whoami, prof_hdr.h_major_ver, prof_hdr.h_minor_ver);
1485		exit(EX_SOFTWARE);
1486	}
1487
1488	/*
1489	 * Map gmon.out onto memory.
1490	 */
1491	(void) fclose(pfile);
1492	if ((fd = open(filename, O_RDONLY)) == -1) {
1493		perror(filename);
1494		exit(EX_IOERR);
1495	}
1496
1497	if ((lret = lseek(fd, 0, SEEK_END)) == -1) {
1498		perror(filename);
1499		exit(EX_IOERR);
1500	}
1501	*fsz = lret;
1502
1503	fmem = mmap(0, *fsz, PROT_READ, MAP_PRIVATE, fd, 0);
1504	if (fmem == MAP_FAILED) {
1505		(void) fprintf(stderr, "%s: can't map %s\n", whoami, filename);
1506		exit(EX_IOERR);
1507	}
1508
1509	/*
1510	 * Before we close this fd, save this gmon.out's info to later verify
1511	 * if the shared objects it references have changed since the time
1512	 * they were used to generate this gmon.out
1513	 */
1514	if (fstat(fd, &buf) == -1) {
1515		(void) fprintf(stderr, "%s: can't get info on `%s'\n",
1516		    whoami, filename);
1517		exit(EX_NOINPUT);
1518	}
1519	gmonout_info.dev = buf.st_dev;
1520	gmonout_info.ino = buf.st_ino;
1521	gmonout_info.mtime = buf.st_mtime;
1522	gmonout_info.size = buf.st_size;
1523
1524	(void) close(fd);
1525
1526	return ((void *) fmem);
1527}
1528
1529static void *
1530openpfile(char *filename, size_t *fsz)
1531{
1532	struct hdr	tmp;
1533	FILE		*pfile;
1534	unsigned long	magic_num;
1535	size_t		hdrsize;
1536	static bool	first_time = TRUE;
1537	extern bool	old_style;
1538
1539	if ((pfile = fopen(filename, "r")) == NULL) {
1540		perror(filename);
1541		exit(EX_IOERR);
1542	}
1543
1544	/*
1545	 * Read in the magic. Note that we changed the cast "unsigned long"
1546	 * to "unsigned int" because that's how h_magic is defined in the
1547	 * new format ProfHeader.
1548	 */
1549	if (fread(&magic_num, sizeof (unsigned int), 1, pfile) == 0) {
1550		perror("fread()");
1551		exit(EX_IOERR);
1552	}
1553
1554	rewind(pfile);
1555
1556	/*
1557	 * First check if this is versioned or *old-style* gmon.out
1558	 */
1559	if (magic_num == (unsigned int)PROF_MAGIC) {
1560		if ((!first_time) && (old_style == TRUE)) {
1561			(void) fprintf(stderr, "%s: can't mix old & new format "
1562			    "profiled files\n", whoami);
1563			exit(EX_SOFTWARE);
1564		}
1565		first_time = FALSE;
1566		old_style = FALSE;
1567		return (handle_versioned(pfile, filename, fsz));
1568	}
1569
1570	if ((!first_time) && (old_style == FALSE)) {
1571		(void) fprintf(stderr, "%s: can't mix old & new format "
1572		    "profiled files\n", whoami);
1573		exit(EX_SOFTWARE);
1574	}
1575
1576	first_time = FALSE;
1577	old_style = TRUE;
1578	fsz = 0;
1579
1580	/*
1581	 * Now, we need to determine if this is a run-time linker
1582	 * profiled file or if it is a standard gmon.out.
1583	 *
1584	 * We do this by checking if magic matches PRF_MAGIC. If it
1585	 * does, then this is a run-time linker profiled file, if it
1586	 * doesn't, it must be a gmon.out file.
1587	 */
1588	if (magic_num == (unsigned long)PRF_MAGIC)
1589		rflag = TRUE;
1590	else
1591		rflag = FALSE;
1592
1593	hdrsize = Bflag ? sizeof (struct hdr) : sizeof (struct hdr32);
1594
1595	if (rflag) {
1596		if (Bflag) {
1597			L_hdr64		l_hdr64;
1598
1599			/*
1600			 * If the rflag is set then the input file is
1601			 * rtld profiled data, we'll read it in and convert
1602			 * it to the standard format (ie: make it look like
1603			 * a gmon.out file).
1604			 */
1605			if (fread(&l_hdr64, sizeof (L_hdr64), 1, pfile) == 0) {
1606				perror("fread()");
1607				exit(EX_IOERR);
1608			}
1609			if (l_hdr64.hd_version != PRF_VERSION_64) {
1610				(void) fprintf(stderr,
1611				    "%s: expected version %d, "
1612				    "got version %d when processing 64-bit "
1613				    "run-time linker profiled file.\n",
1614				    whoami, PRF_VERSION_64, l_hdr64.hd_version);
1615				exit(EX_SOFTWARE);
1616			}
1617			tmp.lowpc = 0;
1618			tmp.highpc = (pctype)l_hdr64.hd_hpc;
1619			tmp.ncnt = hdrsize + l_hdr64.hd_psize;
1620		} else {
1621			L_hdr		l_hdr;
1622
1623			/*
1624			 * If the rflag is set then the input file is
1625			 * rtld profiled data, we'll read it in and convert
1626			 * it to the standard format (ie: make it look like
1627			 * a gmon.out file).
1628			 */
1629			if (fread(&l_hdr, sizeof (L_hdr), 1, pfile) == 0) {
1630				perror("fread()");
1631				exit(EX_IOERR);
1632			}
1633			if (l_hdr.hd_version != PRF_VERSION) {
1634				(void) fprintf(stderr,
1635				    "%s: expected version %d, "
1636				    "got version %d when processing "
1637				    "run-time linker profiled file.\n",
1638				    whoami, PRF_VERSION, l_hdr.hd_version);
1639				exit(EX_SOFTWARE);
1640			}
1641			tmp.lowpc = 0;
1642			tmp.highpc = (pctype)(uintptr_t)l_hdr.hd_hpc;
1643			tmp.ncnt = hdrsize + l_hdr.hd_psize;
1644		}
1645	} else {
1646		if (Bflag) {
1647			if (fread(&tmp, sizeof (struct hdr), 1, pfile) == 0) {
1648				perror("fread()");
1649				exit(EX_IOERR);
1650			}
1651		} else {
1652			/*
1653			 * If we're not reading big %pc's, we need to read
1654			 * the 32-bit header, and assign the members to
1655			 * the actual header.
1656			 */
1657			struct hdr32 hdr32;
1658			if (fread(&hdr32, sizeof (hdr32), 1, pfile) == 0) {
1659				perror("fread()");
1660				exit(EX_IOERR);
1661			}
1662			tmp.lowpc = hdr32.lowpc;
1663			tmp.highpc = hdr32.highpc;
1664			tmp.ncnt = hdr32.ncnt;
1665		}
1666	}
1667
1668	/*
1669	 * perform sanity check on profiled file we've opened.
1670	 */
1671	if (tmp.lowpc >= tmp.highpc) {
1672		if (rflag)
1673			(void) fprintf(stderr,
1674			    "%s: badly formed profiled data.\n",
1675			    filename);
1676		else
1677			(void) fprintf(stderr,
1678			    "%s: badly formed gmon.out file.\n",
1679			    filename);
1680		exit(EX_SOFTWARE);
1681	}
1682
1683	if (s_highpc != 0 && (tmp.lowpc != h.lowpc ||
1684	    tmp.highpc != h.highpc || tmp.ncnt != h.ncnt)) {
1685		(void) fprintf(stderr,
1686		    "%s: incompatible with first gmon file\n",
1687		    filename);
1688		exit(EX_IOERR);
1689	}
1690	h = tmp;
1691	s_lowpc = h.lowpc;
1692	s_highpc = h.highpc;
1693	lowpc = h.lowpc / sizeof (UNIT);
1694	highpc = h.highpc / sizeof (UNIT);
1695	sampbytes = h.ncnt > hdrsize ? h.ncnt - hdrsize : 0;
1696	nsamples = sampbytes / sizeof (unsigned_UNIT);
1697
1698#ifdef DEBUG
1699	if (debug & SAMPLEDEBUG) {
1700		(void) printf("[openpfile] hdr.lowpc 0x%llx hdr.highpc "
1701		    "0x%llx hdr.ncnt %lld\n",
1702		    h.lowpc, h.highpc, h.ncnt);
1703		(void) printf(
1704		    "[openpfile]   s_lowpc 0x%llx   s_highpc 0x%llx\n",
1705		    s_lowpc, s_highpc);
1706		(void) printf(
1707		    "[openpfile]     lowpc 0x%llx     highpc 0x%llx\n",
1708		    lowpc, highpc);
1709		(void) printf("[openpfile] sampbytes %d nsamples %d\n",
1710		    sampbytes, nsamples);
1711	}
1712#endif /* DEBUG */
1713
1714	return ((void *) pfile);
1715}
1716
1717/*
1718 * Information from a gmon.out file depends on whether it's versioned
1719 * or non-versioned, *old style* gmon.out. If old-style, it is in two
1720 * parts : an array of sampling hits within pc ranges, and the arcs. If
1721 * versioned, it contains a header, followed by any number of
1722 * modules/callgraph/pcsample_buffer objects.
1723 */
1724static void
1725getpfile(char *filename)
1726{
1727	void		*handle;
1728	size_t		fsz;
1729
1730	handle = openpfile(filename, &fsz);
1731
1732	if (old_style) {
1733		readsamples((FILE *)handle);
1734		readarcs((FILE *)handle);
1735		(void) fclose((FILE *)handle);
1736		return;
1737	}
1738
1739	getpfiledata((caddr_t)handle, fsz);
1740	(void) munmap(handle, fsz);
1741}
1742
1743int
1744main(int argc, char **argv)
1745{
1746	char	**sp;
1747	nltype	**timesortnlp;
1748	int		c;
1749	int		errflg;
1750
1751	prog_name = *argv;  /* preserve program name */
1752	debug = 0;
1753	nflag = FALSE;
1754	bflag = TRUE;
1755	lflag = FALSE;
1756	Cflag = FALSE;
1757	first_file = TRUE;
1758	rflag = FALSE;
1759	Bflag = FALSE;
1760	errflg = FALSE;
1761
1762	while ((c = getopt(argc, argv, "abd:CcDE:e:F:f:ln:sz")) != EOF)
1763		switch (c) {
1764		case 'a':
1765			aflag = TRUE;
1766			break;
1767		case 'b':
1768			bflag = FALSE;
1769			break;
1770		case 'c':
1771			cflag = TRUE;
1772			break;
1773		case 'C':
1774			Cflag = TRUE;
1775			break;
1776		case 'd':
1777			dflag = TRUE;
1778			debug |= atoi(optarg);
1779			(void) printf("[main] debug = 0x%x\n", debug);
1780			break;
1781		case 'D':
1782			Dflag = TRUE;
1783			break;
1784		case 'E':
1785			addlist(Elist, optarg);
1786			Eflag = TRUE;
1787			addlist(elist, optarg);
1788			eflag = TRUE;
1789			break;
1790		case 'e':
1791			addlist(elist, optarg);
1792			eflag = TRUE;
1793			break;
1794		case 'F':
1795			addlist(Flist, optarg);
1796			Fflag = TRUE;
1797			addlist(flist, optarg);
1798			fflag = TRUE;
1799			break;
1800		case 'f':
1801			addlist(flist, optarg);
1802			fflag = TRUE;
1803			break;
1804		case 'l':
1805			lflag = TRUE;
1806			break;
1807		case 'n':
1808			nflag = TRUE;
1809			number_funcs_toprint = atoi(optarg);
1810			break;
1811		case 's':
1812			sflag = TRUE;
1813			break;
1814		case 'z':
1815			zflag = TRUE;
1816			break;
1817		case '?':
1818			errflg++;
1819
1820		}
1821
1822	if (errflg) {
1823		(void) fprintf(stderr,
1824		    "usage: gprof [ -abcCDlsz ] [ -e function-name ] "
1825		    "[ -E function-name ]\n\t[ -f function-name ] "
1826		    "[ -F function-name  ]\n\t[  image-file  "
1827		    "[ profile-file ... ] ]\n");
1828		exit(EX_USAGE);
1829	}
1830
1831	if (optind < argc) {
1832		a_outname  = argv[optind++];
1833	} else {
1834		a_outname  = A_OUTNAME;
1835	}
1836	if (optind < argc) {
1837		gmonname = argv[optind++];
1838	} else {
1839		gmonname = GMONNAME;
1840	}
1841	/*
1842	 *	turn off default functions
1843	 */
1844	for (sp = &defaultEs[0]; *sp; sp++) {
1845		Eflag = TRUE;
1846		addlist(Elist, *sp);
1847		eflag = TRUE;
1848		addlist(elist, *sp);
1849	}
1850	/*
1851	 *	how many ticks per second?
1852	 *	if we can't tell, report time in ticks.
1853	 */
1854	hz = sysconf(_SC_CLK_TCK);
1855	if (hz == -1) {
1856		hz = 1;
1857		(void) fprintf(stderr, "time is in ticks, not seconds\n");
1858	}
1859
1860	getnfile(a_outname);
1861
1862	/*
1863	 *	get information about mon.out file(s).
1864	 */
1865	do {
1866		getpfile(gmonname);
1867		if (optind < argc)
1868			gmonname = argv[optind++];
1869		else
1870			optind++;
1871	} while (optind <= argc);
1872	/*
1873	 *	dump out a gmon.sum file if requested
1874	 */
1875	if (sflag || Dflag)
1876		dumpsum(GMONSUM);
1877
1878	if (old_style) {
1879		/*
1880		 *	assign samples to procedures
1881		 */
1882		asgnsamples();
1883	}
1884
1885	/*
1886	 *	assemble the dynamic profile
1887	 */
1888	timesortnlp = doarcs();
1889
1890	/*
1891	 *	print the dynamic profile
1892	 */
1893#ifdef DEBUG
1894	if (debug & ANYDEBUG) {
1895		/* raw output of all symbols in all their glory */
1896		int i;
1897		(void) printf(" Name, pc_entry_pt, svalue, tix_in_routine, "
1898		    "#calls, selfcalls, index \n");
1899		for (i = 0; i < modules.nname; i++) { 	/* Print each symbol */
1900			if (timesortnlp[i]->name)
1901				(void) printf(" %s ", timesortnlp[i]->name);
1902			else
1903				(void) printf(" <cycle> ");
1904			(void) printf(" %lld ", timesortnlp[i]->value);
1905			(void) printf(" %lld ", timesortnlp[i]->svalue);
1906			(void) printf(" %f ", timesortnlp[i]->time);
1907			(void) printf(" %lld ", timesortnlp[i]->ncall);
1908			(void) printf(" %lld ", timesortnlp[i]->selfcalls);
1909			(void) printf(" %d ", timesortnlp[i]->index);
1910			(void) printf(" \n");
1911		}
1912	}
1913#endif /* DEBUG */
1914
1915	printgprof(timesortnlp);
1916	/*
1917	 *	print the flat profile
1918	 */
1919	printprof();
1920	/*
1921	 *	print the index
1922	 */
1923	printindex();
1924
1925	/*
1926	 * print the modules
1927	 */
1928	printmodules();
1929
1930	done();
1931	/* NOTREACHED */
1932	return (0);
1933}
1934