xref: /illumos-gate/usr/src/cmd/sgs/gprof/common/readelf.c (revision 326c1baf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include	"gprof.h"
28 #include	<stdlib.h>
29 #include	<sys/file.h>
30 #include	<fcntl.h>
31 #include	<unistd.h>
32 #include	<string.h>
33 #include	<sysexits.h>
34 #include	<libelf.h>
35 #include	"gelf.h"
36 
37 #ifdef DEBUG
38 static void	debug_dup_del(nltype *, nltype *);
39 
40 #define	DPRINTF(msg, file)	if (debug & ELFDEBUG) \
41 					(void) printf(msg, file);
42 
43 #define	PRINTF(msg)		if (debug & ELFDEBUG) \
44 					(void) printf(msg);
45 
46 #define	DEBUG_DUP_DEL(keeper, louser)	if (debug & ELFDEBUG) \
47 						debug_dup_del(keeper, louser);
48 
49 #else
50 #define	DPRINTF(msg, file)
51 #define	PRINTF(msg)
52 #define	DEBUG_DUP_DEL(keeper, louser)
53 #endif
54 
55 size_t	textbegin, textsize;
56 
57 /* Prototype definitions first */
58 
59 static void	process(char *filename, int fd);
60 static void	get_symtab(Elf *elf, mod_info_t *module);
61 static void	get_textseg(Elf *elf, int fd);
62 static void	save_aout_info(char *);
63 
64 static void
fatal_error(char * error)65 fatal_error(char *error)
66 {
67 	(void) fprintf(stderr,
68 	    "Fatal ELF error: %s (%s)\n", error, elf_errmsg(-1));
69 	exit(EX_SOFTWARE);
70 }
71 
72 bool
is_shared_obj(char * name)73 is_shared_obj(char *name)
74 {
75 	int		fd;
76 	Elf		*elf;
77 	GElf_Ehdr	ehdr;
78 
79 	if ((fd = open(name, O_RDONLY)) == -1) {
80 		(void) fprintf(stderr, "%s: can't open `%s'\n", whoami, name);
81 		exit(EX_NOINPUT);
82 	}
83 
84 	if (elf_version(EV_CURRENT) == EV_NONE)
85 		fatal_error("libelf is out of date");
86 
87 	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
88 		fatal_error("can't read as ELF file");
89 
90 	if (gelf_getehdr(elf, &ehdr) == NULL)
91 		fatal_error("can't read ehdr");
92 
93 	(void) elf_end(elf);
94 	(void) close(fd);
95 
96 	if (ehdr.e_type == ET_DYN)
97 		return (TRUE);
98 	else
99 		return (FALSE);
100 }
101 
102 static void
save_aout_info(char * aoutname)103 save_aout_info(char *aoutname)
104 {
105 	struct stat		buf;
106 	extern fl_info_t	aout_info;
107 
108 	if (stat(aoutname, &buf) == -1) {
109 		(void) fprintf(stderr, "%s: can't get info on `%s'\n",
110 		    whoami, aoutname);
111 		exit(EX_NOINPUT);
112 	}
113 
114 	aout_info.dev = buf.st_dev;
115 	aout_info.ino = buf.st_ino;
116 	aout_info.mtime = buf.st_mtime;
117 	aout_info.size = buf.st_size;
118 }
119 
120 void
getnfile(char * aoutname)121 getnfile(char *aoutname)
122 {
123 	int	fd;
124 
125 	DPRINTF(" Attempting to open %s  \n", aoutname);
126 	if ((fd = open((aoutname), O_RDONLY)) == -1) {
127 		(void) fprintf(stderr, "%s: can't open `%s'\n",
128 		    whoami, aoutname);
129 		exit(EX_NOINPUT);
130 	}
131 	process(aoutname, fd);
132 	save_aout_info(aoutname);
133 
134 	(void) close(fd);
135 }
136 
137 static GElf_Addr
get_txtorigin(Elf * elf)138 get_txtorigin(Elf *elf)
139 {
140 	GElf_Ehdr	ehdr;
141 	GElf_Phdr	phdr;
142 	GElf_Half	ndx;
143 	GElf_Addr	txt_origin = 0;
144 	bool		first_load_seg = TRUE;
145 
146 	if (gelf_getehdr(elf, &ehdr) == NULL)
147 		fatal_error("can't read ehdr");
148 
149 	for (ndx = 0; ndx < ehdr.e_phnum; ndx++) {
150 		if (gelf_getphdr(elf, ndx, &phdr) == NULL)
151 			continue;
152 
153 		if ((phdr.p_type == PT_LOAD) && !(phdr.p_flags & PF_W)) {
154 			if (first_load_seg || phdr.p_vaddr < txt_origin)
155 				txt_origin = phdr.p_vaddr;
156 
157 			if (first_load_seg)
158 				first_load_seg = FALSE;
159 		}
160 	}
161 
162 	return (txt_origin);
163 }
164 
165 void
process_namelist(mod_info_t * module)166 process_namelist(mod_info_t *module)
167 {
168 	int		fd;
169 	Elf		*elf;
170 
171 	if ((fd = open(module->name, O_RDONLY)) == -1) {
172 		(void) fprintf(stderr, "%s: can't read %s\n",
173 		    whoami, module->name);
174 		(void) fprintf(stderr, "Exiting due to error(s)...\n");
175 		exit(EX_NOINPUT);
176 	}
177 
178 	/*
179 	 * libelf's version already verified in processing a.out,
180 	 * so directly do elf_begin()
181 	 */
182 	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
183 		fatal_error("can't read as ELF file");
184 
185 	module->next = NULL;
186 	module->txt_origin = get_txtorigin(elf);
187 	get_symtab(elf, module);
188 	module->active = TRUE;
189 }
190 
191 /*
192  * Get the ELF header and,  if it exists, call get_symtab()
193  * to begin processing of the file; otherwise, return from
194  * processing the file with a warning.
195  */
196 static void
process(char * filename,int fd)197 process(char *filename, int fd)
198 {
199 	Elf			*elf;
200 	extern bool		cflag;
201 	extern bool		Bflag;
202 
203 	if (elf_version(EV_CURRENT) == EV_NONE)
204 		fatal_error("libelf is out of date");
205 
206 	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
207 		fatal_error("can't read as ELF file");
208 
209 	if (gelf_getclass(elf) == ELFCLASS64)
210 		Bflag = TRUE;
211 
212 	/*
213 	 * Initialize active modules list. Note that we set the end
214 	 * address while reading the symbol table, in get_symtab
215 	 */
216 	modules.id = 1;
217 	modules.next = NULL;
218 	modules.txt_origin = get_txtorigin(elf);
219 	modules.load_base = modules.txt_origin;
220 	if ((modules.name = malloc(strlen(filename) + 1)) == NULL) {
221 		(void) fprintf(stderr, "%s: can't malloc %d bytes",
222 		    whoami, strlen(filename) + 1);
223 		exit(EX_UNAVAILABLE);
224 	}
225 	(void) strcpy(modules.name, filename);
226 
227 	get_symtab(elf, &modules);
228 
229 	modules.load_end = modules.data_end;
230 	modules.active = TRUE;
231 	n_modules = 1;
232 
233 	if (cflag)
234 		get_textseg(elf, fd);
235 }
236 
237 static void
get_textseg(Elf * elf,int fd)238 get_textseg(Elf *elf, int fd)
239 {
240 	GElf_Ehdr ehdr;
241 	GElf_Phdr phdr;
242 	GElf_Half i;
243 
244 	if (gelf_getehdr(elf, &ehdr) == NULL)
245 		fatal_error("can't read ehdr");
246 
247 	for (i = 0; i < ehdr.e_phnum; i++) {
248 
249 		if (gelf_getphdr(elf, i, &phdr) == NULL)
250 			continue;
251 
252 		if (!(phdr.p_flags & PF_W) && (phdr.p_filesz > textsize)) {
253 			size_t chk;
254 
255 			/*
256 			 * We could have multiple loadable text segments;
257 			 * keep the largest we find.
258 			 */
259 			if (textspace)
260 				free(textspace);
261 
262 			/*
263 			 * gprof is a 32-bit program;  if this text segment
264 			 * has a > 32-bit offset or length, it's too big.
265 			 */
266 			chk = (size_t)phdr.p_vaddr + (size_t)phdr.p_filesz;
267 			if (phdr.p_vaddr + phdr.p_filesz != (GElf_Xword)chk)
268 				fatal_error("text segment too large for -c");
269 
270 			textbegin = (size_t)phdr.p_vaddr;
271 			textsize = (size_t)phdr.p_filesz;
272 
273 			textspace = malloc(textsize);
274 
275 			if (lseek(fd, (off_t)phdr.p_offset, SEEK_SET) !=
276 			    (off_t)phdr.p_offset)
277 				fatal_error("cannot seek to text section");
278 
279 			if (read(fd, textspace, textsize) != textsize)
280 				fatal_error("cannot read text");
281 		}
282 	}
283 
284 	if (textsize == 0)
285 		fatal_error("can't find text segment");
286 }
287 
288 #ifdef DEBUG
289 static void
debug_dup_del(nltype * keeper,nltype * louser)290 debug_dup_del(nltype * keeper, nltype * louser)
291 {
292 	(void) printf("remove_dup_syms: discarding sym %s over sym %s\n",
293 	    louser->name, keeper->name);
294 }
295 #endif /* DEBUG */
296 
297 static void
remove_dup_syms(nltype * nl,sztype * sym_count)298 remove_dup_syms(nltype *nl, sztype *sym_count)
299 {
300 	int	i;
301 	int	index;
302 	int	nextsym;
303 	nltype *orig_list;
304 
305 	if ((orig_list = malloc(sizeof (nltype) * *sym_count)) == NULL) {
306 		(void) fprintf(stderr,
307 		    "gprof: remove_dup_syms: malloc failed\n");
308 		(void) fprintf(stderr, "Exiting due to error(s)...\n");
309 		exit(EX_UNAVAILABLE);
310 	}
311 	(void) memcpy(orig_list, nl, sizeof (nltype) * *sym_count);
312 
313 	for (i = 0, index = 0, nextsym = 1; nextsym < *sym_count; nextsym++) {
314 		int	i_type;
315 		int	n_bind;
316 		int	n_type;
317 
318 		/*
319 		 * If orig_list[nextsym] points to a new symvalue, then we
320 		 * will copy our keeper and move on to the next symbol.
321 		 */
322 		if ((orig_list + i)->value < (orig_list + nextsym)->value) {
323 			*(nl + index++) = *(orig_list +i);
324 			i = nextsym;
325 			continue;
326 		}
327 
328 		/*
329 		 * If these two symbols have the same info, then we
330 		 * keep the first and keep checking for dups.
331 		 */
332 		if ((orig_list + i)->syminfo ==
333 		    (orig_list + nextsym)->syminfo) {
334 			DEBUG_DUP_DEL(orig_list + i, orig_list + nextsym);
335 			continue;
336 		}
337 		n_bind = ELF32_ST_BIND((orig_list + nextsym)->syminfo);
338 		i_type = ELF32_ST_TYPE((orig_list + i)->syminfo);
339 		n_type = ELF32_ST_TYPE((orig_list + nextsym)->syminfo);
340 
341 		/*
342 		 * If they have the same type we take the stronger
343 		 * bound function.
344 		 */
345 		if (i_type == n_type) {
346 			if (n_bind == STB_WEAK) {
347 				DEBUG_DUP_DEL((orig_list + i),
348 				    (orig_list + nextsym));
349 				continue;
350 			}
351 			DEBUG_DUP_DEL((orig_list + nextsym),
352 			    (orig_list + i));
353 			i = nextsym;
354 			continue;
355 		}
356 
357 		/*
358 		 * If the first symbol isn't of type NOTYPE then it must
359 		 * be the keeper.
360 		 */
361 		if (i_type != STT_NOTYPE) {
362 			DEBUG_DUP_DEL((orig_list + i),
363 			    (orig_list + nextsym));
364 			continue;
365 		}
366 
367 		/*
368 		 * Throw away the first one and take the new
369 		 * symbol
370 		 */
371 		DEBUG_DUP_DEL((orig_list + nextsym), (orig_list + i));
372 		i = nextsym;
373 	}
374 
375 	if (i < *sym_count) {
376 		if ((orig_list + i)->value > (nl + index - 1)->value)
377 			*(nl + index++) = *(orig_list + i);
378 	}
379 
380 	*sym_count = index;
381 }
382 
383 /*
384  * compare either by name or by value for sorting.
385  * This is the comparison function called by qsort to
386  * sort the symbols either by name or value when requested.
387  */
388 static int
compare(const void * arg1,const void * arg2)389 compare(const void *arg1, const void *arg2)
390 {
391 	nltype *a = (nltype *)arg1;
392 	nltype *b = (nltype *)arg2;
393 
394 	if (a->value > b->value)
395 		return (1);
396 	else
397 		return ((a->value == b->value) - 1);
398 }
399 
400 static int
is_function(Elf * elf,GElf_Sym * sym)401 is_function(Elf *elf, GElf_Sym *sym)
402 {
403 	Elf_Scn *scn;
404 	GElf_Shdr shdr;
405 
406 	/*
407 	 * With shared objects, it is possible we come across a function
408 	 * that's global, but is undefined. The definition is probably
409 	 * elsewhere, so we'll have to skip it as far as this object is
410 	 * concerned.
411 	 */
412 	if (sym->st_shndx == SHN_UNDEF)
413 		return (0);
414 
415 	if (GELF_ST_TYPE(sym->st_info) == STT_FUNC) {
416 		if (GELF_ST_BIND(sym->st_info) == STB_GLOBAL)
417 			return (1);
418 
419 		if (GELF_ST_BIND(sym->st_info) == STB_WEAK)
420 			return (1);
421 
422 		if (!aflag && GELF_ST_BIND(sym->st_info) == STB_LOCAL)
423 			return (1);
424 	}
425 
426 	/*
427 	 * It's not a function; determine if it's in an executable section.
428 	 */
429 	if (GELF_ST_TYPE(sym->st_info) != STT_NOTYPE)
430 		return (0);
431 
432 	/*
433 	 * If it isn't global, and it isn't weak, and it either isn't
434 	 * local or the "all flag" isn't set, then get out.
435 	 */
436 	if (GELF_ST_BIND(sym->st_info) != STB_GLOBAL &&
437 	    GELF_ST_BIND(sym->st_info) != STB_WEAK &&
438 	    (GELF_ST_BIND(sym->st_info) != STB_LOCAL || aflag))
439 		return (0);
440 
441 	if (sym->st_shndx >= SHN_LORESERVE)
442 		return (0);
443 
444 	scn = elf_getscn(elf, sym->st_shndx);
445 	(void) gelf_getshdr(scn, &shdr);
446 
447 	if (!(shdr.sh_flags & SHF_EXECINSTR))
448 		return (0);
449 
450 	return (1);
451 }
452 
453 static void
get_symtab(Elf * elf,mod_info_t * module)454 get_symtab(Elf *elf, mod_info_t *module)
455 {
456 	Elf_Scn		*scn = NULL, *sym_pri = NULL, *sym_aux = NULL;
457 	GElf_Word	strndx = 0;
458 	sztype		nsyms, i;
459 	Elf_Data	*symdata_pri;
460 	Elf_Data	*symdata_aux = NULL;
461 	GElf_Xword	nsyms_pri = 0, nsyms_aux = 0;
462 	nltype		*etext = NULL;
463 	nltype		*l_nl, *l_npe;
464 	sztype		l_nname;
465 	extern sztype	total_names;
466 	int		symtab_found = 0;
467 
468 
469 	/*
470 	 * Scan the section headers looking for a symbol table. Our
471 	 * preference is to use .symtab, because it contains the full
472 	 * set of symbols. If we find it, we stop looking immediately
473 	 * and use it. In the absence of a .symtab section, we are
474 	 * willing to use the dynamic symbol table (.dynsym), possibly
475 	 * augmented by the .SUNW_ldynsym, which contains local symbols.
476 	 */
477 	while ((symtab_found == 0) && ((scn = elf_nextscn(elf, scn)) != NULL)) {
478 		GElf_Shdr shdr;
479 
480 		if (gelf_getshdr(scn, &shdr) == NULL)
481 			continue;
482 
483 		switch (shdr.sh_type) {
484 		case SHT_SYMTAB:
485 			nsyms_pri = shdr.sh_size / shdr.sh_entsize;
486 			strndx = shdr.sh_link;
487 			sym_pri = scn;
488 			/* Throw away .SUNW_ldynsym. It is for .dynsym only */
489 			nsyms_aux = 0;
490 			sym_aux = NULL;
491 			/* We have found the best symbol table. Stop looking */
492 			symtab_found = 1;
493 			break;
494 
495 		case SHT_DYNSYM:
496 			/* We will use .dynsym if no .symtab is found */
497 			nsyms_pri = shdr.sh_size / shdr.sh_entsize;
498 			strndx = shdr.sh_link;
499 			sym_pri = scn;
500 			break;
501 
502 		case SHT_SUNW_LDYNSYM:
503 			/* Auxiliary table, used with .dynsym */
504 			nsyms_aux = shdr.sh_size / shdr.sh_entsize;
505 			sym_aux = scn;
506 			break;
507 		}
508 	}
509 
510 	if (sym_pri == NULL || strndx == 0)
511 		fatal_error("can't find symbol table.\n");
512 
513 	nsyms = (sztype)(nsyms_pri + nsyms_aux);
514 	if ((nsyms_pri + nsyms_aux) != (GElf_Xword)nsyms)
515 		fatal_error(
516 		    "32-bit gprof cannot handle more than 2^32 symbols");
517 
518 	if ((symdata_pri = elf_getdata(sym_pri, NULL)) == NULL)
519 		fatal_error("can't read symbol data.\n");
520 
521 	if ((sym_aux != NULL) &&
522 	    ((symdata_aux = elf_getdata(sym_aux, NULL)) == NULL))
523 		fatal_error("can't read .SUNW_ldynsym symbol data.\n");
524 
525 	if ((l_nl = l_npe = (nltype *)calloc(nsyms + PRF_SYMCNT,
526 	    sizeof (nltype))) == NULL)
527 		fatal_error("cannot allocate symbol data.\n");
528 
529 	/*
530 	 * Now we need to cruise through the symbol table eliminating
531 	 * all non-functions from consideration, and making strings
532 	 * real.
533 	 */
534 	l_nname = 0;
535 
536 	for (i = 1; i < nsyms; i++) {
537 		GElf_Sym gsym;
538 		char *name;
539 
540 		/*
541 		 * Look up the symbol. In the case where we have a
542 		 * .SUNW_ldynsym/.dynsym pair, we treat them as a single
543 		 * logical table, with the data from .SUNW_ldynsym coming
544 		 * before the data in .dynsym.
545 		 */
546 		if (i >= nsyms_aux)
547 			(void) gelf_getsym(symdata_pri, i - nsyms_aux, &gsym);
548 		else
549 			(void) gelf_getsym(symdata_aux, i, &gsym);
550 
551 		name = elf_strptr(elf, strndx, gsym.st_name);
552 
553 		/*
554 		 * We're interested in this symbol if it's a function or
555 		 * if it's the symbol "_etext"
556 		 */
557 		if (is_function(elf, &gsym) || strcmp(name, PRF_ETEXT) == 0) {
558 
559 			l_npe->name = name;
560 			l_npe->value = gsym.st_value;
561 			l_npe->sz = gsym.st_size;
562 			l_npe->syminfo = gsym.st_info;
563 			l_npe->module = module;
564 
565 			if (strcmp(name, PRF_ETEXT) == 0)
566 				etext = l_npe;
567 
568 			if (lflag == TRUE &&
569 			    GELF_ST_BIND(gsym.st_info) == STB_LOCAL) {
570 				/*
571 				 * If the "locals only" flag is on, then
572 				 * we add the local symbols to the
573 				 * exclusion lists.
574 				 */
575 				addlist(Elist, name);
576 				addlist(elist, name);
577 			}
578 			DPRINTF("Index %lld:", l_nname);
579 			DPRINTF("\tValue: 0x%llx\t", l_npe->value);
580 			DPRINTF("Name: %s \n", l_npe->name);
581 			l_npe++;
582 			l_nname++;
583 		}
584 
585 		if (strcmp(name, PRF_END) == 0)
586 			module->data_end = gsym.st_value;
587 	}
588 
589 	if (l_npe == l_nl)
590 		fatal_error("no valid functions found");
591 
592 	/*
593 	 * Finally, we need to construct some dummy entries.
594 	 */
595 	if (etext) {
596 		l_npe->name = PRF_EXTSYM;
597 		l_npe->value = etext->value + 1;
598 		l_npe->syminfo = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
599 		l_npe->module = module;
600 		l_npe++;
601 		l_nname++;
602 	}
603 
604 	l_npe->name = PRF_MEMTERM;
605 	l_npe->value = (pctype)-1;
606 	l_npe->syminfo = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
607 	l_npe->module = module;
608 	l_npe++;
609 	l_nname++;
610 
611 	/*
612 	 * We're almost done;  all we need to do is sort the symbols
613 	 * and then remove the duplicates.
614 	 */
615 	qsort(l_nl, (size_t)l_nname, sizeof (nltype), compare);
616 	remove_dup_syms(l_nl, &l_nname);
617 
618 	module->nl = l_nl;
619 	module->npe = l_npe;
620 	module->nname = l_nname;
621 
622 	total_names += l_nname;
623 }
624