dis_target.c revision 23a1ccea6aac035f084a7a4cdc968687d1b02daf
1dc0093feschrock/*
2dc0093feschrock * CDDL HEADER START
3dc0093feschrock *
4dc0093feschrock * The contents of this file are subject to the terms of the
5dc0093feschrock * Common Development and Distribution License (the "License").
6dc0093feschrock * You may not use this file except in compliance with the License.
7dc0093feschrock *
8dc0093feschrock * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9dc0093feschrock * or http://www.opensolaris.org/os/licensing.
10dc0093feschrock * See the License for the specific language governing permissions
11dc0093feschrock * and limitations under the License.
12dc0093feschrock *
13dc0093feschrock * When distributing Covered Code, include this CDDL HEADER in each
14dc0093feschrock * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15dc0093feschrock * If applicable, add the following below this CDDL HEADER, with the
16dc0093feschrock * fields enclosed by brackets "[]" replaced with your own identifying
17dc0093feschrock * information: Portions Copyright [yyyy] [name of copyright owner]
18dc0093feschrock *
19dc0093feschrock * CDDL HEADER END
20dc0093feschrock */
21dc0093feschrock
22dc0093feschrock/*
2323a1cceRoger A. Faulkner * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24dc0093feschrock */
25dc0093feschrock
26dc0093feschrock#include <assert.h>
27dc0093feschrock#include <errno.h>
28dc0093feschrock#include <fcntl.h>
29dc0093feschrock#include <gelf.h>
30dc0093feschrock#include <libelf.h>
31dc0093feschrock#include <stdlib.h>
32dc0093feschrock#include <string.h>
33dc0093feschrock#include <unistd.h>
34dc0093feschrock
35dc0093feschrock#include <sys/fcntl.h>
36dc0093feschrock#include <sys/stat.h>
37dc0093feschrock
38dc0093feschrock#include "dis_target.h"
39dc0093feschrock#include "dis_util.h"
40dc0093feschrock
41dc0093feschrock/*
42dc0093feschrock * Standard ELF disassembler target.
43dc0093feschrock *
44dc0093feschrock * We only support disassembly of ELF files, though this target interface could
45dc0093feschrock * be extended in the future.  Each basic type (target, func, section) contains
46dc0093feschrock * enough information to uniquely identify the location within the file.  The
47dc0093feschrock * interfaces use libelf(3LIB) to do the actual processing of the file.
48dc0093feschrock */
49dc0093feschrock
50dc0093feschrock/*
51dc0093feschrock * Symbol table entry type.  We maintain our own symbol table sorted by address,
52dc0093feschrock * with the symbol name already resolved against the ELF symbol table.
53dc0093feschrock */
54dc0093feschrocktypedef struct sym_entry {
55dc0093feschrock	GElf_Sym	se_sym;		/* value of symbol */
56dc0093feschrock	char		*se_name;	/* name of symbol */
57dc0093feschrock	int		se_shndx;	/* section where symbol is located */
58dc0093feschrock} sym_entry_t;
59dc0093feschrock
60dc0093feschrock/*
61dc0093feschrock * Target data structure.  This structure keeps track of the ELF file
62dc0093feschrock * information, a few bits of pre-processed section index information, and
63dc0093feschrock * sorted versions of the symbol table.  We also keep track of the last symbol
64dc0093feschrock * looked up, as the majority of lookups remain within the same symbol.
65dc0093feschrock */
66dc0093feschrockstruct dis_tgt {
67dc0093feschrock	Elf		*dt_elf;	/* libelf handle */
68dc0093feschrock	Elf		*dt_elf_root;	/* main libelf handle (for archives) */
69dc0093feschrock	const char	*dt_filename;	/* name of file */
70dc0093feschrock	int		dt_fd;		/* underlying file descriptor */
71dc0093feschrock	size_t		dt_shstrndx;	/* section index of .shstrtab */
72dc0093feschrock	size_t		dt_symidx;	/* section index of symbol table */
73dc0093feschrock	sym_entry_t	*dt_symcache;	/* last symbol looked up */
74dc0093feschrock	sym_entry_t	*dt_symtab;	/* sorted symbol table */
75dc0093feschrock	int		dt_symcount;	/* # of symbol table entries */
76dc0093feschrock	struct dis_tgt	*dt_next;	/* next target (for archives) */
77dc0093feschrock	Elf_Arhdr	*dt_arhdr;	/* archive header (for archives) */
78dc0093feschrock};
79dc0093feschrock
80dc0093feschrock/*
81dc0093feschrock * Function data structure.  We resolve the symbol and lookup the associated ELF
82dc0093feschrock * data when building this structure.  The offset is calculated based on the
83dc0093feschrock * section's starting address.
84dc0093feschrock */
85dc0093feschrockstruct dis_func {
86dc0093feschrock	sym_entry_t	*df_sym;	/* symbol table reference */
87dc0093feschrock	Elf_Data	*df_data;	/* associated ELF data */
88dc0093feschrock	size_t		df_offset;	/* offset within data */
89dc0093feschrock};
90dc0093feschrock
91dc0093feschrock/*
92dc0093feschrock * Section data structure.  We store the entire section header so that we can
93dc0093feschrock * determine some properties (such as whether or not it contains text) after
94dc0093feschrock * building the structure.
95dc0093feschrock */
96dc0093feschrockstruct dis_scn {
97dc0093feschrock	GElf_Shdr	ds_shdr;
98dc0093feschrock	const char	*ds_name;
99dc0093feschrock	Elf_Data	*ds_data;
100dc0093feschrock};
101dc0093feschrock
10223a1cceRoger A. Faulkner/* Lifted from Psymtab.c, omitting STT_TLS */
103dc0093feschrock#define	DATA_TYPES      \
10423a1cceRoger A. Faulkner	((1 << STT_OBJECT) | (1 << STT_FUNC) | (1 << STT_COMMON))
105dc0093feschrock#define	IS_DATA_TYPE(tp)	(((1 << (tp)) & DATA_TYPES) != 0)
106dc0093feschrock
107dc0093feschrock/*
108dc0093feschrock * Pick out the best symbol to used based on the sections available in the
109dc0093feschrock * target.  We prefer SHT_SYMTAB over SHT_DYNSYM.
110dc0093feschrock */
111dc0093feschrock/* ARGSUSED */
112dc0093feschrockstatic void
113dc0093feschrockget_symtab(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
114dc0093feschrock{
115dc0093feschrock	int *index = data;
116dc0093feschrock
117dc0093feschrock	*index += 1;
118dc0093feschrock
119dc0093feschrock	/*
120dc0093feschrock	 * Prefer SHT_SYMTAB over SHT_DYNSYM
121dc0093feschrock	 */
122dc0093feschrock	if (scn->ds_shdr.sh_type == SHT_DYNSYM && tgt->dt_symidx == 0)
123dc0093feschrock		tgt->dt_symidx = *index;
124dc0093feschrock	else if (scn->ds_shdr.sh_type == SHT_SYMTAB)
125dc0093feschrock		tgt->dt_symidx = *index;
126dc0093feschrock}
127dc0093feschrock
128dc0093feschrockstatic int
129dc0093feschrocksym_compare(const void *a, const void *b)
130dc0093feschrock{
131dc0093feschrock	const sym_entry_t *syma = a;
132dc0093feschrock	const sym_entry_t *symb = b;
133dc0093feschrock	const char *aname = syma->se_name;
134dc0093feschrock	const char *bname = symb->se_name;
135dc0093feschrock
136dc0093feschrock	if (syma->se_sym.st_value < symb->se_sym.st_value)
137dc0093feschrock		return (-1);
138dc0093feschrock
139dc0093feschrock	if (syma->se_sym.st_value > symb->se_sym.st_value)
140dc0093feschrock		return (1);
141dc0093feschrock
142dc0093feschrock	/*
143dc0093feschrock	 * Prefer functions over non-functions
144dc0093feschrock	 */
145dc0093feschrock	if (GELF_ST_TYPE(syma->se_sym.st_info) !=
146dc0093feschrock	    GELF_ST_TYPE(symb->se_sym.st_info)) {
147dc0093feschrock		if (GELF_ST_TYPE(syma->se_sym.st_info) == STT_FUNC)
148dc0093feschrock			return (-1);
149dc0093feschrock		if (GELF_ST_TYPE(symb->se_sym.st_info) == STT_FUNC)
150dc0093feschrock			return (1);
151dc0093feschrock	}
152dc0093feschrock
153dc0093feschrock	/*
154dc0093feschrock	 * For symbols with the same address and type, we sort them according to
155dc0093feschrock	 * a hierarchy:
156dc0093feschrock	 *
157dc0093feschrock	 * 	1. weak symbols (common name)
158dc0093feschrock	 * 	2. global symbols (external name)
159dc0093feschrock	 * 	3. local symbols
160dc0093feschrock	 */
161dc0093feschrock	if (GELF_ST_BIND(syma->se_sym.st_info) !=
162dc0093feschrock	    GELF_ST_BIND(symb->se_sym.st_info)) {
163dc0093feschrock		if (GELF_ST_BIND(syma->se_sym.st_info) == STB_WEAK)
164dc0093feschrock			return (-1);
165dc0093feschrock		if (GELF_ST_BIND(symb->se_sym.st_info) == STB_WEAK)
166dc0093feschrock			return (1);
167dc0093feschrock
168dc0093feschrock		if (GELF_ST_BIND(syma->se_sym.st_info) == STB_GLOBAL)
169dc0093feschrock			return (-1);
170dc0093feschrock		if (GELF_ST_BIND(symb->se_sym.st_info) == STB_GLOBAL)
171dc0093feschrock			return (1);
172dc0093feschrock	}
173dc0093feschrock
174dc0093feschrock	/*
175dc0093feschrock	 * As a last resort, if we have multiple symbols of the same type at the
176dc0093feschrock	 * same address, prefer the version with the fewest leading underscores.
177dc0093feschrock	 */
178dc0093feschrock	if (aname == NULL)
179dc0093feschrock		return (-1);
180dc0093feschrock	if (bname == NULL)
181dc0093feschrock		return (1);
182dc0093feschrock
183dc0093feschrock	while (*aname == '_' && *bname == '_') {
184dc0093feschrock		aname++;
185dc0093feschrock		bname++;
186dc0093feschrock	}
187dc0093feschrock
188dc0093feschrock	if (*bname == '_')
189dc0093feschrock		return (-1);
190dc0093feschrock	if (*aname == '_')
191dc0093feschrock		return (1);
192dc0093feschrock
193dc0093feschrock	/*
194dc0093feschrock	 * Prefer the symbol with the smaller size.
195dc0093feschrock	 */
196dc0093feschrock	if (syma->se_sym.st_size < symb->se_sym.st_size)
197dc0093feschrock		return (-1);
198dc0093feschrock	if (syma->se_sym.st_size > symb->se_sym.st_size)
199dc0093feschrock		return (1);
200dc0093feschrock
201dc0093feschrock	/*
202dc0093feschrock	 * We really do have two identical symbols for some reason.  Just report
203dc0093feschrock	 * them as equal, and to the lucky one go the spoils.
204dc0093feschrock	 */
205dc0093feschrock	return (0);
206dc0093feschrock}
207dc0093feschrock
208dc0093feschrock/*
209dc0093feschrock * Construct an optimized symbol table sorted by starting address.
210dc0093feschrock */
211dc0093feschrockstatic void
212dc0093feschrockconstruct_symtab(dis_tgt_t *tgt)
213dc0093feschrock{
214dc0093feschrock	Elf_Scn *scn;
215dc0093feschrock	GElf_Shdr shdr;
216dc0093feschrock	Elf_Data *symdata;
217dc0093feschrock	int i;
218dc0093feschrock	GElf_Word *symshndx = NULL;
219dc0093feschrock	int symshndx_size;
220dc0093feschrock	sym_entry_t *sym;
221dc0093feschrock	sym_entry_t *p_symtab = NULL;
222dc0093feschrock	int nsym = 0; /* count of symbols we're not interested in */
223dc0093feschrock
224dc0093feschrock	/*
225dc0093feschrock	 * Find the symshndx section, if any
226dc0093feschrock	 */
227dc0093feschrock	for (scn = elf_nextscn(tgt->dt_elf, NULL); scn != NULL;
228dc0093feschrock	    scn = elf_nextscn(tgt->dt_elf, scn)) {
229dc0093feschrock		if (gelf_getshdr(scn, &shdr) == NULL)
230dc0093feschrock			break;
231dc0093feschrock		if (shdr.sh_type == SHT_SYMTAB_SHNDX &&
232dc0093feschrock		    shdr.sh_link == tgt->dt_symidx) {
233dc0093feschrock			Elf_Data	*data;
234dc0093feschrock
235dc0093feschrock			if ((data = elf_getdata(scn, NULL)) != NULL) {
236dc0093feschrock				symshndx = (GElf_Word *)data->d_buf;
237dc0093feschrock				symshndx_size = data->d_size /
238dc0093feschrock				    sizeof (GElf_Word);
239dc0093feschrock				break;
240dc0093feschrock			}
241dc0093feschrock		}
242dc0093feschrock	}
243dc0093feschrock
244dc0093feschrock	if ((scn = elf_getscn(tgt->dt_elf, tgt->dt_symidx)) == NULL)
245dc0093feschrock		die("%s: failed to get section information", tgt->dt_filename);
246dc0093feschrock	if (gelf_getshdr(scn, &shdr) == NULL)
247dc0093feschrock		die("%s: failed to get section header", tgt->dt_filename);
248dc0093feschrock	if (shdr.sh_entsize == 0)
249dc0093feschrock		die("%s: symbol table has zero size", tgt->dt_filename);
250dc0093feschrock
251dc0093feschrock	if ((symdata = elf_getdata(scn, NULL)) == NULL)
252dc0093feschrock		die("%s: failed to get symbol table", tgt->dt_filename);
253dc0093feschrock
254dc0093feschrock	tgt->dt_symcount = symdata->d_size / gelf_fsize(tgt->dt_elf, ELF_T_SYM,
2557a65609jmcp	    1, EV_CURRENT);
256dc0093feschrock
257dc0093feschrock	p_symtab = safe_malloc(tgt->dt_symcount * sizeof (sym_entry_t));
258dc0093feschrock
259dc0093feschrock	for (i = 0, sym = p_symtab; i < tgt->dt_symcount; i++) {
260dc0093feschrock		if (gelf_getsym(symdata, i, &(sym->se_sym)) == NULL) {
261dc0093feschrock			warn("%s: gelf_getsym returned NULL for %d",
2627a65609jmcp			    tgt->dt_filename, i);
263dc0093feschrock			nsym++;
264dc0093feschrock			continue;
265dc0093feschrock		}
266dc0093feschrock
267dc0093feschrock		/*
268dc0093feschrock		 * We're only interested in data symbols.
269dc0093feschrock		 */
270dc0093feschrock		if (!IS_DATA_TYPE(GELF_ST_TYPE(sym->se_sym.st_info))) {
271dc0093feschrock			nsym++;
272dc0093feschrock			continue;
273dc0093feschrock		}
274dc0093feschrock
275dc0093feschrock		if (sym->se_sym.st_shndx == SHN_XINDEX && symshndx != NULL) {
276dc0093feschrock			if (i > symshndx_size) {
277dc0093feschrock				warn("%s: bad SHNX_XINDEX %d",
2787a65609jmcp				    tgt->dt_filename, i);
279dc0093feschrock				sym->se_shndx = -1;
280dc0093feschrock			} else {
281dc0093feschrock				sym->se_shndx = symshndx[i];
282dc0093feschrock			}
283dc0093feschrock		} else {
284dc0093feschrock			sym->se_shndx = sym->se_sym.st_shndx;
285dc0093feschrock		}
286dc0093feschrock
287dc0093feschrock		if ((sym->se_name = elf_strptr(tgt->dt_elf, shdr.sh_link,
288dc0093feschrock		    (size_t)sym->se_sym.st_name)) == NULL) {
289dc0093feschrock			warn("%s: failed to lookup symbol %d name",
2907a65609jmcp			    tgt->dt_filename, i);
291dc0093feschrock			nsym++;
292dc0093feschrock			continue;
293dc0093feschrock		}
294dc0093feschrock
295dc0093feschrock		sym++;
296dc0093feschrock	}
297dc0093feschrock
298dc0093feschrock	tgt->dt_symcount -= nsym;
2997a65609jmcp	tgt->dt_symtab = realloc(p_symtab, tgt->dt_symcount *
3007a65609jmcp	    sizeof (sym_entry_t));
301dc0093feschrock
302dc0093feschrock	qsort(tgt->dt_symtab, tgt->dt_symcount, sizeof (sym_entry_t),
303dc0093feschrock	    sym_compare);
304dc0093feschrock}
305dc0093feschrock
306dc0093feschrock/*
307dc0093feschrock * Create a target backed by an ELF file.
308dc0093feschrock */
309dc0093feschrockdis_tgt_t *
310dc0093feschrockdis_tgt_create(const char *file)
311dc0093feschrock{
312dc0093feschrock	dis_tgt_t *tgt, *current;
313dc0093feschrock	int idx;
314dc0093feschrock	Elf *elf;
315dc0093feschrock	GElf_Ehdr ehdr;
316dc0093feschrock	Elf_Arhdr *arhdr = NULL;
317dc0093feschrock	int cmd;
318dc0093feschrock
319dc0093feschrock	if (elf_version(EV_CURRENT) == EV_NONE)
320dc0093feschrock		die("libelf(3ELF) out of date");
321dc0093feschrock
322dc0093feschrock	tgt = safe_malloc(sizeof (dis_tgt_t));
323dc0093feschrock
324dc0093feschrock	if ((tgt->dt_fd = open(file, O_RDONLY)) < 0) {
325dc0093feschrock		warn("%s: failed opening file, reason: %s", file,
3267a65609jmcp		    strerror(errno));
327dc0093feschrock		free(tgt);
328dc0093feschrock		return (NULL);
329dc0093feschrock	}
330dc0093feschrock
331dc0093feschrock	if ((tgt->dt_elf_root =
332dc0093feschrock	    elf_begin(tgt->dt_fd, ELF_C_READ, NULL)) == NULL) {
333dc0093feschrock		warn("%s: invalid or corrupt ELF file", file);
334dc0093feschrock		dis_tgt_destroy(tgt);
335dc0093feschrock		return (NULL);
336dc0093feschrock	}
337dc0093feschrock
338dc0093feschrock	current = tgt;
339dc0093feschrock	cmd = ELF_C_READ;
340dc0093feschrock	while ((elf = elf_begin(tgt->dt_fd, cmd, tgt->dt_elf_root)) != NULL) {
341dc0093feschrock
342dc0093feschrock		if (elf_kind(tgt->dt_elf_root) == ELF_K_AR &&
343dc0093feschrock		    (arhdr = elf_getarhdr(elf)) == NULL) {
344dc0093feschrock			warn("%s: malformed archive", file);
345dc0093feschrock			dis_tgt_destroy(tgt);
346dc0093feschrock			return (NULL);
347dc0093feschrock		}
348dc0093feschrock
349dc0093feschrock		/*
350dc0093feschrock		 * Make sure that this Elf file is sane
351dc0093feschrock		 */
352dc0093feschrock		if (gelf_getehdr(elf, &ehdr) == NULL) {
353dc0093feschrock			if (arhdr != NULL) {
354dc0093feschrock				/*
355dc0093feschrock				 * For archives, we drive on in the face of bad
356dc0093feschrock				 * members.  The "/" and "//" members are
357dc0093feschrock				 * special, and should be silently ignored.
358dc0093feschrock				 */
359dc0093feschrock				if (strcmp(arhdr->ar_name, "/") != 0 &&
360dc0093feschrock				    strcmp(arhdr->ar_name, "//") != 0)
361dc0093feschrock					warn("%s[%s]: invalid file type",
362dc0093feschrock					    file, arhdr->ar_name);
363dc0093feschrock				cmd = elf_next(elf);
364dc0093feschrock				(void) elf_end(elf);
365dc0093feschrock				continue;
366dc0093feschrock			}
367dc0093feschrock
368dc0093feschrock			warn("%s: invalid file type", file);
369dc0093feschrock			dis_tgt_destroy(tgt);
370dc0093feschrock			return (NULL);
371dc0093feschrock		}
372dc0093feschrock
373dc0093feschrock		/*
374dc0093feschrock		 * If we're seeing a new Elf object, then we have an
375dc0093feschrock		 * archive. In this case, we create a new target, and chain it
376dc0093feschrock		 * off the master target.  We can later iterate over these
377dc0093feschrock		 * targets using dis_tgt_next().
378dc0093feschrock		 */
379dc0093feschrock		if (current->dt_elf != NULL) {
380dc0093feschrock			dis_tgt_t *next = safe_malloc(sizeof (dis_tgt_t));
381dc0093feschrock			next->dt_elf_root = tgt->dt_elf_root;
382dc0093feschrock			next->dt_fd = -1;
383dc0093feschrock			current->dt_next = next;
384dc0093feschrock			current = next;
385dc0093feschrock		}
386dc0093feschrock		current->dt_elf = elf;
387dc0093feschrock		current->dt_arhdr = arhdr;
388dc0093feschrock
38962b628aAli Bahrami		if (elf_getshdrstrndx(elf, &current->dt_shstrndx) == -1) {
390dc0093feschrock			warn("%s: failed to get section string table for "
391dc0093feschrock			    "file", file);
392dc0093feschrock			dis_tgt_destroy(tgt);
393dc0093feschrock			return (NULL);
394dc0093feschrock		}
395dc0093feschrock
396dc0093feschrock		idx = 0;
397dc0093feschrock		dis_tgt_section_iter(current, get_symtab, &idx);
398dc0093feschrock
399dc0093feschrock		if (current->dt_symidx != 0)
400dc0093feschrock			construct_symtab(current);
401dc0093feschrock
402dc0093feschrock		current->dt_filename = file;
403dc0093feschrock
404dc0093feschrock		cmd = elf_next(elf);
405dc0093feschrock	}
406dc0093feschrock
407dc0093feschrock	/*
408dc0093feschrock	 * Final sanity check.  If we had an archive with no members, then bail
409dc0093feschrock	 * out with a nice message.
410dc0093feschrock	 */
411dc0093feschrock	if (tgt->dt_elf == NULL) {
412dc0093feschrock		warn("%s: empty archive\n", file);
413dc0093feschrock		dis_tgt_destroy(tgt);
414dc0093feschrock		return (NULL);
415dc0093feschrock	}
416dc0093feschrock
417dc0093feschrock	return (tgt);
418dc0093feschrock}
419dc0093feschrock
420dc0093feschrock/*
421dc0093feschrock * Return the filename associated with the target.
422dc0093feschrock */
423