xref: /illumos-gate/usr/src/cmd/dis/dis_main.c (revision dc0093f44ee4fac928e006850f8ed53f68277af5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <ctype.h>
30 #include <getopt.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <sys/sysmacros.h>
35 #include <sys/elf_SPARC.h>
36 
37 #include <libdisasm.h>
38 
39 #include "dis_target.h"
40 #include "dis_util.h"
41 #include "dis_list.h"
42 
43 int g_demangle;		/* Demangle C++ names */
44 int g_quiet;		/* Quiet mode */
45 int g_numeric;		/* Numeric mode */
46 int g_flags;		/* libdisasm language flags */
47 int g_doall;		/* true if no functions or sections were given */
48 
49 dis_namelist_t *g_funclist;	/* list of functions to disassemble, if any */
50 dis_namelist_t *g_seclist;	/* list of sections to disassemble, if any */
51 
52 /*
53  * Section options for -d, -D, and -s
54  */
55 #define	DIS_DATA_RELATIVE	1
56 #define	DIS_DATA_ABSOLUTE	2
57 #define	DIS_TEXT		3
58 
59 /*
60  * libdisasm callback data.  Keeps track of current data (function or section)
61  * and offset within that data.
62  */
63 typedef struct dis_buffer {
64 	dis_tgt_t	*db_tgt;	/* current dis target */
65 	void		*db_data;	/* function or section data */
66 	uint64_t	db_addr;	/* address of function start */
67 	size_t		db_size;	/* size of data */
68 	uint64_t	db_nextaddr;	/* next address to be read */
69 } dis_buffer_t;
70 
71 #define	MINSYMWIDTH	22	/* Minimum width of symbol portion of line */
72 
73 /*
74  * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately
75  * formatted symbol, based on the offset and current setttings.
76  */
77 void
78 getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf,
79     size_t buflen)
80 {
81 	if (symbol == NULL || g_numeric)
82 		(void) snprintf(buf, buflen, "%llx", addr);
83 	else {
84 		if (g_demangle)
85 			symbol = dis_demangle(symbol);
86 
87 		if (offset == 0)
88 			(void) snprintf(buf, buflen, "%s", symbol);
89 		else if (g_flags & DIS_OCTAL)
90 			(void) snprintf(buf, buflen, "%s+0%o", symbol, offset);
91 		else
92 			(void) snprintf(buf, buflen, "%s+0x%x", symbol, offset);
93 	}
94 }
95 
96 /*
97  * The main disassembly routine.  Given a fixed-sized buffer and starting
98  * address, disassemble the data using the supplied target and libdisasm handle.
99  */
100 void
101 dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data,
102     size_t datalen)
103 {
104 	dis_buffer_t db = { 0 };
105 	char buf[BUFSIZE];
106 	char symbuf[BUFSIZE];
107 	const char *symbol;
108 	off_t symoffset;
109 	int i;
110 	int bytesperline;
111 	size_t symsize;
112 	int isfunc;
113 	size_t symwidth = 0;
114 
115 	db.db_tgt = tgt;
116 	db.db_data = data;
117 	db.db_addr = addr;
118 	db.db_size = datalen;
119 
120 	dis_set_data(dhp, &db);
121 
122 	if ((bytesperline = dis_max_instrlen(dhp)) > 6)
123 		bytesperline = 6;
124 
125 	while (addr < db.db_addr + db.db_size) {
126 
127 		if (dis_disassemble(dhp, addr, buf, BUFSIZE) != 0) {
128 			/*
129 			 * If we encounter an invalid opcode, we just
130 			 * print "*** invalid opcode ***" at that first bad
131 			 * instruction and continue with printing the rest
132 			 * of the instruction stream as hex data,
133 			 * We then find the next valid symbol in the section,
134 			 * and disassemble from there.
135 			 */
136 			off_t next;
137 
138 			(void) snprintf(buf, sizeof (buf),
139 			    "*** invalid opcode ***");
140 
141 			if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) {
142 				db.db_nextaddr = db.db_addr + db.db_size;
143 			} else {
144 				if (next > db.db_size)
145 					db.db_nextaddr = db.db_addr +
146 					    db.db_size;
147 				else
148 					db.db_nextaddr = addr + next;
149 			}
150 		}
151 
152 		/*
153 		 * Print out the line as:
154 		 *
155 		 * 	address:	bytes	text
156 		 *
157 		 * If there are more than 6 bytes in any given instruction,
158 		 * spread the bytes across two lines.  We try to get symbolic
159 		 * information for the address, but if that fails we print out
160 		 * the numeric address instead.
161 		 *
162 		 * We try to keep the address portion of the text aligned at
163 		 * MINSYMWIDTH characters.  If we are disassembling a function
164 		 * with a long name, this can be annoying.  So we pick a width
165 		 * based on the maximum width that the current symbol can be.
166 		 * This at least produces text aligned within each function.
167 		 */
168 		symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize,
169 		    &isfunc);
170 		/* Get the maximum length for this symbol */
171 		getsymname(addr, symbol, symsize, symbuf, sizeof (symbuf));
172 		symwidth = MAX(strlen(symbuf), MINSYMWIDTH);
173 
174 		getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf));
175 
176 		/*
177 		 * If we've crossed a new function boundary, print out the
178 		 * function name on a blank line.
179 		 */
180 		if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc)
181 			(void) printf("%s()\n", symbol);
182 
183 		(void) printf("    %s:%*s ", symbuf,
184 		    symwidth - strlen(symbuf), "");
185 
186 		/* print bytes */
187 		for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr));
188 		    i++) {
189 			int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
190 			if (g_flags & DIS_OCTAL)
191 				(void) printf("%03o ", byte);
192 			else
193 				(void) printf("%02x ", byte);
194 		}
195 
196 		/* trailing spaces for missing bytes */
197 		for (; i < bytesperline; i++) {
198 			if (g_flags & DIS_OCTAL)
199 				(void) printf("    ");
200 			else
201 				(void) printf("   ");
202 		}
203 
204 		/* contents of disassembly */
205 		(void) printf(" %s", buf);
206 
207 		/* excess bytes that spill over onto subsequent lines */
208 		for (; i < db.db_nextaddr - addr; i++) {
209 			int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
210 			if (i % bytesperline == 0)
211 				(void) printf("\n    %*s  ", symwidth, "");
212 			if (g_flags & DIS_OCTAL)
213 				(void) printf("%03o ", byte);
214 			else
215 				(void) printf("%02x ", byte);
216 		}
217 
218 		(void) printf("\n");
219 
220 		addr = db.db_nextaddr;
221 	}
222 }
223 
224 /*
225  * libdisasm wrapper around symbol lookup.  Invoke the target-specific lookup
226  * function, and convert the result using getsymname().
227  */
228 int
229 do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start,
230     size_t *symlen)
231 {
232 	dis_buffer_t *db = data;
233 	const char *symbol;
234 	off_t offset;
235 	size_t size;
236 
237 	/*
238 	 * If NULL symbol is returned, getsymname takes care of
239 	 * printing appropriate address in buf instead of symbol.
240 	 */
241 	symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL);
242 
243 	if (buf != NULL)
244 		getsymname(addr, symbol, offset, buf, buflen);
245 
246 	if (start != NULL)
247 		*start = addr - offset;
248 	if (symlen != NULL)
249 		*symlen = size;
250 
251 	return (0);
252 }
253 
254 /*
255  * libdisasm wrapper around target reading.  libdisasm will always read data
256  * in order, so update our current offset within the buffer appropriately.
257  * We only support reading from within the current object; libdisasm should
258  * never ask us to do otherwise.
259  */
260 int
261 do_read(void *data, uint64_t addr, void *buf, size_t len)
262 {
263 	dis_buffer_t *db = data;
264 	size_t offset;
265 
266 	if (addr < db->db_addr || addr >= db->db_addr + db->db_size)
267 		return (-1);
268 
269 	offset = addr - db->db_addr;
270 	len = MIN(len, db->db_size - offset);
271 
272 	(void) memcpy(buf, (char *)db->db_data + offset, len);
273 
274 	db->db_nextaddr = addr + len;
275 
276 	return (len);
277 }
278 
279 /*
280  * Routine to dump raw data in a human-readable format.  Used by the -d and -D
281  * options.  We model our output after the xxd(1) program, which gives nicely
282  * formatted output, along with an ASCII translation of the result.
283  */
284 void
285 dump_data(uint64_t addr, void *data, size_t datalen)
286 {
287 	uintptr_t curaddr = addr & (~0xf);
288 	uint8_t *bytes = data;
289 	int i;
290 	int width;
291 
292 	/*
293 	 * Determine if the address given to us fits in 32-bit range, in which
294 	 * case use a 4-byte width.
295 	 */
296 	if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL)
297 		width = 8;
298 	else
299 		width = 16;
300 
301 	while (curaddr < addr + datalen) {
302 		/*
303 		 * Display leading address
304 		 */
305 		(void) printf("%0*x: ", width, curaddr);
306 
307 		/*
308 		 * Print out data in two-byte chunks.  If the current address
309 		 * is before the starting address or after the end of the
310 		 * section, print spaces.
311 		 */
312 		for (i = 0; i < 16; i++) {
313 			if (curaddr + i < addr ||curaddr + i >= addr + datalen)
314 				(void) printf("  ");
315 			else
316 				(void) printf("%02x",
317 				    bytes[curaddr + i - addr]);
318 
319 			if (i & 1)
320 				(void) printf(" ");
321 		}
322 
323 		(void) printf(" ");
324 
325 		/*
326 		 * Print out the ASCII representation
327 		 */
328 		for (i = 0; i < 16; i++) {
329 			if (curaddr + i < addr ||
330 			    curaddr + i >= addr + datalen) {
331 				(void) printf(" ");
332 			} else {
333 				uint8_t byte = bytes[curaddr + i - addr];
334 				if (isprint(byte))
335 					(void) printf("%c", byte);
336 				else
337 					(void) printf(".");
338 			}
339 		}
340 
341 		(void) printf("\n");
342 
343 		curaddr += 16;
344 	}
345 }
346 
347 /*
348  * Disassemble a section implicitly specified as part of a file.  This function
349  * is called for all sections when no other flags are specified.  We ignore any
350  * data sections, and print out only those sections containing text.
351  */
352 void
353 dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
354 {
355 	dis_handle_t *dhp = data;
356 
357 	/* ignore data sections */
358 	if (!dis_section_istext(scn))
359 		return;
360 
361 	if (!g_quiet)
362 		(void) printf("\nsection %s\n", dis_section_name(scn));
363 
364 	dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn),
365 	    dis_section_size(scn));
366 }
367 
368 /*
369  * Structure passed to dis_named_{section,function} which keeps track of both
370  * the target and the libdisasm handle.
371  */
372 typedef struct callback_arg {
373 	dis_tgt_t	*ca_tgt;
374 	dis_handle_t	*ca_handle;
375 } callback_arg_t;
376 
377 /*
378  * Disassemble a section explicitly named with -s, -d, or -D.  The 'type'
379  * argument contains the type of argument given.  Pass the data onto the
380  * appropriate helper routine.
381  */
382 void
383 dis_named_section(dis_scn_t *scn, int type, void *data)
384 {
385 	callback_arg_t *ca = data;
386 
387 	if (!g_quiet)
388 		(void) printf("\nsection %s\n", dis_section_name(scn));
389 
390 	switch (type) {
391 	case DIS_DATA_RELATIVE:
392 		dump_data(0, dis_section_data(scn), dis_section_size(scn));
393 		break;
394 	case DIS_DATA_ABSOLUTE:
395 		dump_data(dis_section_addr(scn), dis_section_data(scn),
396 		    dis_section_size(scn));
397 		break;
398 	case DIS_TEXT:
399 		dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn),
400 		    dis_section_data(scn), dis_section_size(scn));
401 		break;
402 	}
403 }
404 
405 /*
406  * Disassemble a function explicitly specified with '-F'.  The 'type' argument
407  * is unused.
408  */
409 /* ARGSUSED */
410 void
411 dis_named_function(dis_func_t *func, int type, void *data)
412 {
413 	callback_arg_t *ca = data;
414 
415 	dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func),
416 	    dis_function_data(func), dis_function_size(func));
417 }
418 
419 /*
420  * Disassemble a complete file.  First, we determine the type of the file based
421  * on the ELF machine type, and instantiate a version of the disassembler
422  * appropriate for the file.  We then resolve any named sections or functions
423  * against the file, and iterate over the results (or all sections if no flags
424  * were specified).
425  */
426 void
427 dis_file(const char *filename)
428 {
429 	dis_tgt_t *tgt, *current;
430 	dis_scnlist_t *sections;
431 	dis_funclist_t *functions;
432 	dis_handle_t *dhp;
433 	GElf_Ehdr ehdr;
434 
435 	/*
436 	 * First, initialize the target
437 	 */
438 	if ((tgt = dis_tgt_create(filename)) == NULL)
439 		return;
440 
441 	if (!g_quiet)
442 		(void) printf("disassembly for %s\n\n",  filename);
443 
444 	/*
445 	 * A given file may contain multiple targets (if it is an archive, for
446 	 * example).  We iterate over all possible targets if this is the case.
447 	 */
448 	for (current = tgt; current != NULL; current = dis_tgt_next(current)) {
449 		dis_tgt_ehdr(current, &ehdr);
450 
451 		/*
452 		 * Eventually, this should probably live within libdisasm, and
453 		 * we should be able to disassemble targets from different
454 		 * architectures.  For now, we only support objects as the
455 		 * native machine type.
456 		 */
457 		switch (ehdr.e_machine) {
458 #ifdef __sparc
459 		case EM_SPARC:
460 			if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
461 			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
462 				warn("invalid E_IDENT field for SPARC object");
463 				return;
464 			}
465 			g_flags |= DIS_SPARC_V8;
466 			break;
467 
468 		case EM_SPARC32PLUS:
469 			if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
470 			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
471 				warn("invalid E_IDENT field for SPARC object");
472 				return;
473 			}
474 
475 			switch (ehdr.e_flags & EF_SPARC_32PLUS_MASK) {
476 			case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 |
477 			    EF_SPARC_SUN_US3):
478 			case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1):
479 				g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
480 			default:
481 				g_flags |= DIS_SPARC_V9;
482 			}
483 			break;
484 
485 		case EM_SPARCV9:
486 			if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
487 			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
488 				warn("invalid E_IDENT field for SPARC object");
489 				return;
490 			}
491 
492 			g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
493 			break;
494 #endif /* __sparc */
495 
496 #if defined(__i386) || defined(__amd64)
497 		case EM_386:
498 			g_flags |= DIS_X86_SIZE32;
499 			break;
500 
501 		case EM_AMD64:
502 			g_flags |= DIS_X86_SIZE64;
503 			break;
504 #endif /* __i386 || __amd64 */
505 
506 		default:
507 			die("%s: unsupported ELF machine 0x%x", filename,
508 			    ehdr.e_machine);
509 		}
510 
511 		if (!g_quiet && dis_tgt_member(current) != NULL)
512 			(void) printf("\narchive member %s\n",
513 			    dis_tgt_member(current));
514 
515 		/*
516 		 * Instantiate a libdisasm handle based on the file type.
517 		 */
518 		if ((dhp = dis_handle_create(g_flags, current, do_lookup,
519 		    do_read)) == NULL)
520 			die("%s: failed to initialize disassembler: %s",
521 			    filename, dis_strerror(dis_errno()));
522 
523 		if (g_doall) {
524 			/*
525 			 * With no arguments, iterate over all sections and
526 			 * disassemble only those that contain text.
527 			 */
528 			dis_tgt_section_iter(current, dis_text_section, dhp);
529 		} else {
530 			callback_arg_t ca;
531 
532 			ca.ca_tgt = current;
533 			ca.ca_handle = dhp;
534 
535 			/*
536 			 * If sections or functions were explicitly specified,
537 			 * resolve those names against the object, and iterate
538 			 * over just the resulting data.
539 			 */
540 			sections = dis_namelist_resolve_sections(g_seclist,
541 			    current);
542 			functions = dis_namelist_resolve_functions(g_funclist,
543 			    current);
544 
545 			dis_scnlist_iter(sections, dis_named_section, &ca);
546 			dis_funclist_iter(functions, dis_named_function, &ca);
547 
548 			dis_scnlist_destroy(sections);
549 			dis_funclist_destroy(functions);
550 		}
551 
552 		dis_handle_destroy(dhp);
553 	}
554 
555 	dis_tgt_destroy(tgt);
556 }
557 
558 void
559 usage(void)
560 {
561 	(void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n");
562 	(void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n");
563 	exit(2);
564 }
565 
566 typedef struct lib_node {
567 	char *path;
568 	struct lib_node *next;
569 } lib_node_t;
570 
571 int
572 main(int argc, char **argv)
573 {
574 	int optchar;
575 	int i;
576 	lib_node_t *libs = NULL;
577 
578 	g_funclist = dis_namelist_create();
579 	g_seclist = dis_namelist_create();
580 
581 	while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) {
582 		switch (optchar) {
583 		case 'C':
584 			g_demangle = 1;
585 			break;
586 		case 'd':
587 			dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE);
588 			break;
589 		case 'D':
590 			dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE);
591 			break;
592 		case 'F':
593 			dis_namelist_add(g_funclist, optarg, 0);
594 			break;
595 		case 'l': {
596 			/*
597 			 * The '-l foo' option historically would attempt to
598 			 * disassemble '$LIBDIR/libfoo.a'.  The $LIBDIR
599 			 * environment variable has never been supported or
600 			 * documented for our linker.  However, until this
601 			 * option is formally EOLed, we have to support it.
602 			 */
603 			char *dir;
604 			lib_node_t *node;
605 			size_t len;
606 
607 			if ((dir = getenv("LIBDIR")) == NULL ||
608 			    dir[0] == '\0')
609 				dir = "/usr/lib";
610 			node = safe_malloc(sizeof (lib_node_t));
611 			len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a");
612 			node->path = safe_malloc(len);
613 
614 			(void) snprintf(node->path, len, "%s/lib%s.a", dir,
615 			    optarg);
616 			node->next = libs;
617 			libs = node;
618 			break;
619 		}
620 		case 'L':
621 			/*
622 			 * The '-L' option historically would attempt to read
623 			 * the .debug section of the target to determine source
624 			 * line information in order to annotate the output.
625 			 * No compiler has emitted these sections in many years,
626 			 * and the option has never done what it purported to
627 			 * do.  We silently consume the option for
628 			 * compatibility.
629 			 */
630 			break;
631 		case 'n':
632 			g_numeric = 1;
633 			break;
634 		case 'o':
635 			g_flags |= DIS_OCTAL;
636 			break;
637 		case 'q':
638 			g_quiet = 1;
639 			break;
640 		case 't':
641 			dis_namelist_add(g_seclist, optarg, DIS_TEXT);
642 			break;
643 		case 'V':
644 			(void) printf("Solaris disassembler version 1.0\n");
645 			return (0);
646 		default:
647 			usage();
648 			break;
649 		}
650 	}
651 
652 	argc -= optind;
653 	argv += optind;
654 
655 	if (argc == 0 && libs == NULL) {
656 		warn("no objects specified");
657 		usage();
658 	}
659 
660 	if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist))
661 		g_doall = 1;
662 
663 	/*
664 	 * See comment for 'l' option, above.
665 	 */
666 	while (libs != NULL) {
667 		lib_node_t *node = libs->next;
668 
669 		dis_file(libs->path);
670 		free(libs->path);
671 		free(libs);
672 		libs = node;
673 	}
674 
675 	for (i = 0; i < argc; i++)
676 		dis_file(argv[i]);
677 
678 	dis_namelist_destroy(g_funclist);
679 	dis_namelist_destroy(g_seclist);
680 
681 	return (g_error);
682 }
683