/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 *
 * Copyright 2011 Jason King.  All rights reserved.
 * Copyright 2012 Joshua M. Clulow <josh@sysmgr.org>
 * Copyright 2015 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
 * Copyright 2018, Joyent, Inc.
 */

#include <ctype.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/sysmacros.h>
#include <sys/elf_SPARC.h>

#include <libdisasm.h>

#include "dis_target.h"
#include "dis_util.h"
#include "dis_list.h"

int g_demangle;		/* Demangle C++ names */
int g_quiet;		/* Quiet mode */
int g_numeric;		/* Numeric mode */
int g_flags;		/* libdisasm language flags */
int g_doall;		/* true if no functions or sections were given */

dis_namelist_t *g_funclist;	/* list of functions to disassemble, if any */
dis_namelist_t *g_seclist;	/* list of sections to disassemble, if any */

/*
 * Section options for -d, -D, and -s
 */
#define	DIS_DATA_RELATIVE	1
#define	DIS_DATA_ABSOLUTE	2
#define	DIS_TEXT		3

/*
 * libdisasm callback data.  Keeps track of current data (function or section)
 * and offset within that data.
 */
typedef struct dis_buffer {
	dis_tgt_t	*db_tgt;	/* current dis target */
	void		*db_data;	/* function or section data */
	uint64_t	db_addr;	/* address of function start */
	size_t		db_size;	/* size of data */
	uint64_t	db_nextaddr;	/* next address to be read */
} dis_buffer_t;

#define	MINSYMWIDTH	22	/* Minimum width of symbol portion of line */

/*
 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately
 * formatted symbol, based on the offset and current setttings.
 */
void
getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf,
    size_t buflen)
{
	if (symbol == NULL || g_numeric) {
		if (g_flags & DIS_OCTAL)
			(void) snprintf(buf, buflen, "0%llo", addr);
		else
			(void) snprintf(buf, buflen, "0x%llx", addr);
	} else {
		if (g_demangle)
			symbol = dis_demangle(symbol);

		if (offset == 0)
			(void) snprintf(buf, buflen, "%s", symbol);
		else if (g_flags & DIS_OCTAL)
			(void) snprintf(buf, buflen, "%s+0%o", symbol, offset);
		else
			(void) snprintf(buf, buflen, "%s+0x%x", symbol, offset);
	}
}

/*
 * Determine if we are on an architecture with fixed-size instructions,
 * and if so, what size they are.
 */
static int
insn_size(dis_handle_t *dhp)
{
	int min = dis_min_instrlen(dhp);
	int max = dis_max_instrlen(dhp);

	if (min == max)
		return (min);

	return (0);
}

/*
 * The main disassembly routine.  Given a fixed-sized buffer and starting
 * address, disassemble the data using the supplied target and libdisasm handle.
 */
void
dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data,
    size_t datalen)
{
	dis_buffer_t db = { 0 };
	char buf[BUFSIZE];
	char symbuf[BUFSIZE];
	const char *symbol;
	const char *last_symbol;
	off_t symoffset;
	int i;
	int bytesperline;
	size_t symsize;
	int isfunc;
	size_t symwidth = 0;
	int ret;
	int insz = insn_size(dhp);

	db.db_tgt = tgt;
	db.db_data = data;
	db.db_addr = addr;
	db.db_size = datalen;

	dis_set_data(dhp, &db);

	if ((bytesperline = dis_max_instrlen(dhp)) > 6)
		bytesperline = 6;

	symbol = NULL;

	while (addr < db.db_addr + db.db_size) {

		ret = dis_disassemble(dhp, addr, buf, BUFSIZE);
		if (ret != 0 && insz > 0) {
			/*
			 * Since we know instructions are fixed size, we
			 * always know the address of the next instruction
			 */
			(void) snprintf(buf, sizeof (buf),
			    "*** invalid opcode ***");
			db.db_nextaddr = addr + insz;

		} else if (ret != 0) {
			off_t next;

			(void) snprintf(buf, sizeof (buf),
			    "*** invalid opcode ***");

			/*
			 * On architectures with variable sized instructions
			 * we have no way to figure out where the next
			 * instruction starts if we encounter an invalid
			 * instruction.  Instead we print the rest of the
			 * instruction stream as hex until we reach the
			 * next valid symbol in the section.
			 */
			if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) {
				db.db_nextaddr = db.db_addr + db.db_size;
			} else {
				if (next > db.db_size)
					db.db_nextaddr = db.db_addr +
					    db.db_size;
				else
					db.db_nextaddr = addr + next;
			}
		}

		/*
		 * Print out the line as:
		 *
		 * 	address:	bytes	text
		 *
		 * If there are more than 6 bytes in any given instruction,
		 * spread the bytes across two lines.  We try to get symbolic
		 * information for the address, but if that fails we print out
		 * the numeric address instead.
		 *
		 * We try to keep the address portion of the text aligned at
		 * MINSYMWIDTH characters.  If we are disassembling a function
		 * with a long name, this can be annoying.  So we pick a width
		 * based on the maximum width that the current symbol can be.
		 * This at least produces text aligned within each function.
		 */
		last_symbol = symbol;
		symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize,
		    &isfunc);
		if (symbol == NULL) {
			symbol = dis_find_section(tgt, addr, &symoffset);
			symsize = symoffset;
		}

		if (symbol != last_symbol)
			getsymname(addr, symbol, symsize, symbuf,
			    sizeof (symbuf));

		symwidth = MAX(symwidth, strlen(symbuf));
		getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf));

		/*
		 * If we've crossed a new function boundary, print out the
		 * function name on a blank line.
		 */
		if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc)
			(void) printf("%s()\n", symbol);

		(void) printf("    %s:%*s ", symbuf,
		    symwidth - strlen(symbuf), "");

		/* print bytes */
		for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr));
		    i++) {
			int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
			if (g_flags & DIS_OCTAL)
				(void) printf("%03o ", byte);
			else
				(void) printf("%02x ", byte);
		}

		/* trailing spaces for missing bytes */
		for (; i < bytesperline; i++) {
			if (g_flags & DIS_OCTAL)
				(void) printf("    ");
			else
				(void) printf("   ");
		}

		/* contents of disassembly */
		(void) printf(" %s", buf);

		/* excess bytes that spill over onto subsequent lines */
		for (; i < db.db_nextaddr - addr; i++) {
			int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
			if (i % bytesperline == 0)
				(void) printf("\n    %*s  ", symwidth, "");
			if (g_flags & DIS_OCTAL)
				(void) printf("%03o ", byte);
			else
				(void) printf("%02x ", byte);
		}

		(void) printf("\n");

		addr = db.db_nextaddr;
	}
}

/*
 * libdisasm wrapper around symbol lookup.  Invoke the target-specific lookup
 * function, and convert the result using getsymname().
 */
int
do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start,
    size_t *symlen)
{
	dis_buffer_t *db = data;
	const char *symbol;
	off_t offset;
	size_t size;

	/*
	 * If NULL symbol is returned, getsymname takes care of
	 * printing appropriate address in buf instead of symbol.
	 */
	symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL);

	if (buf != NULL)
		getsymname(addr, symbol, offset, buf, buflen);

	if (start != NULL)
		*start = addr - offset;
	if (symlen != NULL)
		*symlen = size;

	if (symbol == NULL)
		return (-1);

	return (0);
}

/*
 * libdisasm wrapper around target reading.  libdisasm will always read data
 * in order, so update our current offset within the buffer appropriately.
 * We only support reading from within the current object; libdisasm should
 * never ask us to do otherwise.
 */
int
do_read(void *data, uint64_t addr, void *buf, size_t len)
{
	dis_buffer_t *db = data;
	size_t offset;

	if (addr < db->db_addr || addr >= db->db_addr + db->db_size)
		return (-1);

	offset = addr - db->db_addr;
	len = MIN(len, db->db_size - offset);

	(void) memcpy(buf, (char *)db->db_data + offset, len);

	db->db_nextaddr = addr + len;

	return (len);
}

/*
 * Routine to dump raw data in a human-readable format.  Used by the -d and -D
 * options.  We model our output after the xxd(1) program, which gives nicely
 * formatted output, along with an ASCII translation of the result.
 */
void
dump_data(uint64_t addr, void *data, size_t datalen)
{
	uintptr_t curaddr = addr & (~0xf);
	uint8_t *bytes = data;
	int i;
	int width;

	/*
	 * Determine if the address given to us fits in 32-bit range, in which
	 * case use a 4-byte width.
	 */
	if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL)
		width = 8;
	else
		width = 16;

	while (curaddr < addr + datalen) {
		/*
		 * Display leading address
		 */
		(void) printf("%0*x: ", width, curaddr);

		/*
		 * Print out data in two-byte chunks.  If the current address
		 * is before the starting address or after the end of the
		 * section, print spaces.
		 */
		for (i = 0; i < 16; i++) {
			if (curaddr + i < addr ||curaddr + i >= addr + datalen)
				(void) printf("  ");
			else
				(void) printf("%02x",
				    bytes[curaddr + i - addr]);

			if (i & 1)
				(void) printf(" ");
		}

		(void) printf(" ");

		/*
		 * Print out the ASCII representation
		 */
		for (i = 0; i < 16; i++) {
			if (curaddr + i < addr ||
			    curaddr + i >= addr + datalen) {
				(void) printf(" ");
			} else {
				uint8_t byte = bytes[curaddr + i - addr];
				if (isprint(byte))
					(void) printf("%c", byte);
				else
					(void) printf(".");
			}
		}

		(void) printf("\n");

		curaddr += 16;
	}
}

/*
 * Disassemble a section implicitly specified as part of a file.  This function
 * is called for all sections when no other flags are specified.  We ignore any
 * data sections, and print out only those sections containing text.
 */
void
dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
{
	dis_handle_t *dhp = data;

	/* ignore data sections */
	if (!dis_section_istext(scn))
		return;

	if (!g_quiet)
		(void) printf("\nsection %s\n", dis_section_name(scn));

	dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn),
	    dis_section_size(scn));
}

/*
 * Structure passed to dis_named_{section,function} which keeps track of both
 * the target and the libdisasm handle.
 */
typedef struct callback_arg {
	dis_tgt_t	*ca_tgt;
	dis_handle_t	*ca_handle;
} callback_arg_t;

/*
 * Disassemble a section explicitly named with -s, -d, or -D.  The 'type'
 * argument contains the type of argument given.  Pass the data onto the
 * appropriate helper routine.
 */
void
dis_named_section(dis_scn_t *scn, int type, void *data)
{
	callback_arg_t *ca = data;

	if (!g_quiet)
		(void) printf("\nsection %s\n", dis_section_name(scn));

	switch (type) {
	case DIS_DATA_RELATIVE:
		dump_data(0, dis_section_data(scn), dis_section_size(scn));
		break;
	case DIS_DATA_ABSOLUTE:
		dump_data(dis_section_addr(scn), dis_section_data(scn),
		    dis_section_size(scn));
		break;
	case DIS_TEXT:
		dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn),
		    dis_section_data(scn), dis_section_size(scn));
		break;
	}
}

/*
 * Disassemble a function explicitly specified with '-F'.  The 'type' argument
 * is unused.
 */
/* ARGSUSED */
void
dis_named_function(dis_func_t *func, int type, void *data)
{
	callback_arg_t *ca = data;

	dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func),
	    dis_function_data(func), dis_function_size(func));
}

/*
 * Disassemble a complete file.  First, we determine the type of the file based
 * on the ELF machine type, and instantiate a version of the disassembler
 * appropriate for the file.  We then resolve any named sections or functions
 * against the file, and iterate over the results (or all sections if no flags
 * were specified).
 */
void
dis_file(const char *filename)
{
	dis_tgt_t *tgt, *current;
	dis_scnlist_t *sections;
	dis_funclist_t *functions;
	dis_handle_t *dhp;
	GElf_Ehdr ehdr;

	/*
	 * First, initialize the target
	 */
	if ((tgt = dis_tgt_create(filename)) == NULL)
		return;

	if (!g_quiet)
		(void) printf("disassembly for %s\n\n",  filename);

	/*
	 * A given file may contain multiple targets (if it is an archive, for
	 * example).  We iterate over all possible targets if this is the case.
	 */
	for (current = tgt; current != NULL; current = dis_tgt_next(current)) {
		dis_tgt_ehdr(current, &ehdr);

		/*
		 * Eventually, this should probably live within libdisasm, and
		 * we should be able to disassemble targets from different
		 * architectures.  For now, we only support objects as the
		 * native machine type.
		 */
		switch (ehdr.e_machine) {
		case EM_SPARC:
			if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
				warn("invalid E_IDENT field for SPARC object");
				return;
			}
			g_flags |= DIS_SPARC_V8;
			break;

		case EM_SPARC32PLUS:
		{
			uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK;

			if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
				warn("invalid E_IDENT field for SPARC object");
				return;
			}

			if (flags != 0 &&
			    (flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 |
			    EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS)
				g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
			else
				g_flags |= DIS_SPARC_V9;
			break;
		}

		case EM_SPARCV9:
			if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
				warn("invalid E_IDENT field for SPARC object");
				return;
			}

			g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
			break;

		case EM_386:
			g_flags |= DIS_X86_SIZE32;
			break;

		case EM_AMD64:
			g_flags |= DIS_X86_SIZE64;
			break;

		case EM_S370:
			g_flags |= DIS_S370;

			if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
				warn("invalid E_IDENT field for S370 object");
				return;
			}
			break;

		case EM_S390:
			/*
			 * Both 390 and z/Architecture use EM_S390, the only
			 * differences is the class: ELFCLASS32 for plain
			 * old s390 and ELFCLASS64 for z/Architecture (aka.
			 * s390x).
			 */
			if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
				g_flags |= DIS_S390_31;
			} else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
				g_flags |= DIS_S390_64;
			} else {
				warn("invalid E_IDENT field for S390 object");
				return;
			}

			if (ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
				warn("invalid E_IDENT field for S390 object");
				return;
			}
			break;

		case EM_RISCV:
			/*
			 * RISC-V is defined to be litle endian. The current ISA
			 * makes it clear that the 64-bit instructions can
			 * co-exist with the 32-bit ones and therefore we don't
			 * need a separate elf class at this time.
			 */
			if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) {
				warn("invalid EI_DATA field for RISC-V object");
				return;
			}

			if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
				g_flags |= DIS_RISCV_32;
			} else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
				g_flags |= DIS_RISCV_64;
			} else {
				warn("invalid EI_CLASS field for RISC-V "
				    "object");
				return;
			}
			break;

		default:
			die("%s: unsupported ELF machine 0x%x", filename,
			    ehdr.e_machine);
		}

		/*
		 * If ET_REL (.o), printing immediate symbols is likely to
		 * result in garbage, as symbol lookups on unrelocated
		 * immediates find false and useless matches.
		 */

		if (ehdr.e_type == ET_REL)
			g_flags |= DIS_NOIMMSYM;

		if (!g_quiet && dis_tgt_member(current) != NULL)
			(void) printf("\narchive member %s\n",
			    dis_tgt_member(current));

		/*
		 * Instantiate a libdisasm handle based on the file type.
		 */
		if ((dhp = dis_handle_create(g_flags, current, do_lookup,
		    do_read)) == NULL)
			die("%s: failed to initialize disassembler: %s",
			    filename, dis_strerror(dis_errno()));

		if (g_doall) {
			/*
			 * With no arguments, iterate over all sections and
			 * disassemble only those that contain text.
			 */
			dis_tgt_section_iter(current, dis_text_section, dhp);
		} else {
			callback_arg_t ca;

			ca.ca_tgt = current;
			ca.ca_handle = dhp;

			/*
			 * If sections or functions were explicitly specified,
			 * resolve those names against the object, and iterate
			 * over just the resulting data.
			 */
			sections = dis_namelist_resolve_sections(g_seclist,
			    current);
			functions = dis_namelist_resolve_functions(g_funclist,
			    current);

			dis_scnlist_iter(sections, dis_named_section, &ca);
			dis_funclist_iter(functions, dis_named_function, &ca);

			dis_scnlist_destroy(sections);
			dis_funclist_destroy(functions);
		}

		dis_handle_destroy(dhp);
	}

	dis_tgt_destroy(tgt);
}

void
usage(void)
{
	(void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n");
	(void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n");
	exit(2);
}

typedef struct lib_node {
	char *path;
	struct lib_node *next;
} lib_node_t;

int
main(int argc, char **argv)
{
	int optchar;
	int i;
	lib_node_t *libs = NULL;

	g_funclist = dis_namelist_create();
	g_seclist = dis_namelist_create();

	while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) {
		switch (optchar) {
		case 'C':
			g_demangle = 1;
			break;
		case 'd':
			dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE);
			break;
		case 'D':
			dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE);
			break;
		case 'F':
			dis_namelist_add(g_funclist, optarg, 0);
			break;
		case 'l': {
			/*
			 * The '-l foo' option historically would attempt to
			 * disassemble '$LIBDIR/libfoo.a'.  The $LIBDIR
			 * environment variable has never been supported or
			 * documented for our linker.  However, until this
			 * option is formally EOLed, we have to support it.
			 */
			char *dir;
			lib_node_t *node;
			size_t len;

			if ((dir = getenv("LIBDIR")) == NULL ||
			    dir[0] == '\0')
				dir = "/usr/lib";
			node = safe_malloc(sizeof (lib_node_t));
			len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a");
			node->path = safe_malloc(len);

			(void) snprintf(node->path, len, "%s/lib%s.a", dir,
			    optarg);
			node->next = libs;
			libs = node;
			break;
		}
		case 'L':
			/*
			 * The '-L' option historically would attempt to read
			 * the .debug section of the target to determine source
			 * line information in order to annotate the output.
			 * No compiler has emitted these sections in many years,
			 * and the option has never done what it purported to
			 * do.  We silently consume the option for
			 * compatibility.
			 */
			break;
		case 'n':
			g_numeric = 1;
			break;
		case 'o':
			g_flags |= DIS_OCTAL;
			break;
		case 'q':
			g_quiet = 1;
			break;
		case 't':
			dis_namelist_add(g_seclist, optarg, DIS_TEXT);
			break;
		case 'V':
			(void) printf("Solaris disassembler version 1.0\n");
			return (0);
		default:
			usage();
			break;
		}
	}

	argc -= optind;
	argv += optind;

	if (argc == 0 && libs == NULL) {
		warn("no objects specified");
		usage();
	}

	if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist))
		g_doall = 1;

	/*
	 * See comment for 'l' option, above.
	 */
	while (libs != NULL) {
		lib_node_t *node = libs->next;

		dis_file(libs->path);
		free(libs->path);
		free(libs);
		libs = node;
	}

	for (i = 0; i < argc; i++)
		dis_file(argv[i]);

	dis_namelist_destroy(g_funclist);
	dis_namelist_destroy(g_seclist);

	return (g_error);
}