summaryrefslogtreecommitdiff
path: root/usr/src/cmd/dis/dis_main.c
diff options
context:
space:
mode:
authoreschrock <none@none>2006-03-03 22:38:03 -0800
committereschrock <none@none>2006-03-03 22:38:03 -0800
commitdc0093f44ee4fac928e006850f8ed53f68277af5 (patch)
treec7d95b9a418cc1e478687acce9dc72bbae269231 /usr/src/cmd/dis/dis_main.c
parentea8dc4b6d2251b437950c0056bc626b311c73c27 (diff)
downloadillumos-joyent-dc0093f44ee4fac928e006850f8ed53f68277af5.tar.gz
PSARC 2005/673 dis(1) options and libdisasm.so.1
5034117 disassembler needs work 6237338 dis picks symbols inconsistently 6241243 disassembler options need to be updated post-1990 6241251 'dis -[Dd]' needs some surgery 6241264 need to move disassembler into a library 6294758 cafe sgs demangler should be torched --HG-- rename : usr/src/cmd/mdb/intel/mdb/bits.c => deleted_files/usr/src/cmd/mdb/intel/mdb/bits.c rename : usr/src/cmd/mdb/intel/mdb/dis.h => deleted_files/usr/src/cmd/mdb/intel/mdb/dis.h rename : usr/src/cmd/mdb/intel/mdb/inteldis.c => deleted_files/usr/src/cmd/mdb/intel/mdb/inteldis.c rename : usr/src/cmd/sgs/sgsdemangler/common/cafe_dem.h => deleted_files/usr/src/cmd/sgs/sgsdemangler/common/cafe_dem.h rename : usr/src/cmd/sgs/sgsdemangler/common/cafe_dem.y => deleted_files/usr/src/cmd/sgs/sgsdemangler/common/cafe_dem.y rename : usr/src/cmd/sgs/sgsdemangler/common/dem.c => deleted_files/usr/src/cmd/sgs/sgsdemangler/common/dem.c rename : usr/src/cmd/sgs/sgsdemangler/common/dem.h => deleted_files/usr/src/cmd/sgs/sgsdemangler/common/dem.h
Diffstat (limited to 'usr/src/cmd/dis/dis_main.c')
-rw-r--r--usr/src/cmd/dis/dis_main.c682
1 files changed, 682 insertions, 0 deletions
diff --git a/usr/src/cmd/dis/dis_main.c b/usr/src/cmd/dis/dis_main.c
new file mode 100644
index 0000000000..217109c77e
--- /dev/null
+++ b/usr/src/cmd/dis/dis_main.c
@@ -0,0 +1,682 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <ctype.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/sysmacros.h>
+#include <sys/elf_SPARC.h>
+
+#include <libdisasm.h>
+
+#include "dis_target.h"
+#include "dis_util.h"
+#include "dis_list.h"
+
+int g_demangle; /* Demangle C++ names */
+int g_quiet; /* Quiet mode */
+int g_numeric; /* Numeric mode */
+int g_flags; /* libdisasm language flags */
+int g_doall; /* true if no functions or sections were given */
+
+dis_namelist_t *g_funclist; /* list of functions to disassemble, if any */
+dis_namelist_t *g_seclist; /* list of sections to disassemble, if any */
+
+/*
+ * Section options for -d, -D, and -s
+ */
+#define DIS_DATA_RELATIVE 1
+#define DIS_DATA_ABSOLUTE 2
+#define DIS_TEXT 3
+
+/*
+ * libdisasm callback data. Keeps track of current data (function or section)
+ * and offset within that data.
+ */
+typedef struct dis_buffer {
+ dis_tgt_t *db_tgt; /* current dis target */
+ void *db_data; /* function or section data */
+ uint64_t db_addr; /* address of function start */
+ size_t db_size; /* size of data */
+ uint64_t db_nextaddr; /* next address to be read */
+} dis_buffer_t;
+
+#define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */
+
+/*
+ * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately
+ * formatted symbol, based on the offset and current setttings.
+ */
+void
+getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf,
+ size_t buflen)
+{
+ if (symbol == NULL || g_numeric)
+ (void) snprintf(buf, buflen, "%llx", addr);
+ else {
+ if (g_demangle)
+ symbol = dis_demangle(symbol);
+
+ if (offset == 0)
+ (void) snprintf(buf, buflen, "%s", symbol);
+ else if (g_flags & DIS_OCTAL)
+ (void) snprintf(buf, buflen, "%s+0%o", symbol, offset);
+ else
+ (void) snprintf(buf, buflen, "%s+0x%x", symbol, offset);
+ }
+}
+
+/*
+ * The main disassembly routine. Given a fixed-sized buffer and starting
+ * address, disassemble the data using the supplied target and libdisasm handle.
+ */
+void
+dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data,
+ size_t datalen)
+{
+ dis_buffer_t db = { 0 };
+ char buf[BUFSIZE];
+ char symbuf[BUFSIZE];
+ const char *symbol;
+ off_t symoffset;
+ int i;
+ int bytesperline;
+ size_t symsize;
+ int isfunc;
+ size_t symwidth = 0;
+
+ db.db_tgt = tgt;
+ db.db_data = data;
+ db.db_addr = addr;
+ db.db_size = datalen;
+
+ dis_set_data(dhp, &db);
+
+ if ((bytesperline = dis_max_instrlen(dhp)) > 6)
+ bytesperline = 6;
+
+ while (addr < db.db_addr + db.db_size) {
+
+ if (dis_disassemble(dhp, addr, buf, BUFSIZE) != 0) {
+ /*
+ * If we encounter an invalid opcode, we just
+ * print "*** invalid opcode ***" at that first bad
+ * instruction and continue with printing the rest
+ * of the instruction stream as hex data,
+ * We then find the next valid symbol in the section,
+ * and disassemble from there.
+ */
+ off_t next;
+
+ (void) snprintf(buf, sizeof (buf),
+ "*** invalid opcode ***");
+
+ if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) {
+ db.db_nextaddr = db.db_addr + db.db_size;
+ } else {
+ if (next > db.db_size)
+ db.db_nextaddr = db.db_addr +
+ db.db_size;
+ else
+ db.db_nextaddr = addr + next;
+ }
+ }
+
+ /*
+ * Print out the line as:
+ *
+ * address: bytes text
+ *
+ * If there are more than 6 bytes in any given instruction,
+ * spread the bytes across two lines. We try to get symbolic
+ * information for the address, but if that fails we print out
+ * the numeric address instead.
+ *
+ * We try to keep the address portion of the text aligned at
+ * MINSYMWIDTH characters. If we are disassembling a function
+ * with a long name, this can be annoying. So we pick a width
+ * based on the maximum width that the current symbol can be.
+ * This at least produces text aligned within each function.
+ */
+ symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize,
+ &isfunc);
+ /* Get the maximum length for this symbol */
+ getsymname(addr, symbol, symsize, symbuf, sizeof (symbuf));
+ symwidth = MAX(strlen(symbuf), MINSYMWIDTH);
+
+ getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf));
+
+ /*
+ * If we've crossed a new function boundary, print out the
+ * function name on a blank line.
+ */
+ if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc)
+ (void) printf("%s()\n", symbol);
+
+ (void) printf(" %s:%*s ", symbuf,
+ symwidth - strlen(symbuf), "");
+
+ /* print bytes */
+ for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr));
+ i++) {
+ int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
+ if (g_flags & DIS_OCTAL)
+ (void) printf("%03o ", byte);
+ else
+ (void) printf("%02x ", byte);
+ }
+
+ /* trailing spaces for missing bytes */
+ for (; i < bytesperline; i++) {
+ if (g_flags & DIS_OCTAL)
+ (void) printf(" ");
+ else
+ (void) printf(" ");
+ }
+
+ /* contents of disassembly */
+ (void) printf(" %s", buf);
+
+ /* excess bytes that spill over onto subsequent lines */
+ for (; i < db.db_nextaddr - addr; i++) {
+ int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
+ if (i % bytesperline == 0)
+ (void) printf("\n %*s ", symwidth, "");
+ if (g_flags & DIS_OCTAL)
+ (void) printf("%03o ", byte);
+ else
+ (void) printf("%02x ", byte);
+ }
+
+ (void) printf("\n");
+
+ addr = db.db_nextaddr;
+ }
+}
+
+/*
+ * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup
+ * function, and convert the result using getsymname().
+ */
+int
+do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start,
+ size_t *symlen)
+{
+ dis_buffer_t *db = data;
+ const char *symbol;
+ off_t offset;
+ size_t size;
+
+ /*
+ * If NULL symbol is returned, getsymname takes care of
+ * printing appropriate address in buf instead of symbol.
+ */
+ symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL);
+
+ if (buf != NULL)
+ getsymname(addr, symbol, offset, buf, buflen);
+
+ if (start != NULL)
+ *start = addr - offset;
+ if (symlen != NULL)
+ *symlen = size;
+
+ return (0);
+}
+
+/*
+ * libdisasm wrapper around target reading. libdisasm will always read data
+ * in order, so update our current offset within the buffer appropriately.
+ * We only support reading from within the current object; libdisasm should
+ * never ask us to do otherwise.
+ */
+int
+do_read(void *data, uint64_t addr, void *buf, size_t len)
+{
+ dis_buffer_t *db = data;
+ size_t offset;
+
+ if (addr < db->db_addr || addr >= db->db_addr + db->db_size)
+ return (-1);
+
+ offset = addr - db->db_addr;
+ len = MIN(len, db->db_size - offset);
+
+ (void) memcpy(buf, (char *)db->db_data + offset, len);
+
+ db->db_nextaddr = addr + len;
+
+ return (len);
+}
+
+/*
+ * Routine to dump raw data in a human-readable format. Used by the -d and -D
+ * options. We model our output after the xxd(1) program, which gives nicely
+ * formatted output, along with an ASCII translation of the result.
+ */
+void
+dump_data(uint64_t addr, void *data, size_t datalen)
+{
+ uintptr_t curaddr = addr & (~0xf);
+ uint8_t *bytes = data;
+ int i;
+ int width;
+
+ /*
+ * Determine if the address given to us fits in 32-bit range, in which
+ * case use a 4-byte width.
+ */
+ if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL)
+ width = 8;
+ else
+ width = 16;
+
+ while (curaddr < addr + datalen) {
+ /*
+ * Display leading address
+ */
+ (void) printf("%0*x: ", width, curaddr);
+
+ /*
+ * Print out data in two-byte chunks. If the current address
+ * is before the starting address or after the end of the
+ * section, print spaces.
+ */
+ for (i = 0; i < 16; i++) {
+ if (curaddr + i < addr ||curaddr + i >= addr + datalen)
+ (void) printf(" ");
+ else
+ (void) printf("%02x",
+ bytes[curaddr + i - addr]);
+
+ if (i & 1)
+ (void) printf(" ");
+ }
+
+ (void) printf(" ");
+
+ /*
+ * Print out the ASCII representation
+ */
+ for (i = 0; i < 16; i++) {
+ if (curaddr + i < addr ||
+ curaddr + i >= addr + datalen) {
+ (void) printf(" ");
+ } else {
+ uint8_t byte = bytes[curaddr + i - addr];
+ if (isprint(byte))
+ (void) printf("%c", byte);
+ else
+ (void) printf(".");
+ }
+ }
+
+ (void) printf("\n");
+
+ curaddr += 16;
+ }
+}
+
+/*
+ * Disassemble a section implicitly specified as part of a file. This function
+ * is called for all sections when no other flags are specified. We ignore any
+ * data sections, and print out only those sections containing text.
+ */
+void
+dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
+{
+ dis_handle_t *dhp = data;
+
+ /* ignore data sections */
+ if (!dis_section_istext(scn))
+ return;
+
+ if (!g_quiet)
+ (void) printf("\nsection %s\n", dis_section_name(scn));
+
+ dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn),
+ dis_section_size(scn));
+}
+
+/*
+ * Structure passed to dis_named_{section,function} which keeps track of both
+ * the target and the libdisasm handle.
+ */
+typedef struct callback_arg {
+ dis_tgt_t *ca_tgt;
+ dis_handle_t *ca_handle;
+} callback_arg_t;
+
+/*
+ * Disassemble a section explicitly named with -s, -d, or -D. The 'type'
+ * argument contains the type of argument given. Pass the data onto the
+ * appropriate helper routine.
+ */
+void
+dis_named_section(dis_scn_t *scn, int type, void *data)
+{
+ callback_arg_t *ca = data;
+
+ if (!g_quiet)
+ (void) printf("\nsection %s\n", dis_section_name(scn));
+
+ switch (type) {
+ case DIS_DATA_RELATIVE:
+ dump_data(0, dis_section_data(scn), dis_section_size(scn));
+ break;
+ case DIS_DATA_ABSOLUTE:
+ dump_data(dis_section_addr(scn), dis_section_data(scn),
+ dis_section_size(scn));
+ break;
+ case DIS_TEXT:
+ dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn),
+ dis_section_data(scn), dis_section_size(scn));
+ break;
+ }
+}
+
+/*
+ * Disassemble a function explicitly specified with '-F'. The 'type' argument
+ * is unused.
+ */
+/* ARGSUSED */
+void
+dis_named_function(dis_func_t *func, int type, void *data)
+{
+ callback_arg_t *ca = data;
+
+ dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func),
+ dis_function_data(func), dis_function_size(func));
+}
+
+/*
+ * Disassemble a complete file. First, we determine the type of the file based
+ * on the ELF machine type, and instantiate a version of the disassembler
+ * appropriate for the file. We then resolve any named sections or functions
+ * against the file, and iterate over the results (or all sections if no flags
+ * were specified).
+ */
+void
+dis_file(const char *filename)
+{
+ dis_tgt_t *tgt, *current;
+ dis_scnlist_t *sections;
+ dis_funclist_t *functions;
+ dis_handle_t *dhp;
+ GElf_Ehdr ehdr;
+
+ /*
+ * First, initialize the target
+ */
+ if ((tgt = dis_tgt_create(filename)) == NULL)
+ return;
+
+ if (!g_quiet)
+ (void) printf("disassembly for %s\n\n", filename);
+
+ /*
+ * A given file may contain multiple targets (if it is an archive, for
+ * example). We iterate over all possible targets if this is the case.
+ */
+ for (current = tgt; current != NULL; current = dis_tgt_next(current)) {
+ dis_tgt_ehdr(current, &ehdr);
+
+ /*
+ * Eventually, this should probably live within libdisasm, and
+ * we should be able to disassemble targets from different
+ * architectures. For now, we only support objects as the
+ * native machine type.
+ */
+ switch (ehdr.e_machine) {
+#ifdef __sparc
+ case EM_SPARC:
+ if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
+ ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
+ warn("invalid E_IDENT field for SPARC object");
+ return;
+ }
+ g_flags |= DIS_SPARC_V8;
+ break;
+
+ case EM_SPARC32PLUS:
+ if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
+ ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
+ warn("invalid E_IDENT field for SPARC object");
+ return;
+ }
+
+ switch (ehdr.e_flags & EF_SPARC_32PLUS_MASK) {
+ case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 |
+ EF_SPARC_SUN_US3):
+ case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1):
+ g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
+ default:
+ g_flags |= DIS_SPARC_V9;
+ }
+ break;
+
+ case EM_SPARCV9:
+ if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
+ ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
+ warn("invalid E_IDENT field for SPARC object");
+ return;
+ }
+
+ g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
+ break;
+#endif /* __sparc */
+
+#if defined(__i386) || defined(__amd64)
+ case EM_386:
+ g_flags |= DIS_X86_SIZE32;
+ break;
+
+ case EM_AMD64:
+ g_flags |= DIS_X86_SIZE64;
+ break;
+#endif /* __i386 || __amd64 */
+
+ default:
+ die("%s: unsupported ELF machine 0x%x", filename,
+ ehdr.e_machine);
+ }
+
+ if (!g_quiet && dis_tgt_member(current) != NULL)
+ (void) printf("\narchive member %s\n",
+ dis_tgt_member(current));
+
+ /*
+ * Instantiate a libdisasm handle based on the file type.
+ */
+ if ((dhp = dis_handle_create(g_flags, current, do_lookup,
+ do_read)) == NULL)
+ die("%s: failed to initialize disassembler: %s",
+ filename, dis_strerror(dis_errno()));
+
+ if (g_doall) {
+ /*
+ * With no arguments, iterate over all sections and
+ * disassemble only those that contain text.
+ */
+ dis_tgt_section_iter(current, dis_text_section, dhp);
+ } else {
+ callback_arg_t ca;
+
+ ca.ca_tgt = current;
+ ca.ca_handle = dhp;
+
+ /*
+ * If sections or functions were explicitly specified,
+ * resolve those names against the object, and iterate
+ * over just the resulting data.
+ */
+ sections = dis_namelist_resolve_sections(g_seclist,
+ current);
+ functions = dis_namelist_resolve_functions(g_funclist,
+ current);
+
+ dis_scnlist_iter(sections, dis_named_section, &ca);
+ dis_funclist_iter(functions, dis_named_function, &ca);
+
+ dis_scnlist_destroy(sections);
+ dis_funclist_destroy(functions);
+ }
+
+ dis_handle_destroy(dhp);
+ }
+
+ dis_tgt_destroy(tgt);
+}
+
+void
+usage(void)
+{
+ (void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n");
+ (void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n");
+ exit(2);
+}
+
+typedef struct lib_node {
+ char *path;
+ struct lib_node *next;
+} lib_node_t;
+
+int
+main(int argc, char **argv)
+{
+ int optchar;
+ int i;
+ lib_node_t *libs = NULL;
+
+ g_funclist = dis_namelist_create();
+ g_seclist = dis_namelist_create();
+
+ while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) {
+ switch (optchar) {
+ case 'C':
+ g_demangle = 1;
+ break;
+ case 'd':
+ dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE);
+ break;
+ case 'D':
+ dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE);
+ break;
+ case 'F':
+ dis_namelist_add(g_funclist, optarg, 0);
+ break;
+ case 'l': {
+ /*
+ * The '-l foo' option historically would attempt to
+ * disassemble '$LIBDIR/libfoo.a'. The $LIBDIR
+ * environment variable has never been supported or
+ * documented for our linker. However, until this
+ * option is formally EOLed, we have to support it.
+ */
+ char *dir;
+ lib_node_t *node;
+ size_t len;
+
+ if ((dir = getenv("LIBDIR")) == NULL ||
+ dir[0] == '\0')
+ dir = "/usr/lib";
+ node = safe_malloc(sizeof (lib_node_t));
+ len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a");
+ node->path = safe_malloc(len);
+
+ (void) snprintf(node->path, len, "%s/lib%s.a", dir,
+ optarg);
+ node->next = libs;
+ libs = node;
+ break;
+ }
+ case 'L':
+ /*
+ * The '-L' option historically would attempt to read
+ * the .debug section of the target to determine source
+ * line information in order to annotate the output.
+ * No compiler has emitted these sections in many years,
+ * and the option has never done what it purported to
+ * do. We silently consume the option for
+ * compatibility.
+ */
+ break;
+ case 'n':
+ g_numeric = 1;
+ break;
+ case 'o':
+ g_flags |= DIS_OCTAL;
+ break;
+ case 'q':
+ g_quiet = 1;
+ break;
+ case 't':
+ dis_namelist_add(g_seclist, optarg, DIS_TEXT);
+ break;
+ case 'V':
+ (void) printf("Solaris disassembler version 1.0\n");
+ return (0);
+ default:
+ usage();
+ break;
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc == 0 && libs == NULL) {
+ warn("no objects specified");
+ usage();
+ }
+
+ if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist))
+ g_doall = 1;
+
+ /*
+ * See comment for 'l' option, above.
+ */
+ while (libs != NULL) {
+ lib_node_t *node = libs->next;
+
+ dis_file(libs->path);
+ free(libs->path);
+ free(libs);
+ libs = node;
+ }
+
+ for (i = 0; i < argc; i++)
+ dis_file(argv[i]);
+
+ dis_namelist_destroy(g_funclist);
+ dis_namelist_destroy(g_seclist);
+
+ return (g_error);
+}