diff options
| author | Robert Mustacchi <rm@joyent.com> | 2019-01-17 17:50:46 +0000 |
|---|---|---|
| committer | Robert Mustacchi <rm@joyent.com> | 2019-02-11 17:40:04 +0000 |
| commit | bc1f688b4872ace323eaddbb1a6365d054e7bf56 (patch) | |
| tree | 3b6f2f4caaa4bafcfb4f757be7ea4de2858201ce /usr/src/lib/libctf/common | |
| parent | 2b987d42b0ad07d74e39b18a2498709e5195d7e3 (diff) | |
| download | illumos-joyent-bc1f688b4872ace323eaddbb1a6365d054e7bf56.tar.gz | |
6885 CTF Everywhere Part 1
6886 Want ctfdiff
6887 ctfdump should be written in terms of libctf
6888 ctfmerge should be implemented in terms of libctf
6889 ctfconvert should be implemented in terms of libctf
6890 Want general workq
6891 Want general mergeq
6892 ctf_add_encoded assigns() incorrect byte size to types
6893 ctf_add_{struct,union,enum} can reuse forwards
6894 ctf_add_{struct,union,enum} occasionally forget to dirty the ctf_file_t
6895 ctf_add_member could better handle bitfields
6896 ctf_type_size() reports wrong size for forwards
6897 Want libctf ctf_kind_name() function
6898 Want libctf function to set struct/union size
Portions contributed by: John Levon <john.levon@joyent.com>
Portions contributed by: Richard Lowe <richlowe@richlowe.net>
Reviewed by: John Levon <john.levon@joyent.com>
Reviewed by: Andy Fiddaman <andy@omniosce.org>
Reviewed by: Gergő Doma <domag02@gmail.com>
Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src/lib/libctf/common')
| -rw-r--r-- | usr/src/lib/libctf/common/ctf_convert.c | 210 | ||||
| -rw-r--r-- | usr/src/lib/libctf/common/ctf_diff.c | 1360 | ||||
| -rw-r--r-- | usr/src/lib/libctf/common/ctf_dwarf.c | 2995 | ||||
| -rw-r--r-- | usr/src/lib/libctf/common/ctf_elfwrite.c | 422 | ||||
| -rw-r--r-- | usr/src/lib/libctf/common/ctf_lib.c | 296 | ||||
| -rw-r--r-- | usr/src/lib/libctf/common/ctf_merge.c | 1551 | ||||
| -rw-r--r-- | usr/src/lib/libctf/common/ctf_subr.c | 28 | ||||
| -rw-r--r-- | usr/src/lib/libctf/common/libctf.h | 45 | ||||
| -rw-r--r-- | usr/src/lib/libctf/common/libctf_impl.h | 59 | ||||
| -rw-r--r-- | usr/src/lib/libctf/common/mapfile-vers | 38 |
10 files changed, 6981 insertions, 23 deletions
diff --git a/usr/src/lib/libctf/common/ctf_convert.c b/usr/src/lib/libctf/common/ctf_convert.c new file mode 100644 index 0000000000..cbb4d48c76 --- /dev/null +++ b/usr/src/lib/libctf/common/ctf_convert.c @@ -0,0 +1,210 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * Main conversion entry points. This has been designed such that there can be + * any number of different conversion backends. Currently we only have one that + * understands DWARFv2 (and bits of DWARFv4). Each backend should be placed in + * the ctf_converters list and each will be tried in turn. + */ + +#include <libctf_impl.h> +#include <gelf.h> + +ctf_convert_f ctf_converters[] = { + ctf_dwarf_convert +}; + +#define NCONVERTS (sizeof (ctf_converters) / sizeof (ctf_convert_f)) + +typedef enum ctf_convert_source { + CTFCONV_SOURCE_NONE = 0x0, + CTFCONV_SOURCE_UNKNOWN = 0x01, + CTFCONV_SOURCE_C = 0x02, + CTFCONV_SOURCE_S = 0x04 +} ctf_convert_source_t; + +static void +ctf_convert_ftypes(Elf *elf, ctf_convert_source_t *types) +{ + int i; + Elf_Scn *scn = NULL, *strscn; + *types = CTFCONV_SOURCE_NONE; + GElf_Shdr shdr; + Elf_Data *data, *strdata; + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + + if (gelf_getshdr(scn, &shdr) == NULL) + return; + + if (shdr.sh_type == SHT_SYMTAB) + break; + } + + if (scn == NULL) + return; + + if ((strscn = elf_getscn(elf, shdr.sh_link)) == NULL) + return; + + if ((data = elf_getdata(scn, NULL)) == NULL) + return; + + if ((strdata = elf_getdata(strscn, NULL)) == NULL) + return; + + for (i = 0; i < shdr.sh_size / shdr.sh_entsize; i++) { + GElf_Sym sym; + const char *file; + size_t len; + + if (gelf_getsym(data, i, &sym) == NULL) + return; + + if (GELF_ST_TYPE(sym.st_info) != STT_FILE) + continue; + + file = (const char *)((uintptr_t)strdata->d_buf + sym.st_name); + len = strlen(file); + if (len < 2 || file[len - 2] != '.') { + *types |= CTFCONV_SOURCE_UNKNOWN; + continue; + } + + switch (file[len - 1]) { + case 'c': + *types |= CTFCONV_SOURCE_C; + break; + case 'h': + /* We traditionally ignore header files... */ + break; + case 's': + *types |= CTFCONV_SOURCE_S; + break; + default: + *types |= CTFCONV_SOURCE_UNKNOWN; + break; + } + } +} + +static ctf_file_t * +ctf_elfconvert(int fd, Elf *elf, const char *label, uint_t nthrs, uint_t flags, + int *errp, char *errbuf, size_t errlen) +{ + int err, i; + ctf_file_t *fp = NULL; + boolean_t notsup = B_TRUE; + ctf_convert_source_t type; + + if (errp == NULL) + errp = &err; + + if (elf == NULL) { + *errp = EINVAL; + return (NULL); + } + + if (flags & ~CTF_CONVERT_F_IGNNONC) { + *errp = EINVAL; + return (NULL); + } + + if (elf_kind(elf) != ELF_K_ELF) { + *errp = ECTF_FMT; + return (NULL); + } + + ctf_convert_ftypes(elf, &type); + ctf_dprintf("got types: %d\n", type); + if (flags & CTF_CONVERT_F_IGNNONC) { + if (type == CTFCONV_SOURCE_NONE || + (type & CTFCONV_SOURCE_UNKNOWN)) { + *errp = ECTF_CONVNOCSRC; + return (NULL); + } + } + + for (i = 0; i < NCONVERTS; i++) { + ctf_conv_status_t cs; + + fp = NULL; + cs = ctf_converters[i](fd, elf, nthrs, errp, &fp, errbuf, + errlen); + if (cs == CTF_CONV_SUCCESS) { + notsup = B_FALSE; + break; + } + if (cs == CTF_CONV_ERROR) { + fp = NULL; + notsup = B_FALSE; + break; + } + } + + if (notsup == B_TRUE) { + if ((flags & CTF_CONVERT_F_IGNNONC) != 0 && + (type & CTFCONV_SOURCE_C) == 0) { + *errp = ECTF_CONVNOCSRC; + return (NULL); + } + *errp = ECTF_NOCONVBKEND; + return (NULL); + } + + /* + * Succsesful conversion. + */ + if (fp != NULL) { + if (label == NULL) + label = ""; + if (ctf_add_label(fp, label, fp->ctf_typemax, 0) == CTF_ERR) { + *errp = ctf_errno(fp); + ctf_close(fp); + return (NULL); + } + if (ctf_update(fp) == CTF_ERR) { + *errp = ctf_errno(fp); + ctf_close(fp); + return (NULL); + } + } + + return (fp); +} + +ctf_file_t * +ctf_fdconvert(int fd, const char *label, uint_t nthrs, uint_t flags, int *errp, + char *errbuf, size_t errlen) +{ + int err; + Elf *elf; + ctf_file_t *fp; + + if (errp == NULL) + errp = &err; + + elf = elf_begin(fd, ELF_C_READ, NULL); + if (elf == NULL) { + *errp = ECTF_FMT; + return (NULL); + } + + fp = ctf_elfconvert(fd, elf, label, nthrs, flags, errp, errbuf, errlen); + + (void) elf_end(elf); + return (fp); +} diff --git a/usr/src/lib/libctf/common/ctf_diff.c b/usr/src/lib/libctf/common/ctf_diff.c new file mode 100644 index 0000000000..c8facbc728 --- /dev/null +++ b/usr/src/lib/libctf/common/ctf_diff.c @@ -0,0 +1,1360 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2015 Joyent, Inc. All rights reserved. + */ + +/* + * The following is a basic overview of how we diff types in containers (the + * generally interesting part of diff, and what's used by merge). We maintain + * two mapping tables, a table of forward mappings (src->dest), and a reverse + * mapping (dest->src). Both are initialized to contain no mapping, and can also + * be updated to contain a negative mapping. + * + * What we do first is iterate over each type in the src container, and compare + * it with a type in the destination container. This may involve doing recursive + * comparisons -- which can involve cycles. To deal with this, whenever we + * encounter something which may be cyclic, we insert a guess. In other words, + * we assume that it may be true. This is necessary for the classic case of the + * following structure: + * + * struct foo { + * struct foo *foo_next; + * }; + * + * If it turns out that we were wrong, we discard our guesses. + * + * If we find that a given type in src has no corresponding entry in dst, we + * then mark its map as CTF_ERR (-1) to indicate that it has *no* match, as + * opposed to the default value of 0, which indicates an unknown match. + * Once we've done the first iteration through src, we know at that point in + * time whether everything in dst is similar or not and can simply walk over it + * and don't have to do any additional checks. + */ + +#include <libctf.h> +#include <ctf_impl.h> +#include <sys/debug.h> + +typedef struct ctf_diff_func { + const char *cdf_name; + ulong_t cdf_symidx; + ulong_t cdf_matchidx; +} ctf_diff_func_t; + +typedef struct ctf_diff_obj { + const char *cdo_name; + ulong_t cdo_symidx; + ctf_id_t cdo_id; + ulong_t cdo_matchidx; +} ctf_diff_obj_t; + +typedef struct ctf_diff_guess { + struct ctf_diff_guess *cdg_next; + ctf_id_t cdg_iid; + ctf_id_t cdg_oid; +} ctf_diff_guess_t; + +/* typedef in libctf.h */ +struct ctf_diff { + uint_t cds_flags; + boolean_t cds_tvalid; /* types valid */ + ctf_file_t *cds_ifp; + ctf_file_t *cds_ofp; + ctf_id_t *cds_forward; + ctf_id_t *cds_reverse; + size_t cds_fsize; + size_t cds_rsize; + ctf_diff_type_f cds_func; + ctf_diff_guess_t *cds_guess; + void *cds_arg; + uint_t cds_nifuncs; + uint_t cds_nofuncs; + uint_t cds_nextifunc; + uint_t cds_nextofunc; + ctf_diff_func_t *cds_ifuncs; + ctf_diff_func_t *cds_ofuncs; + boolean_t cds_ffillip; + boolean_t cds_fvalid; + uint_t cds_niobj; + uint_t cds_noobj; + uint_t cds_nextiobj; + uint_t cds_nextoobj; + ctf_diff_obj_t *cds_iobj; + ctf_diff_obj_t *cds_oobj; + boolean_t cds_ofillip; + boolean_t cds_ovalid; +}; + +#define TINDEX(tid) (tid - 1) + +/* + * Team Diff + */ +static int ctf_diff_type(ctf_diff_t *, ctf_file_t *, ctf_id_t, ctf_file_t *, + ctf_id_t); + +static int +ctf_diff_name(ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, ctf_id_t oid) +{ + const char *iname, *oname; + const ctf_type_t *itp, *otp; + + if ((itp = ctf_lookup_by_id(&ifp, iid)) == NULL) + return (CTF_ERR); + + if ((otp = ctf_lookup_by_id(&ofp, oid)) == NULL) + return (ctf_set_errno(ifp, iid)); + + iname = ctf_strptr(ifp, itp->ctt_name); + oname = ctf_strptr(ofp, otp->ctt_name); + + if ((iname == NULL || oname == NULL) && (iname != oname)) + return (B_TRUE); + + /* Two anonymous names are the same */ + if (iname == NULL && oname == NULL) + return (B_FALSE); + + return (strcmp(iname, oname) == 0 ? B_FALSE: B_TRUE); +} + +/* + * For floats and ints + */ +static int +ctf_diff_number(ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, ctf_id_t oid) +{ + ctf_encoding_t ien, den; + + if (ctf_type_encoding(ifp, iid, &ien) != 0) + return (CTF_ERR); + + if (ctf_type_encoding(ofp, oid, &den) != 0) + return (ctf_set_errno(ifp, iid)); + + if (bcmp(&ien, &den, sizeof (ctf_encoding_t)) != 0) + return (B_TRUE); + + return (B_FALSE); +} + +/* + * Two typedefs are equivalent, if after we resolve a chain of typedefs, they + * point to equivalent types. This means that if a size_t is defined as follows: + * + * size_t -> ulong_t -> unsigned long + * size_t -> unsigned long + * + * That we'll ultimately end up treating them the same. + */ +static int +ctf_diff_typedef(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, + ctf_file_t *ofp, ctf_id_t oid) +{ + ctf_id_t iref = CTF_ERR, oref = CTF_ERR; + + while (ctf_type_kind(ifp, iid) == CTF_K_TYPEDEF) { + iref = ctf_type_reference(ifp, iid); + if (iref == CTF_ERR) + return (CTF_ERR); + iid = iref; + } + + while (ctf_type_kind(ofp, oid) == CTF_K_TYPEDEF) { + oref = ctf_type_reference(ofp, oid); + if (oref == CTF_ERR) + return (CTF_ERR); + oid = oref; + } + + VERIFY(iref != CTF_ERR && oref != CTF_ERR); + return (ctf_diff_type(cds, ifp, iref, ofp, oref)); +} + +/* + * Two qualifiers are equivalent iff they point to two equivalent types. + */ +static int +ctf_diff_qualifier(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, + ctf_file_t *ofp, ctf_id_t oid) +{ + ctf_id_t iref, oref; + + iref = ctf_type_reference(ifp, iid); + if (iref == CTF_ERR) + return (CTF_ERR); + + oref = ctf_type_reference(ofp, oid); + if (oref == CTF_ERR) + return (ctf_set_errno(ifp, ctf_errno(ofp))); + + return (ctf_diff_type(cds, ifp, iref, ofp, oref)); +} + +/* + * Two arrays are the same iff they have the same type for contents, the same + * type for the index, and the same number of elements. + */ +static int +ctf_diff_array(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, + ctf_id_t oid) +{ + int ret; + ctf_arinfo_t iar, oar; + + if (ctf_array_info(ifp, iid, &iar) == CTF_ERR) + return (CTF_ERR); + + if (ctf_array_info(ofp, oid, &oar) == CTF_ERR) + return (ctf_set_errno(ifp, ctf_errno(ofp))); + + ret = ctf_diff_type(cds, ifp, iar.ctr_contents, ofp, oar.ctr_contents); + if (ret != B_FALSE) + return (ret); + + if (iar.ctr_nelems != oar.ctr_nelems) + return (B_TRUE); + + /* + * If we're ignoring integer types names, then we're trying to do a bit + * of a logical diff and we don't really care about the fact that the + * index element might not be the same here, what we care about are the + * number of elements and that they're the same type. + */ + if ((cds->cds_flags & CTF_DIFF_F_IGNORE_INTNAMES) == 0) { + ret = ctf_diff_type(cds, ifp, iar.ctr_index, ofp, + oar.ctr_index); + if (ret != B_FALSE) + return (ret); + } + + return (B_FALSE); +} + +/* + * Two function pointers are the same if the following is all true: + * + * o They have the same return type + * o They have the same number of arguments + * o The arguments are of the same type + * o They have the same flags + */ +static int +ctf_diff_fptr(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, + ctf_id_t oid) +{ + int ret, i; + ctf_funcinfo_t ifunc, ofunc; + ctf_id_t *iids, *oids; + + if (ctf_func_info_by_id(ifp, iid, &ifunc) == CTF_ERR) + return (CTF_ERR); + + if (ctf_func_info_by_id(ofp, oid, &ofunc) == CTF_ERR) + return (ctf_set_errno(ifp, ctf_errno(ofp))); + + if (ifunc.ctc_argc != ofunc.ctc_argc) + return (B_TRUE); + + if (ifunc.ctc_flags != ofunc.ctc_flags) + return (B_TRUE); + + ret = ctf_diff_type(cds, ifp, ifunc.ctc_return, ofp, ofunc.ctc_return); + if (ret != B_FALSE) + return (ret); + + iids = ctf_alloc(sizeof (ctf_id_t) * ifunc.ctc_argc); + if (iids == NULL) + return (ctf_set_errno(ifp, ENOMEM)); + + oids = ctf_alloc(sizeof (ctf_id_t) * ifunc.ctc_argc); + if (oids == NULL) { + ctf_free(iids, sizeof (ctf_id_t) * ifunc.ctc_argc); + return (ctf_set_errno(ifp, ENOMEM)); + } + + if (ctf_func_args_by_id(ifp, iid, ifunc.ctc_argc, iids) == CTF_ERR) { + ret = CTF_ERR; + goto out; + } + + if (ctf_func_args_by_id(ofp, oid, ofunc.ctc_argc, oids) == CTF_ERR) { + ret = ctf_set_errno(ifp, ctf_errno(ofp)); + goto out; + } + + ret = B_TRUE; + for (i = 0; i < ifunc.ctc_argc; i++) { + ret = ctf_diff_type(cds, ifp, iids[i], ofp, oids[i]); + if (ret != B_FALSE) + goto out; + } + ret = B_FALSE; + +out: + ctf_free(iids, sizeof (ctf_id_t) * ifunc.ctc_argc); + ctf_free(oids, sizeof (ctf_id_t) * ofunc.ctc_argc); + return (ret); +} + +/* + * Two structures are the same if every member is identical to its corresponding + * type, at the same offset, and has the same name, as well as them having the + * same overall size. + */ +static int +ctf_diff_struct(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, + ctf_id_t oid) +{ + ctf_file_t *oifp; + const ctf_type_t *itp, *otp; + ssize_t isize, iincr, osize, oincr; + const ctf_member_t *imp, *omp; + const ctf_lmember_t *ilmp, *olmp; + int n; + ctf_diff_guess_t *cdg; + + oifp = ifp; + + if ((itp = ctf_lookup_by_id(&ifp, iid)) == NULL) + return (CTF_ERR); + + if ((otp = ctf_lookup_by_id(&ofp, oid)) == NULL) + return (ctf_set_errno(oifp, ctf_errno(ofp))); + + if (ctf_type_size(ifp, iid) != ctf_type_size(ofp, oid)) + return (B_TRUE); + + if (LCTF_INFO_VLEN(ifp, itp->ctt_info) != + LCTF_INFO_VLEN(ofp, otp->ctt_info)) + return (B_TRUE); + + (void) ctf_get_ctt_size(ifp, itp, &isize, &iincr); + (void) ctf_get_ctt_size(ofp, otp, &osize, &oincr); + + if (ifp->ctf_version == CTF_VERSION_1 || isize < CTF_LSTRUCT_THRESH) { + imp = (const ctf_member_t *)((uintptr_t)itp + iincr); + ilmp = NULL; + } else { + imp = NULL; + ilmp = (const ctf_lmember_t *)((uintptr_t)itp + iincr); + } + + if (ofp->ctf_version == CTF_VERSION_1 || osize < CTF_LSTRUCT_THRESH) { + omp = (const ctf_member_t *)((uintptr_t)otp + oincr); + olmp = NULL; + } else { + omp = NULL; + olmp = (const ctf_lmember_t *)((uintptr_t)otp + oincr); + } + + /* + * Insert our assumption that they're equal for the moment. + */ + cdg = ctf_alloc(sizeof (ctf_diff_guess_t)); + if (cdg == NULL) + return (ctf_set_errno(ifp, ENOMEM)); + cdg->cdg_iid = iid; + cdg->cdg_oid = oid; + cdg->cdg_next = cds->cds_guess; + cds->cds_guess = cdg; + cds->cds_forward[TINDEX(iid)] = oid; + cds->cds_reverse[TINDEX(oid)] = iid; + + for (n = LCTF_INFO_VLEN(ifp, itp->ctt_info); n != 0; n--) { + const char *iname, *oname; + ulong_t ioff, ooff; + ctf_id_t itype, otype; + int ret; + + if (imp != NULL) { + iname = ctf_strptr(ifp, imp->ctm_name); + ioff = imp->ctm_offset; + itype = imp->ctm_type; + } else { + iname = ctf_strptr(ifp, ilmp->ctlm_name); + ioff = CTF_LMEM_OFFSET(ilmp); + itype = ilmp->ctlm_type; + } + + if (omp != NULL) { + oname = ctf_strptr(ofp, omp->ctm_name); + ooff = omp->ctm_offset; + otype = omp->ctm_type; + } else { + oname = ctf_strptr(ofp, olmp->ctlm_name); + ooff = CTF_LMEM_OFFSET(olmp); + otype = olmp->ctlm_type; + } + + if (ioff != ooff) { + return (B_TRUE); + } + if (strcmp(iname, oname) != 0) { + return (B_TRUE); + } + ret = ctf_diff_type(cds, ifp, itype, ofp, otype); + if (ret != B_FALSE) { + return (ret); + } + + /* Advance our pointers */ + if (imp != NULL) + imp++; + if (ilmp != NULL) + ilmp++; + if (omp != NULL) + omp++; + if (olmp != NULL) + olmp++; + } + + return (B_FALSE); +} + +/* + * Two unions are the same if they have the same set of members. This is similar + * to, but slightly different from a struct. The offsets of members don't + * matter. However, there is no guarantee of ordering so we have to fall back to + * doing an O(N^2) scan. + */ +typedef struct ctf_diff_union_member { + ctf_diff_t *cdum_cds; + ctf_file_t *cdum_fp; + ctf_file_t *cdum_iterfp; + const char *cdum_name; + ctf_id_t cdum_type; + int cdum_ret; +} ctf_diff_union_member_t; + +typedef struct ctf_diff_union_fp { + ctf_diff_t *cduf_cds; + ctf_file_t *cduf_curfp; + ctf_file_t *cduf_altfp; + ctf_id_t cduf_type; + int cduf_ret; +} ctf_diff_union_fp_t; + +/* ARGSUSED */ +static int +ctf_diff_union_check_member(const char *name, ctf_id_t id, ulong_t off, + void *arg) +{ + int ret; + ctf_diff_union_member_t *cdump = arg; + + if (strcmp(name, cdump->cdum_name) != 0) + return (0); + + ret = ctf_diff_type(cdump->cdum_cds, cdump->cdum_fp, cdump->cdum_type, + cdump->cdum_iterfp, id); + if (ret == CTF_ERR) { + cdump->cdum_ret = CTF_ERR; + return (1); + } + + if (ret == B_FALSE) { + cdump->cdum_ret = B_FALSE; + /* Return non-zero to stop iteration as we have a match */ + return (1); + } + + return (0); +} + +/* ARGSUSED */ +static int +ctf_diff_union_check_fp(const char *name, ctf_id_t id, ulong_t off, void *arg) +{ + int ret; + ctf_diff_union_member_t cdum; + ctf_diff_union_fp_t *cdufp = arg; + + cdum.cdum_cds = cdufp->cduf_cds; + cdum.cdum_fp = cdufp->cduf_curfp; + cdum.cdum_iterfp = cdufp->cduf_altfp; + cdum.cdum_name = name; + cdum.cdum_type = id; + cdum.cdum_ret = B_TRUE; + + ret = ctf_member_iter(cdum.cdum_iterfp, cdufp->cduf_type, + ctf_diff_union_check_member, &cdum); + if (ret == 0 || cdum.cdum_ret == CTF_ERR) { + /* No match found or error, terminate now */ + cdufp->cduf_ret = cdum.cdum_ret; + return (1); + } else if (ret == CTF_ERR) { + (void) ctf_set_errno(cdum.cdum_fp, ctf_errno(cdum.cdum_iterfp)); + cdufp->cduf_ret = CTF_ERR; + return (1); + } else { + ASSERT(cdum.cdum_ret == B_FALSE); + cdufp->cduf_ret = cdum.cdum_ret; + return (0); + } +} + +static int +ctf_diff_union(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, + ctf_id_t oid) +{ + ctf_file_t *oifp; + const ctf_type_t *itp, *otp; + ctf_diff_union_fp_t cduf; + ctf_diff_guess_t *cdg; + int ret; + + oifp = ifp; + if ((itp = ctf_lookup_by_id(&ifp, iid)) == NULL) + return (CTF_ERR); + if ((otp = ctf_lookup_by_id(&ofp, oid)) == NULL) + return (ctf_set_errno(oifp, ctf_errno(ofp))); + + if (LCTF_INFO_VLEN(ifp, itp->ctt_info) != + LCTF_INFO_VLEN(ofp, otp->ctt_info)) + return (B_TRUE); + + cdg = ctf_alloc(sizeof (ctf_diff_guess_t)); + if (cdg == NULL) + return (ctf_set_errno(ifp, ENOMEM)); + cdg->cdg_iid = iid; + cdg->cdg_oid = oid; + cdg->cdg_next = cds->cds_guess; + cds->cds_guess = cdg; + cds->cds_forward[TINDEX(iid)] = oid; + cds->cds_reverse[TINDEX(oid)] = iid; + + cduf.cduf_cds = cds; + cduf.cduf_curfp = ifp; + cduf.cduf_altfp = ofp; + cduf.cduf_type = oid; + cduf.cduf_ret = B_TRUE; + ret = ctf_member_iter(ifp, iid, ctf_diff_union_check_fp, &cduf); + if (ret != CTF_ERR) + ret = cduf.cduf_ret; + + return (ret); +} + +/* + * Two enums are equivalent if they share the same underlying type and they have + * the same set of members. + */ +static int +ctf_diff_enum(ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, ctf_id_t oid) +{ + ctf_file_t *oifp; + const ctf_type_t *itp, *otp; + ssize_t iincr, oincr; + const ctf_enum_t *iep, *oep; + int n; + + oifp = ifp; + if ((itp = ctf_lookup_by_id(&ifp, iid)) == NULL) + return (CTF_ERR); + if ((otp = ctf_lookup_by_id(&ofp, oid)) == NULL) + return (ctf_set_errno(oifp, ctf_errno(ofp))); + + if (LCTF_INFO_VLEN(ifp, itp->ctt_info) != + LCTF_INFO_VLEN(ofp, otp->ctt_info)) + return (B_TRUE); + + (void) ctf_get_ctt_size(ifp, itp, NULL, &iincr); + (void) ctf_get_ctt_size(ofp, otp, NULL, &oincr); + iep = (const ctf_enum_t *)((uintptr_t)itp + iincr); + oep = (const ctf_enum_t *)((uintptr_t)otp + oincr); + + for (n = LCTF_INFO_VLEN(ifp, itp->ctt_info); n != 0; + n--, iep++, oep++) { + if (strcmp(ctf_strptr(ifp, iep->cte_name), + ctf_strptr(ofp, oep->cte_name)) != 0) + return (B_TRUE); + + if (iep->cte_value != oep->cte_value) + return (B_TRUE); + } + + return (B_FALSE); +} + +/* + * Two forwards are equivalent in one of two cases. If both are forwards, than + * they are the same. Otherwise, they're equivalent if one is a struct or union + * and the other is a forward. + */ +static int +ctf_diff_forward(ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, ctf_id_t oid) +{ + int ikind, okind; + + ikind = ctf_type_kind(ifp, iid); + okind = ctf_type_kind(ofp, oid); + + if (ikind == okind) { + ASSERT(ikind == CTF_K_FORWARD); + return (B_FALSE); + } else if (ikind == CTF_K_FORWARD) { + return (okind != CTF_K_UNION && okind != CTF_K_STRUCT); + } else { + return (ikind != CTF_K_UNION && ikind != CTF_K_STRUCT); + } +} + +/* + * Are two types equivalent? + */ +int +ctf_diff_type(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, + ctf_id_t oid) +{ + int ret, ikind, okind; + + /* Do a quick short circuit */ + if (ifp == ofp && iid == oid) + return (B_FALSE); + + /* + * Check if it's something we've already encountered in a forward + * reference or forward negative table. Also double check the reverse + * table. + */ + if (cds->cds_forward[TINDEX(iid)] == oid) + return (B_FALSE); + if (cds->cds_forward[TINDEX(iid)] != 0) + return (B_TRUE); + if (cds->cds_reverse[TINDEX(oid)] == iid) + return (B_FALSE); + if ((cds->cds_flags & CTF_DIFF_F_IGNORE_INTNAMES) == 0 && + cds->cds_reverse[TINDEX(oid)] != 0) + return (B_TRUE); + + ikind = ctf_type_kind(ifp, iid); + okind = ctf_type_kind(ofp, oid); + + if (ikind != okind && + ikind != CTF_K_FORWARD && okind != CTF_K_FORWARD) + return (B_TRUE); + + /* Check names */ + if ((ret = ctf_diff_name(ifp, iid, ofp, oid)) != B_FALSE) { + if (ikind != okind || ikind != CTF_K_INTEGER || + (cds->cds_flags & CTF_DIFF_F_IGNORE_INTNAMES) == 0) + return (ret); + } + + if (ikind == CTF_K_FORWARD || okind == CTF_K_FORWARD) + return (ctf_diff_forward(ifp, iid, ofp, oid)); + + switch (ikind) { + case CTF_K_INTEGER: + case CTF_K_FLOAT: + ret = ctf_diff_number(ifp, iid, ofp, oid); + break; + case CTF_K_ARRAY: + ret = ctf_diff_array(cds, ifp, iid, ofp, oid); + break; + case CTF_K_FUNCTION: + ret = ctf_diff_fptr(cds, ifp, iid, ofp, oid); + break; + case CTF_K_STRUCT: + ret = ctf_diff_struct(cds, ifp, iid, ofp, oid); + break; + case CTF_K_UNION: + ret = ctf_diff_union(cds, ifp, iid, ofp, oid); + break; + case CTF_K_ENUM: + ret = ctf_diff_enum(ifp, iid, ofp, oid); + break; + case CTF_K_FORWARD: + ret = ctf_diff_forward(ifp, iid, ofp, oid); + break; + case CTF_K_TYPEDEF: + ret = ctf_diff_typedef(cds, ifp, iid, ofp, oid); + break; + case CTF_K_POINTER: + case CTF_K_VOLATILE: + case CTF_K_CONST: + case CTF_K_RESTRICT: + ret = ctf_diff_qualifier(cds, ifp, iid, ofp, oid); + break; + case CTF_K_UNKNOWN: + /* + * The current CTF tools use CTF_K_UNKNOWN as a padding type. We + * always declare two instances of CTF_K_UNKNOWN as different, + * even though this leads to additional diff noise. + */ + ret = B_TRUE; + break; + default: + abort(); + } + + return (ret); +} + +/* + * Walk every type in the first container and try to find a match in the second. + * If there is a match, then update both the forward and reverse mapping tables. + * + * The self variable tells us whether or not we should be comparing the input + * ctf container with itself or not. + */ +static int +ctf_diff_pass1(ctf_diff_t *cds, boolean_t self) +{ + int i, j, diff; + int istart, iend, jstart, jend; + + istart = 1; + iend = cds->cds_ifp->ctf_typemax; + if (cds->cds_ifp->ctf_flags & LCTF_CHILD) { + istart += CTF_CHILD_START; + iend += CTF_CHILD_START; + } + + jstart = 1; + jend = cds->cds_ofp->ctf_typemax; + if (cds->cds_ofp->ctf_flags & LCTF_CHILD) { + jstart += CTF_CHILD_START; + jend += CTF_CHILD_START; + } + + for (i = istart; i <= iend; i++) { + diff = B_TRUE; + + /* + * If we're doing a self diff for dedup purposes, then we want + * to ensure that we compare a type i with every type in the + * range, [ 1, i ). Yes, this does mean that when i equals 1, + * we won't compare anything. + */ + if (self == B_TRUE) { + jstart = istart; + jend = i - 1; + } + for (j = jstart; j <= jend; j++) { + ctf_diff_guess_t *cdg, *tofree; + + ASSERT(cds->cds_guess == NULL); + diff = ctf_diff_type(cds, cds->cds_ifp, i, + cds->cds_ofp, j); + if (diff == CTF_ERR) + return (CTF_ERR); + + /* Clean up our guesses */ + cdg = cds->cds_guess; + cds->cds_guess = NULL; + while (cdg != NULL) { + if (diff == B_TRUE) { + cds->cds_forward[TINDEX(cdg->cdg_iid)] = + 0; + cds->cds_reverse[TINDEX(cdg->cdg_oid)] = + 0; + } + tofree = cdg; + cdg = cdg->cdg_next; + ctf_free(tofree, sizeof (ctf_diff_guess_t)); + } + + /* Found a hit, update the tables */ + if (diff == B_FALSE) { + cds->cds_forward[TINDEX(i)] = j; + if (cds->cds_reverse[TINDEX(j)] == 0) + cds->cds_reverse[TINDEX(j)] = i; + break; + } + } + + /* Call the callback at this point */ + if (diff == B_TRUE) { + cds->cds_forward[TINDEX(i)] = CTF_ERR; + cds->cds_func(cds->cds_ifp, i, B_FALSE, NULL, CTF_ERR, + cds->cds_arg); + } else { + cds->cds_func(cds->cds_ifp, i, B_TRUE, cds->cds_ofp, j, + cds->cds_arg); + } + } + + return (0); +} + +/* + * Now we need to walk the second container and emit anything that we didn't + * find as common in the first pass. + */ +static int +ctf_diff_pass2(ctf_diff_t *cds) +{ + int i, start, end; + + start = 0x1; + end = cds->cds_ofp->ctf_typemax; + if (cds->cds_ofp->ctf_flags & LCTF_CHILD) { + start += CTF_CHILD_START; + end += CTF_CHILD_START; + } + + for (i = start; i <= end; i++) { + if (cds->cds_reverse[TINDEX(i)] != 0) + continue; + cds->cds_func(cds->cds_ofp, i, B_FALSE, NULL, CTF_ERR, + cds->cds_arg); + } + + return (0); +} + +int +ctf_diff_init(ctf_file_t *ifp, ctf_file_t *ofp, ctf_diff_t **cdsp) +{ + ctf_diff_t *cds; + size_t fsize, rsize; + + cds = ctf_alloc(sizeof (ctf_diff_t)); + if (cds == NULL) + return (ctf_set_errno(ifp, ENOMEM)); + + bzero(cds, sizeof (ctf_diff_t)); + cds->cds_ifp = ifp; + cds->cds_ofp = ofp; + + fsize = sizeof (ctf_id_t) * ifp->ctf_typemax; + rsize = sizeof (ctf_id_t) * ofp->ctf_typemax; + if (ifp->ctf_flags & LCTF_CHILD) + fsize += CTF_CHILD_START * sizeof (ctf_id_t); + if (ofp->ctf_flags & LCTF_CHILD) + rsize += CTF_CHILD_START * sizeof (ctf_id_t); + + cds->cds_forward = ctf_alloc(fsize); + if (cds->cds_forward == NULL) { + ctf_free(cds, sizeof (ctf_diff_t)); + return (ctf_set_errno(ifp, ENOMEM)); + } + cds->cds_fsize = fsize; + cds->cds_reverse = ctf_alloc(rsize); + if (cds->cds_reverse == NULL) { + ctf_free(cds->cds_forward, fsize); + ctf_free(cds, sizeof (ctf_diff_t)); + return (ctf_set_errno(ifp, ENOMEM)); + } + cds->cds_rsize = rsize; + bzero(cds->cds_forward, fsize); + bzero(cds->cds_reverse, rsize); + + cds->cds_ifp->ctf_refcnt++; + cds->cds_ofp->ctf_refcnt++; + *cdsp = cds; + return (0); +} + +int +ctf_diff_types(ctf_diff_t *cds, ctf_diff_type_f cb, void *arg) +{ + int ret; + + cds->cds_func = cb; + cds->cds_arg = arg; + + ret = ctf_diff_pass1(cds, B_FALSE); + if (ret == 0) + ret = ctf_diff_pass2(cds); + + cds->cds_func = NULL; + cds->cds_arg = NULL; + cds->cds_tvalid = B_TRUE; + return (ret); +} + +/* + * Do a diff where we're comparing a container with itself. In other words we'd + * like to know what types are actually duplicates of existing types in the + * container. + * + * Note this should remain private to libctf and not be exported in the public + * mapfile for the time being. + */ +int +ctf_diff_self(ctf_diff_t *cds, ctf_diff_type_f cb, void *arg) +{ + if (cds->cds_ifp != cds->cds_ofp) + return (EINVAL); + + cds->cds_func = cb; + cds->cds_arg = arg; + + return (ctf_diff_pass1(cds, B_TRUE)); +} + + +void +ctf_diff_fini(ctf_diff_t *cds) +{ + ctf_diff_guess_t *cdg; + size_t fsize, rsize; + + if (cds == NULL) + return; + + cds->cds_ifp->ctf_refcnt--; + cds->cds_ofp->ctf_refcnt--; + + fsize = sizeof (ctf_id_t) * cds->cds_ifp->ctf_typemax; + rsize = sizeof (ctf_id_t) * cds->cds_ofp->ctf_typemax; + if (cds->cds_ifp->ctf_flags & LCTF_CHILD) + fsize += CTF_CHILD_START * sizeof (ctf_id_t); + if (cds->cds_ofp->ctf_flags & LCTF_CHILD) + rsize += CTF_CHILD_START * sizeof (ctf_id_t); + + if (cds->cds_ifuncs != NULL) + ctf_free(cds->cds_ifuncs, + sizeof (ctf_diff_func_t) * cds->cds_nifuncs); + if (cds->cds_ofuncs != NULL) + ctf_free(cds->cds_ofuncs, + sizeof (ctf_diff_func_t) * cds->cds_nofuncs); + if (cds->cds_iobj != NULL) + ctf_free(cds->cds_iobj, + sizeof (ctf_diff_obj_t) * cds->cds_niobj); + if (cds->cds_oobj != NULL) + ctf_free(cds->cds_oobj, + sizeof (ctf_diff_obj_t) * cds->cds_noobj); + cdg = cds->cds_guess; + while (cdg != NULL) { + ctf_diff_guess_t *tofree = cdg; + cdg = cdg->cdg_next; + ctf_free(tofree, sizeof (ctf_diff_guess_t)); + } + if (cds->cds_forward != NULL) + ctf_free(cds->cds_forward, cds->cds_fsize); + if (cds->cds_reverse != NULL) + ctf_free(cds->cds_reverse, cds->cds_rsize); + ctf_free(cds, sizeof (ctf_diff_t)); +} + +uint_t +ctf_diff_getflags(ctf_diff_t *cds) +{ + return (cds->cds_flags); +} + +int +ctf_diff_setflags(ctf_diff_t *cds, uint_t flags) +{ + if ((flags & ~CTF_DIFF_F_IGNORE_INTNAMES) != 0) + return (ctf_set_errno(cds->cds_ifp, EINVAL)); + + cds->cds_flags = flags; + return (0); +} + +static boolean_t +ctf_diff_symid(ctf_diff_t *cds, ctf_id_t iid, ctf_id_t oid) +{ + ctf_file_t *ifp, *ofp; + + ifp = cds->cds_ifp; + ofp = cds->cds_ofp; + + /* + * If we have parent containers on the scene here, we need to go through + * and do a full diff check because a diff for types will not actually + * go through and check types in the parent container. + */ + if (iid == 0 || oid == 0) + return (iid == oid ? B_FALSE: B_TRUE); + + if (!(ifp->ctf_flags & LCTF_CHILD) && !(ofp->ctf_flags & LCTF_CHILD)) { + if (cds->cds_forward[TINDEX(iid)] != oid) + return (B_TRUE); + return (B_FALSE); + } + + return (ctf_diff_type(cds, ifp, iid, ofp, oid)); +} + +/* ARGSUSED */ +static void +ctf_diff_void_cb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp, + ctf_id_t oid, void *arg) +{ +} + +/* ARGSUSED */ +static int +ctf_diff_func_count(const char *name, ulong_t symidx, ctf_funcinfo_t *fip, + void *arg) +{ + uint32_t *ip = arg; + + *ip = *ip + 1; + return (0); +} + +/* ARGSUSED */ +static int +ctf_diff_func_fill_cb(const char *name, ulong_t symidx, ctf_funcinfo_t *fip, + void *arg) +{ + uint_t *next, max; + ctf_diff_func_t *funcptr; + ctf_diff_t *cds = arg; + + if (cds->cds_ffillip == B_TRUE) { + max = cds->cds_nifuncs; + next = &cds->cds_nextifunc; + funcptr = cds->cds_ifuncs + *next; + } else { + max = cds->cds_nofuncs; + next = &cds->cds_nextofunc; + funcptr = cds->cds_ofuncs + *next; + + } + + VERIFY(*next < max); + funcptr->cdf_name = name; + funcptr->cdf_symidx = symidx; + funcptr->cdf_matchidx = ULONG_MAX; + *next = *next + 1; + + return (0); +} + +int +ctf_diff_func_fill(ctf_diff_t *cds) +{ + int ret; + uint32_t ifcount, ofcount, idcnt, cti; + ulong_t i, j; + ctf_id_t *iids, *oids; + + ifcount = 0; + ofcount = 0; + idcnt = 0; + iids = NULL; + oids = NULL; + + ret = ctf_function_iter(cds->cds_ifp, ctf_diff_func_count, &ifcount); + if (ret != 0) + return (ret); + ret = ctf_function_iter(cds->cds_ofp, ctf_diff_func_count, &ofcount); + if (ret != 0) + return (ret); + + cds->cds_ifuncs = ctf_alloc(sizeof (ctf_diff_func_t) * ifcount); + if (cds->cds_ifuncs == NULL) + return (ctf_set_errno(cds->cds_ifp, ENOMEM)); + + cds->cds_nifuncs = ifcount; + cds->cds_nextifunc = 0; + + cds->cds_ofuncs = ctf_alloc(sizeof (ctf_diff_func_t) * ofcount); + if (cds->cds_ofuncs == NULL) + return (ctf_set_errno(cds->cds_ifp, ENOMEM)); + + cds->cds_nofuncs = ofcount; + cds->cds_nextofunc = 0; + + cds->cds_ffillip = B_TRUE; + if ((ret = ctf_function_iter(cds->cds_ifp, ctf_diff_func_fill_cb, + cds)) != 0) + return (ret); + + cds->cds_ffillip = B_FALSE; + if ((ret = ctf_function_iter(cds->cds_ofp, ctf_diff_func_fill_cb, + cds)) != 0) + return (ret); + + /* + * Everything is initialized to not match. This could probably be faster + * with something that used a hash. But this part of the diff isn't used + * by merge. + */ + for (i = 0; i < cds->cds_nifuncs; i++) { + for (j = 0; j < cds->cds_nofuncs; j++) { + ctf_diff_func_t *ifd, *ofd; + ctf_funcinfo_t ifip, ofip; + boolean_t match; + + ifd = &cds->cds_ifuncs[i]; + ofd = &cds->cds_ofuncs[j]; + if (strcmp(ifd->cdf_name, ofd->cdf_name) != 0) + continue; + + ret = ctf_func_info(cds->cds_ifp, ifd->cdf_symidx, + &ifip); + if (ret != 0) + goto out; + ret = ctf_func_info(cds->cds_ofp, ofd->cdf_symidx, + &ofip); + if (ret != 0) { + ret = ctf_set_errno(cds->cds_ifp, + ctf_errno(cds->cds_ofp)); + goto out; + } + + if (ifip.ctc_argc != ofip.ctc_argc && + ifip.ctc_flags != ofip.ctc_flags) + continue; + + /* Validate return type and arguments are the same */ + if (ctf_diff_symid(cds, ifip.ctc_return, + ofip.ctc_return)) + continue; + + if (ifip.ctc_argc > idcnt) { + if (iids != NULL) + ctf_free(iids, + sizeof (ctf_id_t) * idcnt); + if (oids != NULL) + ctf_free(oids, + sizeof (ctf_id_t) * idcnt); + iids = oids = NULL; + idcnt = ifip.ctc_argc; + iids = ctf_alloc(sizeof (ctf_id_t) * idcnt); + if (iids == NULL) { + ret = ctf_set_errno(cds->cds_ifp, + ENOMEM); + goto out; + } + oids = ctf_alloc(sizeof (ctf_id_t) * idcnt); + if (iids == NULL) { + ret = ctf_set_errno(cds->cds_ifp, + ENOMEM); + goto out; + } + } + + if ((ret = ctf_func_args(cds->cds_ifp, ifd->cdf_symidx, + ifip.ctc_argc, iids)) != 0) + goto out; + if ((ret = ctf_func_args(cds->cds_ofp, ofd->cdf_symidx, + ofip.ctc_argc, oids)) != 0) + goto out; + + match = B_TRUE; + for (cti = 0; cti < ifip.ctc_argc; cti++) { + if (ctf_diff_symid(cds, iids[cti], oids[cti])) { + match = B_FALSE; + break; + } + } + + if (match == B_FALSE) + continue; + + ifd->cdf_matchidx = j; + ofd->cdf_matchidx = i; + break; + } + } + + ret = 0; + +out: + if (iids != NULL) + ctf_free(iids, sizeof (ctf_id_t) * idcnt); + if (oids != NULL) + ctf_free(oids, sizeof (ctf_id_t) * idcnt); + + return (ret); +} + +/* + * In general, two functions are the same, if they have the same name and their + * arguments have the same types, including the return type. Like types, we + * basically have to do this in two passes. In the first phase we walk every + * type in the first container and try to find a match in the second. + */ +int +ctf_diff_functions(ctf_diff_t *cds, ctf_diff_func_f cb, void *arg) +{ + int ret; + ulong_t i; + + if (cds->cds_tvalid == B_FALSE) { + if ((ret = ctf_diff_types(cds, ctf_diff_void_cb, NULL)) != 0) + return (ret); + } + + if (cds->cds_fvalid == B_FALSE) { + if ((ret = ctf_diff_func_fill(cds)) != 0) + return (ret); + cds->cds_fvalid = B_TRUE; + } + + for (i = 0; i < cds->cds_nifuncs; i++) { + if (cds->cds_ifuncs[i].cdf_matchidx == ULONG_MAX) { + cb(cds->cds_ifp, cds->cds_ifuncs[i].cdf_symidx, + B_FALSE, NULL, ULONG_MAX, arg); + } else { + ulong_t idx = cds->cds_ifuncs[i].cdf_matchidx; + cb(cds->cds_ifp, cds->cds_ifuncs[i].cdf_symidx, B_TRUE, + cds->cds_ofp, cds->cds_ofuncs[idx].cdf_symidx, arg); + } + } + + for (i = 0; i < cds->cds_nofuncs; i++) { + if (cds->cds_ofuncs[i].cdf_matchidx != ULONG_MAX) + continue; + cb(cds->cds_ofp, cds->cds_ofuncs[i].cdf_symidx, B_FALSE, + NULL, ULONG_MAX, arg); + } + + return (0); +} + +static int +ctf_diff_obj_fill_cb(const char *name, ctf_id_t id, ulong_t symidx, void *arg) +{ + uint_t *next, max; + ctf_diff_obj_t *objptr; + ctf_diff_t *cds = arg; + + if (cds->cds_ofillip == B_TRUE) { + max = cds->cds_niobj; + next = &cds->cds_nextiobj; + objptr = cds->cds_iobj + *next; + } else { + max = cds->cds_noobj; + next = &cds->cds_nextoobj; + objptr = cds->cds_oobj+ *next; + + } + + VERIFY(*next < max); + objptr->cdo_name = name; + objptr->cdo_symidx = symidx; + objptr->cdo_id = id; + objptr->cdo_matchidx = ULONG_MAX; + *next = *next + 1; + + return (0); +} + +/* ARGSUSED */ +static int +ctf_diff_obj_count(const char *name, ctf_id_t id, ulong_t symidx, void *arg) +{ + uint32_t *count = arg; + + *count = *count + 1; + + return (0); +} + + +static int +ctf_diff_obj_fill(ctf_diff_t *cds) +{ + int ret; + uint32_t iocount, oocount; + ulong_t i, j; + + iocount = 0; + oocount = 0; + + ret = ctf_object_iter(cds->cds_ifp, ctf_diff_obj_count, &iocount); + if (ret != 0) + return (ret); + + ret = ctf_object_iter(cds->cds_ofp, ctf_diff_obj_count, &oocount); + if (ret != 0) + return (ret); + + cds->cds_iobj = ctf_alloc(sizeof (ctf_diff_obj_t) * iocount); + if (cds->cds_iobj == NULL) + return (ctf_set_errno(cds->cds_ifp, ENOMEM)); + cds->cds_niobj = iocount; + cds->cds_nextiobj = 0; + + cds->cds_oobj = ctf_alloc(sizeof (ctf_diff_obj_t) * oocount); + if (cds->cds_oobj == NULL) + return (ctf_set_errno(cds->cds_ifp, ENOMEM)); + cds->cds_noobj = oocount; + cds->cds_nextoobj = 0; + + cds->cds_ofillip = B_TRUE; + if ((ret = ctf_object_iter(cds->cds_ifp, ctf_diff_obj_fill_cb, + cds)) != 0) + return (ret); + + cds->cds_ofillip = B_FALSE; + if ((ret = ctf_object_iter(cds->cds_ofp, ctf_diff_obj_fill_cb, + cds)) != 0) + return (ret); + + for (i = 0; i < cds->cds_niobj; i++) { + for (j = 0; j < cds->cds_noobj; j++) { + ctf_diff_obj_t *id, *od; + + id = &cds->cds_iobj[i]; + od = &cds->cds_oobj[j]; + + if (id->cdo_name == NULL || od->cdo_name == NULL) + continue; + if (strcmp(id->cdo_name, od->cdo_name) != 0) + continue; + + if (ctf_diff_symid(cds, id->cdo_id, od->cdo_id)) { + continue; + } + + id->cdo_matchidx = j; + od->cdo_matchidx = i; + break; + } + } + + return (0); +} + +int +ctf_diff_objects(ctf_diff_t *cds, ctf_diff_obj_f cb, void *arg) +{ + int ret; + ulong_t i; + + if (cds->cds_tvalid == B_FALSE) { + if ((ret = ctf_diff_types(cds, ctf_diff_void_cb, NULL)) != 0) + return (ret); + } + + if (cds->cds_ovalid == B_FALSE) { + if ((ret = ctf_diff_obj_fill(cds)) != 0) + return (ret); + cds->cds_ovalid = B_TRUE; + } + + for (i = 0; i < cds->cds_niobj; i++) { + ctf_diff_obj_t *o = &cds->cds_iobj[i]; + + if (cds->cds_iobj[i].cdo_matchidx == ULONG_MAX) { + cb(cds->cds_ifp, o->cdo_symidx, o->cdo_id, B_FALSE, + NULL, ULONG_MAX, CTF_ERR, arg); + } else { + ctf_diff_obj_t *alt = &cds->cds_oobj[o->cdo_matchidx]; + cb(cds->cds_ifp, o->cdo_symidx, o->cdo_id, B_TRUE, + cds->cds_ofp, alt->cdo_symidx, alt->cdo_id, arg); + } + } + + for (i = 0; i < cds->cds_noobj; i++) { + ctf_diff_obj_t *o = &cds->cds_oobj[i]; + if (o->cdo_matchidx != ULONG_MAX) + continue; + cb(cds->cds_ofp, o->cdo_symidx, o->cdo_id, B_FALSE, NULL, + ULONG_MAX, CTF_ERR, arg); + } + + return (0); +} diff --git a/usr/src/lib/libctf/common/ctf_dwarf.c b/usr/src/lib/libctf/common/ctf_dwarf.c new file mode 100644 index 0000000000..18be598ed9 --- /dev/null +++ b/usr/src/lib/libctf/common/ctf_dwarf.c @@ -0,0 +1,2995 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright 2012 Jason King. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright 2018 Joyent, Inc. + */ + +/* + * CTF DWARF conversion theory. + * + * DWARF data contains a series of compilation units. Each compilation unit + * generally refers to an object file or what once was, in the case of linked + * binaries and shared objects. Each compilation unit has a series of what DWARF + * calls a DIE (Debugging Information Entry). The set of entries that we care + * about have type information stored in a series of attributes. Each DIE also + * has a tag that identifies the kind of attributes that it has. + * + * A given DIE may itself have children. For example, a DIE that represents a + * structure has children which represent members. Whenever we encounter a DIE + * that has children or other values or types associated with it, we recursively + * process those children first so that way we can then refer to the generated + * CTF type id while processing its parent. This reduces the amount of unknowns + * and fixups that we need. It also ensures that we don't accidentally add types + * that an overzealous compiler might add to the DWARF data but aren't used by + * anything in the system. + * + * Once we do a conversion, we store a mapping in an AVL tree that goes from the + * DWARF's die offset, which is relative to the given compilation unit, to a + * ctf_id_t. + * + * Unfortunately, some compilers actually will emit duplicate entries for a + * given type that look similar, but aren't quite. To that end, we go through + * and do a variant on a merge once we're done processing a single compilation + * unit which deduplicates all of the types that are in the unit. + * + * Finally, if we encounter an object that has multiple compilation units, then + * we'll convert all of the compilation units separately and then do a merge, so + * that way we can result in one single ctf_file_t that represents everything + * for the object. + * + * Conversion Steps + * ---------------- + * + * Because a given object we've been given to convert may have multiple + * compilation units, we break the work into two halves. The first half + * processes each compilation unit (potentially in parallel) and then the second + * half optionally merges all of the dies in the first half. First, we'll cover + * what's involved in converting a single ctf_cu_t's dwarf to CTF. This covers + * the work done in ctf_dwarf_convert_one(). + * + * An individual ctf_cu_t, which represents a compilation unit, is converted to + * CTF in a series of multiple passes. + * + * Pass 1: During the first pass we walk all of the top-level dies and if we + * find a function, variable, struct, union, enum or typedef, we recursively + * transform all of its types. We don't recurse or process everything, because + * we don't want to add some of the types that compilers may add which are + * effectively unused. + * + * During pass 1, if we encounter any structures or unions we mark them for + * fixing up later. This is necessary because we may not be able to determine + * the full size of a structure at the beginning of time. This will happen if + * the DWARF attribute DW_AT_byte_size is not present for a member. Because of + * this possibility we defer adding members to structures or even converting + * them during pass 1 and save that for pass 2. Adding all of the base + * structures without any of their members helps deal with any circular + * dependencies that we might encounter. + * + * Pass 2: This pass is used to do the first half of fixing up structures and + * unions. Rather than walk the entire type space again, we actually walk the + * list of structures and unions that we marked for later fixing up. Here, we + * iterate over every structure and add members to the underlying ctf_file_t, + * but not to the structs themselves. One might wonder why we don't, and the + * main reason is that libctf requires a ctf_update() be done before adding the + * members to structures or unions. + * + * Pass 3: This pass is used to do the second half of fixing up structures and + * unions. During this part we always go through and add members to structures + * and unions that we added to the container in the previous pass. In addition, + * we set the structure and union's actual size, which may have additional + * padding added by the compiler, it isn't simply the last offset. DWARF always + * guarantees an attribute exists for this. Importantly no ctf_id_t's change + * during pass 2. + * + * Pass 4: The next phase is to add CTF entries for all of the symbols and + * variables that are present in this die. During pass 1 we added entries to a + * map for each variable and function. During this pass, we iterate over the + * symbol table and when we encounter a symbol that we have in our lists of + * translated information which matches, we then add it to the ctf_file_t. + * + * Pass 5: Here we go and look for any weak symbols and functions and see if + * they match anything that we recognize. If so, then we add type information + * for them at this point based on the matching type. + * + * Pass 6: This pass is actually a variant on a merge. The traditional merge + * process expects there to be no duplicate types. As such, at the end of + * conversion, we do a dedup on all of the types in the system. The + * deduplication process is described in lib/libctf/common/ctf_merge.c. + * + * Once pass 6 is done, we've finished processing the individual compilation + * unit. + * + * The following steps reflect the general process of doing a conversion. + * + * 1) Walk the dwarf section and determine the number of compilation units + * 2) Create a ctf_cu_t for each compilation unit + * 3) Add all ctf_cu_t's to a workq + * 4) Have the workq process each die with ctf_dwarf_convert_one. This itself + * is comprised of several steps, which were already enumerated. + * 5) If we have multiple cu's, we do a ctf merge of all the dies. The mechanics + * of the merge are discussed in lib/libctf/common/ctf_merge.c. + * 6) Free everything up and return a ctf_file_t to the user. If we only had a + * single compilation unit, then we give that to the user. Otherwise, we + * return the merged ctf_file_t. + * + * Threading + * --------- + * + * The process has been designed to be amenable to threading. Each compilation + * unit has its own type stream, therefore the logical place to divide and + * conquer is at the compilation unit. Each ctf_cu_t has been built to be able + * to be processed independently of the others. It has its own libdwarf handle, + * as a given libdwarf handle may only be used by a single thread at a time. + * This allows the various ctf_cu_t's to be processed in parallel by different + * threads. + * + * All of the ctf_cu_t's are loaded into a workq which allows for a number of + * threads to be specified and used as a thread pool to process all of the + * queued work. We set the number of threads to use in the workq equal to the + * number of threads that the user has specified. + * + * After all of the compilation units have been drained, we use the same number + * of threads when performing a merge of multiple compilation units, if they + * exist. + * + * While all of these different parts do support and allow for multiple threads, + * it's important that when only a single thread is specified, that it be the + * calling thread. This allows the conversion routines to be used in a context + * that doesn't allow additional threads, such as rtld. + * + * Common DWARF Mechanics and Notes + * -------------------------------- + * + * At this time, we really only support DWARFv2, though support for DWARFv4 is + * mostly there. There is no intent to support DWARFv3. + * + * Generally types for something are stored in the DW_AT_type attribute. For + * example, a function's return type will be stored in the local DW_AT_type + * attribute while the arguments will be in child DIEs. There are also various + * times when we don't have any DW_AT_type. In that case, the lack of a type + * implies, at least for C, that its C type is void. Because DWARF doesn't emit + * one, we have a synthetic void type that we create and manipulate instead and + * pass it off to consumers on an as-needed basis. If nothing has a void type, + * it will not be emitted. + * + * Architecture Specific Parts + * --------------------------- + * + * The CTF tooling encodes various information about the various architectures + * in the system. Importantly, the tool assumes that every architecture has a + * data model where long and pointer are the same size. This is currently the + * case, as the two data models illumos supports are ILP32 and LP64. + * + * In addition, we encode the mapping of various floating point sizes to various + * types for each architecture. If a new architecture is being added, it should + * be added to the list. The general design of the ctf conversion tools is to be + * architecture independent. eg. any of the tools here should be able to convert + * any architecture's DWARF into ctf; however, this has not been rigorously + * tested and more importantly, the ctf routines don't currently write out the + * data in an endian-aware form, they only use that of the currently running + * library. + */ + +#include <libctf_impl.h> +#include <sys/avl.h> +#include <sys/debug.h> +#include <gelf.h> +#include <libdwarf.h> +#include <dwarf.h> +#include <libgen.h> +#include <workq.h> +#include <errno.h> + +#define DWARF_VERSION_TWO 2 +#define DWARF_VARARGS_NAME "..." + +/* + * Dwarf may refer recursively to other types that we've already processed. To + * see if we've already converted them, we look them up in an AVL tree that's + * sorted by the DWARF id. + */ +typedef struct ctf_dwmap { + avl_node_t cdm_avl; + Dwarf_Off cdm_off; + Dwarf_Die cdm_die; + ctf_id_t cdm_id; + boolean_t cdm_fix; +} ctf_dwmap_t; + +typedef struct ctf_dwvar { + ctf_list_t cdv_list; + char *cdv_name; + ctf_id_t cdv_type; + boolean_t cdv_global; +} ctf_dwvar_t; + +typedef struct ctf_dwfunc { + ctf_list_t cdf_list; + char *cdf_name; + ctf_funcinfo_t cdf_fip; + ctf_id_t *cdf_argv; + boolean_t cdf_global; +} ctf_dwfunc_t; + +typedef struct ctf_dwbitf { + ctf_list_t cdb_list; + ctf_id_t cdb_base; + uint_t cdb_nbits; + ctf_id_t cdb_id; +} ctf_dwbitf_t; + +/* + * The ctf_cu_t represents a single top-level DWARF die unit. While generally, + * the typical object file has only a single die, if we're asked to convert + * something that's been linked from multiple sources, multiple dies will exist. + */ +typedef struct ctf_die { + Elf *cu_elf; /* shared libelf handle */ + char *cu_name; /* basename of the DIE */ + ctf_merge_t *cu_cmh; /* merge handle */ + ctf_list_t cu_vars; /* List of variables */ + ctf_list_t cu_funcs; /* List of functions */ + ctf_list_t cu_bitfields; /* Bit field members */ + Dwarf_Debug cu_dwarf; /* libdwarf handle */ + Dwarf_Die cu_cu; /* libdwarf compilation unit */ + Dwarf_Off cu_cuoff; /* cu's offset */ + Dwarf_Off cu_maxoff; /* maximum offset */ + ctf_file_t *cu_ctfp; /* output CTF file */ + avl_tree_t cu_map; /* map die offsets to CTF types */ + char *cu_errbuf; /* error message buffer */ + size_t cu_errlen; /* error message buffer length */ + size_t cu_ptrsz; /* object's pointer size */ + boolean_t cu_bigend; /* is it big endian */ + boolean_t cu_doweaks; /* should we convert weak symbols? */ + uint_t cu_mach; /* machine type */ + ctf_id_t cu_voidtid; /* void pointer */ + ctf_id_t cu_longtid; /* id for a 'long' */ +} ctf_cu_t; + +static int ctf_dwarf_offset(ctf_cu_t *, Dwarf_Die, Dwarf_Off *); +static int ctf_dwarf_convert_die(ctf_cu_t *, Dwarf_Die); +static int ctf_dwarf_convert_type(ctf_cu_t *, Dwarf_Die, ctf_id_t *, int); + +static int ctf_dwarf_function_count(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *, + boolean_t); +static int ctf_dwarf_convert_fargs(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *, + ctf_id_t *); + +typedef int (ctf_dwarf_symtab_f)(ctf_cu_t *, const GElf_Sym *, ulong_t, + const char *, const char *, void *); + +/* + * This is a generic way to set a CTF Conversion backend error depending on what + * we were doing. Unless it was one of a specific set of errors that don't + * indicate a programming / translation bug, eg. ENOMEM, then we transform it + * into a CTF backend error and fill in the error buffer. + */ +static int +ctf_dwarf_error(ctf_cu_t *cup, ctf_file_t *cfp, int err, const char *fmt, ...) +{ + va_list ap; + int ret; + size_t off = 0; + ssize_t rem = cup->cu_errlen; + if (cfp != NULL) + err = ctf_errno(cfp); + + if (err == ENOMEM) + return (err); + + ret = snprintf(cup->cu_errbuf, rem, "die %s: ", cup->cu_name); + if (ret < 0) + goto err; + off += ret; + rem = MAX(rem - ret, 0); + + va_start(ap, fmt); + ret = vsnprintf(cup->cu_errbuf + off, rem, fmt, ap); + va_end(ap); + if (ret < 0) + goto err; + + off += ret; + rem = MAX(rem - ret, 0); + if (fmt[strlen(fmt) - 1] != '\n') { + (void) snprintf(cup->cu_errbuf + off, rem, + ": %s\n", ctf_errmsg(err)); + } + va_end(ap); + return (ECTF_CONVBKERR); + +err: + cup->cu_errbuf[0] = '\0'; + return (ECTF_CONVBKERR); +} + +/* + * DWARF often opts to put no explicit type to describe a void type. eg. if we + * have a reference type whose DW_AT_type member doesn't exist, then we should + * instead assume it points to void. Because this isn't represented, we + * instead cause it to come into existence. + */ +static ctf_id_t +ctf_dwarf_void(ctf_cu_t *cup) +{ + if (cup->cu_voidtid == CTF_ERR) { + ctf_encoding_t enc = { CTF_INT_SIGNED, 0, 0 }; + cup->cu_voidtid = ctf_add_integer(cup->cu_ctfp, CTF_ADD_ROOT, + "void", &enc); + if (cup->cu_voidtid == CTF_ERR) { + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to create void type: %s\n", + ctf_errmsg(ctf_errno(cup->cu_ctfp))); + } + } + + return (cup->cu_voidtid); +} + +/* + * There are many different forms that an array index may take. However, we just + * always force it to be of a type long no matter what. Therefore we use this to + * have a single instance of long across everything. + */ +static ctf_id_t +ctf_dwarf_long(ctf_cu_t *cup) +{ + if (cup->cu_longtid == CTF_ERR) { + ctf_encoding_t enc; + + enc.cte_format = CTF_INT_SIGNED; + enc.cte_offset = 0; + /* All illumos systems are LP */ + enc.cte_bits = cup->cu_ptrsz * 8; + cup->cu_longtid = ctf_add_integer(cup->cu_ctfp, CTF_ADD_NONROOT, + "long", &enc); + if (cup->cu_longtid == CTF_ERR) { + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to create long type: %s\n", + ctf_errmsg(ctf_errno(cup->cu_ctfp))); + } + + } + + return (cup->cu_longtid); +} + +static int +ctf_dwmap_comp(const void *a, const void *b) +{ + const ctf_dwmap_t *ca = a; + const ctf_dwmap_t *cb = b; + + if (ca->cdm_off > cb->cdm_off) + return (1); + if (ca->cdm_off < cb->cdm_off) + return (-1); + return (0); +} + +static int +ctf_dwmap_add(ctf_cu_t *cup, ctf_id_t id, Dwarf_Die die, boolean_t fix) +{ + int ret; + avl_index_t index; + ctf_dwmap_t *dwmap; + Dwarf_Off off; + + VERIFY(id > 0 && id < CTF_MAX_TYPE); + + if ((ret = ctf_dwarf_offset(cup, die, &off)) != 0) + return (ret); + + if ((dwmap = ctf_alloc(sizeof (ctf_dwmap_t))) == NULL) + return (ENOMEM); + + dwmap->cdm_die = die; + dwmap->cdm_off = off; + dwmap->cdm_id = id; + dwmap->cdm_fix = fix; + + ctf_dprintf("dwmap: %p %" DW_PR_DUx "->%d\n", dwmap, off, id); + VERIFY(avl_find(&cup->cu_map, dwmap, &index) == NULL); + avl_insert(&cup->cu_map, dwmap, index); + return (0); +} + +static int +ctf_dwarf_attribute(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, + Dwarf_Attribute *attrp) +{ + int ret; + Dwarf_Error derr; + + if ((ret = dwarf_attr(die, name, attrp, &derr)) == DW_DLV_OK) + return (0); + if (ret == DW_DLV_NO_ENTRY) { + *attrp = NULL; + return (ENOENT); + } + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to get attribute for type: %s\n", + dwarf_errmsg(derr)); + return (ECTF_CONVBKERR); +} + +static int +ctf_dwarf_ref(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, Dwarf_Off *refp) +{ + int ret; + Dwarf_Attribute attr; + Dwarf_Error derr; + + if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0) + return (ret); + + if (dwarf_formref(attr, refp, &derr) == DW_DLV_OK) { + dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR); + return (0); + } + + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to get unsigned attribute for type: %s\n", + dwarf_errmsg(derr)); + return (ECTF_CONVBKERR); +} + +static int +ctf_dwarf_refdie(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, + Dwarf_Die *diep) +{ + int ret; + Dwarf_Off off; + Dwarf_Error derr; + + if ((ret = ctf_dwarf_ref(cup, die, name, &off)) != 0) + return (ret); + + off += cup->cu_cuoff; + if ((ret = dwarf_offdie(cup->cu_dwarf, off, diep, &derr)) != + DW_DLV_OK) { + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to get die from offset %" DW_PR_DUu ": %s\n", + off, dwarf_errmsg(derr)); + return (ECTF_CONVBKERR); + } + + return (0); +} + +static int +ctf_dwarf_signed(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, + Dwarf_Signed *valp) +{ + int ret; + Dwarf_Attribute attr; + Dwarf_Error derr; + + if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0) + return (ret); + + if (dwarf_formsdata(attr, valp, &derr) == DW_DLV_OK) { + dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR); + return (0); + } + + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to get unsigned attribute for type: %s\n", + dwarf_errmsg(derr)); + return (ECTF_CONVBKERR); +} + +static int +ctf_dwarf_unsigned(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, + Dwarf_Unsigned *valp) +{ + int ret; + Dwarf_Attribute attr; + Dwarf_Error derr; + + if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0) + return (ret); + + if (dwarf_formudata(attr, valp, &derr) == DW_DLV_OK) { + dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR); + return (0); + } + + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to get unsigned attribute for type: %s\n", + dwarf_errmsg(derr)); + return (ECTF_CONVBKERR); +} + +static int +ctf_dwarf_boolean(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, + Dwarf_Bool *val) +{ + int ret; + Dwarf_Attribute attr; + Dwarf_Error derr; + + if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0) + return (ret); + + if (dwarf_formflag(attr, val, &derr) == DW_DLV_OK) { + dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR); + return (0); + } + + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to get boolean attribute for type: %s\n", + dwarf_errmsg(derr)); + + return (ECTF_CONVBKERR); +} + +static int +ctf_dwarf_string(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, char **strp) +{ + int ret; + char *s; + Dwarf_Attribute attr; + Dwarf_Error derr; + + *strp = NULL; + if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0) + return (ret); + + if (dwarf_formstring(attr, &s, &derr) == DW_DLV_OK) { + if ((*strp = ctf_strdup(s)) == NULL) + ret = ENOMEM; + else + ret = 0; + dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR); + return (ret); + } + + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to get string attribute for type: %s\n", + dwarf_errmsg(derr)); + return (ECTF_CONVBKERR); +} + +static int +ctf_dwarf_member_location(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Unsigned *valp) +{ + int ret; + Dwarf_Error derr; + Dwarf_Attribute attr; + Dwarf_Locdesc *loc; + Dwarf_Signed locnum; + + if ((ret = ctf_dwarf_attribute(cup, die, DW_AT_data_member_location, + &attr)) != 0) + return (ret); + + if (dwarf_loclist(attr, &loc, &locnum, &derr) != DW_DLV_OK) { + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to obtain location list for member offset: %s", + dwarf_errmsg(derr)); + dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR); + return (ECTF_CONVBKERR); + } + dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR); + + if (locnum != 1 || loc->ld_s->lr_atom != DW_OP_plus_uconst) { + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to parse location structure for member"); + dwarf_dealloc(cup->cu_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK); + dwarf_dealloc(cup->cu_dwarf, loc, DW_DLA_LOCDESC); + return (ECTF_CONVBKERR); + } + + *valp = loc->ld_s->lr_number; + + dwarf_dealloc(cup->cu_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK); + dwarf_dealloc(cup->cu_dwarf, loc, DW_DLA_LOCDESC); + return (0); +} + + +static int +ctf_dwarf_offset(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Off *offsetp) +{ + Dwarf_Error derr; + + if (dwarf_dieoffset(die, offsetp, &derr) == DW_DLV_OK) + return (0); + + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to get die offset: %s\n", + dwarf_errmsg(derr)); + return (ECTF_CONVBKERR); +} + +/* simpler variant for debugging output */ +static Dwarf_Off +ctf_die_offset(Dwarf_Die die) +{ + Dwarf_Off off = -1; + Dwarf_Error derr; + + (void) dwarf_dieoffset(die, &off, &derr); + return (off); +} + +static int +ctf_dwarf_tag(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half *tagp) +{ + Dwarf_Error derr; + + if (dwarf_tag(die, tagp, &derr) == DW_DLV_OK) + return (0); + + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to get tag type: %s\n", + dwarf_errmsg(derr)); + return (ECTF_CONVBKERR); +} + +static int +ctf_dwarf_sib(ctf_cu_t *cup, Dwarf_Die base, Dwarf_Die *sibp) +{ + Dwarf_Error derr; + int ret; + + *sibp = NULL; + ret = dwarf_siblingof(cup->cu_dwarf, base, sibp, &derr); + if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY) + return (0); + + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to sibling from die: %s\n", + dwarf_errmsg(derr)); + return (ECTF_CONVBKERR); +} + +static int +ctf_dwarf_child(ctf_cu_t *cup, Dwarf_Die base, Dwarf_Die *childp) +{ + Dwarf_Error derr; + int ret; + + *childp = NULL; + ret = dwarf_child(base, childp, &derr); + if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY) + return (0); + + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to child from die: %s\n", + dwarf_errmsg(derr)); + return (ECTF_CONVBKERR); +} + +/* + * Compilers disagree on what to do to determine if something has global + * visiblity. Traditionally gcc has used DW_AT_external to indicate this while + * Studio has used DW_AT_visibility. We check DW_AT_visibility first and then + * fall back to DW_AT_external. Lack of DW_AT_external implies that it is not. + */ +static int +ctf_dwarf_isglobal(ctf_cu_t *cup, Dwarf_Die die, boolean_t *igp) +{ + int ret; + Dwarf_Signed vis; + Dwarf_Bool ext; + + if ((ret = ctf_dwarf_signed(cup, die, DW_AT_visibility, &vis)) == 0) { + *igp = vis == DW_VIS_exported; + return (0); + } else if (ret != ENOENT) { + return (ret); + } + + if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_external, &ext)) != 0) { + if (ret == ENOENT) { + *igp = B_FALSE; + return (0); + } + return (ret); + } + *igp = ext != 0 ? B_TRUE : B_FALSE; + return (0); +} + +static int +ctf_dwarf_die_elfenc(Elf *elf, ctf_cu_t *cup, char *errbuf, size_t errlen) +{ + GElf_Ehdr ehdr; + + if (gelf_getehdr(elf, &ehdr) == NULL) { + (void) snprintf(errbuf, errlen, + "failed to get ELF header: %s\n", + elf_errmsg(elf_errno())); + return (ECTF_CONVBKERR); + } + + cup->cu_mach = ehdr.e_machine; + + if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) { + cup->cu_ptrsz = 4; + VERIFY(ctf_setmodel(cup->cu_ctfp, CTF_MODEL_ILP32) == 0); + } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) { + cup->cu_ptrsz = 8; + VERIFY(ctf_setmodel(cup->cu_ctfp, CTF_MODEL_LP64) == 0); + } else { + (void) snprintf(errbuf, errlen, + "unknown ELF class %d", ehdr.e_ident[EI_CLASS]); + return (ECTF_CONVBKERR); + } + + if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB) { + cup->cu_bigend = B_FALSE; + } else if (ehdr.e_ident[EI_DATA] == ELFDATA2MSB) { + cup->cu_bigend = B_TRUE; + } else { + (void) snprintf(errbuf, errlen, + "unknown ELF data encoding: %hhu", ehdr.e_ident[EI_DATA]); + return (ECTF_CONVBKERR); + } + + return (0); +} + +typedef struct ctf_dwarf_fpent { + size_t cdfe_size; + uint_t cdfe_enc[3]; +} ctf_dwarf_fpent_t; + +typedef struct ctf_dwarf_fpmap { + uint_t cdf_mach; + ctf_dwarf_fpent_t cdf_ents[4]; +} ctf_dwarf_fpmap_t; + +static const ctf_dwarf_fpmap_t ctf_dwarf_fpmaps[] = { + { EM_SPARC, { + { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } }, + { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } }, + { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } }, + { 0, { 0 } } + } }, + { EM_SPARC32PLUS, { + { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } }, + { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } }, + { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } }, + { 0, { 0 } } + } }, + { EM_SPARCV9, { + { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } }, + { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } }, + { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } }, + { 0, { 0 } } + } }, + { EM_386, { + { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } }, + { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } }, + { 12, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } }, + { 0, { 0 } } + } }, + { EM_X86_64, { + { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } }, + { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } }, + { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } }, + { 0, { 0 } } + } }, + { EM_NONE } +}; + +static int +ctf_dwarf_float_base(ctf_cu_t *cup, Dwarf_Signed type, ctf_encoding_t *enc) +{ + const ctf_dwarf_fpmap_t *map = &ctf_dwarf_fpmaps[0]; + const ctf_dwarf_fpent_t *ent; + uint_t col = 0, mult = 1; + + for (map = &ctf_dwarf_fpmaps[0]; map->cdf_mach != EM_NONE; map++) { + if (map->cdf_mach == cup->cu_mach) + break; + } + + if (map->cdf_mach == EM_NONE) { + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "Unsupported machine type: %d\n", cup->cu_mach); + return (ENOTSUP); + } + + if (type == DW_ATE_complex_float) { + mult = 2; + col = 1; + } else if (type == DW_ATE_imaginary_float || + type == DW_ATE_SUN_imaginary_float) { + col = 2; + } + + ent = &map->cdf_ents[0]; + for (ent = &map->cdf_ents[0]; ent->cdfe_size != 0; ent++) { + if (ent->cdfe_size * mult * 8 == enc->cte_bits) { + enc->cte_format = ent->cdfe_enc[col]; + return (0); + } + } + + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to find valid fp mapping for encoding %d, size %d bits\n", + type, enc->cte_bits); + return (EINVAL); +} + +static int +ctf_dwarf_dwarf_base(ctf_cu_t *cup, Dwarf_Die die, int *kindp, + ctf_encoding_t *enc) +{ + int ret; + Dwarf_Signed type; + + if ((ret = ctf_dwarf_signed(cup, die, DW_AT_encoding, &type)) != 0) + return (ret); + + switch (type) { + case DW_ATE_unsigned: + case DW_ATE_address: + *kindp = CTF_K_INTEGER; + enc->cte_format = 0; + break; + case DW_ATE_unsigned_char: + *kindp = CTF_K_INTEGER; + enc->cte_format = CTF_INT_CHAR; + break; + case DW_ATE_signed: + *kindp = CTF_K_INTEGER; + enc->cte_format = CTF_INT_SIGNED; + break; + case DW_ATE_signed_char: + *kindp = CTF_K_INTEGER; + enc->cte_format = CTF_INT_SIGNED | CTF_INT_CHAR; + break; + case DW_ATE_boolean: + *kindp = CTF_K_INTEGER; + enc->cte_format = CTF_INT_SIGNED | CTF_INT_BOOL; + break; + case DW_ATE_float: + case DW_ATE_complex_float: + case DW_ATE_imaginary_float: + case DW_ATE_SUN_imaginary_float: + case DW_ATE_SUN_interval_float: + *kindp = CTF_K_FLOAT; + if ((ret = ctf_dwarf_float_base(cup, type, enc)) != 0) + return (ret); + break; + default: + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "encountered unkown DWARF encoding: %d", type); + return (ECTF_CONVBKERR); + } + + return (0); +} + +/* + * Different compilers (at least GCC and Studio) use different names for types. + * This parses the types and attempts to unify them. If this fails, we just fall + * back to using the DWARF itself. + */ +static int +ctf_dwarf_parse_base(const char *name, int *kindp, ctf_encoding_t *enc, + char **newnamep) +{ + char buf[256]; + char *base, *c, *last; + int nlong = 0, nshort = 0, nchar = 0, nint = 0; + int sign = 1; + + if (strlen(name) + 1 > sizeof (buf)) + return (EINVAL); + + (void) strlcpy(buf, name, sizeof (buf)); + for (c = strtok_r(buf, " ", &last); c != NULL; + c = strtok_r(NULL, " ", &last)) { + if (strcmp(c, "signed") == 0) { + sign = 1; + } else if (strcmp(c, "unsigned") == 0) { + sign = 0; + } else if (strcmp(c, "long") == 0) { + nlong++; + } else if (strcmp(c, "char") == 0) { + nchar++; + } else if (strcmp(c, "short") == 0) { + nshort++; + } else if (strcmp(c, "int") == 0) { + nint++; + } else { + /* + * If we don't recognize any of the tokens, we'll tell + * the caller to fall back to the dwarf-provided + * encoding information. + */ + return (EINVAL); + } + } + + if (nchar > 1 || nshort > 1 || nint > 1 || nlong > 2) + return (EINVAL); + + if (nchar > 0) { + if (nlong > 0 || nshort > 0 || nint > 0) + return (EINVAL); + base = "char"; + } else if (nshort > 0) { + if (nlong > 0) + return (EINVAL); + base = "short"; + } else if (nlong > 0) { + base = "long"; + } else { + base = "int"; + } + + if (nchar > 0) + enc->cte_format = CTF_INT_CHAR; + else + enc->cte_format = 0; + + if (sign > 0) + enc->cte_format |= CTF_INT_SIGNED; + + (void) snprintf(buf, sizeof (buf), "%s%s%s", + (sign ? "" : "unsigned "), + (nlong > 1 ? "long " : ""), + base); + + *newnamep = ctf_strdup(buf); + if (*newnamep == NULL) + return (ENOMEM); + *kindp = CTF_K_INTEGER; + return (0); +} + +static int +ctf_dwarf_create_base(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot, + Dwarf_Off off) +{ + int ret; + char *name, *nname; + Dwarf_Unsigned sz; + int kind; + ctf_encoding_t enc; + ctf_id_t id; + + if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0) + return (ret); + if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &sz)) != 0) { + goto out; + } + ctf_dprintf("Creating base type %s from off %llu, size: %d\n", name, + off, sz); + + bzero(&enc, sizeof (ctf_encoding_t)); + enc.cte_bits = sz * 8; + if ((ret = ctf_dwarf_parse_base(name, &kind, &enc, &nname)) == 0) { + ctf_free(name, strlen(name) + 1); + name = nname; + } else { + if (ret != EINVAL) + return (ret); + ctf_dprintf("falling back to dwarf for base type %s\n", name); + if ((ret = ctf_dwarf_dwarf_base(cup, die, &kind, &enc)) != 0) + return (ret); + } + + id = ctf_add_encoded(cup->cu_ctfp, isroot, name, &enc, kind); + if (id == CTF_ERR) { + ret = ctf_errno(cup->cu_ctfp); + } else { + *idp = id; + ret = ctf_dwmap_add(cup, id, die, B_FALSE); + } +out: + ctf_free(name, strlen(name) + 1); + return (ret); +} + +/* + * Getting a member's offset is a surprisingly intricate dance. It works as + * follows: + * + * 1) If we're in DWARFv4, then we either have a DW_AT_data_bit_offset or we + * have a DW_AT_data_member_location. We won't have both. Thus we check first + * for DW_AT_data_bit_offset, and if it exists, we're set. + * + * Next, if we have a bitfield and we don't have a DW_AT_data_bit_offset, then + * we have to grab the data location and use the following dance: + * + * 2) Gather the set of DW_AT_byte_size, DW_AT_bit_offset, and DW_AT_bit_size. + * Of course, the DW_AT_byte_size may be omitted, even though it isn't always. + * When it's been omitted, we then have to say that the size is that of the + * underlying type, which forces that to be after a ctf_update(). Here, we have + * to do different things based on whether or not we're using big endian or + * little endian to obtain the proper offset. + */ +static int +ctf_dwarf_member_offset(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t mid, + ulong_t *offp) +{ + int ret; + Dwarf_Unsigned loc, bitsz, bytesz; + Dwarf_Signed bitoff; + size_t off; + ssize_t tsz; + + if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_data_bit_offset, + &loc)) == 0) { + *offp = loc; + return (0); + } else if (ret != ENOENT) { + return (ret); + } + + if ((ret = ctf_dwarf_member_location(cup, die, &loc)) != 0) + return (ret); + off = loc * 8; + + if ((ret = ctf_dwarf_signed(cup, die, DW_AT_bit_offset, + &bitoff)) != 0) { + if (ret != ENOENT) + return (ret); + *offp = off; + return (0); + } + + /* At this point we have to have DW_AT_bit_size */ + if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_bit_size, &bitsz)) != 0) + return (ret); + + if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, + &bytesz)) != 0) { + if (ret != ENOENT) + return (ret); + if ((tsz = ctf_type_size(cup->cu_ctfp, mid)) == CTF_ERR) { + int e = ctf_errno(cup->cu_ctfp); + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to get type size: %s", ctf_errmsg(e)); + return (ECTF_CONVBKERR); + } + } else { + tsz = bytesz; + } + tsz *= 8; + if (cup->cu_bigend == B_TRUE) { + *offp = off + bitoff; + } else { + *offp = off + tsz - bitoff - bitsz; + } + + return (0); +} + +/* + * We need to determine if the member in question is a bitfield. If it is, then + * we need to go through and create a new type that's based on the actual base + * type, but has a different size. We also rename the type as a result to help + * deal with future collisions. + * + * Here we need to look and see if we have a DW_AT_bit_size value. If we have a + * bit size member and it does not equal the byte size member, then we need to + * create a bitfield type based on this. + * + * Note: When we support DWARFv4, there may be a chance that we need to also + * search for the DW_AT_byte_size if we don't have a DW_AT_bit_size member. + */ +static int +ctf_dwarf_member_bitfield(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp) +{ + int ret; + Dwarf_Unsigned bitsz; + ctf_encoding_t e; + ctf_dwbitf_t *cdb; + ctf_dtdef_t *dtd; + ctf_id_t base = *idp; + int kind; + + if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_bit_size, &bitsz)) != 0) { + if (ret == ENOENT) + return (0); + return (ret); + } + + ctf_dprintf("Trying to deal with bitfields on %d:%d\n", base, bitsz); + /* + * Given that we now have a bitsize, time to go do something about it. + * We're going to create a new type based on the current one, but first + * we need to find the base type. This means we need to traverse any + * typedef's, consts, and volatiles until we get to what should be + * something of type integer or enumeration. + */ + VERIFY(bitsz < UINT32_MAX); + dtd = ctf_dtd_lookup(cup->cu_ctfp, base); + VERIFY(dtd != NULL); + kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info); + while (kind == CTF_K_TYPEDEF || kind == CTF_K_CONST || + kind == CTF_K_VOLATILE) { + dtd = ctf_dtd_lookup(cup->cu_ctfp, dtd->dtd_data.ctt_type); + VERIFY(dtd != NULL); + kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info); + } + ctf_dprintf("got kind %d\n", kind); + VERIFY(kind == CTF_K_INTEGER || kind == CTF_K_ENUM); + + /* + * As surprising as it may be, it is strictly possible to create a + * bitfield that is based on an enum. Of course, the C standard leaves + * enums sizing as an ABI concern more or less. To that effect, today on + * all illumos platforms the size of an enum is generally that of an + * int as our supported data models and ABIs all agree on that. So what + * we'll do is fake up a CTF encoding here to use. In this case, we'll + * treat it as an unsigned value of whatever size the underlying enum + * currently has (which is in the ctt_size member of its dynamic type + * data). + */ + if (kind == CTF_K_INTEGER) { + e = dtd->dtd_u.dtu_enc; + } else { + bzero(&e, sizeof (ctf_encoding_t)); + e.cte_bits = dtd->dtd_data.ctt_size * NBBY; + } + + for (cdb = ctf_list_next(&cup->cu_bitfields); cdb != NULL; + cdb = ctf_list_next(cdb)) { + if (cdb->cdb_base == base && cdb->cdb_nbits == bitsz) + break; + } + + /* + * Create a new type if none exists. We name all types in a way that is + * guaranteed not to conflict with the corresponding C type. We do this + * by using the ':' operator. + */ + if (cdb == NULL) { + size_t namesz; + char *name; + + e.cte_bits = bitsz; + namesz = snprintf(NULL, 0, "%s:%d", dtd->dtd_name, + (uint32_t)bitsz); + name = ctf_alloc(namesz + 1); + if (name == NULL) + return (ENOMEM); + cdb = ctf_alloc(sizeof (ctf_dwbitf_t)); + if (cdb == NULL) { + ctf_free(name, namesz + 1); + return (ENOMEM); + } + (void) snprintf(name, namesz + 1, "%s:%d", dtd->dtd_name, + (uint32_t)bitsz); + + cdb->cdb_base = base; + cdb->cdb_nbits = bitsz; + cdb->cdb_id = ctf_add_integer(cup->cu_ctfp, CTF_ADD_NONROOT, + name, &e); + if (cdb->cdb_id == CTF_ERR) { + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to get add bitfield type %s: %s", name, + ctf_errmsg(ctf_errno(cup->cu_ctfp))); + ctf_free(name, namesz + 1); + ctf_free(cdb, sizeof (ctf_dwbitf_t)); + return (ECTF_CONVBKERR); + } + ctf_free(name, namesz + 1); + ctf_list_append(&cup->cu_bitfields, cdb); + } + + *idp = cdb->cdb_id; + + return (0); +} + +static int +ctf_dwarf_fixup_sou(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t base, boolean_t add) +{ + int ret, kind; + Dwarf_Die child, memb; + Dwarf_Unsigned size; + ulong_t nsz; + + kind = ctf_type_kind(cup->cu_ctfp, base); + VERIFY(kind != CTF_ERR); + VERIFY(kind == CTF_K_STRUCT || kind == CTF_K_UNION); + + /* + * Members are in children. However, gcc also allows empty ones. + */ + if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) + return (ret); + if (child == NULL) + return (0); + + memb = child; + while (memb != NULL) { + Dwarf_Die sib, tdie; + Dwarf_Half tag; + ctf_id_t mid; + char *mname; + ulong_t memboff = 0; + + if ((ret = ctf_dwarf_tag(cup, memb, &tag)) != 0) + return (ret); + + if (tag != DW_TAG_member) + continue; + + if ((ret = ctf_dwarf_refdie(cup, memb, DW_AT_type, &tdie)) != 0) + return (ret); + + if ((ret = ctf_dwarf_convert_type(cup, tdie, &mid, + CTF_ADD_NONROOT)) != 0) + return (ret); + ctf_dprintf("Got back type id: %d\n", mid); + + /* + * If we're not adding a member, just go ahead and return. + */ + if (add == B_FALSE) { + if ((ret = ctf_dwarf_member_bitfield(cup, memb, + &mid)) != 0) + return (ret); + goto next; + } + + if ((ret = ctf_dwarf_string(cup, memb, DW_AT_name, + &mname)) != 0 && ret != ENOENT) + return (ret); + if (ret == ENOENT) + mname = NULL; + + if (kind == CTF_K_UNION) { + memboff = 0; + } else if ((ret = ctf_dwarf_member_offset(cup, memb, mid, + &memboff)) != 0) { + if (mname != NULL) + ctf_free(mname, strlen(mname) + 1); + return (ret); + } + + if ((ret = ctf_dwarf_member_bitfield(cup, memb, &mid)) != 0) + return (ret); + + ret = ctf_add_member(cup->cu_ctfp, base, mname, mid, memboff); + if (ret == CTF_ERR) { + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to add member %s: %s", + mname, ctf_errmsg(ctf_errno(cup->cu_ctfp))); + if (mname != NULL) + ctf_free(mname, strlen(mname) + 1); + return (ECTF_CONVBKERR); + } + + if (mname != NULL) + ctf_free(mname, strlen(mname) + 1); + +next: + if ((ret = ctf_dwarf_sib(cup, memb, &sib)) != 0) + return (ret); + memb = sib; + } + + /* + * If we're not adding members, then we don't know the final size of the + * structure, so end here. + */ + if (add == B_FALSE) + return (0); + + /* Finally set the size of the structure to the actual byte size */ + if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &size)) != 0) + return (ret); + nsz = size; + if ((ctf_set_size(cup->cu_ctfp, base, nsz)) == CTF_ERR) { + int e = ctf_errno(cup->cu_ctfp); + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to set type size for %d to 0x%x: %s", base, + (uint32_t)size, ctf_errmsg(e)); + return (ECTF_CONVBKERR); + } + + return (0); +} + +static int +ctf_dwarf_create_sou(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, + int kind, int isroot) +{ + int ret; + char *name; + ctf_id_t base; + Dwarf_Die child; + Dwarf_Bool decl; + + /* + * Deal with the terribly annoying case of anonymous structs and unions. + * If they don't have a name, set the name to the empty string. + */ + if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 && + ret != ENOENT) + return (ret); + if (ret == ENOENT) + name = NULL; + + /* + * We need to check if we just have a declaration here. If we do, then + * instead of creating an actual structure or union, we're just going to + * go ahead and create a forward. During a dedup or merge, the forward + * will be replaced with the real thing. + */ + if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, + &decl)) != 0) { + if (ret != ENOENT) + return (ret); + decl = 0; + } + + if (decl != 0) { + base = ctf_add_forward(cup->cu_ctfp, isroot, name, kind); + } else if (kind == CTF_K_STRUCT) { + base = ctf_add_struct(cup->cu_ctfp, isroot, name); + } else { + base = ctf_add_union(cup->cu_ctfp, isroot, name); + } + ctf_dprintf("added sou %s (%d) (%d)\n", name, kind, base); + if (name != NULL) + ctf_free(name, strlen(name) + 1); + if (base == CTF_ERR) + return (ctf_errno(cup->cu_ctfp)); + *idp = base; + + /* + * If it's just a declaration, we're not going to mark it for fix up or + * do anything else. + */ + if (decl == B_TRUE) + return (ctf_dwmap_add(cup, base, die, B_FALSE)); + if ((ret = ctf_dwmap_add(cup, base, die, B_TRUE)) != 0) + return (ret); + + /* + * Members are in children. However, gcc also allows empty ones. + */ + if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) + return (ret); + if (child == NULL) + return (0); + + return (0); +} + +static int +ctf_dwarf_create_array_range(ctf_cu_t *cup, Dwarf_Die range, ctf_id_t *idp, + ctf_id_t base, int isroot) +{ + int ret; + Dwarf_Die sib; + Dwarf_Unsigned val; + Dwarf_Signed sval; + ctf_arinfo_t ar; + + ctf_dprintf("creating array range\n"); + + if ((ret = ctf_dwarf_sib(cup, range, &sib)) != 0) + return (ret); + if (sib != NULL) { + ctf_id_t id; + if ((ret = ctf_dwarf_create_array_range(cup, sib, &id, + base, CTF_ADD_NONROOT)) != 0) + return (ret); + ar.ctr_contents = id; + } else { + ar.ctr_contents = base; + } + + if ((ar.ctr_index = ctf_dwarf_long(cup)) == CTF_ERR) + return (ctf_errno(cup->cu_ctfp)); + + /* + * Array bounds can be signed or unsigned, but there are several kinds + * of signless forms (data1, data2, etc) that take their sign from the + * routine that is trying to interpret them. That is, data1 can be + * either signed or unsigned, depending on whether you use the signed or + * unsigned accessor function. GCC will use the signless forms to store + * unsigned values which have their high bit set, so we need to try to + * read them first as unsigned to get positive values. We could also + * try signed first, falling back to unsigned if we got a negative + * value. + */ + if ((ret = ctf_dwarf_unsigned(cup, range, DW_AT_upper_bound, + &val)) == 0) { + ar.ctr_nelems = val + 1; + } else if (ret != ENOENT) { + return (ret); + } else if ((ret = ctf_dwarf_signed(cup, range, DW_AT_upper_bound, + &sval)) == 0) { + ar.ctr_nelems = sval + 1; + } else if (ret != ENOENT) { + return (ret); + } else { + ar.ctr_nelems = 0; + } + + if ((*idp = ctf_add_array(cup->cu_ctfp, isroot, &ar)) == CTF_ERR) + return (ctf_errno(cup->cu_ctfp)); + + return (0); +} + +/* + * Try and create an array type. First, the kind of the array is specified in + * the DW_AT_type entry. Next, the number of entries is stored in a more + * complicated form, we should have a child that has the DW_TAG_subrange type. + */ +static int +ctf_dwarf_create_array(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot) +{ + int ret; + Dwarf_Die tdie, rdie; + ctf_id_t tid; + Dwarf_Half rtag; + + if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) + return (ret); + if ((ret = ctf_dwarf_convert_type(cup, tdie, &tid, + CTF_ADD_NONROOT)) != 0) + return (ret); + + if ((ret = ctf_dwarf_child(cup, die, &rdie)) != 0) + return (ret); + if ((ret = ctf_dwarf_tag(cup, rdie, &rtag)) != 0) + return (ret); + if (rtag != DW_TAG_subrange_type) { + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "encountered array without DW_TAG_subrange_type child\n"); + return (ECTF_CONVBKERR); + } + + /* + * The compiler may opt to describe a multi-dimensional array as one + * giant array or it may opt to instead encode it as a series of + * subranges. If it's the latter, then for each subrange we introduce a + * type. We can always use the base type. + */ + if ((ret = ctf_dwarf_create_array_range(cup, rdie, idp, tid, + isroot)) != 0) + return (ret); + ctf_dprintf("Got back id %d\n", *idp); + return (ctf_dwmap_add(cup, *idp, die, B_FALSE)); +} + +static int +ctf_dwarf_create_reference(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, + int kind, int isroot) +{ + int ret; + ctf_id_t id; + Dwarf_Die tdie; + char *name; + size_t namelen; + + if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 && + ret != ENOENT) + return (ret); + if (ret == ENOENT) { + name = NULL; + namelen = 0; + } else { + namelen = strlen(name); + } + + ctf_dprintf("reference kind %d %s\n", kind, name != NULL ? name : "<>"); + + if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) { + if (ret != ENOENT) { + ctf_free(name, namelen); + return (ret); + } + if ((id = ctf_dwarf_void(cup)) == CTF_ERR) { + ctf_free(name, namelen); + return (ctf_errno(cup->cu_ctfp)); + } + } else { + if ((ret = ctf_dwarf_convert_type(cup, tdie, &id, + CTF_ADD_NONROOT)) != 0) { + ctf_free(name, namelen); + return (ret); + } + } + + if ((*idp = ctf_add_reftype(cup->cu_ctfp, isroot, name, id, kind)) == + CTF_ERR) { + ctf_free(name, namelen); + return (ctf_errno(cup->cu_ctfp)); + } + + ctf_free(name, namelen); + return (ctf_dwmap_add(cup, *idp, die, B_FALSE)); +} + +static int +ctf_dwarf_create_enum(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot) +{ + int ret; + ctf_id_t id; + Dwarf_Die child; + char *name; + + if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 && + ret != ENOENT) + return (ret); + if (ret == ENOENT) + name = NULL; + id = ctf_add_enum(cup->cu_ctfp, isroot, name); + ctf_dprintf("added enum %s (%d)\n", name, id); + if (name != NULL) + ctf_free(name, strlen(name) + 1); + if (id == CTF_ERR) + return (ctf_errno(cup->cu_ctfp)); + *idp = id; + if ((ret = ctf_dwmap_add(cup, id, die, B_FALSE)) != 0) + return (ret); + + if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) { + if (ret == ENOENT) + ret = 0; + return (ret); + } + + while (child != NULL) { + Dwarf_Half tag; + Dwarf_Signed sval; + Dwarf_Unsigned uval; + Dwarf_Die arg = child; + int eval; + + if ((ret = ctf_dwarf_sib(cup, arg, &child)) != 0) + return (ret); + + if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0) + return (ret); + + if (tag != DW_TAG_enumerator) { + if ((ret = ctf_dwarf_convert_type(cup, arg, NULL, + CTF_ADD_NONROOT)) != 0) + return (ret); + continue; + } + + /* + * DWARF v4 section 5.7 tells us we'll always have names. + */ + if ((ret = ctf_dwarf_string(cup, arg, DW_AT_name, &name)) != 0) + return (ret); + + /* + * We have to be careful here: newer GCCs generate DWARF where + * an unsigned value will happily pass ctf_dwarf_signed(). + * Since negative values will fail ctf_dwarf_unsigned(), we try + * that first to make sure we get the right value. + */ + if ((ret = ctf_dwarf_unsigned(cup, arg, DW_AT_const_value, + &uval)) == 0) { + eval = (int)uval; + } else if ((ret = ctf_dwarf_signed(cup, arg, DW_AT_const_value, + &sval)) == 0) { + eval = sval; + } + + if (ret != 0) { + if (ret != ENOENT) + return (ret); + + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "encountered enumeration without constant value\n"); + return (ECTF_CONVBKERR); + } + + ret = ctf_add_enumerator(cup->cu_ctfp, id, name, eval); + if (ret == CTF_ERR) { + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "failed to add enumarator %s (%d) to %d\n", + name, eval, id); + ctf_free(name, strlen(name) + 1); + return (ctf_errno(cup->cu_ctfp)); + } + ctf_free(name, strlen(name) + 1); + } + + return (0); +} + +/* + * For a function pointer, walk over and process all of its children, unless we + * encounter one that's just a declaration. In which case, we error on it. + */ +static int +ctf_dwarf_create_fptr(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot) +{ + int ret; + Dwarf_Bool b; + ctf_funcinfo_t fi; + Dwarf_Die retdie; + ctf_id_t *argv = NULL; + + bzero(&fi, sizeof (ctf_funcinfo_t)); + + if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) != 0) { + if (ret != ENOENT) + return (ret); + } else { + if (b != 0) + return (EPROTOTYPE); + } + + /* + * Return type is in DW_AT_type, if none, it returns void. + */ + if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &retdie)) != 0) { + if (ret != ENOENT) + return (ret); + if ((fi.ctc_return = ctf_dwarf_void(cup)) == CTF_ERR) + return (ctf_errno(cup->cu_ctfp)); + } else { + if ((ret = ctf_dwarf_convert_type(cup, retdie, &fi.ctc_return, + CTF_ADD_NONROOT)) != 0) + return (ret); + } + + if ((ret = ctf_dwarf_function_count(cup, die, &fi, B_TRUE)) != 0) { + return (ret); + } + + if (fi.ctc_argc != 0) { + argv = ctf_alloc(sizeof (ctf_id_t) * fi.ctc_argc); + if (argv == NULL) + return (ENOMEM); + + if ((ret = ctf_dwarf_convert_fargs(cup, die, &fi, argv)) != 0) { + ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc); + return (ret); + } + } + + if ((*idp = ctf_add_funcptr(cup->cu_ctfp, isroot, &fi, argv)) == + CTF_ERR) { + ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc); + return (ctf_errno(cup->cu_ctfp)); + } + + ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc); + return (ctf_dwmap_add(cup, *idp, die, B_FALSE)); +} + +static int +ctf_dwarf_convert_type(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, + int isroot) +{ + int ret; + Dwarf_Off offset; + Dwarf_Half tag; + ctf_dwmap_t lookup, *map; + ctf_id_t id; + + if (idp == NULL) + idp = &id; + + if ((ret = ctf_dwarf_offset(cup, die, &offset)) != 0) + return (ret); + + if (offset > cup->cu_maxoff) { + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "die offset %llu beyond maximum for header %llu\n", + offset, cup->cu_maxoff); + return (ECTF_CONVBKERR); + } + + /* + * If we've already added an entry for this offset, then we're done. + */ + lookup.cdm_off = offset; + if ((map = avl_find(&cup->cu_map, &lookup, NULL)) != NULL) { + *idp = map->cdm_id; + return (0); + } + + if ((ret = ctf_dwarf_tag(cup, die, &tag)) != 0) + return (ret); + + ret = ENOTSUP; + switch (tag) { + case DW_TAG_base_type: + ctf_dprintf("base\n"); + ret = ctf_dwarf_create_base(cup, die, idp, isroot, offset); + break; + case DW_TAG_array_type: + ctf_dprintf("array\n"); + ret = ctf_dwarf_create_array(cup, die, idp, isroot); + break; + case DW_TAG_enumeration_type: + ctf_dprintf("enum\n"); + ret = ctf_dwarf_create_enum(cup, die, idp, isroot); + break; + case DW_TAG_pointer_type: + ctf_dprintf("pointer\n"); + ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_POINTER, + isroot); + break; + case DW_TAG_structure_type: + ctf_dprintf("struct\n"); + ret = ctf_dwarf_create_sou(cup, die, idp, CTF_K_STRUCT, + isroot); + break; + case DW_TAG_subroutine_type: + ctf_dprintf("fptr\n"); + ret = ctf_dwarf_create_fptr(cup, die, idp, isroot); + break; + case DW_TAG_typedef: + ctf_dprintf("typedef\n"); + ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_TYPEDEF, + isroot); + break; + case DW_TAG_union_type: + ctf_dprintf("union\n"); + ret = ctf_dwarf_create_sou(cup, die, idp, CTF_K_UNION, + isroot); + break; + case DW_TAG_const_type: + ctf_dprintf("const\n"); + ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_CONST, + isroot); + break; + case DW_TAG_volatile_type: + ctf_dprintf("volatile\n"); + ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_VOLATILE, + isroot); + break; + case DW_TAG_restrict_type: + ctf_dprintf("restrict\n"); + ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_RESTRICT, + isroot); + break; + default: + ctf_dprintf("ignoring tag type %x\n", tag); + ret = 0; + break; + } + ctf_dprintf("ctf_dwarf_convert_type tag specific handler returned %d\n", + ret); + + return (ret); +} + +static int +ctf_dwarf_walk_lexical(ctf_cu_t *cup, Dwarf_Die die) +{ + int ret; + Dwarf_Die child; + + if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) + return (ret); + + if (child == NULL) + return (0); + + return (ctf_dwarf_convert_die(cup, die)); +} + +static int +ctf_dwarf_function_count(ctf_cu_t *cup, Dwarf_Die die, ctf_funcinfo_t *fip, + boolean_t fptr) +{ + int ret; + Dwarf_Die child, sib, arg; + + if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) + return (ret); + + arg = child; + while (arg != NULL) { + Dwarf_Half tag; + + if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0) + return (ret); + + /* + * We have to check for a varargs type decleration. This will + * happen in one of two ways. If we have a function pointer + * type, then it'll be done with a tag of type + * DW_TAG_unspecified_parameters. However, it only means we have + * a variable number of arguments, if we have more than one + * argument found so far. Otherwise, when we have a function + * type, it instead uses a formal parameter whose name is '...' + * to indicate a variable arguments member. + * + * Also, if we have a function pointer, then we have to expect + * that we might not get a name at all. + */ + if (tag == DW_TAG_formal_parameter && fptr == B_FALSE) { + char *name; + if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, + &name)) != 0) + return (ret); + if (strcmp(name, DWARF_VARARGS_NAME) == 0) + fip->ctc_flags |= CTF_FUNC_VARARG; + else + fip->ctc_argc++; + ctf_free(name, strlen(name) + 1); + } else if (tag == DW_TAG_formal_parameter) { + fip->ctc_argc++; + } else if (tag == DW_TAG_unspecified_parameters && + fip->ctc_argc > 0) { + fip->ctc_flags |= CTF_FUNC_VARARG; + } + if ((ret = ctf_dwarf_sib(cup, arg, &sib)) != 0) + return (ret); + arg = sib; + } + + return (0); +} + +static int +ctf_dwarf_convert_fargs(ctf_cu_t *cup, Dwarf_Die die, ctf_funcinfo_t *fip, + ctf_id_t *argv) +{ + int ret; + int i = 0; + Dwarf_Die child, sib, arg; + + if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) + return (ret); + + arg = child; + while (arg != NULL) { + Dwarf_Half tag; + + if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0) + return (ret); + if (tag == DW_TAG_formal_parameter) { + Dwarf_Die tdie; + + if ((ret = ctf_dwarf_refdie(cup, arg, DW_AT_type, + &tdie)) != 0) + return (ret); + + if ((ret = ctf_dwarf_convert_type(cup, tdie, &argv[i], + CTF_ADD_ROOT)) != 0) + return (ret); + i++; + + /* + * Once we hit argc entries, we're done. This ensures we + * don't accidentally hit a varargs which should be the + * last entry. + */ + if (i == fip->ctc_argc) + break; + } + + if ((ret = ctf_dwarf_sib(cup, arg, &sib)) != 0) + return (ret); + arg = sib; + } + + return (0); +} + +static int +ctf_dwarf_convert_function(ctf_cu_t *cup, Dwarf_Die die) +{ + int ret; + char *name; + ctf_dwfunc_t *cdf; + Dwarf_Die tdie; + + /* + * Functions that don't have a name are generally functions that have + * been inlined and thus most information about them has been lost. If + * we can't get a name, then instead of returning ENOENT, we silently + * swallow the error. + */ + if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0) { + if (ret == ENOENT) + return (0); + return (ret); + } + + ctf_dprintf("beginning work on function %s\n", name); + if ((cdf = ctf_alloc(sizeof (ctf_dwfunc_t))) == NULL) { + ctf_free(name, strlen(name) + 1); + return (ENOMEM); + } + bzero(cdf, sizeof (ctf_dwfunc_t)); + cdf->cdf_name = name; + + if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) == 0) { + if ((ret = ctf_dwarf_convert_type(cup, tdie, + &(cdf->cdf_fip.ctc_return), CTF_ADD_ROOT)) != 0) { + ctf_free(name, strlen(name) + 1); + ctf_free(cdf, sizeof (ctf_dwfunc_t)); + return (ret); + } + } else if (ret != ENOENT) { + ctf_free(name, strlen(name) + 1); + ctf_free(cdf, sizeof (ctf_dwfunc_t)); + return (ret); + } else { + if ((cdf->cdf_fip.ctc_return = ctf_dwarf_void(cup)) == + CTF_ERR) { + ctf_free(name, strlen(name) + 1); + ctf_free(cdf, sizeof (ctf_dwfunc_t)); + return (ctf_errno(cup->cu_ctfp)); + } + } + + /* + * A function has a number of children, some of which may not be ones we + * care about. Children that we care about have a type of + * DW_TAG_formal_parameter. We're going to do two passes, the first to + * count the arguments, the second to process them. Afterwards, we + * should be good to go ahead and add this function. + * + * Note, we already got the return type by going in and grabbing it out + * of the DW_AT_type. + */ + if ((ret = ctf_dwarf_function_count(cup, die, &cdf->cdf_fip, + B_FALSE)) != 0) { + ctf_free(name, strlen(name) + 1); + ctf_free(cdf, sizeof (ctf_dwfunc_t)); + return (ret); + } + + ctf_dprintf("beginning to convert function arguments %s\n", name); + if (cdf->cdf_fip.ctc_argc != 0) { + uint_t argc = cdf->cdf_fip.ctc_argc; + cdf->cdf_argv = ctf_alloc(sizeof (ctf_id_t) * argc); + if (cdf->cdf_argv == NULL) { + ctf_free(name, strlen(name) + 1); + ctf_free(cdf, sizeof (ctf_dwfunc_t)); + return (ENOMEM); + } + if ((ret = ctf_dwarf_convert_fargs(cup, die, + &cdf->cdf_fip, cdf->cdf_argv)) != 0) { + ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) * argc); + ctf_free(name, strlen(name) + 1); + ctf_free(cdf, sizeof (ctf_dwfunc_t)); + return (ret); + } + } else { + cdf->cdf_argv = NULL; + } + + if ((ret = ctf_dwarf_isglobal(cup, die, &cdf->cdf_global)) != 0) { + ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) * + cdf->cdf_fip.ctc_argc); + ctf_free(name, strlen(name) + 1); + ctf_free(cdf, sizeof (ctf_dwfunc_t)); + return (ret); + } + + ctf_list_append(&cup->cu_funcs, cdf); + return (ret); +} + +/* + * Convert variables, but only if they're not prototypes and have names. + */ +static int +ctf_dwarf_convert_variable(ctf_cu_t *cup, Dwarf_Die die) +{ + int ret; + char *name; + Dwarf_Bool b; + Dwarf_Die tdie; + ctf_id_t id; + ctf_dwvar_t *cdv; + + /* Skip "Non-Defining Declarations" */ + if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) == 0) { + if (b != 0) + return (0); + } else if (ret != ENOENT) { + return (ret); + } + + /* + * If we find a DIE of "Declarations Completing Non-Defining + * Declarations", we will use the referenced type's DIE. This isn't + * quite correct, e.g. DW_AT_decl_line will be the forward declaration + * not this site. It's sufficient for what we need, however: in + * particular, we should find DW_AT_external as needed there. + */ + if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_specification, + &tdie)) == 0) { + Dwarf_Off offset; + if ((ret = ctf_dwarf_offset(cup, tdie, &offset)) != 0) + return (ret); + ctf_dprintf("die 0x%llx DW_AT_specification -> die 0x%llx\n", + ctf_die_offset(die), ctf_die_offset(tdie)); + die = tdie; + } else if (ret != ENOENT) { + return (ret); + } + + if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 && + ret != ENOENT) + return (ret); + if (ret == ENOENT) + return (0); + + if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) { + ctf_free(name, strlen(name) + 1); + return (ret); + } + + if ((ret = ctf_dwarf_convert_type(cup, tdie, &id, + CTF_ADD_ROOT)) != 0) + return (ret); + + if ((cdv = ctf_alloc(sizeof (ctf_dwvar_t))) == NULL) { + ctf_free(name, strlen(name) + 1); + return (ENOMEM); + } + + cdv->cdv_name = name; + cdv->cdv_type = id; + + if ((ret = ctf_dwarf_isglobal(cup, die, &cdv->cdv_global)) != 0) { + ctf_free(cdv, sizeof (ctf_dwvar_t)); + ctf_free(name, strlen(name) + 1); + return (ret); + } + + ctf_list_append(&cup->cu_vars, cdv); + return (0); +} + +/* + * Walk through our set of top-level types and process them. + */ +static int +ctf_dwarf_walk_toplevel(ctf_cu_t *cup, Dwarf_Die die) +{ + int ret; + Dwarf_Off offset; + Dwarf_Half tag; + + if ((ret = ctf_dwarf_offset(cup, die, &offset)) != 0) + return (ret); + + if (offset > cup->cu_maxoff) { + (void) snprintf(cup->cu_errbuf, cup->cu_errlen, + "die offset %llu beyond maximum for header %llu\n", + offset, cup->cu_maxoff); + return (ECTF_CONVBKERR); + } + + if ((ret = ctf_dwarf_tag(cup, die, &tag)) != 0) + return (ret); + + ret = 0; + switch (tag) { + case DW_TAG_subprogram: + ctf_dprintf("top level func\n"); + ret = ctf_dwarf_convert_function(cup, die); + break; + case DW_TAG_variable: + ctf_dprintf("top level var\n"); + ret = ctf_dwarf_convert_variable(cup, die); + break; + case DW_TAG_lexical_block: + ctf_dprintf("top level block\n"); + ret = ctf_dwarf_walk_lexical(cup, die); + break; + case DW_TAG_enumeration_type: + case DW_TAG_structure_type: + case DW_TAG_typedef: + case DW_TAG_union_type: + ctf_dprintf("top level type\n"); + ret = ctf_dwarf_convert_type(cup, die, NULL, B_TRUE); + break; + default: + break; + } + + return (ret); +} + + +/* + * We're given a node. At this node we need to convert it and then proceed to + * convert any siblings that are associaed with this die. + */ +static int +ctf_dwarf_convert_die(ctf_cu_t *cup, Dwarf_Die die) +{ + while (die != NULL) { + int ret; + Dwarf_Die sib; + + if ((ret = ctf_dwarf_walk_toplevel(cup, die)) != 0) + return (ret); + + if ((ret = ctf_dwarf_sib(cup, die, &sib)) != 0) + return (ret); + die = sib; + } + return (0); +} + +static int +ctf_dwarf_fixup_die(ctf_cu_t *cup, boolean_t addpass) +{ + ctf_dwmap_t *map; + + for (map = avl_first(&cup->cu_map); map != NULL; + map = AVL_NEXT(&cup->cu_map, map)) { + int ret; + if (map->cdm_fix == B_FALSE) + continue; + if ((ret = ctf_dwarf_fixup_sou(cup, map->cdm_die, map->cdm_id, + addpass)) != 0) + return (ret); + } + + return (0); +} + +static ctf_dwfunc_t * +ctf_dwarf_match_func(ctf_cu_t *cup, const char *file, const char *name, + int bind) +{ + ctf_dwfunc_t *cdf; + + if (bind == STB_WEAK) + return (NULL); + + /* Nothing we can do if we can't find a name to compare it to. */ + if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL)) + return (NULL); + + for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL; + cdf = ctf_list_next(cdf)) { + if (bind == STB_GLOBAL && cdf->cdf_global == B_FALSE) + continue; + if (bind == STB_LOCAL && cdf->cdf_global == B_TRUE) + continue; + if (strcmp(name, cdf->cdf_name) != 0) + continue; + if (bind == STB_LOCAL && strcmp(file, cup->cu_name) != 0) + continue; + return (cdf); + } + + return (NULL); +} +static ctf_dwvar_t * +ctf_dwarf_match_var(ctf_cu_t *cup, const char *file, const char *name, + int bind) +{ + ctf_dwvar_t *cdv; + + /* Nothing we can do if we can't find a name to compare it to. */ + if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL)) + return (NULL); + ctf_dprintf("Still considering %s\n", name); + + for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL; + cdv = ctf_list_next(cdv)) { + if (bind == STB_GLOBAL && cdv->cdv_global == B_FALSE) + continue; + if (bind == STB_LOCAL && cdv->cdv_global == B_TRUE) + continue; + if (strcmp(name, cdv->cdv_name) != 0) + continue; + if (bind == STB_LOCAL && strcmp(file, cup->cu_name) != 0) + continue; + return (cdv); + } + + return (NULL); +} + +static int +ctf_dwarf_symtab_iter(ctf_cu_t *cup, ctf_dwarf_symtab_f *func, void *arg) +{ + int ret; + ulong_t i; + ctf_file_t *fp = cup->cu_ctfp; + const char *file = NULL; + uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data; + uintptr_t strbase = (uintptr_t)fp->ctf_strtab.cts_data; + + for (i = 0; i < fp->ctf_nsyms; i++) { + const char *name; + int type; + GElf_Sym gsym; + const GElf_Sym *gsymp; + + if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) { + const Elf32_Sym *symp = (Elf32_Sym *)symbase + i; + type = ELF32_ST_TYPE(symp->st_info); + if (type == STT_FILE) { + file = (char *)(strbase + symp->st_name); + continue; + } + if (type != STT_OBJECT && type != STT_FUNC) + continue; + if (ctf_sym_valid(strbase, type, symp->st_shndx, + symp->st_value, symp->st_name) == B_FALSE) + continue; + name = (char *)(strbase + symp->st_name); + gsym.st_name = symp->st_name; + gsym.st_value = symp->st_value; + gsym.st_size = symp->st_size; + gsym.st_info = symp->st_info; + gsym.st_other = symp->st_other; + gsym.st_shndx = symp->st_shndx; + gsymp = &gsym; + } else { + const Elf64_Sym *symp = (Elf64_Sym *)symbase + i; + type = ELF64_ST_TYPE(symp->st_info); + if (type == STT_FILE) { + file = (char *)(strbase + symp->st_name); + continue; + } + if (type != STT_OBJECT && type != STT_FUNC) + continue; + if (ctf_sym_valid(strbase, type, symp->st_shndx, + symp->st_value, symp->st_name) == B_FALSE) + continue; + name = (char *)(strbase + symp->st_name); + gsymp = symp; + } + + ret = func(cup, gsymp, i, file, name, arg); + if (ret != 0) + return (ret); + } + + return (0); +} + +static int +ctf_dwarf_conv_funcvars_cb(ctf_cu_t *cup, const GElf_Sym *symp, ulong_t idx, + const char *file, const char *name, void *arg) +{ + int ret, bind, type; + + bind = GELF_ST_BIND(symp->st_info); + type = GELF_ST_TYPE(symp->st_info); + + /* + * Come back to weak symbols in another pass + */ + if (bind == STB_WEAK) + return (0); + + if (type == STT_OBJECT) { + ctf_dwvar_t *cdv = ctf_dwarf_match_var(cup, file, name, + bind); + ctf_dprintf("match for %s (%d): %p\n", name, idx, cdv); + if (cdv == NULL) + return (0); + ret = ctf_add_object(cup->cu_ctfp, idx, cdv->cdv_type); + ctf_dprintf("added object %s\n", name); + } else { + ctf_dwfunc_t *cdf = ctf_dwarf_match_func(cup, file, name, + bind); + if (cdf == NULL) + return (0); + ret = ctf_add_function(cup->cu_ctfp, idx, &cdf->cdf_fip, + cdf->cdf_argv); + } + + if (ret == CTF_ERR) { + return (ctf_errno(cup->cu_ctfp)); + } + + return (0); +} + +static int +ctf_dwarf_conv_funcvars(ctf_cu_t *cup) +{ + return (ctf_dwarf_symtab_iter(cup, ctf_dwarf_conv_funcvars_cb, NULL)); +} + +/* + * If we have a weak symbol, attempt to find the strong symbol it will resolve + * to. Note: the code where this actually happens is in sym_process() in + * cmd/sgs/libld/common/syms.c + * + * Finding the matching symbol is unfortunately not trivial. For a symbol to be + * a candidate, it must: + * + * - have the same type (function, object) + * - have the same value (address) + * - have the same size + * - not be another weak symbol + * - belong to the same section (checked via section index) + * + * To perform this check, we first iterate over the symbol table. For each weak + * symbol that we encounter, we then do a second walk over the symbol table, + * calling ctf_dwarf_conv_check_weak(). If a symbol matches the above, then it's + * either a local or global symbol. If we find a global symbol then we go with + * it and stop searching for additional matches. + * + * If instead, we find a local symbol, things are more complicated. The first + * thing we do is to try and see if we have file information about both symbols + * (STT_FILE). If they both have file information and it matches, then we treat + * that as a good match and stop searching for additional matches. + * + * Otherwise, this means we have a non-matching file and a local symbol. We + * treat this as a candidate and if we find a better match (one of the two cases + * above), use that instead. There are two different ways this can happen. + * Either this is a completely different symbol, or it's a once-global symbol + * that was scoped to local via a mapfile. In the former case, curfile is + * likely inaccurate since the linker does not preserve the needed curfile in + * the order of the symbol table (see the comments about locally scoped symbols + * in libld's update_osym()). As we can't tell this case from the former one, + * we use this symbol iff no other matching symbol is found. + * + * What we really need here is a SUNW section containing weak<->strong mappings + * that we can consume. + */ +typedef struct ctf_dwarf_weak_arg { + const GElf_Sym *cweak_symp; + const char *cweak_file; + boolean_t cweak_candidate; + ulong_t cweak_idx; +} ctf_dwarf_weak_arg_t; + +static int +ctf_dwarf_conv_check_weak(ctf_cu_t *cup, const GElf_Sym *symp, + ulong_t idx, const char *file, const char *name, void *arg) +{ + ctf_dwarf_weak_arg_t *cweak = arg; + const GElf_Sym *wsymp = cweak->cweak_symp; + + ctf_dprintf("comparing weak to %s\n", name); + + if (GELF_ST_BIND(symp->st_info) == STB_WEAK) { + return (0); + } + + if (GELF_ST_TYPE(wsymp->st_info) != GELF_ST_TYPE(symp->st_info)) { + return (0); + } + + if (wsymp->st_value != symp->st_value) { + return (0); + } + + if (wsymp->st_size != symp->st_size) { + return (0); + } + + if (wsymp->st_shndx != symp->st_shndx) { + return (0); + } + + /* + * Check if it's a weak candidate. + */ + if (GELF_ST_BIND(symp->st_info) == STB_LOCAL && + (file == NULL || cweak->cweak_file == NULL || + strcmp(file, cweak->cweak_file) != 0)) { + cweak->cweak_candidate = B_TRUE; + cweak->cweak_idx = idx; + return (0); + } + + /* + * Found a match, break. + */ + cweak->cweak_idx = idx; + return (1); +} + +static int +ctf_dwarf_duplicate_sym(ctf_cu_t *cup, ulong_t idx, ulong_t matchidx) +{ + ctf_id_t id = ctf_lookup_by_symbol(cup->cu_ctfp, matchidx); + + /* + * If we matched something that for some reason didn't have type data, + * we don't consider that a fatal error and silently swallow it. + */ + if (id == CTF_ERR) { + if (ctf_errno(cup->cu_ctfp) == ECTF_NOTYPEDAT) + return (0); + else + return (ctf_errno(cup->cu_ctfp)); + } + + if (ctf_add_object(cup->cu_ctfp, idx, id) == CTF_ERR) + return (ctf_errno(cup->cu_ctfp)); + + return (0); +} + +static int +ctf_dwarf_duplicate_func(ctf_cu_t *cup, ulong_t idx, ulong_t matchidx) +{ + int ret; + ctf_funcinfo_t fip; + ctf_id_t *args = NULL; + + if (ctf_func_info(cup->cu_ctfp, matchidx, &fip) == CTF_ERR) { + if (ctf_errno(cup->cu_ctfp) == ECTF_NOFUNCDAT) + return (0); + else + return (ctf_errno(cup->cu_ctfp)); + } + + if (fip.ctc_argc != 0) { + args = ctf_alloc(sizeof (ctf_id_t) * fip.ctc_argc); + if (args == NULL) + return (ENOMEM); + + if (ctf_func_args(cup->cu_ctfp, matchidx, fip.ctc_argc, args) == + CTF_ERR) { + ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc); + return (ctf_errno(cup->cu_ctfp)); + } + } + + ret = ctf_add_function(cup->cu_ctfp, idx, &fip, args); + if (args != NULL) + ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc); + if (ret == CTF_ERR) + return (ctf_errno(cup->cu_ctfp)); + + return (0); +} + +static int +ctf_dwarf_conv_weaks_cb(ctf_cu_t *cup, const GElf_Sym *symp, + ulong_t idx, const char *file, const char *name, void *arg) +{ + int ret, type; + ctf_dwarf_weak_arg_t cweak; + + /* + * We only care about weak symbols. + */ + if (GELF_ST_BIND(symp->st_info) != STB_WEAK) + return (0); + + type = GELF_ST_TYPE(symp->st_info); + ASSERT(type == STT_OBJECT || type == STT_FUNC); + + /* + * For each weak symbol we encounter, we need to do a second iteration + * to try and find a match. We should probably think about other + * techniques to try and save us time in the future. + */ + cweak.cweak_symp = symp; + cweak.cweak_file = file; + cweak.cweak_candidate = B_FALSE; + cweak.cweak_idx = 0; + + ctf_dprintf("Trying to find weak equiv for %s\n", name); + + ret = ctf_dwarf_symtab_iter(cup, ctf_dwarf_conv_check_weak, &cweak); + VERIFY(ret == 0 || ret == 1); + + /* + * Nothing was ever found, we're not going to add anything for this + * entry. + */ + if (ret == 0 && cweak.cweak_candidate == B_FALSE) { + ctf_dprintf("found no weak match for %s\n", name); + return (0); + } + + /* + * Now, finally go and add the type based on the match. + */ + if (type == STT_OBJECT) { + ret = ctf_dwarf_duplicate_sym(cup, idx, cweak.cweak_idx); + } else { + ret = ctf_dwarf_duplicate_func(cup, idx, cweak.cweak_idx); + } + + return (ret); +} + +static int +ctf_dwarf_conv_weaks(ctf_cu_t *cup) +{ + return (ctf_dwarf_symtab_iter(cup, ctf_dwarf_conv_weaks_cb, NULL)); +} + +/* ARGSUSED */ +static int +ctf_dwarf_convert_one(void *arg, void *unused) +{ + int ret; + ctf_file_t *dedup; + ctf_cu_t *cup = arg; + + ctf_dprintf("converting die: %s\n", cup->cu_name); + ctf_dprintf("max offset: %x\n", cup->cu_maxoff); + VERIFY(cup != NULL); + + ret = ctf_dwarf_convert_die(cup, cup->cu_cu); + ctf_dprintf("ctf_dwarf_convert_die (%s) returned %d\n", cup->cu_name, + ret); + if (ret != 0) { + return (ret); + } + if (ctf_update(cup->cu_ctfp) != 0) { + return (ctf_dwarf_error(cup, cup->cu_ctfp, 0, + "failed to update output ctf container")); + } + + ret = ctf_dwarf_fixup_die(cup, B_FALSE); + ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cup->cu_name, + ret); + if (ret != 0) { + return (ret); + } + if (ctf_update(cup->cu_ctfp) != 0) { + return (ctf_dwarf_error(cup, cup->cu_ctfp, 0, + "failed to update output ctf container")); + } + + ret = ctf_dwarf_fixup_die(cup, B_TRUE); + ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cup->cu_name, + ret); + if (ret != 0) { + return (ret); + } + if (ctf_update(cup->cu_ctfp) != 0) { + return (ctf_dwarf_error(cup, cup->cu_ctfp, 0, + "failed to update output ctf container")); + } + + + if ((ret = ctf_dwarf_conv_funcvars(cup)) != 0) { + return (ctf_dwarf_error(cup, NULL, ret, + "failed to convert strong functions and variables")); + } + + if (ctf_update(cup->cu_ctfp) != 0) { + return (ctf_dwarf_error(cup, cup->cu_ctfp, 0, + "failed to update output ctf container")); + } + + if (cup->cu_doweaks == B_TRUE) { + if ((ret = ctf_dwarf_conv_weaks(cup)) != 0) { + return (ctf_dwarf_error(cup, NULL, ret, + "failed to convert weak functions and variables")); + } + + if (ctf_update(cup->cu_ctfp) != 0) { + return (ctf_dwarf_error(cup, cup->cu_ctfp, 0, + "failed to update output ctf container")); + } + } + + ctf_phase_dump(cup->cu_ctfp, "pre-dedup"); + ctf_dprintf("adding inputs for dedup\n"); + if ((ret = ctf_merge_add(cup->cu_cmh, cup->cu_ctfp)) != 0) { + return (ctf_dwarf_error(cup, NULL, ret, + "failed to add inputs for merge")); + } + + ctf_dprintf("starting merge\n"); + if ((ret = ctf_merge_dedup(cup->cu_cmh, &dedup)) != 0) { + return (ctf_dwarf_error(cup, NULL, ret, + "failed to deduplicate die")); + } + ctf_close(cup->cu_ctfp); + cup->cu_ctfp = dedup; + + return (0); +} + +/* + * Note, we expect that if we're returning a ctf_file_t from one of the dies, + * say in the single node case, it's been saved and the entry here has been set + * to NULL, which ctf_close happily ignores. + */ +static void +ctf_dwarf_free_die(ctf_cu_t *cup) +{ + ctf_dwfunc_t *cdf, *ndf; + ctf_dwvar_t *cdv, *ndv; + ctf_dwbitf_t *cdb, *ndb; + ctf_dwmap_t *map; + void *cookie; + Dwarf_Error derr; + + ctf_dprintf("Beginning to free die: %p\n", cup); + cup->cu_elf = NULL; + ctf_dprintf("Trying to free name: %p\n", cup->cu_name); + if (cup->cu_name != NULL) + ctf_free(cup->cu_name, strlen(cup->cu_name) + 1); + ctf_dprintf("Trying to free merge handle: %p\n", cup->cu_cmh); + if (cup->cu_cmh != NULL) { + ctf_merge_fini(cup->cu_cmh); + cup->cu_cmh = NULL; + } + + ctf_dprintf("Trying to free functions\n"); + for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL; cdf = ndf) { + ndf = ctf_list_next(cdf); + ctf_free(cdf->cdf_name, strlen(cdf->cdf_name) + 1); + if (cdf->cdf_fip.ctc_argc != 0) { + ctf_free(cdf->cdf_argv, + sizeof (ctf_id_t) * cdf->cdf_fip.ctc_argc); + } + ctf_free(cdf, sizeof (ctf_dwfunc_t)); + } + + ctf_dprintf("Trying to free variables\n"); + for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL; cdv = ndv) { + ndv = ctf_list_next(cdv); + ctf_free(cdv->cdv_name, strlen(cdv->cdv_name) + 1); + ctf_free(cdv, sizeof (ctf_dwvar_t)); + } + + ctf_dprintf("Trying to free bitfields\n"); + for (cdb = ctf_list_next(&cup->cu_bitfields); cdb != NULL; cdb = ndb) { + ndb = ctf_list_next(cdb); + ctf_free(cdb, sizeof (ctf_dwbitf_t)); + } + + ctf_dprintf("Trying to clean up dwarf_t: %p\n", cup->cu_dwarf); + (void) dwarf_finish(cup->cu_dwarf, &derr); + cup->cu_dwarf = NULL; + ctf_close(cup->cu_ctfp); + + cookie = NULL; + while ((map = avl_destroy_nodes(&cup->cu_map, &cookie)) != NULL) { + ctf_free(map, sizeof (ctf_dwmap_t)); + } + avl_destroy(&cup->cu_map); + cup->cu_errbuf = NULL; +} + +static void +ctf_dwarf_free_dies(ctf_cu_t *cdies, int ndies) +{ + int i; + + ctf_dprintf("Beginning to free dies\n"); + for (i = 0; i < ndies; i++) { + ctf_dwarf_free_die(&cdies[i]); + } + + ctf_free(cdies, sizeof (ctf_cu_t) * ndies); +} + +static int +ctf_dwarf_count_dies(Dwarf_Debug dw, Dwarf_Error *derr, int *ndies, + char *errbuf, size_t errlen) +{ + int ret; + Dwarf_Half vers; + Dwarf_Unsigned nexthdr; + + while ((ret = dwarf_next_cu_header(dw, NULL, &vers, NULL, NULL, + &nexthdr, derr)) != DW_DLV_NO_ENTRY) { + if (ret != DW_DLV_OK) { + (void) snprintf(errbuf, errlen, + "file does not contain valid DWARF data: %s\n", + dwarf_errmsg(*derr)); + return (ECTF_CONVBKERR); + } + + if (vers != DWARF_VERSION_TWO) { + (void) snprintf(errbuf, errlen, + "unsupported DWARF version: %d\n", vers); + return (ECTF_CONVBKERR); + } + *ndies = *ndies + 1; + } + + if (*ndies == 0) { + (void) snprintf(errbuf, errlen, + "file does not contain valid DWARF data: %s\n", + dwarf_errmsg(*derr)); + return (ECTF_CONVBKERR); + } + + return (0); +} + +static int +ctf_dwarf_init_die(int fd, Elf *elf, ctf_cu_t *cup, int ndie, char *errbuf, + size_t errlen) +{ + int ret; + Dwarf_Unsigned hdrlen, abboff, nexthdr; + Dwarf_Half addrsz; + Dwarf_Unsigned offset = 0; + Dwarf_Error derr; + + while ((ret = dwarf_next_cu_header(cup->cu_dwarf, &hdrlen, NULL, + &abboff, &addrsz, &nexthdr, &derr)) != DW_DLV_NO_ENTRY) { + char *name; + Dwarf_Die cu, child; + + /* Based on the counting above, we should be good to go */ + VERIFY(ret == DW_DLV_OK); + if (ndie > 0) { + ndie--; + offset = nexthdr; + continue; + } + + /* + * Compilers are apparently inconsistent. Some emit no DWARF for + * empty files and others emit empty compilation unit. + */ + cup->cu_voidtid = CTF_ERR; + cup->cu_longtid = CTF_ERR; + cup->cu_elf = elf; + cup->cu_maxoff = nexthdr - 1; + cup->cu_ctfp = ctf_fdcreate(fd, &ret); + if (cup->cu_ctfp == NULL) { + ctf_free(cup, sizeof (ctf_cu_t)); + return (ret); + } + avl_create(&cup->cu_map, ctf_dwmap_comp, sizeof (ctf_dwmap_t), + offsetof(ctf_dwmap_t, cdm_avl)); + cup->cu_errbuf = errbuf; + cup->cu_errlen = errlen; + bzero(&cup->cu_vars, sizeof (ctf_list_t)); + bzero(&cup->cu_funcs, sizeof (ctf_list_t)); + bzero(&cup->cu_bitfields, sizeof (ctf_list_t)); + + if ((ret = ctf_dwarf_die_elfenc(elf, cup, errbuf, + errlen)) != 0) { + avl_destroy(&cup->cu_map); + ctf_free(cup, sizeof (ctf_cu_t)); + return (ret); + } + + if ((ret = ctf_dwarf_sib(cup, NULL, &cu)) != 0) { + avl_destroy(&cup->cu_map); + ctf_free(cup, sizeof (ctf_cu_t)); + return (ret); + } + if (cu == NULL) { + (void) snprintf(errbuf, errlen, + "file does not contain DWARF data\n"); + avl_destroy(&cup->cu_map); + ctf_free(cup, sizeof (ctf_cu_t)); + return (ECTF_CONVBKERR); + } + + if ((ret = ctf_dwarf_child(cup, cu, &child)) != 0) { + avl_destroy(&cup->cu_map); + ctf_free(cup, sizeof (ctf_cu_t)); + return (ret); + } + if (child == NULL) { + (void) snprintf(errbuf, errlen, + "file does not contain DWARF data\n"); + avl_destroy(&cup->cu_map); + ctf_free(cup, sizeof (ctf_cu_t)); + return (ECTF_CONVBKERR); + } + + cup->cu_cuoff = offset; + cup->cu_cu = child; + + if ((cup->cu_cmh = ctf_merge_init(fd, &ret)) == NULL) { + avl_destroy(&cup->cu_map); + ctf_free(cup, sizeof (ctf_cu_t)); + return (ret); + } + + if (ctf_dwarf_string(cup, cu, DW_AT_name, &name) == 0) { + size_t len = strlen(name) + 1; + char *b = basename(name); + cup->cu_name = strdup(b); + ctf_free(name, len); + } + break; + } + + return (0); +} + + +ctf_conv_status_t +ctf_dwarf_convert(int fd, Elf *elf, uint_t nthrs, int *errp, ctf_file_t **fpp, + char *errmsg, size_t errlen) +{ + int err, ret, ndies, i; + Dwarf_Debug dw; + Dwarf_Error derr; + ctf_cu_t *cdies = NULL, *cup; + workq_t *wqp = NULL; + + if (errp == NULL) + errp = &err; + *errp = 0; + *fpp = NULL; + + ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL, &dw, &derr); + if (ret != DW_DLV_OK) { + /* + * We may want to expect DWARF data here and fail conversion if + * it's missing. In this case, if we actually have some amount + * of DWARF, but no section, for now, just go ahead and create + * an empty CTF file. + */ + if (ret == DW_DLV_NO_ENTRY || + dwarf_errno(derr) == DW_DLE_DEBUG_INFO_NULL) { + *fpp = ctf_create(errp); + return (*fpp != NULL ? CTF_CONV_SUCCESS : + CTF_CONV_ERROR); + } + (void) snprintf(errmsg, errlen, + "failed to initialize DWARF: %s\n", + dwarf_errmsg(derr)); + *errp = ECTF_CONVBKERR; + return (CTF_CONV_ERROR); + } + + /* + * Iterate over all of the compilation units and create a ctf_cu_t for + * each of them. This is used to determine if we have zero, one, or + * multiple dies to convert. If we have zero, that's an error. If + * there's only one die, that's the simple case. No merge needed and + * only a single Dwarf_Debug as well. + */ + ndies = 0; + ret = ctf_dwarf_count_dies(dw, &derr, &ndies, errmsg, errlen); + if (ret != 0) { + *errp = ret; + goto out; + } + + (void) dwarf_finish(dw, &derr); + cdies = ctf_alloc(sizeof (ctf_cu_t) * ndies); + if (cdies == NULL) { + *errp = ENOMEM; + return (CTF_CONV_ERROR); + } + + for (i = 0; i < ndies; i++) { + cup = &cdies[i]; + ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL, + &cup->cu_dwarf, &derr); + if (ret != 0) { + ctf_free(cdies, sizeof (ctf_cu_t) * ndies); + (void) snprintf(errmsg, errlen, + "failed to initialize DWARF: %s\n", + dwarf_errmsg(derr)); + *errp = ECTF_CONVBKERR; + return (CTF_CONV_ERROR); + } + + ret = ctf_dwarf_init_die(fd, elf, &cdies[i], i, errmsg, errlen); + if (ret != 0) { + *errp = ret; + goto out; + } + cup->cu_doweaks = ndies > 1 ? B_FALSE : B_TRUE; + } + + ctf_dprintf("found %d DWARF die(s)\n", ndies); + + /* + * If we only have one compilation unit, there's no reason to use + * multiple threads, even if the user requested them. After all, they + * just gave us an upper bound. + */ + if (ndies == 1) + nthrs = 1; + + if (workq_init(&wqp, nthrs) == -1) { + *errp = errno; + goto out; + } + + for (i = 0; i < ndies; i++) { + cup = &cdies[i]; + ctf_dprintf("adding die %s: %p, %x %x\n", cup->cu_name, + cup->cu_cu, cup->cu_cuoff, cup->cu_maxoff); + if (workq_add(wqp, cup) == -1) { + *errp = errno; + goto out; + } + } + + ret = workq_work(wqp, ctf_dwarf_convert_one, NULL, errp); + if (ret == WORKQ_ERROR) { + *errp = errno; + goto out; + } else if (ret == WORKQ_UERROR) { + ctf_dprintf("internal convert failed: %s\n", + ctf_errmsg(*errp)); + goto out; + } + + ctf_dprintf("Determining next phase: have %d dies\n", ndies); + if (ndies != 1) { + ctf_merge_t *cmp; + + cmp = ctf_merge_init(fd, &ret); + if (cmp == NULL) { + *errp = ret; + goto out; + } + + ctf_dprintf("setting threads\n"); + if ((ret = ctf_merge_set_nthreads(cmp, nthrs)) != 0) { + ctf_merge_fini(cmp); + *errp = ret; + goto out; + } + + ctf_dprintf("adding dies\n"); + for (i = 0; i < ndies; i++) { + cup = &cdies[i]; + if ((ret = ctf_merge_add(cmp, cup->cu_ctfp)) != 0) { + ctf_merge_fini(cmp); + *errp = ret; + goto out; + } + } + + ctf_dprintf("performing merge\n"); + ret = ctf_merge_merge(cmp, fpp); + if (ret != 0) { + ctf_dprintf("failed merge!\n"); + *fpp = NULL; + ctf_merge_fini(cmp); + *errp = ret; + goto out; + } + ctf_merge_fini(cmp); + *errp = 0; + ctf_dprintf("successfully converted!\n"); + } else { + *errp = 0; + *fpp = cdies->cu_ctfp; + cdies->cu_ctfp = NULL; + ctf_dprintf("successfully converted!\n"); + } + +out: + workq_fini(wqp); + ctf_dwarf_free_dies(cdies, ndies); + return (*fpp != NULL ? CTF_CONV_SUCCESS : CTF_CONV_ERROR); +} diff --git a/usr/src/lib/libctf/common/ctf_elfwrite.c b/usr/src/lib/libctf/common/ctf_elfwrite.c new file mode 100644 index 0000000000..7f668fe528 --- /dev/null +++ b/usr/src/lib/libctf/common/ctf_elfwrite.c @@ -0,0 +1,422 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright (c) 2015, Joyent, Inc. + */ + +/* + * Routines for writing ctf data to elf files. + */ + +#include <libctf_impl.h> +#include <libctf.h> +#include <gelf.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> +#include <libelf.h> + +static int +ctf_write_elf(ctf_file_t *fp, Elf *src, Elf *dst, int flags) +{ + GElf_Ehdr sehdr, dehdr; + Elf_Scn *sscn, *dscn; + Elf_Data *sdata, *ddata; + GElf_Shdr shdr; + int symtab_idx = -1; + off_t new_offset = 0; + off_t ctfnameoff = 0; + int compress = (flags & CTF_ELFWRITE_F_COMPRESS); + int *secxlate = NULL; + int srcidx, dstidx, pad, i; + int curnmoff = 0; + int changing = 0; + int ret; + size_t nshdr, nphdr, strndx; + void *strdatabuf = NULL, *symdatabuf = NULL; + size_t strdatasz = 0, symdatasz = 0; + + void *cdata = NULL; + size_t elfsize, asize; + + if ((flags & ~(CTF_ELFWRITE_F_COMPRESS)) != 0) { + ret = ctf_set_errno(fp, EINVAL); + goto out; + } + + if (gelf_newehdr(dst, gelf_getclass(src)) == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + if (gelf_getehdr(src, &sehdr) == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + (void) memcpy(&dehdr, &sehdr, sizeof (GElf_Ehdr)); + if (gelf_update_ehdr(dst, &dehdr) == 0) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + + /* + * Use libelf to get the number of sections and the string section to + * deal with ELF files that may have a large number of sections. We just + * always use this to make our live easier. + */ + if (elf_getphdrnum(src, &nphdr) != 0) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + if (elf_getshdrnum(src, &nshdr) != 0) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + if (elf_getshdrstrndx(src, &strndx) != 0) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + + /* + * Neither the existing debug sections nor the SUNW_ctf sections (new or + * existing) are SHF_ALLOC'd, so they won't be in areas referenced by + * program headers. As such, we can just blindly copy the program + * headers from the existing file to the new file. + */ + if (nphdr != 0) { + (void) elf_flagelf(dst, ELF_C_SET, ELF_F_LAYOUT); + if (gelf_newphdr(dst, nphdr) == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + + for (i = 0; i < nphdr; i++) { + GElf_Phdr phdr; + + if (gelf_getphdr(src, i, &phdr) == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + if (gelf_update_phdr(dst, i, &phdr) == 0) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + } + } + + secxlate = ctf_alloc(sizeof (int) * nshdr); + for (srcidx = dstidx = 0; srcidx < nshdr; srcidx++) { + Elf_Scn *scn = elf_getscn(src, srcidx); + GElf_Shdr shdr; + char *sname; + + if (gelf_getshdr(scn, &shdr) == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + sname = elf_strptr(src, strndx, shdr.sh_name); + if (sname == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + + if (strcmp(sname, CTF_ELF_SCN_NAME) == 0) { + secxlate[srcidx] = -1; + } else { + secxlate[srcidx] = dstidx++; + curnmoff += strlen(sname) + 1; + } + + new_offset = (off_t)dehdr.e_phoff; + } + + for (srcidx = 1; srcidx < nshdr; srcidx++) { + char *sname; + + sscn = elf_getscn(src, srcidx); + if (gelf_getshdr(sscn, &shdr) == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + + if (secxlate[srcidx] == -1) { + changing = 1; + continue; + } + + dscn = elf_newscn(dst); + if (dscn == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + + /* + * If this file has program headers, we need to explicitly lay + * out sections. If none of the sections prior to this one have + * been removed, then we can just use the existing location. If + * one or more sections have been changed, then we need to + * adjust this one to avoid holes. + */ + if (changing && nphdr != 0) { + pad = new_offset % shdr.sh_addralign; + + if (pad != 0) + new_offset += shdr.sh_addralign - pad; + shdr.sh_offset = new_offset; + } + + shdr.sh_link = secxlate[shdr.sh_link]; + + if (shdr.sh_type == SHT_REL || shdr.sh_type == SHT_RELA) + shdr.sh_info = secxlate[shdr.sh_info]; + + sname = elf_strptr(src, strndx, shdr.sh_name); + if (sname == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + if ((sdata = elf_getdata(sscn, NULL)) == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + if ((ddata = elf_newdata(dscn)) == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + bcopy(sdata, ddata, sizeof (Elf_Data)); + + if (srcidx == strndx) { + char seclen = strlen(CTF_ELF_SCN_NAME); + + strdatasz = ddata->d_size + shdr.sh_size + + seclen + 1; + ddata->d_buf = strdatabuf = ctf_alloc(strdatasz); + if (ddata->d_buf == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + bcopy(sdata->d_buf, ddata->d_buf, shdr.sh_size); + (void) strcpy((caddr_t)ddata->d_buf + shdr.sh_size, + CTF_ELF_SCN_NAME); + ctfnameoff = (off_t)shdr.sh_size; + shdr.sh_size += seclen + 1; + ddata->d_size += seclen + 1; + + if (nphdr != 0) + changing = 1; + } + + if (shdr.sh_type == SHT_SYMTAB && shdr.sh_entsize != 0) { + int nsym = shdr.sh_size / shdr.sh_entsize; + + symtab_idx = secxlate[srcidx]; + + symdatasz = shdr.sh_size; + ddata->d_buf = symdatabuf = ctf_alloc(symdatasz); + if (ddata->d_buf == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + (void) bcopy(sdata->d_buf, ddata->d_buf, shdr.sh_size); + + for (i = 0; i < nsym; i++) { + GElf_Sym sym; + short newscn; + + (void) gelf_getsym(ddata, i, &sym); + + if (sym.st_shndx >= SHN_LORESERVE) + continue; + + if ((newscn = secxlate[sym.st_shndx]) != + sym.st_shndx) { + sym.st_shndx = + (newscn == -1 ? 1 : newscn); + + if (gelf_update_sym(ddata, i, &sym) == + 0) { + ret = ctf_set_errno(fp, + ECTF_ELF); + goto out; + } + } + } + } + + if (gelf_update_shdr(dscn, &shdr) == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + + new_offset = (off_t)shdr.sh_offset; + if (shdr.sh_type != SHT_NOBITS) + new_offset += shdr.sh_size; + } + + if (symtab_idx == -1) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + + /* Add the ctf section */ + if ((dscn = elf_newscn(dst)) == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + if (gelf_getshdr(dscn, &shdr) == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + shdr.sh_name = ctfnameoff; + shdr.sh_type = SHT_PROGBITS; + shdr.sh_size = fp->ctf_size; + shdr.sh_link = symtab_idx; + shdr.sh_addralign = 4; + if (changing && nphdr != 0) { + pad = new_offset % shdr.sh_addralign; + + if (pad) + new_offset += shdr.sh_addralign - pad; + + shdr.sh_offset = new_offset; + new_offset += shdr.sh_size; + } + + if ((ddata = elf_newdata(dscn)) == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + + if (compress != 0) { + int err; + + if (ctf_zopen(&err) == NULL) { + ret = ctf_set_errno(fp, err); + goto out; + } + + if ((err = ctf_compress(fp, &cdata, &asize, &elfsize)) != 0) { + ret = ctf_set_errno(fp, err); + goto out; + } + ddata->d_buf = cdata; + ddata->d_size = elfsize; + } else { + ddata->d_buf = (void *)fp->ctf_base; + ddata->d_size = fp->ctf_size; + } + ddata->d_align = shdr.sh_addralign; + + if (gelf_update_shdr(dscn, &shdr) == 0) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + + /* update the section header location */ + if (nphdr != 0) { + size_t align = gelf_fsize(dst, ELF_T_ADDR, 1, EV_CURRENT); + size_t r = new_offset % align; + + if (r) + new_offset += align - r; + + dehdr.e_shoff = new_offset; + } + + /* commit to disk */ + if (sehdr.e_shstrndx == SHN_XINDEX) + dehdr.e_shstrndx = SHN_XINDEX; + else + dehdr.e_shstrndx = secxlate[sehdr.e_shstrndx]; + if (gelf_update_ehdr(dst, &dehdr) == NULL) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + if (elf_update(dst, ELF_C_WRITE) < 0) { + ret = ctf_set_errno(fp, ECTF_ELF); + goto out; + } + + ret = 0; + +out: + if (strdatabuf != NULL) + ctf_free(strdatabuf, strdatasz); + if (symdatabuf != NULL) + ctf_free(symdatabuf, symdatasz); + if (cdata != NULL) + ctf_data_free(cdata, fp->ctf_size); + if (secxlate != NULL) + ctf_free(secxlate, sizeof (int) * nshdr); + + return (ret); +} + +int +ctf_elffdwrite(ctf_file_t *fp, int ifd, int ofd, int flags) +{ + int ret; + Elf *ielf, *oelf; + + (void) elf_version(EV_CURRENT); + if ((ielf = elf_begin(ifd, ELF_C_READ, NULL)) == NULL) + return (ctf_set_errno(fp, ECTF_ELF)); + + if ((oelf = elf_begin(ofd, ELF_C_WRITE, NULL)) == NULL) + return (ctf_set_errno(fp, ECTF_ELF)); + + ret = ctf_write_elf(fp, ielf, oelf, flags); + + (void) elf_end(ielf); + (void) elf_end(oelf); + + return (ret); +} + +int +ctf_elfwrite(ctf_file_t *fp, const char *input, const char *output, int flags) +{ + struct stat st; + int ifd, ofd, ret; + + if ((ifd = open(input, O_RDONLY)) < 0) + return (ctf_set_errno(fp, errno)); + + if (fstat(ifd, &st) < 0) + return (ctf_set_errno(fp, errno)); + + if ((ofd = open(output, O_RDWR | O_CREAT | O_TRUNC, st.st_mode)) < 0) + return (ctf_set_errno(fp, errno)); + + ret = ctf_elffdwrite(fp, ifd, ofd, flags); + + if (close(ifd) != 0 && ret == 0) + ret = ctf_set_errno(fp, errno); + if (close(ofd) != 0 && ret == 0) + ret = ctf_set_errno(fp, errno); + + return (ret); +} diff --git a/usr/src/lib/libctf/common/ctf_lib.c b/usr/src/lib/libctf/common/ctf_lib.c index e71ebc6d9d..e183b15f52 100644 --- a/usr/src/lib/libctf/common/ctf_lib.c +++ b/usr/src/lib/libctf/common/ctf_lib.c @@ -23,6 +23,9 @@ * Copyright 2003 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2015, Joyent, Inc. + */ #include <sys/types.h> #include <sys/stat.h> @@ -33,6 +36,8 @@ #include <errno.h> #include <dlfcn.h> #include <gelf.h> +#include <zlib.h> +#include <sys/debug.h> #ifdef _LP64 static const char *_libctf_zlib = "/usr/lib/64/libz.so.1"; @@ -42,6 +47,9 @@ static const char *_libctf_zlib = "/usr/lib/libz.so.1"; static struct { int (*z_uncompress)(uchar_t *, ulong_t *, const uchar_t *, ulong_t); + int (*z_initcomp)(z_stream *, int, const char *, int); + int (*z_compress)(z_stream *, int); + int (*z_finicomp)(z_stream *); const char *(*z_error)(int); void *z_dlp; } zlib; @@ -49,6 +57,18 @@ static struct { static size_t _PAGESIZE; static size_t _PAGEMASK; +static uint64_t ctf_phase = 0; + +#define CTF_COMPRESS_CHUNK (64*1024) + +typedef struct ctf_zdata { + void *czd_buf; + void *czd_next; + ctf_file_t *czd_ctfp; + size_t czd_allocsz; + z_stream czd_zstr; +} ctf_zdata_t; + #pragma init(_libctf_init) void _libctf_init(void) @@ -84,9 +104,14 @@ ctf_zopen(int *errp) return (ctf_set_open_errno(errp, ECTF_ZINIT)); zlib.z_uncompress = (int (*)()) dlsym(zlib.z_dlp, "uncompress"); + zlib.z_initcomp = (int (*)()) dlsym(zlib.z_dlp, "deflateInit_"); + zlib.z_compress = (int (*)()) dlsym(zlib.z_dlp, "deflate"); + zlib.z_finicomp = (int (*)()) dlsym(zlib.z_dlp, "deflateEnd"); zlib.z_error = (const char *(*)()) dlsym(zlib.z_dlp, "zError"); - if (zlib.z_uncompress == NULL || zlib.z_error == NULL) { + if (zlib.z_uncompress == NULL || zlib.z_error == NULL || + zlib.z_initcomp == NULL|| zlib.z_compress == NULL || + zlib.z_finicomp == NULL) { (void) dlclose(zlib.z_dlp); bzero(&zlib, sizeof (zlib)); return (ctf_set_open_errno(errp, ECTF_ZINIT)); @@ -111,6 +136,206 @@ z_strerror(int err) return (zlib.z_error(err)); } +static int +ctf_zdata_init(ctf_zdata_t *czd, ctf_file_t *fp) +{ + ctf_header_t *cthp; + + bzero(czd, sizeof (ctf_zdata_t)); + + czd->czd_allocsz = fp->ctf_size; + czd->czd_buf = ctf_data_alloc(czd->czd_allocsz); + if (czd->czd_buf == MAP_FAILED) + return (ctf_set_errno(fp, ENOMEM)); + + bcopy(fp->ctf_base, czd->czd_buf, sizeof (ctf_header_t)); + czd->czd_ctfp = fp; + cthp = czd->czd_buf; + cthp->cth_flags |= CTF_F_COMPRESS; + czd->czd_next = (void *)((uintptr_t)czd->czd_buf + + sizeof (ctf_header_t)); + + if (zlib.z_initcomp(&czd->czd_zstr, Z_BEST_COMPRESSION, + ZLIB_VERSION, sizeof (z_stream)) != Z_OK) + return (ctf_set_errno(fp, ECTF_ZLIB)); + + return (0); +} + +static int +ctf_zdata_grow(ctf_zdata_t *czd) +{ + size_t off; + size_t newsz; + void *ndata; + + off = (uintptr_t)czd->czd_next - (uintptr_t)czd->czd_buf; + newsz = czd->czd_allocsz + CTF_COMPRESS_CHUNK; + ndata = ctf_data_alloc(newsz); + if (ndata == MAP_FAILED) { + return (ctf_set_errno(czd->czd_ctfp, ENOMEM)); + } + + bcopy(czd->czd_buf, ndata, off); + ctf_data_free(czd->czd_buf, czd->czd_allocsz); + czd->czd_allocsz = newsz; + czd->czd_buf = ndata; + czd->czd_next = (void *)((uintptr_t)ndata + off); + + czd->czd_zstr.next_out = (Bytef *)czd->czd_next; + czd->czd_zstr.avail_out = CTF_COMPRESS_CHUNK; + return (0); +} + +static int +ctf_zdata_compress_buffer(ctf_zdata_t *czd, const void *buf, size_t bufsize) +{ + int err; + + czd->czd_zstr.next_out = czd->czd_next; + czd->czd_zstr.avail_out = czd->czd_allocsz - + ((uintptr_t)czd->czd_next - (uintptr_t)czd->czd_buf); + czd->czd_zstr.next_in = (Bytef *)buf; + czd->czd_zstr.avail_in = bufsize; + + while (czd->czd_zstr.avail_in != 0) { + if (czd->czd_zstr.avail_out == 0) { + czd->czd_next = czd->czd_zstr.next_out; + if ((err = ctf_zdata_grow(czd)) != 0) { + return (err); + } + } + + if ((err = zlib.z_compress(&czd->czd_zstr, Z_NO_FLUSH)) != Z_OK) + return (ctf_set_errno(czd->czd_ctfp, ECTF_ZLIB)); + } + czd->czd_next = czd->czd_zstr.next_out; + + return (0); +} + +static int +ctf_zdata_flush(ctf_zdata_t *czd, boolean_t finish) +{ + int err; + int flag = finish == B_TRUE ? Z_FINISH : Z_FULL_FLUSH; + int bret = finish == B_TRUE ? Z_STREAM_END : Z_BUF_ERROR; + + for (;;) { + if (czd->czd_zstr.avail_out == 0) { + czd->czd_next = czd->czd_zstr.next_out; + if ((err = ctf_zdata_grow(czd)) != 0) { + return (err); + } + } + + err = zlib.z_compress(&czd->czd_zstr, flag); + if (err == bret) { + break; + } + if (err != Z_OK) + return (ctf_set_errno(czd->czd_ctfp, ECTF_ZLIB)); + + } + + czd->czd_next = czd->czd_zstr.next_out; + + return (0); +} + +static int +ctf_zdata_end(ctf_zdata_t *czd) +{ + int ret; + + if ((ret = ctf_zdata_flush(czd, B_TRUE)) != 0) + return (ret); + + if ((ret = zlib.z_finicomp(&czd->czd_zstr)) != 0) + return (ctf_set_errno(czd->czd_ctfp, ECTF_ZLIB)); + + return (0); +} + +static void +ctf_zdata_cleanup(ctf_zdata_t *czd) +{ + ctf_data_free(czd->czd_buf, czd->czd_allocsz); + (void) zlib.z_finicomp(&czd->czd_zstr); +} + +/* + * Compress our CTF data and return both the size of the compressed data and the + * size of the allocation. These may be different due to the nature of + * compression. + * + * In addition, we flush the compression between our two phases such that we + * maintain a different dictionary between the CTF data and the string section. + */ +int +ctf_compress(ctf_file_t *fp, void **buf, size_t *allocsz, size_t *elfsize) +{ + int err; + ctf_zdata_t czd; + ctf_header_t *cthp = (ctf_header_t *)fp->ctf_base; + + if ((err = ctf_zdata_init(&czd, fp)) != 0) + return (err); + + if ((err = ctf_zdata_compress_buffer(&czd, fp->ctf_buf, + cthp->cth_stroff)) != 0) { + ctf_zdata_cleanup(&czd); + return (err); + } + + if ((err = ctf_zdata_flush(&czd, B_FALSE)) != 0) { + ctf_zdata_cleanup(&czd); + return (err); + } + + if ((err = ctf_zdata_compress_buffer(&czd, + fp->ctf_buf + cthp->cth_stroff, cthp->cth_strlen)) != 0) { + ctf_zdata_cleanup(&czd); + return (err); + } + + if ((err = ctf_zdata_end(&czd)) != 0) { + ctf_zdata_cleanup(&czd); + return (err); + } + + *buf = czd.czd_buf; + *allocsz = czd.czd_allocsz; + *elfsize = (uintptr_t)czd.czd_next - (uintptr_t)czd.czd_buf; + + return (0); +} + +int +z_compress(void *dst, size_t *dstlen, const void *src, size_t srclen) +{ + z_stream zs; + int err; + + bzero(&zs, sizeof (z_stream)); + zs.next_in = (uchar_t *)src; + zs.avail_in = srclen; + zs.next_out = dst; + zs.avail_out = *dstlen; + + if ((err = zlib.z_initcomp(&zs, Z_BEST_COMPRESSION, ZLIB_VERSION, + sizeof (z_stream))) != Z_OK) + return (err); + + if ((err = zlib.z_compress(&zs, Z_FINISH)) != Z_STREAM_END) { + (void) zlib.z_finicomp(&zs); + return (err == Z_OK ? Z_BUF_ERROR : err); + } + + *dstlen = zs.total_out; + return (zlib.z_finicomp(&zs)); +} + /* * Convert a 32-bit ELF file header into GElf. */ @@ -189,7 +414,7 @@ ctf_sect_munmap(const ctf_sect_t *sp) * responsible for closing the file descriptor when it is no longer needed. */ ctf_file_t * -ctf_fdopen(int fd, int *errp) +ctf_fdcreate_int(int fd, int *errp, ctf_sect_t *ctfp) { ctf_sect_t ctfsect, symsect, strsect; ctf_file_t *fp = NULL; @@ -221,6 +446,9 @@ ctf_fdopen(int fd, int *errp) */ if (nbytes >= sizeof (ctf_preamble_t) && hdr.ctf.ctp_magic == CTF_MAGIC) { + if (ctfp != NULL) + return (ctf_set_open_errno(errp, EINVAL)); + if (hdr.ctf.ctp_version > CTF_VERSION) return (ctf_set_open_errno(errp, ECTF_CTFVERS)); @@ -370,7 +598,8 @@ ctf_fdopen(int fd, int *errp) continue; /* corrupt sh_name field */ if (shp->sh_type == SHT_PROGBITS && - strcmp(strs + shp->sh_name, _CTF_SECTION) == 0) { + strcmp(strs + shp->sh_name, _CTF_SECTION) == 0 && + ctfp == NULL) { ctfsect.cts_name = strs + shp->sh_name; ctfsect.cts_type = shp->sh_type; ctfsect.cts_flags = shp->sh_flags; @@ -397,18 +626,22 @@ ctf_fdopen(int fd, int *errp) free(sp); /* free section header array */ - if (ctfsect.cts_type == SHT_NULL) { - (void) munmap(strs_map, strs_mapsz); - return (ctf_set_open_errno(errp, ECTF_NOCTFDATA)); - } + if (ctfp == NULL) { + if (ctfsect.cts_type == SHT_NULL && ctfp == NULL) { + (void) munmap(strs_map, strs_mapsz); + return (ctf_set_open_errno(errp, + ECTF_NOCTFDATA)); + } - /* - * Now mmap the CTF data, symtab, and strtab sections and - * call ctf_bufopen() to do the rest of the work. - */ - if (ctf_sect_mmap(&ctfsect, fd) == MAP_FAILED) { - (void) munmap(strs_map, strs_mapsz); - return (ctf_set_open_errno(errp, ECTF_MMAP)); + /* + * Now mmap the CTF data, symtab, and strtab sections + * and call ctf_bufopen() to do the rest of the work. + */ + if (ctf_sect_mmap(&ctfsect, fd) == MAP_FAILED) { + (void) munmap(strs_map, strs_mapsz); + return (ctf_set_open_errno(errp, ECTF_MMAP)); + } + ctfp = &ctfsect; } if (symsect.cts_type != SHT_NULL && @@ -418,12 +651,13 @@ ctf_fdopen(int fd, int *errp) (void) ctf_set_open_errno(errp, ECTF_MMAP); goto bad; /* unmap all and abort */ } - fp = ctf_bufopen(&ctfsect, &symsect, &strsect, errp); + fp = ctf_bufopen(ctfp, &symsect, &strsect, errp); } else - fp = ctf_bufopen(&ctfsect, NULL, NULL, errp); + fp = ctf_bufopen(ctfp, NULL, NULL, errp); bad: if (fp == NULL) { - ctf_sect_munmap(&ctfsect); + if (ctfp == NULL) + ctf_sect_munmap(&ctfsect); ctf_sect_munmap(&symsect); ctf_sect_munmap(&strsect); } else @@ -436,6 +670,12 @@ bad: return (ctf_set_open_errno(errp, ECTF_FMT)); } +ctf_file_t * +ctf_fdopen(int fd, int *errp) +{ + return (ctf_fdcreate_int(fd, errp, NULL)); +} + /* * Open the specified file and return a pointer to a CTF container. The file * can be either an ELF file or raw CTF file. This is just a convenient @@ -502,3 +742,25 @@ ctf_version(int version) return (_libctf_version); } + +/* + * A utility function for folks debugging CTF conversion and merging. + */ +void +ctf_phase_dump(ctf_file_t *fp, const char *phase) +{ + int fd; + static char *base; + char path[MAXPATHLEN]; + + if (base == NULL && (base = getenv("LIBCTF_WRITE_PHASES")) == NULL) + return; + + (void) snprintf(path, sizeof (path), "%s/libctf.%s.%d.ctf", base, + phase != NULL ? phase : "", + ctf_phase); + if ((fd = open(path, O_CREAT | O_TRUNC | O_RDWR, 0777)) < 0) + return; + (void) ctf_write(fp, fd); + (void) close(fd); +} diff --git a/usr/src/lib/libctf/common/ctf_merge.c b/usr/src/lib/libctf/common/ctf_merge.c new file mode 100644 index 0000000000..c93a2a4f7c --- /dev/null +++ b/usr/src/lib/libctf/common/ctf_merge.c @@ -0,0 +1,1551 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2015 Joyent, Inc. + */ + +/* + * To perform a merge of two CTF containers, we first diff the two containers + * types. For every type that's in the src container, but not in the dst + * container, we note it and add it to dst container. If there are any objects + * or functions associated with src, we go through and update the types that + * they refer to such that they all refer to types in the dst container. + * + * The bulk of the logic for the merge, after we've run the diff, occurs in + * ctf_merge_common(). + * + * In terms of exported APIs, we don't really export a simple merge two + * containers, as the general way this is used, in something like ctfmerge(1), + * is to add all the containers and then let us figure out the best way to merge + * it. + */ + +#include <libctf_impl.h> +#include <sys/debug.h> +#include <sys/list.h> +#include <stddef.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <mergeq.h> +#include <errno.h> + +typedef struct ctf_merge_tinfo { + uint16_t cmt_map; /* Map to the type in out */ + boolean_t cmt_fixup; + boolean_t cmt_forward; + boolean_t cmt_missing; +} ctf_merge_tinfo_t; + +/* + * State required for doing an individual merge of two containers. + */ +typedef struct ctf_merge_types { + ctf_file_t *cm_out; /* Output CTF file */ + ctf_file_t *cm_src; /* Input CTF file */ + ctf_merge_tinfo_t *cm_tmap; /* Type state information */ + boolean_t cm_dedup; /* Are we doing a dedup? */ + boolean_t cm_unique; /* are we doing a uniquify? */ +} ctf_merge_types_t; + +typedef struct ctf_merge_objmap { + list_node_t cmo_node; + const char *cmo_name; /* Symbol name */ + ulong_t cmo_idx; /* Symbol ID */ + ctf_id_t cmo_tid; /* Type ID */ +} ctf_merge_objmap_t; + +typedef struct ctf_merge_funcmap { + list_node_t cmf_node; + const char *cmf_name; /* Symbol name */ + ulong_t cmf_idx; /* Symbol ID */ + ctf_id_t cmf_rtid; /* Type ID */ + uint_t cmf_flags; /* ctf_funcinfo_t ctc_flags */ + uint_t cmf_argc; /* Number of arguments */ + ctf_id_t cmf_args[]; /* Types of arguments */ +} ctf_merge_funcmap_t; + +typedef struct ctf_merge_input { + list_node_t cmi_node; + ctf_file_t *cmi_input; + list_t cmi_omap; + list_t cmi_fmap; + boolean_t cmi_created; +} ctf_merge_input_t; + +struct ctf_merge_handle { + list_t cmh_inputs; /* Input list */ + uint_t cmh_ninputs; /* Number of inputs */ + uint_t cmh_nthreads; /* Number of threads to use */ + ctf_file_t *cmh_unique; /* ctf to uniquify against */ + boolean_t cmh_msyms; /* Should we merge symbols/funcs? */ + int cmh_ofd; /* FD for output file */ + int cmh_flags; /* Flags that control merge behavior */ + char *cmh_label; /* Optional label */ + char *cmh_pname; /* Parent name */ +}; + +static int ctf_merge_add_type(ctf_merge_types_t *, ctf_id_t); + +static ctf_id_t +ctf_merge_gettype(ctf_merge_types_t *cmp, ctf_id_t id) +{ + if (cmp->cm_dedup == B_FALSE) { + VERIFY(cmp->cm_tmap[id].cmt_map != 0); + return (cmp->cm_tmap[id].cmt_map); + } + + while (cmp->cm_tmap[id].cmt_missing == B_FALSE) { + VERIFY(cmp->cm_tmap[id].cmt_map != 0); + id = cmp->cm_tmap[id].cmt_map; + } + VERIFY(cmp->cm_tmap[id].cmt_map != 0); + return (cmp->cm_tmap[id].cmt_map); +} + +static void +ctf_merge_diffcb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp, + ctf_id_t oid, void *arg) +{ + ctf_merge_types_t *cmp = arg; + ctf_merge_tinfo_t *cmt = cmp->cm_tmap; + + if (same == B_TRUE) { + if (ctf_type_kind(ifp, iid) == CTF_K_FORWARD && + ctf_type_kind(ofp, oid) != CTF_K_FORWARD) { + VERIFY(cmt[oid].cmt_map == 0); + + /* + * If we're uniquifying types, it's possible for the + * container that we're uniquifying against to have a + * forward which exists in the container being reduced. + * For example, genunix has the machcpu structure as a + * forward which is actually in unix and we uniquify + * unix against genunix. In such cases, we explicitly do + * not do any mapping of the forward information, lest + * we risk losing the real definition. Instead, mark + * that it's missing. + */ + if (cmp->cm_unique == B_TRUE) { + cmt[oid].cmt_missing = B_TRUE; + return; + } + + cmt[oid].cmt_map = iid; + cmt[oid].cmt_forward = B_TRUE; + ctf_dprintf("merge diff forward mapped %d->%d\n", oid, + iid); + return; + } + + /* + * We could have multiple things that a given type ends up + * matching in the world of forwards and pointers to forwards. + * For now just take the first one... + */ + if (cmt[oid].cmt_map != 0) + return; + cmt[oid].cmt_map = iid; + ctf_dprintf("merge diff mapped %d->%d\n", oid, iid); + } else if (ifp == cmp->cm_src) { + VERIFY(cmt[iid].cmt_map == 0); + cmt[iid].cmt_missing = B_TRUE; + ctf_dprintf("merge diff said %d is missing\n", iid); + } +} + +static int +ctf_merge_add_number(ctf_merge_types_t *cmp, ctf_id_t id) +{ + int ret, flags; + const ctf_type_t *tp; + const char *name; + ctf_encoding_t en; + + if (ctf_type_encoding(cmp->cm_src, id, &en) != 0) + return (CTF_ERR); + + tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); + name = ctf_strraw(cmp->cm_src, tp->ctt_name); + if (CTF_INFO_ISROOT(tp->ctt_info) != 0) + flags = CTF_ADD_ROOT; + else + flags = CTF_ADD_NONROOT; + + ret = ctf_add_encoded(cmp->cm_out, flags, name, &en, + ctf_type_kind(cmp->cm_src, id)); + + if (ret == CTF_ERR) + return (ret); + + VERIFY(cmp->cm_tmap[id].cmt_map == 0); + cmp->cm_tmap[id].cmt_map = ret; + return (0); +} + +static int +ctf_merge_add_array(ctf_merge_types_t *cmp, ctf_id_t id) +{ + int ret, flags; + const ctf_type_t *tp; + ctf_arinfo_t ar; + + if (ctf_array_info(cmp->cm_src, id, &ar) == CTF_ERR) + return (CTF_ERR); + + tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); + if (CTF_INFO_ISROOT(tp->ctt_info) != 0) + flags = CTF_ADD_ROOT; + else + flags = CTF_ADD_NONROOT; + + if (cmp->cm_tmap[ar.ctr_contents].cmt_map == 0) { + ret = ctf_merge_add_type(cmp, ar.ctr_contents); + if (ret != 0) + return (ret); + ASSERT(cmp->cm_tmap[ar.ctr_contents].cmt_map != 0); + } + ar.ctr_contents = ctf_merge_gettype(cmp, ar.ctr_contents); + + if (cmp->cm_tmap[ar.ctr_index].cmt_map == 0) { + ret = ctf_merge_add_type(cmp, ar.ctr_index); + if (ret != 0) + return (ret); + ASSERT(cmp->cm_tmap[ar.ctr_index].cmt_map != 0); + } + ar.ctr_index = ctf_merge_gettype(cmp, ar.ctr_index); + + ret = ctf_add_array(cmp->cm_out, flags, &ar); + if (ret == CTF_ERR) + return (ret); + + VERIFY(cmp->cm_tmap[id].cmt_map == 0); + cmp->cm_tmap[id].cmt_map = ret; + + return (0); +} + +static int +ctf_merge_add_reftype(ctf_merge_types_t *cmp, ctf_id_t id) +{ + int ret, flags; + const ctf_type_t *tp; + ctf_id_t reftype; + const char *name; + + tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); + name = ctf_strraw(cmp->cm_src, tp->ctt_name); + if (CTF_INFO_ISROOT(tp->ctt_info) != 0) + flags = CTF_ADD_ROOT; + else + flags = CTF_ADD_NONROOT; + + reftype = ctf_type_reference(cmp->cm_src, id); + if (reftype == CTF_ERR) + return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src))); + + if (cmp->cm_tmap[reftype].cmt_map == 0) { + ret = ctf_merge_add_type(cmp, reftype); + if (ret != 0) + return (ret); + ASSERT(cmp->cm_tmap[reftype].cmt_map != 0); + } + reftype = ctf_merge_gettype(cmp, reftype); + + ret = ctf_add_reftype(cmp->cm_out, flags, name, reftype, + ctf_type_kind(cmp->cm_src, id)); + if (ret == CTF_ERR) + return (ret); + + VERIFY(cmp->cm_tmap[id].cmt_map == 0); + cmp->cm_tmap[id].cmt_map = ret; + return (0); +} + +static int +ctf_merge_add_typedef(ctf_merge_types_t *cmp, ctf_id_t id) +{ + int ret, flags; + const ctf_type_t *tp; + const char *name; + ctf_id_t reftype; + + tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); + name = ctf_strraw(cmp->cm_src, tp->ctt_name); + if (CTF_INFO_ISROOT(tp->ctt_info) != 0) + flags = CTF_ADD_ROOT; + else + flags = CTF_ADD_NONROOT; + + reftype = ctf_type_reference(cmp->cm_src, id); + if (reftype == CTF_ERR) + return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src))); + + if (cmp->cm_tmap[reftype].cmt_map == 0) { + ret = ctf_merge_add_type(cmp, reftype); + if (ret != 0) + return (ret); + ASSERT(cmp->cm_tmap[reftype].cmt_map != 0); + } + reftype = ctf_merge_gettype(cmp, reftype); + + ret = ctf_add_typedef(cmp->cm_out, flags, name, reftype); + if (ret == CTF_ERR) + return (ret); + + VERIFY(cmp->cm_tmap[id].cmt_map == 0); + cmp->cm_tmap[id].cmt_map = ret; + return (0); +} + +typedef struct ctf_merge_enum { + ctf_file_t *cme_fp; + ctf_id_t cme_id; +} ctf_merge_enum_t; + +static int +ctf_merge_add_enumerator(const char *name, int value, void *arg) +{ + ctf_merge_enum_t *cmep = arg; + + return (ctf_add_enumerator(cmep->cme_fp, cmep->cme_id, name, value) == + CTF_ERR); +} + +static int +ctf_merge_add_enum(ctf_merge_types_t *cmp, ctf_id_t id) +{ + int flags; + const ctf_type_t *tp; + const char *name; + ctf_id_t enumid; + ctf_merge_enum_t cme; + + tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); + name = ctf_strraw(cmp->cm_src, tp->ctt_name); + if (CTF_INFO_ISROOT(tp->ctt_info) != 0) + flags = CTF_ADD_ROOT; + else + flags = CTF_ADD_NONROOT; + + enumid = ctf_add_enum(cmp->cm_out, flags, name); + if (enumid == CTF_ERR) + return (enumid); + + cme.cme_fp = cmp->cm_out; + cme.cme_id = enumid; + if (ctf_enum_iter(cmp->cm_src, id, ctf_merge_add_enumerator, + &cme) != 0) + return (CTF_ERR); + + VERIFY(cmp->cm_tmap[id].cmt_map == 0); + cmp->cm_tmap[id].cmt_map = enumid; + return (0); +} + +static int +ctf_merge_add_func(ctf_merge_types_t *cmp, ctf_id_t id) +{ + int ret, flags, i; + const ctf_type_t *tp; + ctf_funcinfo_t ctc; + ctf_id_t *argv; + + tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); + if (CTF_INFO_ISROOT(tp->ctt_info) != 0) + flags = CTF_ADD_ROOT; + else + flags = CTF_ADD_NONROOT; + + if (ctf_func_info_by_id(cmp->cm_src, id, &ctc) == CTF_ERR) + return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src))); + + argv = ctf_alloc(sizeof (ctf_id_t) * ctc.ctc_argc); + if (argv == NULL) + return (ctf_set_errno(cmp->cm_out, ENOMEM)); + if (ctf_func_args_by_id(cmp->cm_src, id, ctc.ctc_argc, argv) == + CTF_ERR) { + ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc); + return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src))); + } + + if (cmp->cm_tmap[ctc.ctc_return].cmt_map == 0) { + ret = ctf_merge_add_type(cmp, ctc.ctc_return); + if (ret != 0) + return (ret); + ASSERT(cmp->cm_tmap[ctc.ctc_return].cmt_map != 0); + } + ctc.ctc_return = ctf_merge_gettype(cmp, ctc.ctc_return); + + for (i = 0; i < ctc.ctc_argc; i++) { + if (cmp->cm_tmap[argv[i]].cmt_map == 0) { + ret = ctf_merge_add_type(cmp, argv[i]); + if (ret != 0) + return (ret); + ASSERT(cmp->cm_tmap[argv[i]].cmt_map != 0); + } + argv[i] = ctf_merge_gettype(cmp, argv[i]); + } + + ret = ctf_add_funcptr(cmp->cm_out, flags, &ctc, argv); + ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc); + if (ret == CTF_ERR) + return (ret); + + VERIFY(cmp->cm_tmap[id].cmt_map == 0); + cmp->cm_tmap[id].cmt_map = ret; + return (0); +} + +static int +ctf_merge_add_forward(ctf_merge_types_t *cmp, ctf_id_t id) +{ + int ret, flags; + const ctf_type_t *tp; + const char *name; + + tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); + name = ctf_strraw(cmp->cm_src, tp->ctt_name); + if (CTF_INFO_ISROOT(tp->ctt_info) != 0) + flags = CTF_ADD_ROOT; + else + flags = CTF_ADD_NONROOT; + + /* + * ctf_add_forward tries to check to see if a given forward already + * exists in one of its hash tables. If we're here then we know that we + * have a forward in a container that isn't present in another. + * Therefore, we choose a token hash table to satisfy the API choice + * here. + */ + ret = ctf_add_forward(cmp->cm_out, flags, name, CTF_K_STRUCT); + if (ret == CTF_ERR) + return (CTF_ERR); + + VERIFY(cmp->cm_tmap[id].cmt_map == 0); + cmp->cm_tmap[id].cmt_map = ret; + return (0); +} + +typedef struct ctf_merge_su { + ctf_merge_types_t *cms_cm; + ctf_id_t cms_id; +} ctf_merge_su_t; + +static int +ctf_merge_add_member(const char *name, ctf_id_t type, ulong_t offset, void *arg) +{ + ctf_merge_su_t *cms = arg; + + VERIFY(cms->cms_cm->cm_tmap[type].cmt_map != 0); + type = cms->cms_cm->cm_tmap[type].cmt_map; + + ctf_dprintf("Trying to add member %s to %d\n", name, cms->cms_id); + return (ctf_add_member(cms->cms_cm->cm_out, cms->cms_id, name, + type, offset) == CTF_ERR); +} + +/* + * During the first pass, we always add the generic structure and union but none + * of its members as they might not all have been mapped yet. Instead we just + * mark all structures and unions as needing to be fixed up. + */ +static int +ctf_merge_add_sou(ctf_merge_types_t *cmp, ctf_id_t id, boolean_t forward) +{ + int flags, kind; + const ctf_type_t *tp; + const char *name; + ctf_id_t suid; + + tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); + name = ctf_strraw(cmp->cm_src, tp->ctt_name); + if (CTF_INFO_ISROOT(tp->ctt_info) != 0) + flags = CTF_ADD_ROOT; + else + flags = CTF_ADD_NONROOT; + kind = ctf_type_kind(cmp->cm_src, id); + + if (kind == CTF_K_STRUCT) + suid = ctf_add_struct(cmp->cm_out, flags, name); + else + suid = ctf_add_union(cmp->cm_out, flags, name); + + if (suid == CTF_ERR) + return (suid); + + /* + * If this is a forward reference then its mapping should already + * exist. + */ + if (forward == B_FALSE) { + VERIFY(cmp->cm_tmap[id].cmt_map == 0); + cmp->cm_tmap[id].cmt_map = suid; + ctf_dprintf("added sou \"%s\" as (%d) %d->%d\n", name, kind, id, + suid); + } else { + VERIFY(cmp->cm_tmap[id].cmt_map == suid); + } + cmp->cm_tmap[id].cmt_fixup = B_TRUE; + + return (0); +} + +static int +ctf_merge_add_type(ctf_merge_types_t *cmp, ctf_id_t id) +{ + int kind, ret; + + /* + * We may end up evaluating a type more than once as we may deal with it + * as we recursively evaluate some kind of reference and then we may see + * it normally. + */ + if (cmp->cm_tmap[id].cmt_map != 0) + return (0); + + kind = ctf_type_kind(cmp->cm_src, id); + switch (kind) { + case CTF_K_INTEGER: + case CTF_K_FLOAT: + ret = ctf_merge_add_number(cmp, id); + break; + case CTF_K_ARRAY: + ret = ctf_merge_add_array(cmp, id); + break; + case CTF_K_POINTER: + case CTF_K_VOLATILE: + case CTF_K_CONST: + case CTF_K_RESTRICT: + ret = ctf_merge_add_reftype(cmp, id); + break; + case CTF_K_TYPEDEF: + ret = ctf_merge_add_typedef(cmp, id); + break; + case CTF_K_ENUM: + ret = ctf_merge_add_enum(cmp, id); + break; + case CTF_K_FUNCTION: + ret = ctf_merge_add_func(cmp, id); + break; + case CTF_K_FORWARD: + ret = ctf_merge_add_forward(cmp, id); + break; + case CTF_K_STRUCT: + case CTF_K_UNION: + ret = ctf_merge_add_sou(cmp, id, B_FALSE); + break; + case CTF_K_UNKNOWN: + /* + * We don't add unknown types, and we later assert that nothing + * should reference them. + */ + return (0); + default: + abort(); + } + + return (ret); +} + +static int +ctf_merge_fixup_sou(ctf_merge_types_t *cmp, ctf_id_t id) +{ + ctf_dtdef_t *dtd; + ctf_merge_su_t cms; + ctf_id_t mapid; + ssize_t size; + + mapid = cmp->cm_tmap[id].cmt_map; + VERIFY(mapid != 0); + dtd = ctf_dtd_lookup(cmp->cm_out, mapid); + VERIFY(dtd != NULL); + + ctf_dprintf("Trying to fix up sou %d\n", id); + cms.cms_cm = cmp; + cms.cms_id = mapid; + if (ctf_member_iter(cmp->cm_src, id, ctf_merge_add_member, &cms) != 0) + return (CTF_ERR); + + if ((size = ctf_type_size(cmp->cm_src, id)) == CTF_ERR) + return (CTF_ERR); + if (ctf_set_size(cmp->cm_out, mapid, size) == CTF_ERR) + return (CTF_ERR); + + return (0); +} + +static int +ctf_merge_fixup_type(ctf_merge_types_t *cmp, ctf_id_t id) +{ + int kind, ret; + + kind = ctf_type_kind(cmp->cm_src, id); + switch (kind) { + case CTF_K_STRUCT: + case CTF_K_UNION: + ret = ctf_merge_fixup_sou(cmp, id); + break; + default: + VERIFY(0); + ret = CTF_ERR; + } + + return (ret); +} + +/* + * Now that we've successfully merged everything, we're going to clean + * up the merge type table. Traditionally if we had just two different + * files that we were working between, the types would be fully + * resolved. However, because we were comparing with ourself every step + * of the way and not our reduced self, we need to go through and update + * every mapped entry to what it now points to in the deduped file. + */ +static void +ctf_merge_fixup_dedup_map(ctf_merge_types_t *cmp) +{ + int i; + + for (i = 1; i < cmp->cm_src->ctf_typemax + 1; i++) { + ctf_id_t tid; + + /* + * Missing types always have their id updated to exactly what it + * should be. + */ + if (cmp->cm_tmap[i].cmt_missing == B_TRUE) { + VERIFY(cmp->cm_tmap[i].cmt_map != 0); + continue; + } + + tid = i; + while (cmp->cm_tmap[tid].cmt_missing == B_FALSE) { + VERIFY(cmp->cm_tmap[tid].cmt_map != 0); + tid = cmp->cm_tmap[tid].cmt_map; + } + VERIFY(cmp->cm_tmap[tid].cmt_map != 0); + cmp->cm_tmap[i].cmt_map = cmp->cm_tmap[tid].cmt_map; + } +} + + +/* + * We're going to do three passes over the containers. + * + * Pass 1 checks for forward references in the output container that we know + * exist in the source container. + * + * Pass 2 adds all the missing types from the source container. As part of this + * we may be adding a type as a forward reference that doesn't exist yet. + * Any types that we encounter in this form, we need to add to a third pass. + * + * Pass 3 is the fixup pass. Here we go through and find all the types that were + * missing in the first. + * + * Importantly, we *must* call ctf_update between the second and third pass, + * otherwise several of the libctf functions will not properly find the data in + * the container. If we're doing a dedup we also fix up the type mapping. + */ +static int +ctf_merge_common(ctf_merge_types_t *cmp) +{ + int ret, i; + + ctf_phase_dump(cmp->cm_src, "merge-common-src"); + ctf_phase_dump(cmp->cm_out, "merge-common-dest"); + + /* Pass 1 */ + for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) { + if (cmp->cm_tmap[i].cmt_forward == B_TRUE) { + ret = ctf_merge_add_sou(cmp, i, B_TRUE); + if (ret != 0) { + return (ret); + } + } + } + + /* Pass 2 */ + for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) { + if (cmp->cm_tmap[i].cmt_missing == B_TRUE) { + ret = ctf_merge_add_type(cmp, i); + if (ret != 0) { + ctf_dprintf("Failed to merge type %d\n", i); + return (ret); + } + } + } + + ret = ctf_update(cmp->cm_out); + if (ret != 0) + return (ret); + + if (cmp->cm_dedup == B_TRUE) { + ctf_merge_fixup_dedup_map(cmp); + } + + ctf_dprintf("Beginning merge pass 3\n"); + /* Pass 3 */ + for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) { + if (cmp->cm_tmap[i].cmt_fixup == B_TRUE) { + ret = ctf_merge_fixup_type(cmp, i); + if (ret != 0) + return (ret); + } + } + + if (cmp->cm_dedup == B_TRUE) { + ctf_merge_fixup_dedup_map(cmp); + } + + return (0); +} + +/* + * Uniquification is slightly different from a stock merge. For starters, we + * don't need to replace any forward references in the output. In this case + * though, the types that already exist are in a parent container to the empty + * output container. + */ +static int +ctf_merge_uniquify_types(ctf_merge_types_t *cmp) +{ + int i, ret; + + for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) { + if (cmp->cm_tmap[i].cmt_missing == B_FALSE) + continue; + ret = ctf_merge_add_type(cmp, i); + if (ret != 0) + return (ret); + } + + ret = ctf_update(cmp->cm_out); + if (ret != 0) + return (ret); + + for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) { + if (cmp->cm_tmap[i].cmt_fixup == B_FALSE) + continue; + ret = ctf_merge_fixup_type(cmp, i); + if (ret != 0) + return (ret); + } + + return (0); +} + +static int +ctf_merge_types_init(ctf_merge_types_t *cmp) +{ + cmp->cm_tmap = ctf_alloc(sizeof (ctf_merge_tinfo_t) * + (cmp->cm_src->ctf_typemax + 1)); + if (cmp->cm_tmap == NULL) + return (ctf_set_errno(cmp->cm_out, ENOMEM)); + bzero(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) * + (cmp->cm_src->ctf_typemax + 1)); + return (0); +} + +static void +ctf_merge_types_fini(ctf_merge_types_t *cmp) +{ + ctf_free(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) * + (cmp->cm_src->ctf_typemax + 1)); +} + +/* + * Merge the types contained inside of two input files. The second input file is + * always going to be the destination. We're guaranteed that it's always + * writeable. + */ +static int +ctf_merge_types(void *arg, void *arg2, void **outp, void *unsued) +{ + int ret; + ctf_merge_types_t cm; + ctf_diff_t *cdp; + ctf_merge_objmap_t *cmo; + ctf_merge_funcmap_t *cmf; + ctf_merge_input_t *scmi = arg; + ctf_merge_input_t *dcmi = arg2; + ctf_file_t *out = dcmi->cmi_input; + ctf_file_t *source = scmi->cmi_input; + + ctf_dprintf("merging %p->%p\n", source, out); + + if (!(out->ctf_flags & LCTF_RDWR)) + return (ctf_set_errno(out, ECTF_RDONLY)); + + if (ctf_getmodel(out) != ctf_getmodel(source)) + return (ctf_set_errno(out, ECTF_DMODEL)); + + if ((ret = ctf_diff_init(out, source, &cdp)) != 0) + return (ret); + + cm.cm_out = out; + cm.cm_src = source; + cm.cm_dedup = B_FALSE; + cm.cm_unique = B_FALSE; + ret = ctf_merge_types_init(&cm); + if (ret != 0) { + ctf_diff_fini(cdp); + return (ctf_set_errno(out, ret)); + } + + ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm); + if (ret != 0) + goto cleanup; + ret = ctf_merge_common(&cm); + ctf_dprintf("merge common returned with %d\n", ret); + if (ret == 0) { + ret = ctf_update(out); + ctf_dprintf("update returned with %d\n", ret); + } else { + goto cleanup; + } + + /* + * Now we need to fix up the object and function maps. + */ + for (cmo = list_head(&scmi->cmi_omap); cmo != NULL; + cmo = list_next(&scmi->cmi_omap, cmo)) { + if (cmo->cmo_tid == 0) + continue; + VERIFY(cm.cm_tmap[cmo->cmo_tid].cmt_map != 0); + cmo->cmo_tid = cm.cm_tmap[cmo->cmo_tid].cmt_map; + } + + for (cmf = list_head(&scmi->cmi_fmap); cmf != NULL; + cmf = list_next(&scmi->cmi_fmap, cmf)) { + int i; + + VERIFY(cm.cm_tmap[cmf->cmf_rtid].cmt_map != 0); + cmf->cmf_rtid = cm.cm_tmap[cmf->cmf_rtid].cmt_map; + for (i = 0; i < cmf->cmf_argc; i++) { + VERIFY(cm.cm_tmap[cmf->cmf_args[i]].cmt_map != 0); + cmf->cmf_args[i] = cm.cm_tmap[cmf->cmf_args[i]].cmt_map; + } + } + + /* + * Now that we've fixed things up, we need to give our function and + * object maps to the destination, such that it can continue to update + * them going forward. + */ + list_move_tail(&dcmi->cmi_fmap, &scmi->cmi_fmap); + list_move_tail(&dcmi->cmi_omap, &scmi->cmi_omap); + +cleanup: + if (ret == 0) + *outp = dcmi; + ctf_merge_types_fini(&cm); + ctf_diff_fini(cdp); + if (ret != 0) + return (ctf_errno(out)); + return (0); +} + +/* + * After performing a pass, we need to go through the object and function type + * maps and potentially fix them up based on the new maps that we haev. + */ +static void +ctf_merge_fixup_nontypes(ctf_merge_types_t *cmp, ctf_merge_input_t *cmi) +{ + ctf_merge_objmap_t *cmo; + ctf_merge_funcmap_t *cmf; + + for (cmo = list_head(&cmi->cmi_omap); cmo != NULL; + cmo = list_next(&cmi->cmi_omap, cmo)) { + if (cmo->cmo_tid == 0) + continue; + VERIFY(cmp->cm_tmap[cmo->cmo_tid].cmt_map != 0); + cmo->cmo_tid = cmp->cm_tmap[cmo->cmo_tid].cmt_map; + } + + for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL; + cmf = list_next(&cmi->cmi_fmap, cmf)) { + int i; + + VERIFY(cmp->cm_tmap[cmf->cmf_rtid].cmt_map != 0); + cmf->cmf_rtid = cmp->cm_tmap[cmf->cmf_rtid].cmt_map; + for (i = 0; i < cmf->cmf_argc; i++) { + VERIFY(cmp->cm_tmap[cmf->cmf_args[i]].cmt_map != + 0); + cmf->cmf_args[i] = + cmp->cm_tmap[cmf->cmf_args[i]].cmt_map; + } + } +} + +static int +ctf_uniquify_types(ctf_merge_t *cmh, ctf_file_t *src, ctf_file_t **outp) +{ + int err, ret; + ctf_file_t *out; + ctf_merge_types_t cm; + ctf_diff_t *cdp; + ctf_merge_input_t *cmi; + ctf_file_t *parent = cmh->cmh_unique; + + *outp = NULL; + out = ctf_fdcreate(cmh->cmh_ofd, &err); + if (out == NULL) + return (ctf_set_errno(src, err)); + + out->ctf_parname = cmh->cmh_pname; + if (ctf_setmodel(out, ctf_getmodel(parent)) != 0) { + (void) ctf_set_errno(src, ctf_errno(out)); + ctf_close(out); + return (CTF_ERR); + } + + if (ctf_import(out, parent) != 0) { + (void) ctf_set_errno(src, ctf_errno(out)); + ctf_close(out); + return (CTF_ERR); + } + + if ((ret = ctf_diff_init(parent, src, &cdp)) != 0) { + ctf_close(out); + return (ctf_set_errno(src, ctf_errno(parent))); + } + + cm.cm_out = parent; + cm.cm_src = src; + cm.cm_dedup = B_FALSE; + cm.cm_unique = B_TRUE; + ret = ctf_merge_types_init(&cm); + if (ret != 0) { + ctf_close(out); + ctf_diff_fini(cdp); + return (ctf_set_errno(src, ret)); + } + + ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm); + if (ret == 0) { + cm.cm_out = out; + ret = ctf_merge_uniquify_types(&cm); + if (ret == 0) + ret = ctf_update(out); + } + + if (ret != 0) { + ctf_merge_types_fini(&cm); + ctf_diff_fini(cdp); + return (ctf_set_errno(src, ctf_errno(cm.cm_out))); + } + + for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL; + cmi = list_next(&cmh->cmh_inputs, cmi)) { + ctf_merge_fixup_nontypes(&cm, cmi); + } + + ctf_merge_types_fini(&cm); + ctf_diff_fini(cdp); + *outp = out; + return (0); +} + +static void +ctf_merge_fini_input(ctf_merge_input_t *cmi) +{ + ctf_merge_objmap_t *cmo; + ctf_merge_funcmap_t *cmf; + + while ((cmo = list_remove_head(&cmi->cmi_omap)) != NULL) + ctf_free(cmo, sizeof (ctf_merge_objmap_t)); + + while ((cmf = list_remove_head(&cmi->cmi_fmap)) != NULL) + ctf_free(cmf, sizeof (ctf_merge_funcmap_t) + + sizeof (ctf_id_t) * cmf->cmf_argc); + + if (cmi->cmi_created == B_TRUE && cmi->cmi_input != NULL) + ctf_close(cmi->cmi_input); + + ctf_free(cmi, sizeof (ctf_merge_input_t)); +} + +void +ctf_merge_fini(ctf_merge_t *cmh) +{ + size_t len; + ctf_merge_input_t *cmi; + + if (cmh->cmh_label != NULL) { + len = strlen(cmh->cmh_label) + 1; + ctf_free(cmh->cmh_label, len); + } + + if (cmh->cmh_pname != NULL) { + len = strlen(cmh->cmh_pname) + 1; + ctf_free(cmh->cmh_pname, len); + } + + while ((cmi = list_remove_head(&cmh->cmh_inputs)) != NULL) + ctf_merge_fini_input(cmi); + + ctf_free(cmh, sizeof (ctf_merge_t)); +} + +ctf_merge_t * +ctf_merge_init(int fd, int *errp) +{ + int err; + ctf_merge_t *out; + struct stat st; + + if (errp == NULL) + errp = &err; + + if (fd != -1 && fstat(fd, &st) != 0) { + *errp = EINVAL; + return (NULL); + } + + out = ctf_alloc(sizeof (ctf_merge_t)); + if (out == NULL) { + *errp = ENOMEM; + return (NULL); + } + + if (fd == -1) { + out->cmh_msyms = B_FALSE; + } else { + out->cmh_msyms = B_TRUE; + } + + list_create(&out->cmh_inputs, sizeof (ctf_merge_input_t), + offsetof(ctf_merge_input_t, cmi_node)); + out->cmh_ninputs = 0; + out->cmh_nthreads = 1; + out->cmh_unique = NULL; + out->cmh_ofd = fd; + out->cmh_flags = 0; + out->cmh_label = NULL; + out->cmh_pname = NULL; + + return (out); +} + +int +ctf_merge_label(ctf_merge_t *cmh, const char *label) +{ + char *dup; + + if (label == NULL) + return (EINVAL); + + dup = ctf_strdup(label); + if (dup == NULL) + return (EAGAIN); + + if (cmh->cmh_label != NULL) { + size_t len = strlen(cmh->cmh_label) + 1; + ctf_free(cmh->cmh_label, len); + } + + cmh->cmh_label = dup; + return (0); +} + +static int +ctf_merge_add_funcs_cb(const char *name, ulong_t idx, ctf_funcinfo_t *fip, + void *arg) +{ + ctf_merge_input_t *cmi = arg; + ctf_merge_funcmap_t *fmap; + + fmap = ctf_alloc(sizeof (ctf_merge_funcmap_t) + + sizeof (ctf_id_t) * fip->ctc_argc); + if (fmap == NULL) + return (ENOMEM); + + fmap->cmf_idx = idx; + fmap->cmf_rtid = fip->ctc_return; + fmap->cmf_flags = fip->ctc_flags; + fmap->cmf_argc = fip->ctc_argc; + fmap->cmf_name = name; + + if (ctf_func_args(cmi->cmi_input, idx, fmap->cmf_argc, + fmap->cmf_args) != 0) { + ctf_free(fmap, sizeof (ctf_merge_funcmap_t) + + sizeof (ctf_id_t) * fip->ctc_argc); + return (ctf_errno(cmi->cmi_input)); + } + + list_insert_tail(&cmi->cmi_fmap, fmap); + return (0); +} + +static int +ctf_merge_add_objs_cb(const char *name, ctf_id_t id, ulong_t idx, void *arg) +{ + ctf_merge_input_t *cmi = arg; + ctf_merge_objmap_t *cmo; + + cmo = ctf_alloc(sizeof (ctf_merge_objmap_t)); + if (cmo == NULL) + return (ENOMEM); + + cmo->cmo_name = name; + cmo->cmo_idx = idx; + cmo->cmo_tid = id; + list_insert_tail(&cmi->cmi_omap, cmo); + return (0); +} + +/* + * Whenever we create an entry to merge, we then go and add a second empty + * ctf_file_t which we use for the purposes of our merging. It's not the best, + * but it's the best that we've got at the moment. + */ +int +ctf_merge_add(ctf_merge_t *cmh, ctf_file_t *input) +{ + int ret; + ctf_merge_input_t *cmi; + ctf_file_t *empty; + + if (input->ctf_flags & LCTF_CHILD) + return (ECTF_MCHILD); + + cmi = ctf_alloc(sizeof (ctf_merge_input_t)); + if (cmi == NULL) + return (ENOMEM); + + cmi->cmi_created = B_FALSE; + cmi->cmi_input = input; + list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t), + offsetof(ctf_merge_funcmap_t, cmf_node)); + list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t), + offsetof(ctf_merge_objmap_t, cmo_node)); + + if (cmh->cmh_msyms == B_TRUE) { + if ((ret = ctf_function_iter(input, ctf_merge_add_funcs_cb, + cmi)) != 0) { + ctf_merge_fini_input(cmi); + return (ret); + } + + if ((ret = ctf_object_iter(input, ctf_merge_add_objs_cb, + cmi)) != 0) { + ctf_merge_fini_input(cmi); + return (ret); + } + } + + list_insert_tail(&cmh->cmh_inputs, cmi); + cmh->cmh_ninputs++; + + /* And now the empty one to merge into this */ + cmi = ctf_alloc(sizeof (ctf_merge_input_t)); + if (cmi == NULL) + return (ENOMEM); + list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t), + offsetof(ctf_merge_funcmap_t, cmf_node)); + list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t), + offsetof(ctf_merge_objmap_t, cmo_node)); + + empty = ctf_fdcreate(cmh->cmh_ofd, &ret); + if (empty == NULL) + return (ret); + cmi->cmi_input = empty; + cmi->cmi_created = B_TRUE; + + if (ctf_setmodel(empty, ctf_getmodel(input)) == CTF_ERR) { + return (ctf_errno(empty)); + } + + list_insert_tail(&cmh->cmh_inputs, cmi); + cmh->cmh_ninputs++; + ctf_dprintf("added containers %p and %p\n", input, empty); + return (0); +} + +int +ctf_merge_uniquify(ctf_merge_t *cmh, ctf_file_t *u, const char *pname) +{ + char *dup; + + if (u->ctf_flags & LCTF_CHILD) + return (ECTF_MCHILD); + if (pname == NULL) + return (EINVAL); + dup = ctf_strdup(pname); + if (dup == NULL) + return (EINVAL); + if (cmh->cmh_pname != NULL) { + size_t len = strlen(cmh->cmh_pname) + 1; + ctf_free(cmh->cmh_pname, len); + } + cmh->cmh_pname = dup; + cmh->cmh_unique = u; + return (0); +} + +static int +ctf_merge_symbols(ctf_merge_t *cmh, ctf_file_t *fp) +{ + int err; + ulong_t i; + + uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data; + uintptr_t strbase = (uintptr_t)fp->ctf_strtab.cts_data; + + for (i = 0; i < fp->ctf_nsyms; i++) { + const char *name; + ctf_merge_input_t *cmi; + ctf_merge_objmap_t *cmo; + + if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) { + const Elf32_Sym *symp = (Elf32_Sym *)symbase + i; + int type = ELF32_ST_TYPE(symp->st_info); + if (type != STT_OBJECT) + continue; + if (ctf_sym_valid(strbase, type, symp->st_shndx, + symp->st_value, symp->st_name) == B_FALSE) + continue; + name = (char *)(strbase + symp->st_name); + } else { + const Elf64_Sym *symp = (Elf64_Sym *)symbase + i; + int type = ELF64_ST_TYPE(symp->st_info); + if (type != STT_OBJECT) + continue; + if (ctf_sym_valid(strbase, type, symp->st_shndx, + symp->st_value, symp->st_name) == B_FALSE) + continue; + name = (char *)(strbase + symp->st_name); + } + + cmo = NULL; + for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL; + cmi = list_next(&cmh->cmh_inputs, cmi)) { + for (cmo = list_head(&cmi->cmi_omap); cmo != NULL; + cmo = list_next(&cmi->cmi_omap, cmo)) { + if (strcmp(cmo->cmo_name, name) == 0) + goto found; + } + } +found: + if (cmo != NULL) { + if (cmo->cmo_tid == 0) + continue; + if ((err = ctf_add_object(fp, i, cmo->cmo_tid)) != 0) { + ctf_dprintf("Failed to add symbol %s->%d: %s\n", + name, cmo->cmo_tid, + ctf_errmsg(ctf_errno(fp))); + return (err); + } + } + } + + return (0); +} + +static int +ctf_merge_functions(ctf_merge_t *cmh, ctf_file_t *fp) +{ + int err; + ulong_t i; + ctf_funcinfo_t fi; + + uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data; + uintptr_t strbase = (uintptr_t)fp->ctf_strtab.cts_data; + + for (i = 0; i < fp->ctf_nsyms; i++) { + const char *name; + ctf_merge_input_t *cmi; + ctf_merge_funcmap_t *cmf; + + if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) { + const Elf32_Sym *symp = (Elf32_Sym *)symbase + i; + int type = ELF32_ST_TYPE(symp->st_info); + if (ELF32_ST_TYPE(symp->st_info) != STT_FUNC) + continue; + if (ctf_sym_valid(strbase, type, symp->st_shndx, + symp->st_value, symp->st_name) == B_FALSE) + continue; + name = (char *)(strbase + symp->st_name); + } else { + const Elf64_Sym *symp = (Elf64_Sym *)symbase + i; + int type = ELF64_ST_TYPE(symp->st_info); + if (ELF64_ST_TYPE(symp->st_info) != STT_FUNC) + continue; + if (ctf_sym_valid(strbase, type, symp->st_shndx, + symp->st_value, symp->st_name) == B_FALSE) + continue; + name = (char *)(strbase + symp->st_name); + } + + cmf = NULL; + for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL; + cmi = list_next(&cmh->cmh_inputs, cmi)) { + for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL; + cmf = list_next(&cmi->cmi_fmap, cmf)) { + if (strcmp(cmf->cmf_name, name) == 0) + goto found; + } + } +found: + if (cmf != NULL) { + fi.ctc_return = cmf->cmf_rtid; + fi.ctc_argc = cmf->cmf_argc; + fi.ctc_flags = cmf->cmf_flags; + if ((err = ctf_add_function(fp, i, &fi, + cmf->cmf_args)) != 0) + return (err); + } + } + + return (0); + +} + +int +ctf_merge_merge(ctf_merge_t *cmh, ctf_file_t **outp) +{ + int err, merr; + ctf_merge_input_t *cmi; + ctf_id_t ltype; + mergeq_t *mqp; + ctf_merge_input_t *final; + ctf_file_t *out; + + if (cmh->cmh_label != NULL && cmh->cmh_unique != NULL) { + const char *label = ctf_label_topmost(cmh->cmh_unique); + if (label == NULL) + return (ECTF_NOLABEL); + if (strcmp(label, cmh->cmh_label) != 0) + return (ECTF_LCONFLICT); + } + + if (mergeq_init(&mqp, cmh->cmh_nthreads) == -1) { + return (errno); + } + + VERIFY(cmh->cmh_ninputs % 2 == 0); + for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL; + cmi = list_next(&cmh->cmh_inputs, cmi)) { + if (mergeq_add(mqp, cmi) == -1) { + err = errno; + mergeq_fini(mqp); + } + } + + err = mergeq_merge(mqp, ctf_merge_types, NULL, (void **)&final, &merr); + mergeq_fini(mqp); + + if (err == MERGEQ_ERROR) { + return (errno); + } else if (err == MERGEQ_UERROR) { + return (merr); + } + + /* + * Disassociate the generated ctf_file_t from the original input. That + * way when the input gets cleaned up, we don't accidentally kill the + * final reference to the ctf_file_t. If it gets uniquified then we'll + * kill it. + */ + VERIFY(final->cmi_input != NULL); + out = final->cmi_input; + final->cmi_input = NULL; + + ctf_dprintf("preparing to uniquify against: %p\n", cmh->cmh_unique); + if (cmh->cmh_unique != NULL) { + ctf_file_t *u; + err = ctf_uniquify_types(cmh, out, &u); + if (err != 0) { + err = ctf_errno(out); + ctf_close(out); + return (err); + } + ctf_close(out); + out = u; + } + + ltype = out->ctf_typemax; + if ((out->ctf_flags & LCTF_CHILD) && ltype != 0) + ltype += CTF_CHILD_START; + ctf_dprintf("trying to add the label\n"); + if (cmh->cmh_label != NULL && + ctf_add_label(out, cmh->cmh_label, ltype, 0) != 0) { + ctf_close(out); + return (ctf_errno(out)); + } + + ctf_dprintf("merging symbols and the like\n"); + if (cmh->cmh_msyms == B_TRUE) { + err = ctf_merge_symbols(cmh, out); + if (err != 0) { + ctf_close(out); + return (ctf_errno(out)); + } + + err = ctf_merge_functions(cmh, out); + if (err != 0) { + ctf_close(out); + return (ctf_errno(out)); + } + } + + err = ctf_update(out); + if (err != 0) { + ctf_close(out); + return (ctf_errno(out)); + } + + *outp = out; + return (0); +} + +/* + * When we get told that something is unique, eg. same is B_FALSE, then that + * tells us that we need to add it to the output. If same is B_TRUE, then we'll + * want to record it in the mapping table so that we know how to redirect types + * to the extant ones. + */ +static void +ctf_dedup_cb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp, + ctf_id_t oid, void *arg) +{ + ctf_merge_types_t *cmp = arg; + ctf_merge_tinfo_t *cmt = cmp->cm_tmap; + + if (same == B_TRUE) { + /* + * The output id here may itself map to something else. + * Therefore, we need to basically walk a chain and see what it + * points to until it itself points to a base type, eg. -1. + * Otherwise we'll dedup to something which no longer exists. + */ + while (cmt[oid].cmt_missing == B_FALSE) + oid = cmt[oid].cmt_map; + cmt[iid].cmt_map = oid; + ctf_dprintf("%d->%d \n", iid, oid); + } else { + VERIFY(cmt[iid].cmt_map == 0); + cmt[iid].cmt_missing = B_TRUE; + ctf_dprintf("%d is missing\n", iid); + } +} + +/* + * Dedup a CTF container. + * + * DWARF and other encoding formats that we use to create CTF data may create + * multiple copies of a given type. However, after doing a conversion, and + * before doing a merge, we'd prefer, if possible, to have every input container + * to be unique. + * + * Doing a deduplication is like a normal merge. However, when we diff the types + * in the container, rather than doing a normal diff, we instead want to diff + * against any already processed types. eg, for a given type i in a container, + * we want to diff it from 0 to i - 1. + */ +int +ctf_merge_dedup(ctf_merge_t *cmp, ctf_file_t **outp) +{ + int ret; + ctf_diff_t *cdp = NULL; + ctf_merge_input_t *cmi, *cmc; + ctf_file_t *ifp, *ofp; + ctf_merge_types_t cm; + + if (cmp == NULL || outp == NULL) + return (EINVAL); + + ctf_dprintf("encountered %d inputs\n", cmp->cmh_ninputs); + if (cmp->cmh_ninputs != 2) + return (EINVAL); + + ctf_dprintf("passed argument sanity check\n"); + + cmi = list_head(&cmp->cmh_inputs); + VERIFY(cmi != NULL); + cmc = list_next(&cmp->cmh_inputs, cmi); + VERIFY(cmc != NULL); + ifp = cmi->cmi_input; + ofp = cmc->cmi_input; + VERIFY(ifp != NULL); + VERIFY(ofp != NULL); + cm.cm_src = ifp; + cm.cm_out = ofp; + cm.cm_dedup = B_TRUE; + cm.cm_unique = B_FALSE; + + if ((ret = ctf_merge_types_init(&cm)) != 0) { + return (ret); + } + + if ((ret = ctf_diff_init(ifp, ifp, &cdp)) != 0) + goto err; + + ctf_dprintf("Successfully initialized dedup\n"); + if ((ret = ctf_diff_self(cdp, ctf_dedup_cb, &cm)) != 0) + goto err; + + ctf_dprintf("Successfully diffed types\n"); + ret = ctf_merge_common(&cm); + ctf_dprintf("deduping types result: %d\n", ret); + if (ret == 0) + ret = ctf_update(cm.cm_out); + if (ret != 0) + goto err; + + ctf_dprintf("Successfully deduped types\n"); + ctf_phase_dump(cm.cm_out, "dedup-pre-syms"); + + /* + * Now we need to fix up the object and function maps. + */ + ctf_merge_fixup_nontypes(&cm, cmi); + + if (cmp->cmh_msyms == B_TRUE) { + ret = ctf_merge_symbols(cmp, cm.cm_out); + if (ret != 0) { + ret = ctf_errno(cm.cm_out); + ctf_dprintf("failed to dedup symbols: %s\n", + ctf_errmsg(ret)); + goto err; + } + + ret = ctf_merge_functions(cmp, cm.cm_out); + if (ret != 0) { + ret = ctf_errno(cm.cm_out); + ctf_dprintf("failed to dedup functions: %s\n", + ctf_errmsg(ret)); + goto err; + } + } + + ret = ctf_update(cm.cm_out); + if (ret == 0) { + cmc->cmi_input = NULL; + *outp = cm.cm_out; + } +err: + ctf_merge_types_fini(&cm); + ctf_diff_fini(cdp); + return (ret); +} + +int +ctf_merge_set_nthreads(ctf_merge_t *cmp, const uint_t nthrs) +{ + if (nthrs == 0) + return (EINVAL); + cmp->cmh_nthreads = nthrs; + return (0); +} diff --git a/usr/src/lib/libctf/common/ctf_subr.c b/usr/src/lib/libctf/common/ctf_subr.c index 467b6a8181..26f7e8c4db 100644 --- a/usr/src/lib/libctf/common/ctf_subr.c +++ b/usr/src/lib/libctf/common/ctf_subr.c @@ -24,8 +24,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <ctf_impl.h> #include <libctf.h> #include <sys/mman.h> @@ -56,6 +54,18 @@ ctf_alloc(size_t size) return (malloc(size)); } +void * +mergeq_alloc(size_t size) +{ + return (malloc(size)); +} + +void * +workq_alloc(size_t size) +{ + return (malloc(size)); +} + /*ARGSUSED*/ void ctf_free(void *buf, size_t size) @@ -63,6 +73,20 @@ ctf_free(void *buf, size_t size) free(buf); } +/*ARGSUSED*/ +void +mergeq_free(void *buf, size_t size) +{ + free(buf); +} + +/*ARGSUSED*/ +void +workq_free(void *buf, size_t size) +{ + free(buf); +} + const char * ctf_strerror(int err) { diff --git a/usr/src/lib/libctf/common/libctf.h b/usr/src/lib/libctf/common/libctf.h index 3fd69318de..0951ae0606 100644 --- a/usr/src/lib/libctf/common/libctf.h +++ b/usr/src/lib/libctf/common/libctf.h @@ -23,6 +23,9 @@ * Copyright 2001-2003 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2015, Joyent, Inc. + */ /* * This header file defines the interfaces available from the CTF debugger @@ -32,7 +35,7 @@ * the fullness of time after we gain more experience with the interfaces. * * In the meantime, be aware that any program linked with libctf in this - * release of Solaris is almost guaranteed to break in the next release. + * release of illumos is almost guaranteed to break in the next release. * * In short, do not user this header file or libctf for any purpose. */ @@ -40,8 +43,6 @@ #ifndef _LIBCTF_H #define _LIBCTF_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/ctf_api.h> #ifdef __cplusplus @@ -53,6 +54,44 @@ extern "C" { */ extern int _libctf_debug; +typedef enum ctf_diff_flag { + CTF_DIFF_F_IGNORE_INTNAMES = 0x01 +} ctf_diff_flag_t; + +typedef struct ctf_diff ctf_diff_t; +typedef void (*ctf_diff_type_f)(ctf_file_t *, ctf_id_t, boolean_t, ctf_file_t *, + ctf_id_t, void *); +typedef void (*ctf_diff_func_f)(ctf_file_t *, ulong_t, boolean_t, ctf_file_t *, + ulong_t, void *); +typedef void (*ctf_diff_obj_f)(ctf_file_t *, ulong_t, ctf_id_t, boolean_t, + ctf_file_t *, ulong_t, ctf_id_t, void *); + +extern int ctf_diff_init(ctf_file_t *, ctf_file_t *, ctf_diff_t **); +extern uint_t ctf_diff_getflags(ctf_diff_t *); +extern int ctf_diff_setflags(ctf_diff_t *, uint_t); +extern int ctf_diff_types(ctf_diff_t *, ctf_diff_type_f, void *); +extern int ctf_diff_functions(ctf_diff_t *, ctf_diff_func_f, void *); +extern int ctf_diff_objects(ctf_diff_t *, ctf_diff_obj_f, void *); +extern void ctf_diff_fini(ctf_diff_t *); + +#define CTF_CONVERT_F_IGNNONC 0x01 +extern ctf_file_t *ctf_fdconvert(int, const char *, uint_t, uint_t, int *, + char *, size_t); + +typedef struct ctf_merge_handle ctf_merge_t; +extern ctf_merge_t *ctf_merge_init(int, int *); +extern int ctf_merge_add(ctf_merge_t *, ctf_file_t *); +extern int ctf_merge_set_nthreads(ctf_merge_t *, const uint_t); +extern int ctf_merge_label(ctf_merge_t *, const char *); +extern int ctf_merge_uniquify(ctf_merge_t *, ctf_file_t *, const char *); +extern int ctf_merge_merge(ctf_merge_t *, ctf_file_t **); +extern int ctf_merge_dedup(ctf_merge_t *, ctf_file_t **); +extern void ctf_merge_fini(ctf_merge_t *); + +#define CTF_ELFWRITE_F_COMPRESS 0x1 +extern int ctf_elffdwrite(ctf_file_t *, int, int, int); +extern int ctf_elfwrite(ctf_file_t *, const char *, const char *, int); + #ifdef __cplusplus } #endif diff --git a/usr/src/lib/libctf/common/libctf_impl.h b/usr/src/lib/libctf/common/libctf_impl.h new file mode 100644 index 0000000000..11193e97d0 --- /dev/null +++ b/usr/src/lib/libctf/common/libctf_impl.h @@ -0,0 +1,59 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#ifndef _LIBCTF_IMPL_H +#define _LIBCTF_IMPL_H + +/* + * Portions of libctf implementations that are only suitable for CTF's userland + * library, eg. converting and merging related routines. + */ + +#include <libelf.h> +#include <libctf.h> +#include <ctf_impl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum ctf_conv_status { + CTF_CONV_SUCCESS = 0, + CTF_CONV_ERROR = 1, + CTF_CONV_NOTSUP = 2 +} ctf_conv_status_t; + +typedef ctf_conv_status_t (*ctf_convert_f)(int, Elf *, uint_t, int *, + ctf_file_t **, char *, size_t); +extern ctf_conv_status_t ctf_dwarf_convert(int, Elf *, uint_t, int *, + ctf_file_t **, char *, size_t); + +/* + * zlib compression routines + */ +extern int ctf_compress(ctf_file_t *fp, void **, size_t *, size_t *); + +extern int ctf_diff_self(ctf_diff_t *, ctf_diff_type_f, void *); + +/* + * Internal debugging aids + */ +extern void ctf_phase_dump(ctf_file_t *, const char *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBCTF_IMPL_H */ diff --git a/usr/src/lib/libctf/common/mapfile-vers b/usr/src/lib/libctf/common/mapfile-vers index 5573e8db25..b26ee8ceb4 100644 --- a/usr/src/lib/libctf/common/mapfile-vers +++ b/usr/src/lib/libctf/common/mapfile-vers @@ -23,7 +23,7 @@ # # -# Copyright (c) 2013, Joyent, Inc. All rights reserved. +# Copyright 2018 Joyent, Inc. # # @@ -53,9 +53,12 @@ SYMBOL_VERSION SUNWprivate_1.2 { ctf_add_enumerator; ctf_add_float; ctf_add_forward; + ctf_add_funcptr; ctf_add_function; ctf_add_integer; + ctf_add_label; ctf_add_member; + ctf_add_object; ctf_add_pointer; ctf_add_restrict; ctf_add_struct; @@ -64,19 +67,52 @@ SYMBOL_VERSION SUNWprivate_1.2 { ctf_add_union; ctf_add_volatile; ctf_create; + ctf_dataptr; ctf_delete_type; + ctf_diff_fini; + ctf_diff_functions; + ctf_diff_getflags; + ctf_diff_init; + ctf_diff_objects; + ctf_diff_setflags; + ctf_diff_types; ctf_discard; ctf_dup; + ctf_elffdwrite; + ctf_elfwrite; ctf_enum_value; + ctf_fdconvert; + ctf_flags; + ctf_func_args_by_id; + ctf_func_info_by_id; + ctf_function_iter; + ctf_kind_name; ctf_label_info; ctf_label_iter; ctf_label_topmost; + ctf_max_id; ctf_member_info; + ctf_merge_add; + ctf_merge_dedup; + ctf_merge_fini; + ctf_merge_init; + ctf_merge_label; + ctf_merge_merge; + ctf_merge_set_nthreads; + ctf_merge_uniquify; + ctf_nr_syms; + ctf_object_iter; ctf_parent_file; + ctf_parent_label; ctf_parent_name; ctf_set_array; + ctf_set_root; + ctf_set_size; + ctf_string_iter; + ctf_symbol_name; ctf_type_align; ctf_type_cmp; + ctf_type_cname; ctf_type_compat; ctf_type_pointer; ctf_update; |
