From 4226f635096bf9d814aa9fb335518c4855bbe3a3 Mon Sep 17 00:00:00 2001 From: Jason King Date: Sat, 27 May 2017 18:46:17 -0500 Subject: 6375 Add native name demangling support Reviewed by: Robert Mustacchi Reviewed by: Richard Lowe Approved by: Dan McDonald --- usr/src/lib/libdemangle/common/cxx.c | 4217 ++++++++++++++++++++++++++++++++++ 1 file changed, 4217 insertions(+) create mode 100644 usr/src/lib/libdemangle/common/cxx.c (limited to 'usr/src/lib/libdemangle/common/cxx.c') diff --git a/usr/src/lib/libdemangle/common/cxx.c b/usr/src/lib/libdemangle/common/cxx.c new file mode 100644 index 0000000000..66d7170544 --- /dev/null +++ b/usr/src/lib/libdemangle/common/cxx.c @@ -0,0 +1,4217 @@ +/* + * Ported from LLVM's libcxxabi trunk/src/cxa_demangle.cpp + * LICENSE.TXT contents is available as ../THIRDPARTYLICENSE + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + */ + +/* + * Copyright 2018 Jason King. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "demangle-sys.h" +#include "demangle_int.h" +#include "cxx.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof (x) / sizeof (x[0])) +#endif + +#define CPP_QUAL_CONST (1U) +#define CPP_QUAL_VOLATILE (2U) +#define CPP_QUAL_RESTRICT (4U) + +typedef struct cpp_db_s { + sysdem_ops_t *cpp_ops; + jmp_buf cpp_jmp; + name_t cpp_name; + sub_t cpp_subs; + templ_t cpp_templ; + unsigned cpp_cv; + unsigned cpp_ref; + unsigned cpp_depth; + boolean_t cpp_parsed_ctor_dtor_cv; + boolean_t cpp_tag_templates; + boolean_t cpp_fix_forward_references; + boolean_t cpp_try_to_parse_template_args; + locale_t cpp_loc; +} cpp_db_t; + +#define CK(x) \ + do { \ + if (!(x)) { \ + longjmp(db->cpp_jmp, 1); \ + } \ + NOTE(CONSTCOND) \ + } while (0) + +#define TOP_L(db) (&(name_top(&(db)->cpp_name)->strp_l)) +#define RLEN(f, l) ((size_t)((l) - (f))) +#define NAMT(db, n) (nlen(db) - n) + +static inline boolean_t is_xdigit(int); + +static boolean_t nempty(cpp_db_t *); +static size_t nlen(cpp_db_t *); +static void nadd_l(cpp_db_t *, const char *, size_t); +static void njoin(cpp_db_t *, size_t, const char *); +static void nfmt(cpp_db_t *, const char *, const char *); + +static void save_top(cpp_db_t *, size_t); +static void sub(cpp_db_t *, size_t); + +static boolean_t tempty(const cpp_db_t *); +static size_t ttlen(const cpp_db_t *); + +static void tsub(cpp_db_t *, size_t); +static void tpush(cpp_db_t *); +static void tpop(cpp_db_t *); +static void tsave(cpp_db_t *, size_t); + +static boolean_t db_init(cpp_db_t *, sysdem_ops_t *); +static void db_fini(cpp_db_t *); +static void dump(cpp_db_t *, FILE *); + +static void demangle(const char *, const char *, cpp_db_t *); + +static const char *parse_type(const char *, const char *, cpp_db_t *); +static const char *parse_builtin_type(const char *, const char *, cpp_db_t *); +static const char *parse_qual_type(const char *, const char *, cpp_db_t *); +static const char *parse_encoding(const char *, const char *, cpp_db_t *); +static const char *parse_dot_suffix(const char *, const char *, cpp_db_t *); +static const char *parse_block_invoke(const char *, const char *, cpp_db_t *); +static const char *parse_special_name(const char *, const char *, cpp_db_t *); +static const char *parse_name(const char *, const char *, boolean_t *, + cpp_db_t *); +static const char *parse_call_offset(const char *, const char *, locale_t); +static const char *parse_number(const char *, const char *, locale_t); +static const char *parse_nested_name(const char *, const char *, boolean_t *, + cpp_db_t *); +static const char *parse_local_name(const char *, const char *, boolean_t *, + cpp_db_t *); +static const char *parse_unscoped_name(const char *, const char *, cpp_db_t *); +static const char *parse_template_args(const char *, const char *, cpp_db_t *); +static const char *parse_substitution(const char *, const char *, cpp_db_t *); +static const char *parse_discriminator(const char *, const char *, locale_t); +static const char *parse_cv_qualifiers(const char *, const char *, unsigned *); +static const char *parse_template_param(const char *, const char *, cpp_db_t *); +static const char *parse_decltype(const char *, const char *, cpp_db_t *); +static const char *parse_template_args(const char *, const char *, cpp_db_t *); +static const char *parse_unqualified_name(const char *, const char *, + cpp_db_t *); +static const char *parse_template_arg(const char *, const char *, cpp_db_t *); +static const char *parse_expression(const char *, const char *, cpp_db_t *); +static const char *parse_expr_primary(const char *, const char *, cpp_db_t *); +static const char *parse_binary_expr(const char *, const char *, + const char *, cpp_db_t *); +static const char *parse_prefix_expr(const char *, const char *, + const char *, cpp_db_t *); +static const char *parse_gs(const char *, const char *, cpp_db_t *); +static const char *parse_idx_expr(const char *, const char *, cpp_db_t *); +static const char *parse_mm_expr(const char *, const char *, cpp_db_t *); +static const char *parse_pp_expr(const char *, const char *, cpp_db_t *); +static const char *parse_trinary_expr(const char *, const char *, cpp_db_t *); +static const char *parse_new_expr(const char *, const char *, cpp_db_t *); +static const char *parse_del_expr(const char *, const char *, cpp_db_t *); +static const char *parse_cast_expr(const char *, const char *, cpp_db_t *); +static const char *parse_sizeof_param_pack_expr(const char *, const char *, + cpp_db_t *); +static const char *parse_typeid_expr(const char *, const char *, cpp_db_t *); +static const char *parse_throw_expr(const char *, const char *, cpp_db_t *); +static const char *parse_dot_star_expr(const char *, const char *, cpp_db_t *); +static const char *parse_dot_expr(const char *, const char *, cpp_db_t *); +static const char *parse_call_expr(const char *, const char *, cpp_db_t *); +static const char *parse_arrow_expr(const char *, const char *, cpp_db_t *); +static const char *parse_conv_expr(const char *, const char *, cpp_db_t *); +static const char *parse_function_param(const char *, const char *, cpp_db_t *); +static const char *parse_base_unresolved_name(const char *, const char *, + cpp_db_t *); +static const char *parse_unresolved_name(const char *, const char *, + cpp_db_t *); +static const char *parse_noexcept_expr(const char *, const char *, cpp_db_t *); +static const char *parse_alignof(const char *, const char *, cpp_db_t *); +static const char *parse_sizeof(const char *, const char *, cpp_db_t *); +static const char *parse_unnamed_type_name(const char *, const char *, + cpp_db_t *); +static const char *parse_ctor_dtor_name(const char *, const char *, cpp_db_t *); +static const char *parse_source_name(const char *, const char *, cpp_db_t *); +static const char *parse_operator_name(const char *, const char *, cpp_db_t *); +static const char *parse_pack_expansion(const char *, const char *, cpp_db_t *); +static const char *parse_unresolved_type(const char *, const char *, + cpp_db_t *); +static const char *parse_unresolved_qualifier_level(const char *, const char *, + cpp_db_t *); +static const char *parse_destructor_name(const char *, const char *, + cpp_db_t *); +static const char *parse_function_type(const char *, const char *, cpp_db_t *); +static const char *parse_array_type(const char *, const char *, cpp_db_t *); +static const char *parse_pointer_to_member_type(const char *, const char *, + cpp_db_t *); +static const char *parse_vector_type(const char *, const char *, cpp_db_t *); + +size_t cpp_name_max_depth = 1024; /* max depth of name stack */ + +char * +cpp_demangle(const char *src, sysdem_ops_t *ops) +{ + char *result = NULL; + cpp_db_t db; + size_t srclen = strlen(src); + + if (!db_init(&db, ops)) + goto done; + if (setjmp(db.cpp_jmp) != 0) + goto done; + + errno = 0; + demangle(src, src + srclen, &db); + + if (errno == 0 && db.cpp_fix_forward_references && + !templ_empty(&db.cpp_templ) && + !sub_empty(&db.cpp_templ.tpl_items[0])) { + db.cpp_fix_forward_references = B_FALSE; + db.cpp_tag_templates = B_FALSE; + name_clear(&db.cpp_name); + sub_clear(&db.cpp_subs); + + if (setjmp(db.cpp_jmp) != 0) + goto done; + + demangle(src, src + srclen, &db); + + if (db.cpp_fix_forward_references) { + errno = EINVAL; + goto done; + } + } + + if (errno != 0) + goto done; + + if (nempty(&db)) { + errno = EINVAL; + goto done; + } + + njoin(&db, 1, ""); + + if (nlen(&db) > 0) { + str_t *s = TOP_L(&db); + result = zalloc(ops, s->str_len + 1); + if (result == NULL) + goto done; + + (void) memcpy(result, s->str_s, s->str_len); + } + +done: + if (demangle_debug) + dump(&db, stdout); + + db_fini(&db); + return (result); +} + +static void +demangle(const char *first, const char *last, cpp_db_t *db) +{ + const char *t = NULL; + + if (first >= last) { + errno = EINVAL; + return; + } + + if (first[0] != '_') { + t = parse_type(first, last, db); + if (t == first) { + errno = EINVAL; + return; + } + goto done; + } + + if (last - first < 4) { + errno = EINVAL; + return; + } + + if (first[1] == 'Z') { + t = parse_encoding(first + 2, last, db); + + if (t != first + 2 && t != last && t[0] == '.') { + t = parse_dot_suffix(t, last, db); + if (nlen(db) > 1) + njoin(db, 2, ""); + } + + goto done; + } + + if (first[1] != '_' || first[2] != '_' || first[3] != 'Z') + goto done; + + t = parse_encoding(first + 4, last, db); + if (t != first + 4 && t != last) + t = parse_block_invoke(t, last, db); + +done: + if (t != last) + errno = EINVAL; +} + +static const char * +parse_dot_suffix(const char *first, const char *last, cpp_db_t *db) +{ + VERIFY3P(first, <=, last); + + if (first == last || first[0] != '.') + return (first); + + if (nempty(db)) + return (first); + + nadd_l(db, first, RLEN(first, last)); + nfmt(db, " ({0})", NULL); + + return (last); +} + +/* + * _block_invoke + * _block_invoke* + * _block_invoke_+ + */ +static const char * +parse_block_invoke(const char *first, const char *last, cpp_db_t *db) +{ + VERIFY3P(first, <=, last); + + if (last - first < 13) + return (first); + + const char test[] = "_block_invoke"; + const char *t = first; + + if (strncmp(first, test, sizeof (test) - 1) != 0) + return (first); + + t += sizeof (test); + if (t == last) + goto done; + + if (t[0] == '_') { + /* need at least one digit */ + if (t + 1 == last || !isdigit_l(t[1], db->cpp_loc)) + return (first); + t += 2; + } + + while (t < last && isdigit_l(t[0], db->cpp_loc)) + t++; + +done: + if (nempty(db)) + return (first); + + nfmt(db, "invocation function for block in {0}", NULL); + return (t); +} + +/* + * ::= + * ::= + * ::= + */ +static const char * +parse_encoding(const char *first, const char *last, cpp_db_t *db) +{ + VERIFY3P(first, <=, last); + + if (first == last) + return (first); + + const char *t = NULL; + const char *t2 = NULL; + unsigned cv = 0; + unsigned ref = 0; + boolean_t tag_templ_save = db->cpp_tag_templates; + + if (++db->cpp_depth > 1) + db->cpp_tag_templates = B_TRUE; + + if (first[0] == 'G' || first[0] == 'T') { + t = parse_special_name(first, last, db); + goto done; + } + + boolean_t ends_with_template_args = B_FALSE; + t = parse_name(first, last, &ends_with_template_args, db); + if (t == first) + goto fail; + + cv = db->cpp_cv; + ref = db->cpp_ref; + + if (t == last || t[0] == 'E' || t[0] == '.') + goto done; + + db->cpp_tag_templates = B_FALSE; + if (nempty(db) || str_length(TOP_L(db)) == 0) + goto fail; + + if (!db->cpp_parsed_ctor_dtor_cv && ends_with_template_args) { + t2 = parse_type(t, last, db); + if (t2 == t || nlen(db) < 2) + goto fail; + + str_pair_t *sp = name_top(&db->cpp_name); + + if (str_length(&sp->strp_r) == 0) + (void) str_append(&sp->strp_l, " ", 1); + + nfmt(db, "{0:L}{1:L}", "{1:R}{0:R}"); + t = t2; + } + + if (t == last || nempty(db)) + goto fail; + + size_t n = nlen(db); + + if (t[0] == 'v') { + t++; + } else { + for (;;) { + t2 = parse_type(t, last, db); + if (t2 == t || t == last) + break; + + t = t2; + } + } + + /* + * a bit of a hack, but a template substitution can apparently be + * an empty string at the end of an argument list, so avoid + * <...., > + */ + if (NAMT(db, n) > 1 && str_pair_len(name_top(&db->cpp_name)) == 0) + name_pop(&db->cpp_name, NULL); + + njoin(db, NAMT(db, n), ", "); + nfmt(db, "({0})", NULL); + + str_t *s = TOP_L(db); + + if (cv & CPP_QUAL_CONST) { + CK(str_append(s, " const", 0)); + } + if (cv & CPP_QUAL_VOLATILE) { + CK(str_append(s, " volatile", 0)); + } + if (cv & CPP_QUAL_RESTRICT) { + CK(str_append(s, " restrict", 0)); + } + if (ref == 1) { + CK(str_append(s, " &", 0)); + } + if (ref == 2) { + CK(str_append(s, " &&", 0)); + } + + nfmt(db, "{1:L}{0}{1:R}", NULL); + +done: + db->cpp_tag_templates = tag_templ_save; + db->cpp_depth--; + return (t); + +fail: + db->cpp_tag_templates = tag_templ_save; + db->cpp_depth--; + return (first); +} + +/* + * ::= TV # virtual table + * ::= TT # VTT structure (construction vtable index) + * ::= TI # typeinfo structure + * ::= TS # typeinfo name (null-terminated byte string) + * ::= Tc + * # base is the nominal target function of thunk + * # first call-offset is 'this' adjustment + * # second call-offset is result adjustment + * ::= T + * # base is the nominal target function of thunk + * ::= GV # Guard variable for one-time init + * # No + * ::= TW # Thread-local wrapper + * ::= TH # Thread-local initialization + * extension ::= TC _ + * # construction vtable for second-in-first + * extension ::= GR # reference temporary for object + */ +static const char * +parse_special_name(const char *first, const char *last, cpp_db_t *db) +{ + VERIFY3P(first, <=, last); + + const char *t = first; + const char *t1 = NULL; + size_t n = nlen(db); + + if (last - first < 2) + return (first); + + switch (t[0]) { + case 'T': + switch (t[1]) { + case 'V': + nadd_l(db, "vtable for", 0); + t = parse_type(first + 2, last, db); + break; + case 'T': + nadd_l(db, "VTT for", 0); + t = parse_type(first + 2, last, db); + break; + case 'I': + nadd_l(db, "typeinfo for", 0); + t = parse_type(first + 2, last, db); + break; + case 'S': + nadd_l(db, "typeinfo name for", 0); + t = parse_type(first + 2, last, db); + break; + case 'c': + nadd_l(db, "covariant return thunk to", 0); + t1 = parse_call_offset(first + 2, last, db->cpp_loc); + if (t1 == t) + return (first); + t = parse_call_offset(t1, last, db->cpp_loc); + if (t == t1) + return (first); + t1 = parse_encoding(t, last, db); + if (t1 == t) + return (first); + break; + case 'C': + t = parse_type(first + 2, last, db); + if (t == first + 2) + return (first); + t1 = parse_number(t, last, db->cpp_loc); + if (*t1 != '_') + return (first); + t = parse_type(t1 + 1, last, db); + if (t == t1 + 1 || nlen(db) < 2) + return (first); + nfmt(db, "construction vtable for {0}-in-{1}", NULL); + return (t); + case 'W': + nadd_l(db, "thread-local wrapper routine for", 0); + t = parse_name(first + 2, last, NULL, db); + break; + case 'H': + nadd_l(db, "thread-local initialization routine for", + 0); + t = parse_name(first + 2, last, NULL, db); + break; + default: + if (first[1] == 'v') { + nadd_l(db, "virtual thunk to", 0); + } else { + nadd_l(db, "non-virtual thunk to", 0); + } + + t = parse_call_offset(first + 1, last, db->cpp_loc); + if (t == first + 1) + return (first); + t1 = parse_encoding(t, last, db); + if (t == t1) + return (first); + t = t1; + break; + } + break; + case 'G': + switch (first[1]) { + case 'V': + nadd_l(db, "guard variable for", 0); + t = parse_name(first + 2, last, NULL, db); + break; + case 'R': + nadd_l(db, "reference temporary for", 0); + t = parse_name(first + 2, last, NULL, db); + break; + default: + return (first); + } + break; + default: + return (first); + } + + size_t amt = NAMT(db, n); + if (t == first + 2 || amt < 2) + return (first); + + njoin(db, amt, " "); + return (t); +} + +/* + * ::= h _ + * ::= v _ + * + * ::= + * # non-virtual base override + * + * ::= _ + * # virtual base override, with vcall offset + */ +static const char * +parse_call_offset(const char *first, const char *last, locale_t loc) +{ + VERIFY3P(first, <=, last); + + const char *t = NULL; + const char *t1 = NULL; + + if (first == last) + return (first); + + if (first[0] != 'h' && first[0] != 'v') + return (first); + + t = parse_number(first + 1, last, loc); + if (t == first + 1 || t == last || t[0] != '_') + return (first); + + /* skip _ */ + t++; + + if (first[0] == 'h') + return (t); + + t1 = parse_number(t, last, loc); + if (t == t1 || t1 == last || t1[0] != '_') + return (first); + + /* skip _ */ + t1++; + + return (t1); +} + +/* + * ::= // N + * ::= # See Scope Encoding below // Z + * ::= + * ::= + * + * ::= + * ::= + */ +static const char * +parse_name(const char *first, const char *last, + boolean_t *ends_with_template_args, cpp_db_t *db) +{ + VERIFY3P(first, <=, last); + + const char *t = first; + const char *t1 = NULL; + + if (last - first < 2) + return (first); + + /* extension: ignore L here */ + if (t[0] == 'L') + t++; + + switch (t[0]) { + case 'N': + t1 = parse_nested_name(t, last, ends_with_template_args, db); + return ((t == t1) ? first : t1); + case 'Z': + t1 = parse_local_name(t, last, ends_with_template_args, db); + return ((t == t1) ? first : t1); + } + + /* + * + * + * + */ + t1 = parse_unscoped_name(t, last, db); + + /* */ + if (t != t1 && t1[0] != 'I') + return (t1); + + if (t == t1) { + t1 = parse_substitution(t, last, db); + if (t == t1 || t1 == last || t1[0] != 'I') + return (first); + } else { + save_top(db, 1); + } + + t = parse_template_args(t1, last, db); + if (t1 == t || nlen(db) < 2) + return (first); + + nfmt(db, "{1:L}{0}", "{1:R}"); + + if (ends_with_template_args != NULL) + *ends_with_template_args = B_TRUE; + + return (t); +} + +/* BEGIN CSTYLED */ +/* + * := Z E [] + * := Z E s [] + * := Z Ed [ ] _ + */ +/* END CSTYLED */ +const char * +parse_local_name(const char *first, const char *last, + boolean_t *ends_with_template_args, cpp_db_t *db) +{ + VERIFY3P(first, <=, last); + + const char *t = NULL; + const char *t1 = NULL; + const char *t2 = NULL; + + if (first == last || first[0] != 'Z') + return (first); + + t = parse_encoding(first + 1, last, db); + if (t == first + 1 || t == last || t[0] != 'E') + return (first); + + VERIFY(!nempty(db)); + + /* skip E */ + t++; + + if (t[0] == 's') { + nfmt(db, "{0:L}::string literal", "{0:R}"); + return (parse_discriminator(t, last, db->cpp_loc)); + } + + if (t[0] == 'd') { + t1 = parse_number(t + 1, last, db->cpp_loc); + if (t1[0] != '_') + return (first); + t1++; + } else { + t1 = t; + } + + t2 = parse_name(t1, last, ends_with_template_args, db); + if (t2 == t1) + return (first); + + nfmt(db, "{1:L}::{0}", "{1:R}"); + + /* parsed, but ignored */ + if (t[0] != 'd') + t2 = parse_discriminator(t2, last, db->cpp_loc); + + return (t2); +} + +/* BEGIN CSTYLED */ +/* + * ::= N [] [] E + * ::= N [] [] E + * + * ::= + * ::= + * ::= + * ::= + * ::= # empty + * ::= + * ::= + * extension ::= L + * + * ::=