diff options
author | Dan McDonald <danmcd@joyent.com> | 2021-05-14 12:00:48 -0400 |
---|---|---|
committer | Dan McDonald <danmcd@joyent.com> | 2021-05-14 12:00:48 -0400 |
commit | 1a25930b922d3fede4a252f4f2e0ecb8de2656cb (patch) | |
tree | a2ff441f85489691a94f13a644cb7cfe10b81f9e /usr/src/lib/libdemangle/common/rust-legacy.c | |
parent | 3aa01401155d92a38a0d4e107043c130432e4a43 (diff) | |
parent | 1cd083931cfd3fb8617c1178f62bce417cfa6af2 (diff) | |
download | illumos-joyent-1a25930b922d3fede4a252f4f2e0ecb8de2656cb.tar.gz |
[illumos-gate merge]
commit 1cd083931cfd3fb8617c1178f62bce417cfa6af2
13780 Add support for rust v0 mangling format
commit 4fe48c6ec9f06cbcce19c4cf97f662b64efde582
13798 loader: Update the EFI timer to be called once a second
commit 9e3493cb8a0cfe96c9aef9b7da42c6c9b5c24b43
13374 Port L2ARC Improvements from OpenZFS
Diffstat (limited to 'usr/src/lib/libdemangle/common/rust-legacy.c')
-rw-r--r-- | usr/src/lib/libdemangle/common/rust-legacy.c | 386 |
1 files changed, 386 insertions, 0 deletions
diff --git a/usr/src/lib/libdemangle/common/rust-legacy.c b/usr/src/lib/libdemangle/common/rust-legacy.c new file mode 100644 index 0000000000..5b1518f619 --- /dev/null +++ b/usr/src/lib/libdemangle/common/rust-legacy.c @@ -0,0 +1,386 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Joyent, Inc. + * Copyright 2021 Jason King + */ + +#include <errno.h> +#include <libcustr.h> +#include <limits.h> +#include <string.h> +#include <stdio.h> + +#include "rust.h" + +/* + * Unfortunately, there is currently no official specification for the legacy + * rust name mangling. This is an attempt to document the understanding of the + * mangling used here. It is based off examination of + * https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/ + * + * A mangled rust name is: + * <prefix> <name> + * + * <prefix> ::= _Z + * __Z + * + * <name> ::= N <name-segment>+ [<hash>] E + * + * <name-segment> ::= <len> <name-chars>{len} + * + * <len> ::= [1-9][0-9]+ + * + * <name-chars> ::= <[A-Za-z]> <[A-Za-z0-9]>* + * <separator> + * <special> + * + * <separator> ::= '..' # '::' + * + * <special> ::= $SP$ # '@' + * $BP$ # '*' + * $RF$ # '&' + * $LT$ # '<' + * $GT$ # '>' + * $LP$ # '(' + * $RP$ # ')' + * $C$ # ',' + * + * <hash> := <len> h <hex-digits>+ + * + * <hex-digits> := <[0-9a-f]> + */ + +static const struct rust_charmap { + const char *ruc_seq; + char ruc_ch; +} rust_charmap[] = { + { "$SP$", '@' }, + { "$BP$", '*' }, + { "$RF$", '&' }, + { "$LT$", '<' }, + { "$GT$", '>' }, + { "$LP$", '(' }, + { "$RP$", ')' }, + { "$C$", ',' }, +}; +static const size_t rust_charmap_sz = ARRAY_SIZE(rust_charmap); + +static boolean_t rustleg_valid_sym(const strview_t *); +static boolean_t rustleg_parse_name(rust_state_t *, strview_t *); +static boolean_t rustleg_parse_hash(rust_state_t *, strview_t *); +static boolean_t rustleg_parse_special(rust_state_t *, strview_t *); +static boolean_t rustleg_add_sep(rust_state_t *); + +boolean_t +rust_demangle_legacy(rust_state_t *restrict st, strview_t *restrict sv) +{ + + /* Make sure the whole thing contains valid characters */ + if (!rustleg_valid_sym(sv)) { + st->rs_error = EINVAL; + return (B_FALSE); + } + + if (sv_peek(sv, -1) != 'E') { + DEMDEBUG("ERROR: string does not end with 'E'"); + st->rs_error = EINVAL; + return (B_FALSE); + } + + if (!rustleg_parse_name(st, sv)) + return (B_FALSE); + + if (sv_remaining(sv) != 0) { + DEMDEBUG("ERROR: trailing characters in name"); + st->rs_error = EINVAL; + return (B_FALSE); + } + + return (B_TRUE); +} + +static boolean_t +rustleg_parse_name_segment(rust_state_t *st, strview_t *svp, boolean_t first) +{ + strview_t orig; + strview_t name; + uint64_t len; + size_t rem; + boolean_t last = B_FALSE; + + if (HAS_ERROR(st) || sv_remaining(svp) == 0) + return (B_FALSE); + + sv_init_sv(&orig, svp); + + if (!rust_parse_base10(st, svp, &len)) { + DEMDEBUG("ERROR: no leading length"); + st->rs_error = EINVAL; + return (B_FALSE); + } + + rem = sv_remaining(svp); + + if (rem < len) { + DEMDEBUG("ERROR: segment length (%" PRIu64 ") > remaining " + "bytes in string (%zu)", len, rem); + st->rs_error = EINVAL; + return (B_FALSE); + } + + /* Is this the last segment before the terminating E? */ + if (rem == len + 1) { + VERIFY3U(sv_peek(svp, -1), ==, 'E'); + last = B_TRUE; + } + + if (!first && !rustleg_add_sep(st)) + return (B_FALSE); + + /* Reduce length of seg to the length we parsed */ + (void) sv_init_sv_range(&name, svp, len); + + DEMDEBUG("%s: segment='%.*s'", __func__, SV_PRINT(&name)); + + /* + * A rust hash starts with 'h', and is the last component of a name + * before the terminating 'E'. It is however not always present + * in every mangled symbol, and a last segment that starts with 'h' + * could be confused for it, so failing to part it just means + * we don't have a trailing hash. + */ + if (sv_peek(&name, 0) == 'h' && last) { + if (rustleg_parse_hash(st, &name)) + goto done; + + /* + * However any error other than 'not a hash' (e.g. ENOMEM) + * means we should fail. + */ + if (st->rs_error != 0) + goto done; + } + + /* A '_' followed by $ is ignored at the start of a name segment */ + if (sv_peek(&name, 0) == '_' && sv_peek(&name, 1) == '$') + (void) sv_consume_n(&name, 1); + + while (sv_remaining(&name) > 0) { + switch (sv_peek(&name, 0)) { + case '$': + if (rustleg_parse_special(st, &name)) + continue; + break; + case '.': + /* Convert '..' to '::' */ + if (sv_peek(&name, 1) != '.') + break; + + if (!rustleg_add_sep(st)) + return (B_FALSE); + + sv_consume_n(&name, 2); + continue; + default: + break; + } + + if (!rust_appendc(st, sv_consume_c(&name))) { + SET_ERROR(st); + return (B_FALSE); + } + } + +done: + sv_consume_n(svp, len); + + VERIFY3P(orig.sv_first, <=, svp->sv_first); + DEMDEBUG("%s: consumed '%.*s'", __func__, + (int)(uintptr_t)(svp->sv_first - orig.sv_first), orig.sv_first); + return (B_TRUE); +} + +/* + * Parse N (<num><name>{num})+ [<num>h<hex digits]E + */ +static boolean_t +rustleg_parse_name(rust_state_t *st, strview_t *svp) +{ + strview_t name; + boolean_t first = B_TRUE; + + sv_init_sv(&name, svp); + + if (HAS_ERROR(st)) + return (B_FALSE); + + DEMDEBUG("%s: name = '%.*s'", __func__, SV_PRINT(&name)); + + if (sv_remaining(svp) == 0) { + DEMDEBUG("%s: empty name", __func__); + return (B_FALSE); + } + + if (!sv_consume_if_c(svp, 'N')) { + DEMDEBUG("%s: does not start with 'N'", __func__); + return (B_FALSE); + } + + while (sv_remaining(svp) > 0 && sv_peek(svp, 0) != 'E') { + if (!rustleg_parse_name_segment(st, svp, first)) + return (B_FALSE); + first = B_FALSE; + } + + if (!sv_consume_if_c(svp, 'E')) { + DEMDEBUG("%s: ERROR no terminating 'E'", __func__); + return (B_FALSE); + } + + VERIFY3P(name.sv_first, <=, svp->sv_first); + DEMDEBUG("%s: consumed '%.*s'", __func__, + (int)(uintptr_t)(svp->sv_first - name.sv_first), name.sv_first); + + return (B_TRUE); +} + +static boolean_t +rustleg_parse_hash(rust_state_t *st, strview_t *svp) +{ + if (HAS_ERROR(st)) + return (B_FALSE); + + VERIFY(sv_consume_if_c(svp, 'h')); + if (!rust_appendc(st, 'h')) + return (B_FALSE); + + while (sv_remaining(svp) > 0) { + char c = sv_consume_c(svp); + + switch (c) { + /* + * The upper-case hex digits (A-F) are excluded as valid + * hash values for several reasons: + * + * 1. It would result in two different possible names for + * the same function, leading to ambiguity in linking (among + * other things). + * + * 2. It would cause potential ambiguity in parsing -- is a + * trailing 'E' part of the hash, or the terminating character + * in the mangled name? + * + * 3. No examples were able to be found in the wild where + * uppercase digits are used, and other rust demanglers all + * seem to assume the hash must contain lower-case hex digits. + */ + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + case '8': case '9': case 'a': case 'b': + case 'c': case 'd': case 'e': case 'f': + if (!rust_appendc(st, c)) + return (B_FALSE); + break; + default: + return (B_FALSE); + } + } + + return (B_TRUE); +} + +static boolean_t +rustleg_parse_special(rust_state_t *restrict st, strview_t *restrict svp) +{ + if (HAS_ERROR(st)) + return (B_FALSE); + + if (sv_peek(svp, 0) != '$') + return (B_FALSE); + + for (size_t i = 0; i < rust_charmap_sz; i++) { + if (sv_consume_if(svp, rust_charmap[i].ruc_seq)) { + if (!rust_appendc(st, rust_charmap[i].ruc_ch)) + return (B_FALSE); + return (B_TRUE); + } + } + + /* Handle $uXXXX$ */ + + strview_t sv; + uint32_t val = 0; + uint_t ndigits = 0; + + sv_init_sv(&sv, svp); + + /* We peeked at this earlier, so it should still be there */ + VERIFY(sv_consume_if_c(&sv, '$')); + + if (!sv_consume_if_c(&sv, 'u')) + return (B_FALSE); + + while (sv_remaining(&sv) > 0) { + uint32_t cval = 0; + char c; + + if (ndigits == 4) + return (B_FALSE); + + c = sv_consume_c(&sv); + if (c >= '0' && c <= '9') + cval = c - '0'; + else if (c >= 'a' && c <= 'f') + cval = c - 'a' + 10; + else if (c == '$') + break; + else + return (B_FALSE); + + val <<= 4; + val |= cval; + ndigits++; + } + + if (!rust_append_utf8_c(st, val)) + return (B_FALSE); + + sv_consume_n(svp, ndigits + 3); + return (B_TRUE); +} + +static boolean_t +rustleg_add_sep(rust_state_t *st) +{ + if (HAS_ERROR(st)) + return (B_FALSE); + + return (rust_append(st, "::")); +} + +static boolean_t +rustleg_valid_sym(const strview_t *sv) +{ + size_t i; + + for (i = 0; i < sv->sv_rem; i++) { + char c = sv->sv_first[i]; + + if ((c & 0x80) == 0) + continue; + DEMDEBUG("%s: ERROR found 8-bit character '%c' in '%.*s' " + "at index %zu", __func__, c, SV_PRINT(sv), i); + return (B_FALSE); + } + return (B_TRUE); +} |