diff options
author | Dan McDonald <danmcd@joyent.com> | 2021-05-14 12:00:48 -0400 |
---|---|---|
committer | Dan McDonald <danmcd@joyent.com> | 2021-05-14 12:00:48 -0400 |
commit | 1a25930b922d3fede4a252f4f2e0ecb8de2656cb (patch) | |
tree | a2ff441f85489691a94f13a644cb7cfe10b81f9e /usr/src/lib/libdemangle/common/rust.c | |
parent | 3aa01401155d92a38a0d4e107043c130432e4a43 (diff) | |
parent | 1cd083931cfd3fb8617c1178f62bce417cfa6af2 (diff) | |
download | illumos-joyent-1a25930b922d3fede4a252f4f2e0ecb8de2656cb.tar.gz |
[illumos-gate merge]
commit 1cd083931cfd3fb8617c1178f62bce417cfa6af2
13780 Add support for rust v0 mangling format
commit 4fe48c6ec9f06cbcce19c4cf97f662b64efde582
13798 loader: Update the EFI timer to be called once a second
commit 9e3493cb8a0cfe96c9aef9b7da42c6c9b5c24b43
13374 Port L2ARC Improvements from OpenZFS
Diffstat (limited to 'usr/src/lib/libdemangle/common/rust.c')
-rw-r--r-- | usr/src/lib/libdemangle/common/rust.c | 727 |
1 files changed, 290 insertions, 437 deletions
diff --git a/usr/src/lib/libdemangle/common/rust.c b/usr/src/lib/libdemangle/common/rust.c index 9b145ca841..ce1fca4859 100644 --- a/usr/src/lib/libdemangle/common/rust.c +++ b/usr/src/lib/libdemangle/common/rust.c @@ -10,564 +10,417 @@ */ /* - * Copyright 2019, Joyent, Inc. * Copyright 2021 Jason King + * Copyright 2019 Joyent, Inc. */ #include <errno.h> +#include <langinfo.h> #include <libcustr.h> #include <limits.h> +#include <stdarg.h> #include <string.h> -#include <sys/ctype.h> /* We want the C locale ISXXX() versions */ -#include <sys/debug.h> -#include <stdio.h> -#include <sys/sysmacros.h> -#include "strview.h" #include "demangle_int.h" +#include "rust.h" -/* - * Unfortunately, there is currently no official specification for the rust - * name mangling. This is an attempt to document the understanding of the - * mangling used here. It is based off examination of - * https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/ - * - * A mangled rust name is: - * <prefix> <name> - * - * <prefix> ::= _Z - * __Z - * - * <name> ::= N <name-segment>+ [<hash>] E - * - * <name-segment> ::= <len> <name-chars>{len} - * - * <len> ::= [1-9][0-9]+ - * - * <name-chars> ::= <[A-Za-z]> <[A-Za-z0-9]>* - * <separator> - * <special> - * - * <separator> ::= '..' # '::' - * - * <special> ::= $SP$ # ' ' - * $BP$ # '*' - * $RF$ # '&' - * $LT$ # '<' - * $GT$ # '>' - * $LP$ # '(' - * $RP$ # ')' - * $C$ # ',' - * $u7e$ # '~' - * $u20$ # ' ' - * $u27$ # '\'' - * $u3d$ # '=' - * $u5b$ # '[' - * $u5d$ # ']' - * $u7b$ # '{' - * $u7d$ # '}' - * $u3b$ # ';' - * $u2b$ # '+' - * $u22$ # '"' - * - * <hash> := <len> h <hex-digits>+ - * - * <hex-digits> := <[0-9a-f]> - */ - -typedef struct rustdem_state { - const char *rds_str; - custr_t *rds_demangled; - sysdem_ops_t *rds_ops; - int rds_error; -} rustdem_state_t; - -static const struct rust_charmap { - const char *ruc_seq; - char ruc_ch; -} rust_charmap[] = { - { "$SP$", '@' }, - { "$BP$", '*' }, - { "$RF$", '&' }, - { "$LT$", '<' }, - { "$GT$", '>' }, - { "$LP$", '(' }, - { "$RP$", ')' }, - { "$C$", ',' }, - { "$u7e$", '~' }, - { "$u20$", ' ' }, - { "$u27$", '\'' }, - { "$u3d$", '=' }, - { "$u5b$", '[' }, - { "$u5d$", ']' }, - { "$u7b$", '{' }, - { "$u7d$", '}' }, - { "$u3b$", ';' }, - { "$u2b$", '+' }, - { "$u22$", '"' } -}; -static const size_t rust_charmap_sz = ARRAY_SIZE(rust_charmap); - -static void *rustdem_alloc(custr_alloc_t *, size_t); -static void rustdem_free(custr_alloc_t *, void *, size_t); - -static boolean_t rustdem_append_c(rustdem_state_t *, char); -static boolean_t rustdem_all_ascii(const strview_t *); - -static boolean_t rustdem_parse_prefix(rustdem_state_t *, strview_t *); -static boolean_t rustdem_parse_name(rustdem_state_t *, strview_t *); -static boolean_t rustdem_parse_hash(rustdem_state_t *, strview_t *); -static boolean_t rustdem_parse_num(rustdem_state_t *, strview_t *, uint64_t *); -static boolean_t rustdem_parse_special(rustdem_state_t *, strview_t *); -static boolean_t rustdem_add_sep(rustdem_state_t *); - -char * -rust_demangle(const char *s, size_t slen, sysdem_ops_t *ops) +static void * +rust_cualloc(custr_alloc_t *cua, size_t len) { - rustdem_state_t st = { - .rds_str = s, - .rds_ops = ops, - }; - custr_alloc_ops_t custr_ops = { - .custr_ao_alloc = rustdem_alloc, - .custr_ao_free = rustdem_free - }; - custr_alloc_t custr_alloc = { - .cua_version = CUSTR_VERSION - }; - strview_t sv; - int ret; - - if (custr_alloc_init(&custr_alloc, &custr_ops) != 0) - return (NULL); - custr_alloc.cua_arg = &st; - - sv_init_str(&sv, s, s + slen); - - if (sv_remaining(&sv) < 1 || sv_peek(&sv, -1) != 'E') { - DEMDEBUG("ERROR: string is either too small or does not end " - "with 'E'"); - errno = EINVAL; - return (NULL); - } - - if (!rustdem_parse_prefix(&st, &sv)) { - DEMDEBUG("ERROR: could not parse prefix"); - errno = EINVAL; - return (NULL); - } - DEMDEBUG("parsed prefix; remaining='%.*s'", SV_PRINT(&sv)); - - if (!rustdem_all_ascii(&sv)) { - /* rustdem_all_ascii() provides debug output */ - errno = EINVAL; - return (NULL); - } - - if ((ret = custr_xalloc(&st.rds_demangled, &custr_alloc)) != 0) - return (NULL); - - if (!rustdem_parse_name(&st, &sv)) { - if (st.rds_error == 0) - st.rds_error = EINVAL; - goto fail; - } - - if (sv_remaining(&sv) > 0) { - DEMDEBUG("ERROR: unexpected trailing characters after " - "terminating 'E': '%.*s'", SV_PRINT(&sv)); - st.rds_error = EINVAL; - goto fail; - } - - char *res = xstrdup(ops, custr_cstr(st.rds_demangled)); - if (res == NULL) { - st.rds_error = errno; - goto fail; - } - - custr_free(st.rds_demangled); - DEMDEBUG("result = '%s'", res); - return (res); - -fail: - custr_free(st.rds_demangled); - errno = st.rds_error; - return (NULL); + rust_state_t *st = cua->cua_arg; + return (zalloc(st->rs_ops, len)); } -static boolean_t -rustdem_parse_prefix(rustdem_state_t *st, strview_t *svp) +static void +rust_cufree(custr_alloc_t *cua, void *p, size_t len) { - strview_t pfx; + rust_state_t *st = cua->cua_arg; + xfree(st->rs_ops, p, len); +} - sv_init_sv(&pfx, svp); +static const custr_alloc_ops_t rust_custr_ops = { + .custr_ao_alloc = rust_cualloc, + .custr_ao_free = rust_cufree +}; - DEMDEBUG("checking for '_Z' or '__Z' in '%.*s'", SV_PRINT(&pfx)); +boolean_t +rust_appendc(rust_state_t *st, char c) +{ + custr_t *cus = st->rs_demangled; - if (st->rds_error != 0) + if (HAS_ERROR(st)) return (B_FALSE); - if (!sv_consume_if_c(&pfx, '_')) - return (B_FALSE); + if (st->rs_skip) + return (B_TRUE); - (void) sv_consume_if_c(&pfx, '_'); + switch (c) { + case '\a': + return (rust_append(st, "\\a")); + case '\b': + return (rust_append(st, "\\b")); + case '\f': + return (rust_append(st, "\\f")); + case '\n': + return (rust_append(st, "\\n")); + case '\r': + return (rust_append(st, "\\r")); + case '\t': + return (rust_append(st, "\\t")); + case '\v': + return (rust_append(st, "\\v")); + case '\\': + return (rust_append(st, "\\\\")); + } + + if (c < ' ') + return (rust_append_printf(st, "\\x%02" PRIx8, (uint8_t)c)); - if (!sv_consume_if_c(&pfx, 'Z')) + if (custr_appendc(cus, c) != 0) { + SET_ERROR(st); return (B_FALSE); + } - /* Update svp with new position */ - sv_init_sv(svp, &pfx); return (B_TRUE); } -static boolean_t -rustdem_parse_name_segment(rustdem_state_t *st, strview_t *svp, boolean_t first) +/* + * Append a UTF-8 code point. If we're not in a UTF-8 locale, this gets + * appended as '\u<hex codepoint>' otherwise the character itself is + * added. + */ +boolean_t +rust_append_utf8_c(rust_state_t *st, uint32_t val) { - strview_t sv; - strview_t name; - uint64_t len; - size_t rem; - boolean_t last = B_FALSE; + custr_t *cus = st->rs_demangled; + uint_t n = 0; + uint8_t c[4] = { 0 }; - if (st->rds_error != 0 || sv_remaining(svp) == 0) + if (HAS_ERROR(st)) return (B_FALSE); - sv_init_sv(&sv, svp); - - if (!rustdem_parse_num(st, &sv, &len)) { - DEMDEBUG("ERROR: no leading length"); - st->rds_error = EINVAL; - return (B_FALSE); + if (!st->rs_isutf8) { + if (val < 0x80) + return (rust_appendc(st, (char)val)); + if (val < 0x10000) + return (rust_append_printf(st, "\\u%04" PRIx32, val)); + return (rust_append_printf(st, "\\U%08" PRIx32, val)); } - rem = sv_remaining(&sv); - - if (rem < len) { - st->rds_error = EINVAL; + if (val < 0x80) { + return (rust_appendc(st, (char)val)); + } else if (val < 0x800) { + c[0] = 0xc0 | ((val >> 6) & 0x1f); + c[1] = 0x80 | (val & 0x3f); + n = 2; + } else if (val < 0x10000) { + c[0] = 0xe0 | ((val >> 12) & 0x0f); + c[1] = 0x80 | ((val >> 6) & 0x3f); + c[2] = 0x80 | (val & 0x3f); + n = 3; + } else if (val < 0x110000) { + c[0] = 0xf0 | ((val >> 18) & 0x7); + c[1] = 0x80 | ((val >> 12) & 0x3f); + c[2] = 0x80 | ((val >> 6) & 0x3f); + c[3] = 0x80 | (val & 0x3f); + n = 4; + } else { + DEMDEBUG("%s: invalid unicode character \\u%" PRIx32, __func__, + val); return (B_FALSE); } - /* Is this the last segment before the terminating E? */ - if (rem == len + 1) { - VERIFY3U(sv_peek(&sv, -1), ==, 'E'); - last = B_TRUE; + for (uint_t i = 0; i < n; i++) { + if (custr_appendc(cus, c[i]) != 0) { + SET_ERROR(st); + return (B_FALSE); + } } - if (!first && !rustdem_add_sep(st)) - return (B_FALSE); - - /* Reduce length of seg to the length we parsed */ - (void) sv_init_sv_range(&name, &sv, len); - - DEMDEBUG("%s: segment='%.*s'", __func__, SV_PRINT(&name)); - - /* - * A rust hash starts with 'h', and is the last component of a name - * before the terminating 'E'. It is however not always present - * in every mangled symbol, and a last segment that starts with 'h' - * could be confused for it, so failing to parse it just means - * we don't have a trailing hash. - */ - if (sv_peek(&name, 0) == 'h' && last) { - if (rustdem_parse_hash(st, &name)) - goto done; - - /* - * However any error other than 'not a hash' (e.g. ENOMEM) - * means we should fail. - */ - if (st->rds_error != 0) - goto done; - } + return (B_TRUE); +} - while (sv_remaining(&name) > 0) { - switch (sv_peek(&name, 0)) { - case '$': - if (rustdem_parse_special(st, &name)) - continue; - break; - case '_': - if (sv_peek(&name, 1) == '$') { - /* - * Only consume/ignore '_'. Leave - * $ for next round. - */ - sv_consume_n(&name, 1); - continue; - } - break; - case '.': - /* Convert '..' to '::' */ - if (sv_peek(&name, 1) != '.') - break; +boolean_t +rust_append(rust_state_t *st, const char *s) +{ + custr_t *cus = st->rs_demangled; - if (!rustdem_add_sep(st)) - return (B_FALSE); + if (HAS_ERROR(st)) + return (B_FALSE); - sv_consume_n(&name, 2); - continue; - default: - break; - } + if (st->rs_skip) + return (B_TRUE); - if (custr_appendc(st->rds_demangled, - sv_consume_c(&name)) != 0) { - st->rds_error = ENOMEM; - return (B_FALSE); - } + if (custr_append(cus, s) != 0) { + SET_ERROR(st); + return (B_FALSE); } -done: - sv_consume_n(&sv, len); - VERIFY3P(svp->sv_first, <=, sv.sv_first); - DEMDEBUG("%s: consumed '%.*s'", __func__, - (int)(sv.sv_first - svp->sv_first), svp->sv_first); - sv_init_sv(svp, &sv); return (B_TRUE); } -/* - * Parse N (<num><name>{num})+[<num>h<hex digits>]E - */ -static boolean_t -rustdem_parse_name(rustdem_state_t *st, strview_t *svp) +boolean_t +rust_append_sv(rust_state_t *restrict st, uint64_t n, strview_t *restrict sv) { - strview_t name; - boolean_t first = B_TRUE; - - if (st->rds_error != 0) + if (HAS_ERROR(st)) return (B_FALSE); - sv_init_sv(&name, svp); - - DEMDEBUG("%s: name = '%.*s'", __func__, SV_PRINT(&name)); + if (st->rs_skip) { + sv_consume_n(sv, (size_t)n); + return (B_TRUE); + } - if (sv_remaining(&name) == 0) { - DEMDEBUG("%s: empty name", __func__); + if (n > sv_remaining(sv)) { + DEMDEBUG("%s: ERROR amount to append (%" PRIu64 ") > " + "remaining bytes (%zu)", __func__, n, sv_remaining(sv)); + st->rs_error = ERANGE; return (B_FALSE); } - if (!sv_consume_if_c(&name, 'N')) { - DEMDEBUG("%s: does not start with 'N'", __func__); + if (n > INT_MAX) { + DEMDEBUG("%s: amount (%" PRIu64 ") > INT_MAX", __func__, n); + st->rs_error = ERANGE; return (B_FALSE); } - while (sv_remaining(&name) > 0 && sv_peek(&name, 0) != 'E') { - if (!rustdem_parse_name_segment(st, &name, first)) - return (B_FALSE); - first = B_FALSE; + if (custr_append_printf(st->rs_demangled, "%.*s", + (int)n, sv->sv_first) != 0) { + SET_ERROR(st); + return (B_FALSE); } - VERIFY(sv_consume_if_c(&name, 'E')); - - VERIFY3P(svp->sv_first, <=, name.sv_first); - DEMDEBUG("%s: consumed '%.*s'", __func__, - (int)(name.sv_first - svp->sv_first), svp->sv_first); + sv_consume_n(sv, (size_t)n); - sv_init_sv(svp, &name); return (B_TRUE); } -static boolean_t -rustdem_parse_hash(rustdem_state_t *st, strview_t *svp) +boolean_t +rust_append_printf(rust_state_t *st, const char *fmt, ...) { - strview_t sv; - - sv_init_sv(&sv, svp); + va_list ap; + int ret; - VERIFY(sv_consume_if_c(&sv, 'h')); - if (!rustdem_append_c(st, 'h')) + if (HAS_ERROR(st)) return (B_FALSE); - while (sv_remaining(&sv) > 0) { - char c = sv_consume_c(&sv); + if (st->rs_skip) + return (B_TRUE); - switch (c) { - /* - * The upper-case hex digits (A-F) are excluded as valid - * hash values for several reasons: - * - * 1. It would result in two different possible names for - * the same function, leading to ambiguity in linking (among - * other things). - * - * 2. It would cause potential ambiguity in parsing -- is a - * trailing 'E' part of the hash, or the terminating character - * in the mangled name? - * - * 3. No examples were able to be found in the wild where - * uppercase digits are used, and other rust demanglers all - * seem to assume the hash must contain lower-case hex digits. - */ - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - case '8': case '9': case 'a': case 'b': - case 'c': case 'd': case 'e': case 'f': - if (!rustdem_append_c(st, c)) - return (B_FALSE); - break; - default: - return (B_FALSE); - } - } + va_start(ap, fmt); + ret = custr_append_vprintf(st->rs_demangled, fmt, ap); + va_end(ap); - sv_init_sv(svp, &sv); - return (B_TRUE); + if (ret == 0) + return (B_TRUE); + SET_ERROR(st); + return (B_FALSE); } -/* - * We have to pick an arbitrary limit here; 999,999,999 fits comfortably - * within an int32_t, so let's go with that, as it seems unlikely we'd - * ever see a larger value in context. - */ -#define MAX_DIGITS 9 - -static boolean_t -rustdem_parse_num(rustdem_state_t *restrict st, strview_t *restrict svp, +boolean_t +rust_parse_base10(rust_state_t *restrict st, strview_t *restrict sv, uint64_t *restrict valp) { - strview_t snum; uint64_t v = 0; - size_t ndigits = 0; char c; - if (st->rds_error != 0) + if (HAS_ERROR(st) || sv_remaining(sv) == 0) return (B_FALSE); - sv_init_sv(&snum, svp); - - DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(&snum)); - - c = sv_peek(&snum, 0); - if (!ISDIGIT(c)) { - DEMDEBUG("%s: ERROR no digits in str\n", __func__); - st->rds_error = EINVAL; - return (B_FALSE); - } + c = sv_peek(sv, 0); /* - * Since there is currently no official specification on rust name - * mangling, only that it has been stated that rust follows what - * C++ mangling does. In the Itanium C++ ABI (what practically - * every non-Windows C++ implementation uses these days), it - * explicitly disallows leading 0s in numeric values (except for - * substition and template indexes, which aren't relevant here). - * We enforce the same restriction -- if a rust implementation allowed - * leading zeros in numbers (basically segment lengths) it'd - * cause all sorts of ambiguity problems with names that likely lead - * to much bigger problems with linking and such, so this seems - * reasonable. + * Since the legacy rust encoding states that it follows the + * Itanium C++ mangling format, we match the behavior of the + * Itanium C++ ABI in disallowing leading 0s in decimal numbers. + * + * For Rust encoding v0, RFC2603 currently has omitted the + * actual definition of <decimal-number>. However examination of + * other implementations written in tandem with the mangling + * implementation suggest that <decimal-number> can be expressed + * by the eregex: 0|[1-9][0-9]* -- that is a '0' is allowed and + * terminates the token, while any other leading digit allows + * parsing to continue until a non-digit is encountered, the + * end of the string is encountered, or overflow is encountered. */ if (c == '0') { - DEMDEBUG("%s: ERROR number starts with leading 0\n", __func__); - st->rds_error = EINVAL; + if (st->rs_encver == RUSTENC_V0) { + sv_consume_n(sv, 1); + *valp = 0; + return (B_TRUE); + } + + DEMDEBUG("%s: ERROR number starts with leading 0\n", + __func__); + st->rs_error = EINVAL; + return (B_FALSE); + } else if (!ISDIGIT(c)) { return (B_FALSE); } - while (sv_remaining(&snum) > 0 && ndigits <= MAX_DIGITS) { - c = sv_consume_c(&snum); + while (sv_remaining(sv) > 0) { + uint64_t cval; + c = sv_peek(sv, 0); if (!ISDIGIT(c)) break; + sv_consume_n(sv, 1); - v *= 10; - v += c - '0'; - ndigits++; - } + cval = c - '0'; - if (ndigits > MAX_DIGITS) { - DEMDEBUG("%s: value %llu is too large\n", __func__, v); - st->rds_error = ERANGE; - return (B_FALSE); - } + if (mul_overflow(v, 10, &v)) { + DEMDEBUG("%s: multiplication overflowed\n", __func__); + st->rs_error = EOVERFLOW; + return (B_FALSE); + } - DEMDEBUG("%s: num=%llu", __func__, v); + if (add_overflow(v, cval, &v)) { + DEMDEBUG("%s: addition overflowed\n", __func__); + st->rs_error = EOVERFLOW; + return (B_FALSE); + } + } *valp = v; - sv_consume_n(svp, ndigits); return (B_TRUE); } static boolean_t -rustdem_parse_special(rustdem_state_t *restrict st, strview_t *restrict svp) +rust_parse_prefix(rust_state_t *restrict st, strview_t *restrict sv) { - if (st->rds_error != 0) + DEMDEBUG("checking prefix in '%.*s'", SV_PRINT(sv)); + + if (HAS_ERROR(st)) return (B_FALSE); - if (sv_peek(svp, 0) != '$') + if (!sv_consume_if_c(sv, '_')) return (B_FALSE); - for (size_t i = 0; i < rust_charmap_sz; i++) { - if (sv_consume_if(svp, rust_charmap[i].ruc_seq)) { - if (!rustdem_append_c(st, rust_charmap[i].ruc_ch)) - return (B_FALSE); - return (B_TRUE); + /* + * MacOS prepends an additional '_' -- allow that in case + * we're given symbols from a MacOS object. + */ + (void) sv_consume_if_c(sv, '_'); + + if (sv_consume_if_c(sv, 'Z')) { + /* + * Legacy names must start with '[_]_Z' + */ + st->rs_encver = RUSTENC_LEGACY; + DEMDEBUG("name is encoded using the rust legacy mangling " + "scheme"); + } else if (sv_consume_if_c(sv, 'R')) { + uint64_t ver = 0; + + /* + * The non-legacy encoding is versioned. After the initial + * 'R' is the version. This isn't spelled out clearly in the + * RFC, but many numeric values encoded take an approach of + * a value of 0 is omitted, and any digits represent the + * value - 1. In other words, in this case, no digits means + * version 0, '_R0...' would be version 1, 'R1...' would + * be version 2, etc. Currently only version 0 is defined, + * but we try to provide a (hopefully) useful message + * when debugging, even if we can't use the version value + * beyond that. + */ + if (rust_parse_base10(st, sv, &ver)) { + DEMDEBUG("%s: ERROR: an unsupported encoding version " + "(%" PRIu64 ") was encountered", ver + 1); + st->rs_error = ENOTSUP; + return (B_FALSE); } + + st->rs_encver = RUSTENC_V0; + DEMDEBUG("name is encoded using the v0 mangling scheme"); + } else { + DEMDEBUG("did not find a valid rust prefix"); + return (B_FALSE); } - return (B_FALSE); + + sv_init_sv(&st->rs_orig, sv); + return (B_TRUE); +} + +static void +rust_fini_state(rust_state_t *st) +{ + custr_free(st->rs_demangled); + custr_alloc_fini(&st->rs_cualloc); } static boolean_t -rustdem_add_sep(rustdem_state_t *st) +rust_init_state(rust_state_t *restrict st, const char *s, sysdem_ops_t *ops) { - if (st->rds_error != 0) + const char *codeset; + + (void) memset(st, 0, sizeof (*st)); + + st->rs_str = s; + st->rs_ops = ops; + + st->rs_cualloc.cua_version = CUSTR_VERSION; + if (custr_alloc_init(&st->rs_cualloc, &rust_custr_ops) != 0) return (B_FALSE); + st->rs_cualloc.cua_arg = st; - if (!rustdem_append_c(st, ':') || - !rustdem_append_c(st, ':')) + if (custr_xalloc(&st->rs_demangled, &st->rs_cualloc) != 0) { + custr_alloc_fini(&st->rs_cualloc); return (B_FALSE); + } + + codeset = nl_langinfo(CODESET); + if (codeset != NULL && strcmp(codeset, "UTF-8") == 0) + st->rs_isutf8 = B_TRUE; return (B_TRUE); } -static boolean_t -rustdem_append_c(rustdem_state_t *st, char c) +char * +rust_demangle(const char *s, size_t len, sysdem_ops_t *ops) { - if (st->rds_error != 0) - return (B_FALSE); + rust_state_t st; + strview_t sv = { 0 }; + boolean_t success = B_FALSE; + int e = 0; + char *out = NULL; - if (custr_appendc(st->rds_demangled, c) == 0) - return (B_TRUE); + if (!rust_init_state(&st, s, ops)) + return (NULL); - st->rds_error = errno; - return (B_FALSE); -} + sv_init_str(&sv, s, s + len); -static boolean_t -rustdem_all_ascii(const strview_t *svp) -{ - strview_t p; + if (!rust_parse_prefix(&st, &sv)) { + if (st.rs_error == 0) + st.rs_error = EINVAL; + goto done; + } - sv_init_sv(&p, svp); + DEMDEBUG("parsed prefix; remaining string='%.*s'", SV_PRINT(&sv)); - while (sv_remaining(&p) > 0) { - char c = sv_consume_c(&p); + switch (st.rs_encver) { + case RUSTENC_LEGACY: + success = rust_demangle_legacy(&st, &sv); + break; + case RUSTENC_V0: + success = rust_demangle_v0(&st, &sv); + break; + } - /* - * #including <sys/ctype.h> conflicts with <ctype.h>. Since - * we want the C locale macros (ISDIGIT, etc), it also means - * we can't use isascii(3C). - */ - if ((c & 0x80) != 0) { - DEMDEBUG("%s: found non-ascii character 0x%02hhx at " - "offset %tu", __func__, c, - (ptrdiff_t)(p.sv_first - svp->sv_first)); - return (B_FALSE); - } +done: + if (success) { + out = xstrdup(ops, custr_cstr(st.rs_demangled)); + if (out == NULL) + SET_ERROR(&st); + } else { + DEMDEBUG("%s: failed, str='%s'", __func__, + custr_cstr(st.rs_demangled)); + + st.rs_error = EINVAL; } - return (B_TRUE); -} -static void * -rustdem_alloc(custr_alloc_t *cao, size_t len) -{ - rustdem_state_t *st = cao->cua_arg; - return (zalloc(st->rds_ops, len)); -} + e = st.rs_error; + rust_fini_state(&st); + if (e > 0) + errno = e; -static void -rustdem_free(custr_alloc_t *cao, void *p, size_t len) -{ - rustdem_state_t *st = cao->cua_arg; - xfree(st->rds_ops, p, len); + return (out); } |