summaryrefslogtreecommitdiff
path: root/usr/src/lib/libdemangle/common/rust.c
diff options
context:
space:
mode:
authorDan McDonald <danmcd@joyent.com>2021-05-14 12:00:48 -0400
committerDan McDonald <danmcd@joyent.com>2021-05-14 12:00:48 -0400
commit1a25930b922d3fede4a252f4f2e0ecb8de2656cb (patch)
treea2ff441f85489691a94f13a644cb7cfe10b81f9e /usr/src/lib/libdemangle/common/rust.c
parent3aa01401155d92a38a0d4e107043c130432e4a43 (diff)
parent1cd083931cfd3fb8617c1178f62bce417cfa6af2 (diff)
downloadillumos-joyent-1a25930b922d3fede4a252f4f2e0ecb8de2656cb.tar.gz
[illumos-gate merge]
commit 1cd083931cfd3fb8617c1178f62bce417cfa6af2 13780 Add support for rust v0 mangling format commit 4fe48c6ec9f06cbcce19c4cf97f662b64efde582 13798 loader: Update the EFI timer to be called once a second commit 9e3493cb8a0cfe96c9aef9b7da42c6c9b5c24b43 13374 Port L2ARC Improvements from OpenZFS
Diffstat (limited to 'usr/src/lib/libdemangle/common/rust.c')
-rw-r--r--usr/src/lib/libdemangle/common/rust.c727
1 files changed, 290 insertions, 437 deletions
diff --git a/usr/src/lib/libdemangle/common/rust.c b/usr/src/lib/libdemangle/common/rust.c
index 9b145ca841..ce1fca4859 100644
--- a/usr/src/lib/libdemangle/common/rust.c
+++ b/usr/src/lib/libdemangle/common/rust.c
@@ -10,564 +10,417 @@
*/
/*
- * Copyright 2019, Joyent, Inc.
* Copyright 2021 Jason King
+ * Copyright 2019 Joyent, Inc.
*/
#include <errno.h>
+#include <langinfo.h>
#include <libcustr.h>
#include <limits.h>
+#include <stdarg.h>
#include <string.h>
-#include <sys/ctype.h> /* We want the C locale ISXXX() versions */
-#include <sys/debug.h>
-#include <stdio.h>
-#include <sys/sysmacros.h>
-#include "strview.h"
#include "demangle_int.h"
+#include "rust.h"
-/*
- * Unfortunately, there is currently no official specification for the rust
- * name mangling. This is an attempt to document the understanding of the
- * mangling used here. It is based off examination of
- * https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/
- *
- * A mangled rust name is:
- * <prefix> <name>
- *
- * <prefix> ::= _Z
- * __Z
- *
- * <name> ::= N <name-segment>+ [<hash>] E
- *
- * <name-segment> ::= <len> <name-chars>{len}
- *
- * <len> ::= [1-9][0-9]+
- *
- * <name-chars> ::= <[A-Za-z]> <[A-Za-z0-9]>*
- * <separator>
- * <special>
- *
- * <separator> ::= '..' # '::'
- *
- * <special> ::= $SP$ # ' '
- * $BP$ # '*'
- * $RF$ # '&'
- * $LT$ # '<'
- * $GT$ # '>'
- * $LP$ # '('
- * $RP$ # ')'
- * $C$ # ','
- * $u7e$ # '~'
- * $u20$ # ' '
- * $u27$ # '\''
- * $u3d$ # '='
- * $u5b$ # '['
- * $u5d$ # ']'
- * $u7b$ # '{'
- * $u7d$ # '}'
- * $u3b$ # ';'
- * $u2b$ # '+'
- * $u22$ # '"'
- *
- * <hash> := <len> h <hex-digits>+
- *
- * <hex-digits> := <[0-9a-f]>
- */
-
-typedef struct rustdem_state {
- const char *rds_str;
- custr_t *rds_demangled;
- sysdem_ops_t *rds_ops;
- int rds_error;
-} rustdem_state_t;
-
-static const struct rust_charmap {
- const char *ruc_seq;
- char ruc_ch;
-} rust_charmap[] = {
- { "$SP$", '@' },
- { "$BP$", '*' },
- { "$RF$", '&' },
- { "$LT$", '<' },
- { "$GT$", '>' },
- { "$LP$", '(' },
- { "$RP$", ')' },
- { "$C$", ',' },
- { "$u7e$", '~' },
- { "$u20$", ' ' },
- { "$u27$", '\'' },
- { "$u3d$", '=' },
- { "$u5b$", '[' },
- { "$u5d$", ']' },
- { "$u7b$", '{' },
- { "$u7d$", '}' },
- { "$u3b$", ';' },
- { "$u2b$", '+' },
- { "$u22$", '"' }
-};
-static const size_t rust_charmap_sz = ARRAY_SIZE(rust_charmap);
-
-static void *rustdem_alloc(custr_alloc_t *, size_t);
-static void rustdem_free(custr_alloc_t *, void *, size_t);
-
-static boolean_t rustdem_append_c(rustdem_state_t *, char);
-static boolean_t rustdem_all_ascii(const strview_t *);
-
-static boolean_t rustdem_parse_prefix(rustdem_state_t *, strview_t *);
-static boolean_t rustdem_parse_name(rustdem_state_t *, strview_t *);
-static boolean_t rustdem_parse_hash(rustdem_state_t *, strview_t *);
-static boolean_t rustdem_parse_num(rustdem_state_t *, strview_t *, uint64_t *);
-static boolean_t rustdem_parse_special(rustdem_state_t *, strview_t *);
-static boolean_t rustdem_add_sep(rustdem_state_t *);
-
-char *
-rust_demangle(const char *s, size_t slen, sysdem_ops_t *ops)
+static void *
+rust_cualloc(custr_alloc_t *cua, size_t len)
{
- rustdem_state_t st = {
- .rds_str = s,
- .rds_ops = ops,
- };
- custr_alloc_ops_t custr_ops = {
- .custr_ao_alloc = rustdem_alloc,
- .custr_ao_free = rustdem_free
- };
- custr_alloc_t custr_alloc = {
- .cua_version = CUSTR_VERSION
- };
- strview_t sv;
- int ret;
-
- if (custr_alloc_init(&custr_alloc, &custr_ops) != 0)
- return (NULL);
- custr_alloc.cua_arg = &st;
-
- sv_init_str(&sv, s, s + slen);
-
- if (sv_remaining(&sv) < 1 || sv_peek(&sv, -1) != 'E') {
- DEMDEBUG("ERROR: string is either too small or does not end "
- "with 'E'");
- errno = EINVAL;
- return (NULL);
- }
-
- if (!rustdem_parse_prefix(&st, &sv)) {
- DEMDEBUG("ERROR: could not parse prefix");
- errno = EINVAL;
- return (NULL);
- }
- DEMDEBUG("parsed prefix; remaining='%.*s'", SV_PRINT(&sv));
-
- if (!rustdem_all_ascii(&sv)) {
- /* rustdem_all_ascii() provides debug output */
- errno = EINVAL;
- return (NULL);
- }
-
- if ((ret = custr_xalloc(&st.rds_demangled, &custr_alloc)) != 0)
- return (NULL);
-
- if (!rustdem_parse_name(&st, &sv)) {
- if (st.rds_error == 0)
- st.rds_error = EINVAL;
- goto fail;
- }
-
- if (sv_remaining(&sv) > 0) {
- DEMDEBUG("ERROR: unexpected trailing characters after "
- "terminating 'E': '%.*s'", SV_PRINT(&sv));
- st.rds_error = EINVAL;
- goto fail;
- }
-
- char *res = xstrdup(ops, custr_cstr(st.rds_demangled));
- if (res == NULL) {
- st.rds_error = errno;
- goto fail;
- }
-
- custr_free(st.rds_demangled);
- DEMDEBUG("result = '%s'", res);
- return (res);
-
-fail:
- custr_free(st.rds_demangled);
- errno = st.rds_error;
- return (NULL);
+ rust_state_t *st = cua->cua_arg;
+ return (zalloc(st->rs_ops, len));
}
-static boolean_t
-rustdem_parse_prefix(rustdem_state_t *st, strview_t *svp)
+static void
+rust_cufree(custr_alloc_t *cua, void *p, size_t len)
{
- strview_t pfx;
+ rust_state_t *st = cua->cua_arg;
+ xfree(st->rs_ops, p, len);
+}
- sv_init_sv(&pfx, svp);
+static const custr_alloc_ops_t rust_custr_ops = {
+ .custr_ao_alloc = rust_cualloc,
+ .custr_ao_free = rust_cufree
+};
- DEMDEBUG("checking for '_Z' or '__Z' in '%.*s'", SV_PRINT(&pfx));
+boolean_t
+rust_appendc(rust_state_t *st, char c)
+{
+ custr_t *cus = st->rs_demangled;
- if (st->rds_error != 0)
+ if (HAS_ERROR(st))
return (B_FALSE);
- if (!sv_consume_if_c(&pfx, '_'))
- return (B_FALSE);
+ if (st->rs_skip)
+ return (B_TRUE);
- (void) sv_consume_if_c(&pfx, '_');
+ switch (c) {
+ case '\a':
+ return (rust_append(st, "\\a"));
+ case '\b':
+ return (rust_append(st, "\\b"));
+ case '\f':
+ return (rust_append(st, "\\f"));
+ case '\n':
+ return (rust_append(st, "\\n"));
+ case '\r':
+ return (rust_append(st, "\\r"));
+ case '\t':
+ return (rust_append(st, "\\t"));
+ case '\v':
+ return (rust_append(st, "\\v"));
+ case '\\':
+ return (rust_append(st, "\\\\"));
+ }
+
+ if (c < ' ')
+ return (rust_append_printf(st, "\\x%02" PRIx8, (uint8_t)c));
- if (!sv_consume_if_c(&pfx, 'Z'))
+ if (custr_appendc(cus, c) != 0) {
+ SET_ERROR(st);
return (B_FALSE);
+ }
- /* Update svp with new position */
- sv_init_sv(svp, &pfx);
return (B_TRUE);
}
-static boolean_t
-rustdem_parse_name_segment(rustdem_state_t *st, strview_t *svp, boolean_t first)
+/*
+ * Append a UTF-8 code point. If we're not in a UTF-8 locale, this gets
+ * appended as '\u<hex codepoint>' otherwise the character itself is
+ * added.
+ */
+boolean_t
+rust_append_utf8_c(rust_state_t *st, uint32_t val)
{
- strview_t sv;
- strview_t name;
- uint64_t len;
- size_t rem;
- boolean_t last = B_FALSE;
+ custr_t *cus = st->rs_demangled;
+ uint_t n = 0;
+ uint8_t c[4] = { 0 };
- if (st->rds_error != 0 || sv_remaining(svp) == 0)
+ if (HAS_ERROR(st))
return (B_FALSE);
- sv_init_sv(&sv, svp);
-
- if (!rustdem_parse_num(st, &sv, &len)) {
- DEMDEBUG("ERROR: no leading length");
- st->rds_error = EINVAL;
- return (B_FALSE);
+ if (!st->rs_isutf8) {
+ if (val < 0x80)
+ return (rust_appendc(st, (char)val));
+ if (val < 0x10000)
+ return (rust_append_printf(st, "\\u%04" PRIx32, val));
+ return (rust_append_printf(st, "\\U%08" PRIx32, val));
}
- rem = sv_remaining(&sv);
-
- if (rem < len) {
- st->rds_error = EINVAL;
+ if (val < 0x80) {
+ return (rust_appendc(st, (char)val));
+ } else if (val < 0x800) {
+ c[0] = 0xc0 | ((val >> 6) & 0x1f);
+ c[1] = 0x80 | (val & 0x3f);
+ n = 2;
+ } else if (val < 0x10000) {
+ c[0] = 0xe0 | ((val >> 12) & 0x0f);
+ c[1] = 0x80 | ((val >> 6) & 0x3f);
+ c[2] = 0x80 | (val & 0x3f);
+ n = 3;
+ } else if (val < 0x110000) {
+ c[0] = 0xf0 | ((val >> 18) & 0x7);
+ c[1] = 0x80 | ((val >> 12) & 0x3f);
+ c[2] = 0x80 | ((val >> 6) & 0x3f);
+ c[3] = 0x80 | (val & 0x3f);
+ n = 4;
+ } else {
+ DEMDEBUG("%s: invalid unicode character \\u%" PRIx32, __func__,
+ val);
return (B_FALSE);
}
- /* Is this the last segment before the terminating E? */
- if (rem == len + 1) {
- VERIFY3U(sv_peek(&sv, -1), ==, 'E');
- last = B_TRUE;
+ for (uint_t i = 0; i < n; i++) {
+ if (custr_appendc(cus, c[i]) != 0) {
+ SET_ERROR(st);
+ return (B_FALSE);
+ }
}
- if (!first && !rustdem_add_sep(st))
- return (B_FALSE);
-
- /* Reduce length of seg to the length we parsed */
- (void) sv_init_sv_range(&name, &sv, len);
-
- DEMDEBUG("%s: segment='%.*s'", __func__, SV_PRINT(&name));
-
- /*
- * A rust hash starts with 'h', and is the last component of a name
- * before the terminating 'E'. It is however not always present
- * in every mangled symbol, and a last segment that starts with 'h'
- * could be confused for it, so failing to parse it just means
- * we don't have a trailing hash.
- */
- if (sv_peek(&name, 0) == 'h' && last) {
- if (rustdem_parse_hash(st, &name))
- goto done;
-
- /*
- * However any error other than 'not a hash' (e.g. ENOMEM)
- * means we should fail.
- */
- if (st->rds_error != 0)
- goto done;
- }
+ return (B_TRUE);
+}
- while (sv_remaining(&name) > 0) {
- switch (sv_peek(&name, 0)) {
- case '$':
- if (rustdem_parse_special(st, &name))
- continue;
- break;
- case '_':
- if (sv_peek(&name, 1) == '$') {
- /*
- * Only consume/ignore '_'. Leave
- * $ for next round.
- */
- sv_consume_n(&name, 1);
- continue;
- }
- break;
- case '.':
- /* Convert '..' to '::' */
- if (sv_peek(&name, 1) != '.')
- break;
+boolean_t
+rust_append(rust_state_t *st, const char *s)
+{
+ custr_t *cus = st->rs_demangled;
- if (!rustdem_add_sep(st))
- return (B_FALSE);
+ if (HAS_ERROR(st))
+ return (B_FALSE);
- sv_consume_n(&name, 2);
- continue;
- default:
- break;
- }
+ if (st->rs_skip)
+ return (B_TRUE);
- if (custr_appendc(st->rds_demangled,
- sv_consume_c(&name)) != 0) {
- st->rds_error = ENOMEM;
- return (B_FALSE);
- }
+ if (custr_append(cus, s) != 0) {
+ SET_ERROR(st);
+ return (B_FALSE);
}
-done:
- sv_consume_n(&sv, len);
- VERIFY3P(svp->sv_first, <=, sv.sv_first);
- DEMDEBUG("%s: consumed '%.*s'", __func__,
- (int)(sv.sv_first - svp->sv_first), svp->sv_first);
- sv_init_sv(svp, &sv);
return (B_TRUE);
}
-/*
- * Parse N (<num><name>{num})+[<num>h<hex digits>]E
- */
-static boolean_t
-rustdem_parse_name(rustdem_state_t *st, strview_t *svp)
+boolean_t
+rust_append_sv(rust_state_t *restrict st, uint64_t n, strview_t *restrict sv)
{
- strview_t name;
- boolean_t first = B_TRUE;
-
- if (st->rds_error != 0)
+ if (HAS_ERROR(st))
return (B_FALSE);
- sv_init_sv(&name, svp);
-
- DEMDEBUG("%s: name = '%.*s'", __func__, SV_PRINT(&name));
+ if (st->rs_skip) {
+ sv_consume_n(sv, (size_t)n);
+ return (B_TRUE);
+ }
- if (sv_remaining(&name) == 0) {
- DEMDEBUG("%s: empty name", __func__);
+ if (n > sv_remaining(sv)) {
+ DEMDEBUG("%s: ERROR amount to append (%" PRIu64 ") > "
+ "remaining bytes (%zu)", __func__, n, sv_remaining(sv));
+ st->rs_error = ERANGE;
return (B_FALSE);
}
- if (!sv_consume_if_c(&name, 'N')) {
- DEMDEBUG("%s: does not start with 'N'", __func__);
+ if (n > INT_MAX) {
+ DEMDEBUG("%s: amount (%" PRIu64 ") > INT_MAX", __func__, n);
+ st->rs_error = ERANGE;
return (B_FALSE);
}
- while (sv_remaining(&name) > 0 && sv_peek(&name, 0) != 'E') {
- if (!rustdem_parse_name_segment(st, &name, first))
- return (B_FALSE);
- first = B_FALSE;
+ if (custr_append_printf(st->rs_demangled, "%.*s",
+ (int)n, sv->sv_first) != 0) {
+ SET_ERROR(st);
+ return (B_FALSE);
}
- VERIFY(sv_consume_if_c(&name, 'E'));
-
- VERIFY3P(svp->sv_first, <=, name.sv_first);
- DEMDEBUG("%s: consumed '%.*s'", __func__,
- (int)(name.sv_first - svp->sv_first), svp->sv_first);
+ sv_consume_n(sv, (size_t)n);
- sv_init_sv(svp, &name);
return (B_TRUE);
}
-static boolean_t
-rustdem_parse_hash(rustdem_state_t *st, strview_t *svp)
+boolean_t
+rust_append_printf(rust_state_t *st, const char *fmt, ...)
{
- strview_t sv;
-
- sv_init_sv(&sv, svp);
+ va_list ap;
+ int ret;
- VERIFY(sv_consume_if_c(&sv, 'h'));
- if (!rustdem_append_c(st, 'h'))
+ if (HAS_ERROR(st))
return (B_FALSE);
- while (sv_remaining(&sv) > 0) {
- char c = sv_consume_c(&sv);
+ if (st->rs_skip)
+ return (B_TRUE);
- switch (c) {
- /*
- * The upper-case hex digits (A-F) are excluded as valid
- * hash values for several reasons:
- *
- * 1. It would result in two different possible names for
- * the same function, leading to ambiguity in linking (among
- * other things).
- *
- * 2. It would cause potential ambiguity in parsing -- is a
- * trailing 'E' part of the hash, or the terminating character
- * in the mangled name?
- *
- * 3. No examples were able to be found in the wild where
- * uppercase digits are used, and other rust demanglers all
- * seem to assume the hash must contain lower-case hex digits.
- */
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- case '8': case '9': case 'a': case 'b':
- case 'c': case 'd': case 'e': case 'f':
- if (!rustdem_append_c(st, c))
- return (B_FALSE);
- break;
- default:
- return (B_FALSE);
- }
- }
+ va_start(ap, fmt);
+ ret = custr_append_vprintf(st->rs_demangled, fmt, ap);
+ va_end(ap);
- sv_init_sv(svp, &sv);
- return (B_TRUE);
+ if (ret == 0)
+ return (B_TRUE);
+ SET_ERROR(st);
+ return (B_FALSE);
}
-/*
- * We have to pick an arbitrary limit here; 999,999,999 fits comfortably
- * within an int32_t, so let's go with that, as it seems unlikely we'd
- * ever see a larger value in context.
- */
-#define MAX_DIGITS 9
-
-static boolean_t
-rustdem_parse_num(rustdem_state_t *restrict st, strview_t *restrict svp,
+boolean_t
+rust_parse_base10(rust_state_t *restrict st, strview_t *restrict sv,
uint64_t *restrict valp)
{
- strview_t snum;
uint64_t v = 0;
- size_t ndigits = 0;
char c;
- if (st->rds_error != 0)
+ if (HAS_ERROR(st) || sv_remaining(sv) == 0)
return (B_FALSE);
- sv_init_sv(&snum, svp);
-
- DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(&snum));
-
- c = sv_peek(&snum, 0);
- if (!ISDIGIT(c)) {
- DEMDEBUG("%s: ERROR no digits in str\n", __func__);
- st->rds_error = EINVAL;
- return (B_FALSE);
- }
+ c = sv_peek(sv, 0);
/*
- * Since there is currently no official specification on rust name
- * mangling, only that it has been stated that rust follows what
- * C++ mangling does. In the Itanium C++ ABI (what practically
- * every non-Windows C++ implementation uses these days), it
- * explicitly disallows leading 0s in numeric values (except for
- * substition and template indexes, which aren't relevant here).
- * We enforce the same restriction -- if a rust implementation allowed
- * leading zeros in numbers (basically segment lengths) it'd
- * cause all sorts of ambiguity problems with names that likely lead
- * to much bigger problems with linking and such, so this seems
- * reasonable.
+ * Since the legacy rust encoding states that it follows the
+ * Itanium C++ mangling format, we match the behavior of the
+ * Itanium C++ ABI in disallowing leading 0s in decimal numbers.
+ *
+ * For Rust encoding v0, RFC2603 currently has omitted the
+ * actual definition of <decimal-number>. However examination of
+ * other implementations written in tandem with the mangling
+ * implementation suggest that <decimal-number> can be expressed
+ * by the eregex: 0|[1-9][0-9]* -- that is a '0' is allowed and
+ * terminates the token, while any other leading digit allows
+ * parsing to continue until a non-digit is encountered, the
+ * end of the string is encountered, or overflow is encountered.
*/
if (c == '0') {
- DEMDEBUG("%s: ERROR number starts with leading 0\n", __func__);
- st->rds_error = EINVAL;
+ if (st->rs_encver == RUSTENC_V0) {
+ sv_consume_n(sv, 1);
+ *valp = 0;
+ return (B_TRUE);
+ }
+
+ DEMDEBUG("%s: ERROR number starts with leading 0\n",
+ __func__);
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ } else if (!ISDIGIT(c)) {
return (B_FALSE);
}
- while (sv_remaining(&snum) > 0 && ndigits <= MAX_DIGITS) {
- c = sv_consume_c(&snum);
+ while (sv_remaining(sv) > 0) {
+ uint64_t cval;
+ c = sv_peek(sv, 0);
if (!ISDIGIT(c))
break;
+ sv_consume_n(sv, 1);
- v *= 10;
- v += c - '0';
- ndigits++;
- }
+ cval = c - '0';
- if (ndigits > MAX_DIGITS) {
- DEMDEBUG("%s: value %llu is too large\n", __func__, v);
- st->rds_error = ERANGE;
- return (B_FALSE);
- }
+ if (mul_overflow(v, 10, &v)) {
+ DEMDEBUG("%s: multiplication overflowed\n", __func__);
+ st->rs_error = EOVERFLOW;
+ return (B_FALSE);
+ }
- DEMDEBUG("%s: num=%llu", __func__, v);
+ if (add_overflow(v, cval, &v)) {
+ DEMDEBUG("%s: addition overflowed\n", __func__);
+ st->rs_error = EOVERFLOW;
+ return (B_FALSE);
+ }
+ }
*valp = v;
- sv_consume_n(svp, ndigits);
return (B_TRUE);
}
static boolean_t
-rustdem_parse_special(rustdem_state_t *restrict st, strview_t *restrict svp)
+rust_parse_prefix(rust_state_t *restrict st, strview_t *restrict sv)
{
- if (st->rds_error != 0)
+ DEMDEBUG("checking prefix in '%.*s'", SV_PRINT(sv));
+
+ if (HAS_ERROR(st))
return (B_FALSE);
- if (sv_peek(svp, 0) != '$')
+ if (!sv_consume_if_c(sv, '_'))
return (B_FALSE);
- for (size_t i = 0; i < rust_charmap_sz; i++) {
- if (sv_consume_if(svp, rust_charmap[i].ruc_seq)) {
- if (!rustdem_append_c(st, rust_charmap[i].ruc_ch))
- return (B_FALSE);
- return (B_TRUE);
+ /*
+ * MacOS prepends an additional '_' -- allow that in case
+ * we're given symbols from a MacOS object.
+ */
+ (void) sv_consume_if_c(sv, '_');
+
+ if (sv_consume_if_c(sv, 'Z')) {
+ /*
+ * Legacy names must start with '[_]_Z'
+ */
+ st->rs_encver = RUSTENC_LEGACY;
+ DEMDEBUG("name is encoded using the rust legacy mangling "
+ "scheme");
+ } else if (sv_consume_if_c(sv, 'R')) {
+ uint64_t ver = 0;
+
+ /*
+ * The non-legacy encoding is versioned. After the initial
+ * 'R' is the version. This isn't spelled out clearly in the
+ * RFC, but many numeric values encoded take an approach of
+ * a value of 0 is omitted, and any digits represent the
+ * value - 1. In other words, in this case, no digits means
+ * version 0, '_R0...' would be version 1, 'R1...' would
+ * be version 2, etc. Currently only version 0 is defined,
+ * but we try to provide a (hopefully) useful message
+ * when debugging, even if we can't use the version value
+ * beyond that.
+ */
+ if (rust_parse_base10(st, sv, &ver)) {
+ DEMDEBUG("%s: ERROR: an unsupported encoding version "
+ "(%" PRIu64 ") was encountered", ver + 1);
+ st->rs_error = ENOTSUP;
+ return (B_FALSE);
}
+
+ st->rs_encver = RUSTENC_V0;
+ DEMDEBUG("name is encoded using the v0 mangling scheme");
+ } else {
+ DEMDEBUG("did not find a valid rust prefix");
+ return (B_FALSE);
}
- return (B_FALSE);
+
+ sv_init_sv(&st->rs_orig, sv);
+ return (B_TRUE);
+}
+
+static void
+rust_fini_state(rust_state_t *st)
+{
+ custr_free(st->rs_demangled);
+ custr_alloc_fini(&st->rs_cualloc);
}
static boolean_t
-rustdem_add_sep(rustdem_state_t *st)
+rust_init_state(rust_state_t *restrict st, const char *s, sysdem_ops_t *ops)
{
- if (st->rds_error != 0)
+ const char *codeset;
+
+ (void) memset(st, 0, sizeof (*st));
+
+ st->rs_str = s;
+ st->rs_ops = ops;
+
+ st->rs_cualloc.cua_version = CUSTR_VERSION;
+ if (custr_alloc_init(&st->rs_cualloc, &rust_custr_ops) != 0)
return (B_FALSE);
+ st->rs_cualloc.cua_arg = st;
- if (!rustdem_append_c(st, ':') ||
- !rustdem_append_c(st, ':'))
+ if (custr_xalloc(&st->rs_demangled, &st->rs_cualloc) != 0) {
+ custr_alloc_fini(&st->rs_cualloc);
return (B_FALSE);
+ }
+
+ codeset = nl_langinfo(CODESET);
+ if (codeset != NULL && strcmp(codeset, "UTF-8") == 0)
+ st->rs_isutf8 = B_TRUE;
return (B_TRUE);
}
-static boolean_t
-rustdem_append_c(rustdem_state_t *st, char c)
+char *
+rust_demangle(const char *s, size_t len, sysdem_ops_t *ops)
{
- if (st->rds_error != 0)
- return (B_FALSE);
+ rust_state_t st;
+ strview_t sv = { 0 };
+ boolean_t success = B_FALSE;
+ int e = 0;
+ char *out = NULL;
- if (custr_appendc(st->rds_demangled, c) == 0)
- return (B_TRUE);
+ if (!rust_init_state(&st, s, ops))
+ return (NULL);
- st->rds_error = errno;
- return (B_FALSE);
-}
+ sv_init_str(&sv, s, s + len);
-static boolean_t
-rustdem_all_ascii(const strview_t *svp)
-{
- strview_t p;
+ if (!rust_parse_prefix(&st, &sv)) {
+ if (st.rs_error == 0)
+ st.rs_error = EINVAL;
+ goto done;
+ }
- sv_init_sv(&p, svp);
+ DEMDEBUG("parsed prefix; remaining string='%.*s'", SV_PRINT(&sv));
- while (sv_remaining(&p) > 0) {
- char c = sv_consume_c(&p);
+ switch (st.rs_encver) {
+ case RUSTENC_LEGACY:
+ success = rust_demangle_legacy(&st, &sv);
+ break;
+ case RUSTENC_V0:
+ success = rust_demangle_v0(&st, &sv);
+ break;
+ }
- /*
- * #including <sys/ctype.h> conflicts with <ctype.h>. Since
- * we want the C locale macros (ISDIGIT, etc), it also means
- * we can't use isascii(3C).
- */
- if ((c & 0x80) != 0) {
- DEMDEBUG("%s: found non-ascii character 0x%02hhx at "
- "offset %tu", __func__, c,
- (ptrdiff_t)(p.sv_first - svp->sv_first));
- return (B_FALSE);
- }
+done:
+ if (success) {
+ out = xstrdup(ops, custr_cstr(st.rs_demangled));
+ if (out == NULL)
+ SET_ERROR(&st);
+ } else {
+ DEMDEBUG("%s: failed, str='%s'", __func__,
+ custr_cstr(st.rs_demangled));
+
+ st.rs_error = EINVAL;
}
- return (B_TRUE);
-}
-static void *
-rustdem_alloc(custr_alloc_t *cao, size_t len)
-{
- rustdem_state_t *st = cao->cua_arg;
- return (zalloc(st->rds_ops, len));
-}
+ e = st.rs_error;
+ rust_fini_state(&st);
+ if (e > 0)
+ errno = e;
-static void
-rustdem_free(custr_alloc_t *cao, void *p, size_t len)
-{
- rustdem_state_t *st = cao->cua_arg;
- xfree(st->rds_ops, p, len);
+ return (out);
}