summaryrefslogtreecommitdiff
path: root/usr/src/lib/libdemangle/common/rust-v0.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/lib/libdemangle/common/rust-v0.c')
-rw-r--r--usr/src/lib/libdemangle/common/rust-v0.c1449
1 files changed, 1449 insertions, 0 deletions
diff --git a/usr/src/lib/libdemangle/common/rust-v0.c b/usr/src/lib/libdemangle/common/rust-v0.c
new file mode 100644
index 0000000000..598d8457c9
--- /dev/null
+++ b/usr/src/lib/libdemangle/common/rust-v0.c
@@ -0,0 +1,1449 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ * Copyright 2021 Jason King
+ */
+
+/* BEGIN CSTYLED */
+
+/*
+ * This implements the 'symbol_name_mangling_v2' demangling for rust as
+ * described in Rust RFC 2603 as opposed to the original (now called
+ * legacy) mangling older versions of rust used (implemented in rust.c).
+ *
+ * The specification can be viewed at:
+ * https://github.com/rust-lang/rfcs/blob/master/text/2603-rust-symbol-name-mangling-v0.md
+ */
+
+/* END CSTYLED */
+
+#include <errno.h>
+#include <libcustr.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "rust.h"
+
+/*
+ * Help track amount of additional output added to rs_demangled across
+ * a function call (to allow that portion to be output for debugging)
+ */
+#define SAVE_LEN(_st, _len) _len = custr_len((_st)->rs_demangled)
+#define CSTR_END(_st, _len) \
+ ((int)(custr_len((_st)->rs_demangled) - (_len))), \
+ custr_cstr((_st)->rs_demangled) + (_len)
+
+typedef enum const_type_class {
+ CTC_INVALID = -1,
+ CTC_UNSIGNED,
+ CTC_SIGNED,
+ CTC_CHAR,
+ CTC_BOOL,
+} const_type_class_t;
+
+/*
+ * Sometimes, parsing something is optional. In this case a failure to
+ * parse is fine, however we still want to consider a fatal error as
+ * failure.
+ */
+#define OPTIONAL(_st, _f) ((_f) || !HAS_ERROR(_st))
+
+static boolean_t rustv0_valid_sym(const strview_t *);
+static const_type_class_t rustv0_classify_const_type(char);
+static boolean_t rustv0_parse_hex_num(rust_state_t *restrict,
+ strview_t *restrict, uint64_t *restrict);
+static boolean_t rustv0_parse_base62(rust_state_t *restrict,
+ strview_t *restrict, uint64_t *restrict);
+
+static boolean_t rustv0_parse_undisambiguated_identifier(
+ rust_state_t *restrict, strview_t *restrict, boolean_t);
+static boolean_t rustv0_parse_disambiguator(rust_state_t *restrict,
+ strview_t *restrict, uint64_t *restrict);
+
+static boolean_t rustv0_parse_path(rust_state_t *restrict, strview_t *restrict,
+ boolean_t);
+static boolean_t rustv0_parse_impl_path(rust_state_t *restrict,
+ strview_t *restrict, boolean_t);
+static boolean_t rustv0_parse_nested_path(rust_state_t *restrict,
+ strview_t *restrict, boolean_t);
+static boolean_t rustv0_parse_basic_type(rust_state_t *restrict,
+ strview_t *restrict);
+static boolean_t rustv0_parse_backref(rust_state_t *restrict,
+ strview_t *restrict,
+ boolean_t (*)(rust_state_t *restrict, strview_t *restrict, boolean_t),
+ boolean_t);
+static boolean_t rustv0_parse_lifetime(rust_state_t *restrict,
+ strview_t *restrict);
+static boolean_t rustv0_parse_const(rust_state_t *restrict,
+ strview_t *restrict, boolean_t);
+static boolean_t rustv0_parse_fnsig(rust_state_t *restrict,
+ strview_t *restrict);
+static boolean_t rustv0_parse_dynbounds(rust_state_t *restrict,
+ strview_t *restrict);
+static boolean_t rustv0_parse_generic_arg(rust_state_t *restrict,
+ strview_t *restrict, boolean_t);
+
+boolean_t
+rust_demangle_v0(rust_state_t *restrict st, strview_t *restrict sv)
+{
+ boolean_t save_skip;
+ boolean_t ret;
+
+ /* Make sure all the characters are valid */
+ if (!rustv0_valid_sym(sv)) {
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ /*
+ * <symbol-name> = "_R" [<decimal-number>] <path>
+ * [<instantiating-crate>]
+ *
+ * We've already parsed the prefix in rust_demangle(), as well
+ * as made sure there's no [<decimal-number>] present, so
+ * start with <path>.
+ */
+ if (!rustv0_parse_path(st, sv, B_TRUE))
+ return (B_FALSE);
+
+ /* [<instantiating crate>] -- parse but don't save */
+ SKIP_BEGIN(st, save_skip);
+ ret = OPTIONAL(st, rustv0_parse_path(st, sv, B_FALSE));
+ SKIP_END(st, save_skip);
+ if (!ret)
+ return (B_FALSE);
+
+ /* If nothing's left, we know we're done */
+ if (sv_remaining(sv) == 0)
+ return (!HAS_ERROR(st));
+
+ /*
+ * LLVM sometimes will suffix symbols starting with a '.'
+ * followed by extra data. For things that start with
+ * ".llvm.", we discard the rest of the string. For
+ * other things that start with '.', we copy the
+ * results to the final string. This matches
+ * what the rust native demangler crate does, and
+ * we don't see a reason to deviate from their
+ * behavior.
+ */
+ if (sv_consume_if(sv, ".llvm."))
+ return (!HAS_ERROR(st));
+
+ if (sv_peek(sv, 0) != '.') {
+ DEMDEBUG("%s: Unexpected trailing data at the end of the "
+ "name: '%.*s'", __func__, SV_PRINT(sv));
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ return (rust_append_sv(st, sv_remaining(sv), sv));
+}
+
+/*
+ * Parse an optional list terminated by 'E'. Each result of 'fn' is
+ * separated by 'sep' in the output.
+ */
+static boolean_t
+rustv0_parse_opt_list(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t),
+ const char *restrict sep, boolean_t bval, size_t *restrict countp)
+{
+ size_t count = 0;
+
+ DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
+
+ while (sv_remaining(sv) > 0) {
+ if (sv_consume_if_c(sv, 'E')) {
+ if (countp != NULL)
+ *countp += count;
+ return (B_TRUE);
+ }
+
+ if (count > 0 && !rust_append(st, sep))
+ return (B_FALSE);
+
+ if (!fn(st, sv, bval))
+ return (B_FALSE);
+
+ count++;
+ }
+
+ /*
+ * An optional list should terminate with an 'E'. If we get here,
+ * we ran out of charaters and didn't terminate as we should.
+ */
+ return (B_FALSE);
+}
+
+static boolean_t
+rustv0_parse_uint_type(rust_state_t *restrict st, strview_t *sv)
+{
+ const char *str = NULL;
+ strview_t save;
+ char c;
+
+ if (HAS_ERROR(st) || sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ sv_init_sv(&save, sv);
+
+ switch (c = sv_consume_c(sv)) {
+ case 'h':
+ str = "u8";
+ break;
+ case 't':
+ str = "u16";
+ break;
+ case 'm':
+ str = "u32";
+ break;
+ case 'y':
+ str = "u64";
+ break;
+ case 'o':
+ str = "u128";
+ break;
+ case 'j': /* usize */
+ str = "usize";
+ break;
+ default:
+ sv_init_sv(sv, &save);
+ return (B_FALSE);
+ }
+
+ DEMDEBUG("%s: %c -> %s", __func__, c, str);
+ return (rust_append(st, str));
+}
+
+static boolean_t
+rustv0_parse_basic_type(rust_state_t *restrict st, strview_t *restrict sv)
+{
+ const char *str = NULL;
+ strview_t save;
+ char c;
+
+ if (HAS_ERROR(st) || sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ if (rustv0_parse_uint_type(st, sv))
+ return (B_TRUE);
+
+ sv_init_sv(&save, sv);
+
+ switch (c = sv_consume_c(sv)) {
+ case 'a':
+ str = "i8";
+ break;
+ case 'b':
+ str = "bool";
+ break;
+ case 'c':
+ str = "char";
+ break;
+ case 'd':
+ str = "f64";
+ break;
+ case 'e':
+ str = "str";
+ break;
+ case 'f':
+ str = "f32";
+ break;
+ case 'i':
+ str = "isize";
+ break;
+ case 'l':
+ str = "i32";
+ break;
+ case 'n':
+ str = "i128";
+ break;
+ case 'p':
+ str = "_";
+ break;
+ case 's':
+ str = "i16";
+ break;
+ case 'u':
+ str = "()";
+ break;
+ case 'v':
+ str = "...";
+ break;
+ case 'x':
+ str = "i64";
+ break;
+ case 'z':
+ str = "!";
+ break;
+ default:
+ sv_init_sv(sv, &save);
+ return (B_FALSE);
+ }
+
+ DEMDEBUG("%s: %c -> %s", __func__, c, str);
+ return (rust_append(st, str));
+}
+
+static boolean_t
+rustv0_parse_type(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t dummy __unused)
+{
+ strview_t save;
+ size_t len, tuple_elem_count;
+ boolean_t ret;
+ char c;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ SAVE_LEN(st, len);
+ sv_init_sv(&save, sv);
+
+ switch (c = sv_consume_c(sv)) {
+ case 'A':
+ ret = rust_appendc(st, '[') &&
+ rustv0_parse_type(st, sv, B_FALSE) &&
+ rust_append(st, "; ") &&
+ rustv0_parse_const(st, sv, B_FALSE) &&
+ rust_appendc(st, ']');
+ break;
+ case 'S':
+ ret = rust_appendc(st, '[') &&
+ rustv0_parse_type(st, sv, B_FALSE) &&
+ rust_appendc(st, ']');
+ break;
+ case 'T':
+ tuple_elem_count = 0;
+ ret = rust_appendc(st, '(') &&
+ rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ",
+ B_FALSE, &tuple_elem_count) &&
+ rust_append(st, (tuple_elem_count == 1) ? ",)" : ")");
+ break;
+ case 'R':
+ case 'Q':
+ /* `&mut T` or `&'... mut T` */
+ if (!(ret = rust_appendc(st, '&')))
+ break;
+
+ /*
+ * lifetime is optional, but we need to add a trailing
+ * space if present (so we cannot use the OPTIONAL macro).
+ */
+ if (rustv0_parse_lifetime(st, sv)) {
+ if (!(ret = rust_appendc(st, ' ')))
+ break;
+ } else if (HAS_ERROR(st)) {
+ break;
+ }
+
+ ret = rust_append(st, (c == 'Q') ? "mut " : "") &&
+ rustv0_parse_type(st, sv, B_FALSE);
+ break;
+ case 'P':
+ ret = rust_append(st, "*const ") &&
+ rustv0_parse_type(st, sv, B_FALSE);
+ break;
+ case 'O':
+ ret = rust_append(st, "*mut ") &&
+ rustv0_parse_type(st, sv, B_FALSE);
+ break;
+ case 'F':
+ ret = rustv0_parse_fnsig(st, sv);
+ break;
+ case 'D':
+ ret = rust_append(st, "dyn ") &&
+ rustv0_parse_dynbounds(st, sv);
+ if (!ret)
+ break;
+
+ /*
+ * Rust RFC2603 shows the lifetime as required, however
+ * it appears this is optional.
+ */
+ DEMDEBUG("%s: pre-lifetime: '%*s'", __func__, SV_PRINT(sv));
+
+ /*
+ * We only want to print a non-zero (non "'_")
+ * lifetime.
+ */
+ if (sv_consume_if(sv, "L_"))
+ break;
+
+ /*
+ * But if there is a lifetime we want to print,
+ * we want to prepend " + " before it.
+ */
+ if (sv_peek(sv, 0) == 'L' &&
+ !(ret = rust_append(st, " + ")))
+ break;
+
+ ret = rustv0_parse_lifetime(st, sv);
+ break;
+ default:
+ sv_init_sv(sv, &save);
+
+ ret = rustv0_parse_backref(st, sv, rustv0_parse_type,
+ B_FALSE) ||
+ rustv0_parse_basic_type(st, sv);
+ if (ret)
+ break;
+
+ ret = rustv0_parse_path(st, sv, B_FALSE);
+ break;
+ }
+
+ DEMDEBUG("%s: type='%.*s' (%s)", __func__, CSTR_END(st, len),
+ ret ? "success" : "fail");
+
+ return (ret);
+}
+
+/*
+ * <path> = "C" <identifier> crate root
+ * | "M" <impl-path> <type> <T>
+ * | "X" <impl-path> <type> <path> <T as Trait> (trait impl)
+ * | "Y" <type> <path> <T as Trait> (trait definition)
+ * | "N" <ns> <path> <identifier> ...::ident (nested path)
+ * | "I" <path> {<generic-arg>} "E" ...<T, U>
+ * | <backref>
+ */
+static boolean_t
+rustv0_parse_path(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t in_value)
+{
+ strview_t save;
+ uint64_t disamb = 0;
+ size_t len;
+ boolean_t ret = B_FALSE;
+ boolean_t save_skip;
+ boolean_t args_stay_save = st->rs_args_stay_open;
+ boolean_t args_open_save = st->rs_args_is_open;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ SAVE_LEN(st, len);
+ sv_init_sv(&save, sv);
+
+ switch (sv_consume_c(sv)) {
+ case 'C':
+ if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disamb)))
+ goto done;
+
+ if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE))
+ goto done;
+
+ if (st->rs_verbose &&
+ !rust_append_printf(st, "[%" PRIx64 "]", disamb))
+ goto done;
+ break;
+ case 'M':
+ SKIP_BEGIN(st, save_skip);
+ if (!rustv0_parse_impl_path(st, sv, in_value)) {
+ SKIP_END(st, save_skip);
+ goto done;
+ }
+ SKIP_END(st, save_skip);
+
+ if (!rust_appendc(st, '<') ||
+ !rustv0_parse_type(st, sv, B_FALSE) ||
+ !rust_appendc(st, '>'))
+ goto done;
+ break;
+ case 'X':
+ SKIP_BEGIN(st, save_skip);
+ if (!rustv0_parse_impl_path(st, sv, in_value)) {
+ SKIP_END(st, save_skip);
+ goto done;
+ }
+ SKIP_END(st, save_skip);
+ /*FALLTHRU*/
+ case 'Y':
+ if (!rust_appendc(st, '<') ||
+ !rustv0_parse_type(st, sv, B_FALSE) ||
+ !rust_append(st, " as ") ||
+ !rustv0_parse_path(st, sv, B_FALSE) ||
+ !rust_appendc(st, '>'))
+ goto done;
+ break;
+ case 'N':
+ if (!rustv0_parse_nested_path(st, sv, in_value))
+ goto done;
+ break;
+ case 'I':
+ st->rs_args_stay_open = B_FALSE;
+ st->rs_args_is_open = B_FALSE;
+
+ if (!rustv0_parse_path(st, sv, in_value))
+ goto done;
+
+ if (in_value && !rust_append(st, "::"))
+ goto done;
+
+ if (!rust_appendc(st, '<') ||
+ !rustv0_parse_opt_list(st, sv, rustv0_parse_generic_arg,
+ ", ", B_FALSE, NULL))
+ goto done;
+
+ st->rs_args_stay_open = args_stay_save;
+ st->rs_args_is_open = args_open_save;
+
+ /*
+ * If we were asked to not close our list, then don't and
+ * indicate that the list is open.
+ */
+ if (st->rs_args_stay_open) {
+ st->rs_args_stay_open = B_FALSE;
+ st->rs_args_is_open = B_TRUE;
+ } else if (!rust_appendc(st, '>')) {
+ goto done;
+ }
+ break;
+ default:
+ /*
+ * Didn't recognize the letter, so it has to be a path. Restore
+ * sv to state prior to switch and continue.
+ */
+ sv_init_sv(sv, &save);
+ if (!rustv0_parse_backref(st, sv, rustv0_parse_path, in_value))
+ goto done;
+ }
+
+ ret = B_TRUE;
+
+done:
+ DEMDEBUG("%s: path='%.*s' (%s)", __func__, CSTR_END(st, len),
+ ret ? "success" : "fail");
+
+ return (ret);
+}
+
+static boolean_t
+rustv0_parse_impl_path(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t in_value)
+{
+ uint64_t val = 0;
+
+ return (OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &val)) &&
+ rustv0_parse_path(st, sv, in_value));
+}
+
+/*
+ * A bit of a hack -- when printing a nested path, we need to know
+ * if the identifier is there or not in order to correctly format
+ * the output preceeding it (when present). This peeks ahead and
+ * determines this.
+ */
+static boolean_t
+rustv0_has_name(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t *has_namep)
+{
+ strview_t save;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ sv_init_sv(&save, sv);
+
+ /* For checking the length, we don't care if it's punycode or not */
+ (void) sv_consume_if_c(&save, 'u');
+
+ if (sv_remaining(sv) == 0) {
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ if (sv_consume_if_c(&save, '0')) {
+ *has_namep = B_FALSE;
+ return (B_TRUE);
+ }
+
+ *has_namep = B_TRUE;
+ return (B_TRUE);
+}
+
+static boolean_t
+rustv0_parse_nested_path(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t in_value)
+{
+ uint64_t disambiguator = 0;
+ size_t len = 0;
+ char ns;
+ boolean_t ret = B_FALSE;
+ boolean_t has_name;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ SAVE_LEN(st, len);
+
+ ns = sv_consume_c(sv);
+
+ if (!rustv0_parse_path(st, sv, in_value))
+ goto done;
+
+ if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disambiguator)))
+ goto done;
+
+ if (!rustv0_has_name(st, sv, &has_name))
+ goto done;
+
+ if (ISUPPER(ns)) {
+ if (!rust_append(st, "::{"))
+ goto done;
+
+ switch (ns) {
+ case 'C':
+ if (!rust_append(st, "closure"))
+ goto done;
+ break;
+ case 'S':
+ if (!rust_append(st, "shim"))
+ goto done;
+ break;
+ default:
+ if (!rust_appendc(st, ns))
+ goto done;
+ break;
+ }
+
+ if (has_name && !rust_appendc(st, ':'))
+ goto done;
+
+ if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE))
+ goto done;
+
+ ret = rust_append_printf(st, "#%" PRIu64 "}", disambiguator);
+ } else {
+ if (has_name) {
+ if (!(ret = rust_append(st, "::")))
+ goto done;
+ }
+ ret = rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE);
+ }
+
+done:
+ DEMDEBUG("%s: nested path = '%.*s' (%s)", __func__, CSTR_END(st, len),
+ ret ? "success" : "fail");
+
+ return (ret);
+}
+
+/*
+ * <disambiguator> = "s" <base-64-number>
+ *
+ */
+static boolean_t
+rustv0_parse_disambiguator(rust_state_t *restrict st, strview_t *restrict sv,
+ uint64_t *valp)
+{
+ if (HAS_ERROR(st) || sv_remaining(sv) < 2)
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ *valp = 0;
+
+ if (!sv_consume_if_c(sv, 's'))
+ return (B_FALSE);
+
+ if (!rustv0_parse_base62(st, sv, valp)) {
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ /*
+ * Rust RFC 2603 details this in Appendix A, but not the main
+ * portion of the RFC. If no disambiguator is present, the value
+ * is 0, if the decoded value is 0, the index is 1, ...
+ * rustv0_parse_base62() already adjusts _ -> 0, 0 -> 1, so we
+ * only need to add one here to complete the adjustment.
+ */
+ *valp = *valp + 1;
+
+ DEMDEBUG("%s: disambiguator=%" PRIu64, __func__, *valp);
+ return (B_TRUE);
+}
+
+/* <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> */
+static boolean_t
+rustv0_parse_undisambiguated_identifier(rust_state_t *restrict st,
+ strview_t *restrict sv, boolean_t repl_underscore)
+{
+ uint64_t len = 0;
+ boolean_t puny = B_FALSE;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ if (sv_consume_if_c(sv, 'u'))
+ puny = B_TRUE;
+
+ if (!rust_parse_base10(st, sv, &len))
+ return (B_FALSE);
+
+ /* skip optional separator '_' */
+ (void) sv_consume_if_c(sv, '_');
+
+ if (sv_remaining(sv) < len) {
+ DEMDEBUG("%s: ERROR: identifier length (%" PRIu64 ") "
+ "> remaining bytes (%zu)", __func__, len,
+ sv_remaining(sv));
+ return (B_FALSE);
+ }
+
+ /* 0 length identifiers are acceptable */
+ if (len == 0)
+ return (B_TRUE);
+
+ if (puny) {
+ strview_t ident;
+
+ sv_init_sv_range(&ident, sv, len);
+ if (!rustv0_puny_decode(st, &ident, repl_underscore))
+ return (B_FALSE);
+
+ sv_consume_n(sv, len);
+ return (B_TRUE);
+ }
+
+ /*
+ * rust identifiers do not contain '-'. However ABI identifiers
+ * are allowed to contain them (e.g. extern "foo-bar" fn ...).
+ * They are substituted with '_' in the mangled output. If we
+ * do not need to reverse this, we can just append 'len' bytes
+ * of sv. Otherwise we need to go through and reverse this
+ * substitution.
+ */
+ if (!repl_underscore)
+ return (rust_append_sv(st, len, sv));
+
+ /*
+ * We checked earlier that len < sv_remaining(sv); so this loop
+ * cannot overrun.
+ */
+ for (size_t i = 0; i < len; i++) {
+ char c = sv_consume_c(sv);
+
+ if (c == '_')
+ c = '-';
+
+ if (!rust_appendc(st, c))
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+/* <backref> = "B" <base-62-number> */
+static boolean_t
+rustv0_parse_backref(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t b),
+ boolean_t bval)
+{
+ strview_t backref;
+ strview_t target;
+ uint64_t idx = 0;
+ size_t save_len;
+ size_t len;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ sv_init_sv(&backref, sv);
+
+ if (!sv_consume_if_c(sv, 'B'))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='B%.*s'", __func__, SV_PRINT(sv));
+
+ if (!rustv0_parse_base62(st, sv, &idx)) {
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ /*
+ * Determine how many bytes we've consumed (up to the start of
+ * the current backref token).
+ */
+ VERIFY3P(backref.sv_first, >=, st->rs_orig.sv_first);
+ len = (size_t)(uintptr_t)(backref.sv_first - st->rs_orig.sv_first);
+
+ /*
+ * The backref can only refer to an index prior to the start of
+ * the current backref token -- that is must always refer back in
+ * the string, never to the current position or beyond.
+ */
+ if (idx >= len) {
+ DEMDEBUG("%s: ERROR: backref index (%" PRIu64 ") "
+ "is out of range [0, %zu)", __func__, idx, len);
+ st->rs_error = ERANGE;
+ return (B_FALSE);
+ }
+
+ /*
+ * Create a strview_t of the original string (sans prefix) by
+ * copying from st->rs_orig. The length of the target strview_t is
+ * capped to end immediately prior to this backref token. Since we
+ * enforce that backrefs must always refer to already processed
+ * portions of the string (i.e. must always refer backwards), and the
+ * length of the strview_t is set to end prior to the start of this
+ * backref token, we guarantee processing of a backref will always
+ * terminate before it can possibly encounter this backref token
+ * and cause a loop -- either the processing terminates normally or
+ * it reaches the end of the capped strview_t.
+ */
+ sv_init_sv_range(&target, &st->rs_orig, len);
+
+ /*
+ * Consume all the input in the target strview_t up to the index
+ */
+ sv_consume_n(&target, idx);
+
+ DEMDEBUG("%s: backref starting at %" PRIu64 " str='%.*s'%s", __func__,
+ idx, SV_PRINT(&target), st->rs_skip ? " (skipping)" : "");
+
+ /*
+ * If we're skipping the output, there's no reason to bother reparsing
+ * the output -- we're not going to save it. We still setup everything
+ * so that the debug output is still emitted.
+ */
+ if (st->rs_skip)
+ return (B_TRUE);
+
+ SAVE_LEN(st, save_len);
+ if (!fn(st, &target, bval))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: backref is '%.*s'", __func__, CSTR_END(st, save_len));
+ return (B_TRUE);
+}
+
+static boolean_t
+rustv0_append_lifetime(rust_state_t *restrict st, uint64_t lifetime)
+{
+ uint64_t bound_lt;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ if (!rust_appendc(st, '\''))
+ return (B_FALSE);
+
+ if (lifetime == 0)
+ return (rust_appendc(st, '_'));
+
+ if (sub_overflow(st->rs_lt_depth, lifetime, &bound_lt)) {
+ DEMDEBUG("%s: ERROR: lifetime value %" PRIu64
+ " > current depth %" PRIu64, __func__, lifetime,
+ st->rs_lt_depth);
+ st->rs_lt_depth = ERANGE;
+ return (B_FALSE);
+ }
+
+ /*
+ * Use 'a, 'b, ...
+ */
+ if (bound_lt < 26) {
+ char c = (char)bound_lt + 'a';
+ return (rust_append_printf(st, "%c", c));
+ }
+
+ /*
+ * Otherwise, use '_123, '_456, ...
+ */
+ return (rust_append_printf(st, "_%" PRIu64, bound_lt));
+}
+
+static boolean_t
+rustv0_parse_lifetime(rust_state_t *restrict st, strview_t *restrict sv)
+{
+ uint64_t lifetime;
+
+ if (!sv_consume_if_c(sv, 'L'))
+ return (B_FALSE);
+
+ if (!rustv0_parse_base62(st, sv, &lifetime))
+ return (B_FALSE);
+
+ return (rustv0_append_lifetime(st, lifetime));
+}
+
+static boolean_t
+rustv0_parse_const_data(rust_state_t *restrict st,
+ const_type_class_t type_class, strview_t *restrict sv)
+{
+ uint64_t val = 0;
+ size_t save_len;
+ boolean_t neg = B_FALSE;
+ boolean_t ret = B_FALSE;
+
+ VERIFY3S(type_class, !=, CTC_INVALID);
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+ SAVE_LEN(st, save_len);
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ if (type_class == CTC_SIGNED && sv_consume_if_c(sv, 'n'))
+ neg = B_TRUE;
+
+ ret = OPTIONAL(st, rustv0_parse_hex_num(st, sv, &val)) &&
+ sv_consume_if_c(sv, '_');
+ if (!ret)
+ goto done;
+
+ switch (type_class) {
+ case CTC_SIGNED:
+ case CTC_UNSIGNED:
+ ret = rust_append_printf(st, "%s%" PRIu64, neg ? "-" : "", val);
+ break;
+ case CTC_BOOL:
+ if (val > 1) {
+ DEMDEBUG("%s: invalid bool val %" PRIu64, __func__,
+ val);
+ ret = B_FALSE;
+ break;
+ }
+ ret = rust_append_printf(st, "%s",
+ (val == 0) ? "false" : "true");
+ break;
+ case CTC_CHAR:
+ if (val > UINT32_MAX) {
+ DEMDEBUG("%s: char value %" PRIu64 " out of range",
+ __func__, val);
+ ret = B_FALSE;
+ break;
+ }
+
+ ret = rust_appendc(st, '\'') && rust_append_utf8_c(st, val) &&
+ rust_appendc(st, '\'');
+ break;
+ default:
+ ret = B_FALSE;
+ }
+
+done:
+ DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, save_len),
+ ret ? "success" : "fail");
+
+ return (ret);
+}
+
+static boolean_t
+rustv0_parse_const(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t dummy __unused)
+{
+ strview_t type;
+ size_t start_len;
+ const_type_class_t ctype_class;
+ char ctype;
+ boolean_t save_skip;
+ boolean_t ret;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+ SAVE_LEN(st, start_len);
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ if (rustv0_parse_backref(st, sv, rustv0_parse_const, B_FALSE))
+ return (B_TRUE);
+
+ if (sv_consume_if_c(sv, 'p')) {
+ ret = rust_appendc(st, '_');
+ goto done;
+ }
+
+ ctype = sv_peek(sv, 0);
+ ctype_class = rustv0_classify_const_type(ctype);
+ if (ctype_class == CTC_INVALID) {
+ DEMDEBUG("%s: const type isn't a valid const generic type",
+ __func__);
+ return (B_FALSE);
+ }
+
+ /*
+ * This isn't spelled out clearly in Rust RFC 2603, but currently
+ * only unsigned int types are allowed at this point. However, we
+ * have a bit of a potential tricky situation. Unlike formatting
+ * the other tokens, if we want to display the type, we do so
+ * _after_ the value, even though the type appears first.
+ *
+ * This is bit of a hack, but we save off the input position from
+ * sv before the parse the type. We then parse it without saving
+ * the resulting value, then parse and output the constant. If
+ * we wish to then display the type, we can go back and parse
+ * the type again, this time saving the result.
+ */
+ sv_init_sv(&type, sv);
+
+ SKIP_BEGIN(st, save_skip);
+ ret = rustv0_parse_type(st, sv, B_FALSE);
+ SKIP_END(st, save_skip);
+
+ if (!ret) {
+ DEMDEBUG("%s: const type isn't valid", __func__);
+ return (B_FALSE);
+ }
+
+ if (sv_consume_if_c(sv, 'p')) {
+ ret = rust_appendc(st, '_');
+ } else {
+ ret = rustv0_parse_const_data(st, ctype_class, sv);
+ }
+ if (!ret)
+ goto done;
+
+ if (st->rs_show_const_type) {
+ ret = rust_append(st, ": ") &&
+ rustv0_parse_uint_type(st, &type);
+ }
+
+done:
+ DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, start_len),
+ ret ? "success" : "fail");
+ return (ret);
+}
+
+static boolean_t
+rustv0_parse_abi(rust_state_t *restrict st, strview_t *restrict sv)
+{
+ DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_consume_if_c(sv, 'C'))
+ return (rust_appendc(st, 'C'));
+
+ return (rustv0_parse_undisambiguated_identifier(st, sv, B_TRUE));
+}
+
+static boolean_t
+rustv0_parse_binder(rust_state_t *restrict st, strview_t *restrict sv)
+{
+ uint64_t n, i;
+
+ if (!sv_consume_if_c(sv, 'G'))
+ return (B_FALSE);
+
+ if (!rustv0_parse_base62(st, sv, &n))
+ return (B_FALSE);
+ n += 1;
+
+ if (!rust_append(st, "for<"))
+ return (B_FALSE);
+
+ for (i = 0; i < n; i++) {
+ if (i > 0 && !rust_append(st, ", "))
+ return (B_FALSE);
+
+ st->rs_lt_depth++;
+ if (!rustv0_append_lifetime(st, 1))
+ return (B_FALSE);
+ }
+
+ if (!rust_append(st, "> "))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+/*
+ * <fn-sig> := [<binder>] ["U"] ["K" <abi>] {type} "E" <type>
+ *
+ * Note that while the Rust RFC states the binder is manditory, based on
+ * actual examples, and comparing with the rust-based demangler, it is in
+ * fact optional.
+ */
+static boolean_t
+rustv0_parse_fnsig(rust_state_t *restrict st, strview_t *restrict sv)
+{
+ uint64_t save_lt = st->rs_lt_depth;
+
+ DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
+
+ if (!OPTIONAL(st, rustv0_parse_binder(st, sv)))
+ return (B_FALSE);
+
+ if (sv_consume_if_c(sv, 'U') && !rust_append(st, "unsafe "))
+ return (B_FALSE);
+
+ if (sv_consume_if_c(sv, 'K') &&
+ (!rust_append(st, "extern \"") || !rustv0_parse_abi(st, sv) ||
+ !rust_append(st, "\" ")))
+ return (B_FALSE);
+
+ if (!rust_append(st, "fn("))
+ return (B_FALSE);
+
+ if (!rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ", B_FALSE,
+ NULL)) {
+ return (B_FALSE);
+ }
+
+ if (!rust_appendc(st, ')'))
+ return (B_FALSE);
+
+ /* If the return type is (), don't print it */
+ if (!sv_consume_if_c(sv, 'u')) {
+ if (!rust_append(st, " -> "))
+ return (B_FALSE);
+
+ if (!rustv0_parse_type(st, sv, B_FALSE))
+ return (B_FALSE);
+ }
+
+ st->rs_lt_depth = save_lt;
+
+ return (B_TRUE);
+}
+
+/*
+ * <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
+ */
+static boolean_t
+rustv0_parse_dyn_trait_assoc_binding(rust_state_t *restrict st,
+ strview_t *restrict sv, boolean_t open)
+{
+ size_t save_len;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ if (!sv_consume_if_c(sv, 'p'))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+ SAVE_LEN(st, save_len);
+
+ if (!rust_append(st, open ? ", " : "<"))
+ return (B_FALSE);
+
+ if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) {
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ if (!rust_append(st, " = "))
+ return (B_FALSE);
+
+ if (!rustv0_parse_type(st, sv, B_FALSE)) {
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ DEMDEBUG("%s: binding='%.*s'", __func__, CSTR_END(st, save_len));
+
+ return (B_TRUE);
+}
+
+static boolean_t
+rustv0_parse_dyn_trait(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t dummy __unused)
+{
+ boolean_t stay_save = st->rs_args_stay_open;
+ boolean_t open_save = st->rs_args_is_open;
+ boolean_t open = B_FALSE;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ /*
+ * This is a bit subtle, but when formatting a trait in trait,
+ * we want something like this:
+ *
+ * dyn Trait<T, U, Assoc=X>
+ *
+ * instead of
+ *
+ * dyn Trait<T, U, <Assoc=X>>
+ *
+ * So when parsing the path, if we encounter generic arguments, we want
+ * the arg list to remain open at the end of processing the path so
+ * we can append the bindings to it. We set rs_args_stay_open to B_TRUE
+ * to indidcate to rustv0_parse_path() that a generic argument list
+ * should not be closed (i.e. don't append a '>' at the end of the
+ * list). If rustv0_parse_path() encounters a list of generic arguments,
+ * it will also set rs->args_is_open to indiciate it opened the list.
+ * We save this in 'open' so that when we process the associated
+ * bindings, we know if we need to open the list on the first binding
+ * or not -- we don't want 'dyn Trait<>' if there are no bindings,
+ * just 'dyn Trait'.
+ */
+ st->rs_args_stay_open = B_TRUE;
+ st->rs_args_is_open = B_FALSE;
+
+ if (!rustv0_parse_path(st, sv, B_FALSE)) {
+ st->rs_args_stay_open = stay_save;
+ st->rs_args_is_open = open_save;
+ return (B_FALSE);
+ }
+
+ open = st->rs_args_is_open;
+
+ st->rs_args_stay_open = stay_save;
+ st->rs_args_is_open = open_save;
+
+ while (rustv0_parse_dyn_trait_assoc_binding(st, sv, open)) {
+ open = B_TRUE;
+ }
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ if (open && !rust_appendc(st, '>'))
+ return (B_FALSE);
+
+ return (!HAS_ERROR(st));
+}
+
+static boolean_t
+rustv0_parse_dynbounds(rust_state_t *restrict st, strview_t *restrict sv)
+{
+ uint64_t save_lt = st->rs_lt_depth;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ /*
+ * This is another case where Rust RFC2603 seems to disagree with
+ * the implementation. The RFC implies this is mandatory, while
+ * the implementations treat it as optional.
+ */
+ if (!OPTIONAL(st, rustv0_parse_binder(st, sv)))
+ return (B_FALSE);
+
+ if (!rustv0_parse_opt_list(st, sv, rustv0_parse_dyn_trait, " + ",
+ B_FALSE, NULL))
+ return (B_FALSE);
+
+ st->rs_lt_depth = save_lt;
+
+ return (B_TRUE);
+}
+
+static boolean_t
+rustv0_parse_generic_arg(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t dummy __unused)
+{
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_consume_if_c(sv, 'K'))
+ return (rustv0_parse_const(st, sv, B_FALSE));
+
+ if (rustv0_parse_lifetime(st, sv))
+ return (B_TRUE);
+
+ return (rustv0_parse_type(st, sv, B_FALSE));
+}
+
+/*
+ * Parse a hex value into *valp. Note that rust only uses lower case
+ * hex values.
+ */
+static boolean_t
+rustv0_parse_hex_num(rust_state_t *restrict st, strview_t *restrict sv,
+ uint64_t *restrict valp)
+{
+ uint64_t val = 0;
+ size_t ndigits = 0;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ /*
+ * Unfortunately, Rust RFC 2603 also doesn't not explicty define
+ * {hex-digits}. We follow what decimal digits does, and treat a
+ * leading 0 as a terminator.
+ */
+ while (sv_remaining(sv) > 0) {
+ char c = sv_peek(sv, 0);
+
+ if (ISDIGIT(c)) {
+ val *= 16;
+ val += c - '0';
+ } else if (c >= 'a' && c <= 'f') {
+ val *= 16;
+ val += c - 'a' + 10;
+ } else {
+ break;
+ }
+
+ sv_consume_n(sv, 1);
+
+ if (++ndigits == 1 && val == 0)
+ break;
+ }
+
+ if (ndigits > 0)
+ *valp = val;
+
+ return ((ndigits > 0) ? B_TRUE : B_FALSE);
+}
+
+/*
+ * Parse a base62 number into *valp. The number is explicitly terminated
+ * by a '_'. The values are also offset by 0 -- that is '_' == 0,
+ * '0_' == 1, ...
+ */
+static boolean_t
+rustv0_parse_base62(rust_state_t *restrict st, strview_t *restrict sv,
+ uint64_t *restrict valp)
+{
+ uint64_t val = 0;
+ char c;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ /* A terminating '_' without any digits is 0 */
+ if (sv_consume_if_c(sv, '_')) {
+ *valp = 0;
+ return (B_TRUE);
+ }
+
+ /* Need at least one valid digit if > 0 */
+ if (!ISALNUM(sv_peek(sv, 0)))
+ return (B_FALSE);
+
+ while (sv_remaining(sv) > 0) {
+ c = sv_consume_c(sv);
+
+ if (c == '_') {
+ /*
+ * Because a lone '_' was already handled earlier,
+ * we know we've had at least one other digit and
+ * can increment the value and return.
+ */
+ *valp = val + 1;
+ return (B_TRUE);
+ } else if (ISDIGIT(c)) {
+ val *= 62;
+ val += c - '0';
+ } else if (ISLOWER(c)) {
+ val *= 62;
+ val += c - 'a' + 10;
+ } else if (ISUPPER(c)) {
+ val *= 62;
+ val += c - 'A' + 36;
+ } else {
+ return (B_FALSE);
+ }
+ }
+
+ /* We reached the end of the string without a terminating _ */
+ return (B_FALSE);
+}
+
+static const_type_class_t
+rustv0_classify_const_type(char type)
+{
+ switch (type) {
+ case 'h': case 't': case 'm': case 'y': case 'o': case 'j':
+ return (CTC_UNSIGNED);
+ case 'a': case 'i': case 'l': case 'n': case 's': case 'x':
+ return (CTC_SIGNED);
+ case 'b':
+ return (CTC_BOOL);
+ case 'c':
+ return (CTC_CHAR);
+ default:
+ return (CTC_INVALID);
+ }
+}
+
+/*
+ * Make sure the name is a plausible mangled rust symbol.
+ * Non-ASCII are never allowed. Rust itself uses [_0-9A-Za-z], however
+ * some things will add a suffix starting with a '.' (e.g. LLVM thin LTO).
+ * As such we proceed in two phases. We first only allow [_0-9A-Z-az] until
+ * we encounter a '.'. At that point, any ASCII character is allowed.
+ */
+static boolean_t
+rustv0_valid_sym(const strview_t *sv)
+{
+ size_t i;
+ boolean_t check_rust = B_TRUE;
+
+ for (i = 0; i < sv->sv_rem; i++) {
+ char c = sv->sv_first[i];
+
+ if (ISALNUM(c) || c == '_')
+ continue;
+
+ if (c == '.') {
+ check_rust = B_FALSE;
+ continue;
+ }
+
+ if (check_rust || (c & 0x80) != 0) {
+ DEMDEBUG("%s: ERROR found invalid character '%c' "
+ "in '%.*s' at index %zu",
+ __func__, c, SV_PRINT(sv), i);
+ return (B_FALSE);
+ }
+ }
+ return (B_TRUE);
+}