diff options
Diffstat (limited to 'usr/src/lib/libdemangle/common/rust-v0.c')
-rw-r--r-- | usr/src/lib/libdemangle/common/rust-v0.c | 1449 |
1 files changed, 1449 insertions, 0 deletions
diff --git a/usr/src/lib/libdemangle/common/rust-v0.c b/usr/src/lib/libdemangle/common/rust-v0.c new file mode 100644 index 0000000000..598d8457c9 --- /dev/null +++ b/usr/src/lib/libdemangle/common/rust-v0.c @@ -0,0 +1,1449 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Joyent, Inc. + * Copyright 2021 Jason King + */ + +/* BEGIN CSTYLED */ + +/* + * This implements the 'symbol_name_mangling_v2' demangling for rust as + * described in Rust RFC 2603 as opposed to the original (now called + * legacy) mangling older versions of rust used (implemented in rust.c). + * + * The specification can be viewed at: + * https://github.com/rust-lang/rfcs/blob/master/text/2603-rust-symbol-name-mangling-v0.md + */ + +/* END CSTYLED */ + +#include <errno.h> +#include <libcustr.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "rust.h" + +/* + * Help track amount of additional output added to rs_demangled across + * a function call (to allow that portion to be output for debugging) + */ +#define SAVE_LEN(_st, _len) _len = custr_len((_st)->rs_demangled) +#define CSTR_END(_st, _len) \ + ((int)(custr_len((_st)->rs_demangled) - (_len))), \ + custr_cstr((_st)->rs_demangled) + (_len) + +typedef enum const_type_class { + CTC_INVALID = -1, + CTC_UNSIGNED, + CTC_SIGNED, + CTC_CHAR, + CTC_BOOL, +} const_type_class_t; + +/* + * Sometimes, parsing something is optional. In this case a failure to + * parse is fine, however we still want to consider a fatal error as + * failure. + */ +#define OPTIONAL(_st, _f) ((_f) || !HAS_ERROR(_st)) + +static boolean_t rustv0_valid_sym(const strview_t *); +static const_type_class_t rustv0_classify_const_type(char); +static boolean_t rustv0_parse_hex_num(rust_state_t *restrict, + strview_t *restrict, uint64_t *restrict); +static boolean_t rustv0_parse_base62(rust_state_t *restrict, + strview_t *restrict, uint64_t *restrict); + +static boolean_t rustv0_parse_undisambiguated_identifier( + rust_state_t *restrict, strview_t *restrict, boolean_t); +static boolean_t rustv0_parse_disambiguator(rust_state_t *restrict, + strview_t *restrict, uint64_t *restrict); + +static boolean_t rustv0_parse_path(rust_state_t *restrict, strview_t *restrict, + boolean_t); +static boolean_t rustv0_parse_impl_path(rust_state_t *restrict, + strview_t *restrict, boolean_t); +static boolean_t rustv0_parse_nested_path(rust_state_t *restrict, + strview_t *restrict, boolean_t); +static boolean_t rustv0_parse_basic_type(rust_state_t *restrict, + strview_t *restrict); +static boolean_t rustv0_parse_backref(rust_state_t *restrict, + strview_t *restrict, + boolean_t (*)(rust_state_t *restrict, strview_t *restrict, boolean_t), + boolean_t); +static boolean_t rustv0_parse_lifetime(rust_state_t *restrict, + strview_t *restrict); +static boolean_t rustv0_parse_const(rust_state_t *restrict, + strview_t *restrict, boolean_t); +static boolean_t rustv0_parse_fnsig(rust_state_t *restrict, + strview_t *restrict); +static boolean_t rustv0_parse_dynbounds(rust_state_t *restrict, + strview_t *restrict); +static boolean_t rustv0_parse_generic_arg(rust_state_t *restrict, + strview_t *restrict, boolean_t); + +boolean_t +rust_demangle_v0(rust_state_t *restrict st, strview_t *restrict sv) +{ + boolean_t save_skip; + boolean_t ret; + + /* Make sure all the characters are valid */ + if (!rustv0_valid_sym(sv)) { + st->rs_error = EINVAL; + return (B_FALSE); + } + + /* + * <symbol-name> = "_R" [<decimal-number>] <path> + * [<instantiating-crate>] + * + * We've already parsed the prefix in rust_demangle(), as well + * as made sure there's no [<decimal-number>] present, so + * start with <path>. + */ + if (!rustv0_parse_path(st, sv, B_TRUE)) + return (B_FALSE); + + /* [<instantiating crate>] -- parse but don't save */ + SKIP_BEGIN(st, save_skip); + ret = OPTIONAL(st, rustv0_parse_path(st, sv, B_FALSE)); + SKIP_END(st, save_skip); + if (!ret) + return (B_FALSE); + + /* If nothing's left, we know we're done */ + if (sv_remaining(sv) == 0) + return (!HAS_ERROR(st)); + + /* + * LLVM sometimes will suffix symbols starting with a '.' + * followed by extra data. For things that start with + * ".llvm.", we discard the rest of the string. For + * other things that start with '.', we copy the + * results to the final string. This matches + * what the rust native demangler crate does, and + * we don't see a reason to deviate from their + * behavior. + */ + if (sv_consume_if(sv, ".llvm.")) + return (!HAS_ERROR(st)); + + if (sv_peek(sv, 0) != '.') { + DEMDEBUG("%s: Unexpected trailing data at the end of the " + "name: '%.*s'", __func__, SV_PRINT(sv)); + st->rs_error = EINVAL; + return (B_FALSE); + } + + return (rust_append_sv(st, sv_remaining(sv), sv)); +} + +/* + * Parse an optional list terminated by 'E'. Each result of 'fn' is + * separated by 'sep' in the output. + */ +static boolean_t +rustv0_parse_opt_list(rust_state_t *restrict st, strview_t *restrict sv, + boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t), + const char *restrict sep, boolean_t bval, size_t *restrict countp) +{ + size_t count = 0; + + DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv)); + + while (sv_remaining(sv) > 0) { + if (sv_consume_if_c(sv, 'E')) { + if (countp != NULL) + *countp += count; + return (B_TRUE); + } + + if (count > 0 && !rust_append(st, sep)) + return (B_FALSE); + + if (!fn(st, sv, bval)) + return (B_FALSE); + + count++; + } + + /* + * An optional list should terminate with an 'E'. If we get here, + * we ran out of charaters and didn't terminate as we should. + */ + return (B_FALSE); +} + +static boolean_t +rustv0_parse_uint_type(rust_state_t *restrict st, strview_t *sv) +{ + const char *str = NULL; + strview_t save; + char c; + + if (HAS_ERROR(st) || sv_remaining(sv) == 0) + return (B_FALSE); + + sv_init_sv(&save, sv); + + switch (c = sv_consume_c(sv)) { + case 'h': + str = "u8"; + break; + case 't': + str = "u16"; + break; + case 'm': + str = "u32"; + break; + case 'y': + str = "u64"; + break; + case 'o': + str = "u128"; + break; + case 'j': /* usize */ + str = "usize"; + break; + default: + sv_init_sv(sv, &save); + return (B_FALSE); + } + + DEMDEBUG("%s: %c -> %s", __func__, c, str); + return (rust_append(st, str)); +} + +static boolean_t +rustv0_parse_basic_type(rust_state_t *restrict st, strview_t *restrict sv) +{ + const char *str = NULL; + strview_t save; + char c; + + if (HAS_ERROR(st) || sv_remaining(sv) == 0) + return (B_FALSE); + + if (rustv0_parse_uint_type(st, sv)) + return (B_TRUE); + + sv_init_sv(&save, sv); + + switch (c = sv_consume_c(sv)) { + case 'a': + str = "i8"; + break; + case 'b': + str = "bool"; + break; + case 'c': + str = "char"; + break; + case 'd': + str = "f64"; + break; + case 'e': + str = "str"; + break; + case 'f': + str = "f32"; + break; + case 'i': + str = "isize"; + break; + case 'l': + str = "i32"; + break; + case 'n': + str = "i128"; + break; + case 'p': + str = "_"; + break; + case 's': + str = "i16"; + break; + case 'u': + str = "()"; + break; + case 'v': + str = "..."; + break; + case 'x': + str = "i64"; + break; + case 'z': + str = "!"; + break; + default: + sv_init_sv(sv, &save); + return (B_FALSE); + } + + DEMDEBUG("%s: %c -> %s", __func__, c, str); + return (rust_append(st, str)); +} + +static boolean_t +rustv0_parse_type(rust_state_t *restrict st, strview_t *restrict sv, + boolean_t dummy __unused) +{ + strview_t save; + size_t len, tuple_elem_count; + boolean_t ret; + char c; + + if (HAS_ERROR(st)) + return (B_FALSE); + + DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + + if (sv_remaining(sv) == 0) + return (B_FALSE); + + SAVE_LEN(st, len); + sv_init_sv(&save, sv); + + switch (c = sv_consume_c(sv)) { + case 'A': + ret = rust_appendc(st, '[') && + rustv0_parse_type(st, sv, B_FALSE) && + rust_append(st, "; ") && + rustv0_parse_const(st, sv, B_FALSE) && + rust_appendc(st, ']'); + break; + case 'S': + ret = rust_appendc(st, '[') && + rustv0_parse_type(st, sv, B_FALSE) && + rust_appendc(st, ']'); + break; + case 'T': + tuple_elem_count = 0; + ret = rust_appendc(st, '(') && + rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ", + B_FALSE, &tuple_elem_count) && + rust_append(st, (tuple_elem_count == 1) ? ",)" : ")"); + break; + case 'R': + case 'Q': + /* `&mut T` or `&'... mut T` */ + if (!(ret = rust_appendc(st, '&'))) + break; + + /* + * lifetime is optional, but we need to add a trailing + * space if present (so we cannot use the OPTIONAL macro). + */ + if (rustv0_parse_lifetime(st, sv)) { + if (!(ret = rust_appendc(st, ' '))) + break; + } else if (HAS_ERROR(st)) { + break; + } + + ret = rust_append(st, (c == 'Q') ? "mut " : "") && + rustv0_parse_type(st, sv, B_FALSE); + break; + case 'P': + ret = rust_append(st, "*const ") && + rustv0_parse_type(st, sv, B_FALSE); + break; + case 'O': + ret = rust_append(st, "*mut ") && + rustv0_parse_type(st, sv, B_FALSE); + break; + case 'F': + ret = rustv0_parse_fnsig(st, sv); + break; + case 'D': + ret = rust_append(st, "dyn ") && + rustv0_parse_dynbounds(st, sv); + if (!ret) + break; + + /* + * Rust RFC2603 shows the lifetime as required, however + * it appears this is optional. + */ + DEMDEBUG("%s: pre-lifetime: '%*s'", __func__, SV_PRINT(sv)); + + /* + * We only want to print a non-zero (non "'_") + * lifetime. + */ + if (sv_consume_if(sv, "L_")) + break; + + /* + * But if there is a lifetime we want to print, + * we want to prepend " + " before it. + */ + if (sv_peek(sv, 0) == 'L' && + !(ret = rust_append(st, " + "))) + break; + + ret = rustv0_parse_lifetime(st, sv); + break; + default: + sv_init_sv(sv, &save); + + ret = rustv0_parse_backref(st, sv, rustv0_parse_type, + B_FALSE) || + rustv0_parse_basic_type(st, sv); + if (ret) + break; + + ret = rustv0_parse_path(st, sv, B_FALSE); + break; + } + + DEMDEBUG("%s: type='%.*s' (%s)", __func__, CSTR_END(st, len), + ret ? "success" : "fail"); + + return (ret); +} + +/* + * <path> = "C" <identifier> crate root + * | "M" <impl-path> <type> <T> + * | "X" <impl-path> <type> <path> <T as Trait> (trait impl) + * | "Y" <type> <path> <T as Trait> (trait definition) + * | "N" <ns> <path> <identifier> ...::ident (nested path) + * | "I" <path> {<generic-arg>} "E" ...<T, U> + * | <backref> + */ +static boolean_t +rustv0_parse_path(rust_state_t *restrict st, strview_t *restrict sv, + boolean_t in_value) +{ + strview_t save; + uint64_t disamb = 0; + size_t len; + boolean_t ret = B_FALSE; + boolean_t save_skip; + boolean_t args_stay_save = st->rs_args_stay_open; + boolean_t args_open_save = st->rs_args_is_open; + + if (HAS_ERROR(st)) + return (B_FALSE); + + DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + + if (sv_remaining(sv) == 0) + return (B_FALSE); + + SAVE_LEN(st, len); + sv_init_sv(&save, sv); + + switch (sv_consume_c(sv)) { + case 'C': + if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disamb))) + goto done; + + if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) + goto done; + + if (st->rs_verbose && + !rust_append_printf(st, "[%" PRIx64 "]", disamb)) + goto done; + break; + case 'M': + SKIP_BEGIN(st, save_skip); + if (!rustv0_parse_impl_path(st, sv, in_value)) { + SKIP_END(st, save_skip); + goto done; + } + SKIP_END(st, save_skip); + + if (!rust_appendc(st, '<') || + !rustv0_parse_type(st, sv, B_FALSE) || + !rust_appendc(st, '>')) + goto done; + break; + case 'X': + SKIP_BEGIN(st, save_skip); + if (!rustv0_parse_impl_path(st, sv, in_value)) { + SKIP_END(st, save_skip); + goto done; + } + SKIP_END(st, save_skip); + /*FALLTHRU*/ + case 'Y': + if (!rust_appendc(st, '<') || + !rustv0_parse_type(st, sv, B_FALSE) || + !rust_append(st, " as ") || + !rustv0_parse_path(st, sv, B_FALSE) || + !rust_appendc(st, '>')) + goto done; + break; + case 'N': + if (!rustv0_parse_nested_path(st, sv, in_value)) + goto done; + break; + case 'I': + st->rs_args_stay_open = B_FALSE; + st->rs_args_is_open = B_FALSE; + + if (!rustv0_parse_path(st, sv, in_value)) + goto done; + + if (in_value && !rust_append(st, "::")) + goto done; + + if (!rust_appendc(st, '<') || + !rustv0_parse_opt_list(st, sv, rustv0_parse_generic_arg, + ", ", B_FALSE, NULL)) + goto done; + + st->rs_args_stay_open = args_stay_save; + st->rs_args_is_open = args_open_save; + + /* + * If we were asked to not close our list, then don't and + * indicate that the list is open. + */ + if (st->rs_args_stay_open) { + st->rs_args_stay_open = B_FALSE; + st->rs_args_is_open = B_TRUE; + } else if (!rust_appendc(st, '>')) { + goto done; + } + break; + default: + /* + * Didn't recognize the letter, so it has to be a path. Restore + * sv to state prior to switch and continue. + */ + sv_init_sv(sv, &save); + if (!rustv0_parse_backref(st, sv, rustv0_parse_path, in_value)) + goto done; + } + + ret = B_TRUE; + +done: + DEMDEBUG("%s: path='%.*s' (%s)", __func__, CSTR_END(st, len), + ret ? "success" : "fail"); + + return (ret); +} + +static boolean_t +rustv0_parse_impl_path(rust_state_t *restrict st, strview_t *restrict sv, + boolean_t in_value) +{ + uint64_t val = 0; + + return (OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &val)) && + rustv0_parse_path(st, sv, in_value)); +} + +/* + * A bit of a hack -- when printing a nested path, we need to know + * if the identifier is there or not in order to correctly format + * the output preceeding it (when present). This peeks ahead and + * determines this. + */ +static boolean_t +rustv0_has_name(rust_state_t *restrict st, strview_t *restrict sv, + boolean_t *has_namep) +{ + strview_t save; + + if (HAS_ERROR(st)) + return (B_FALSE); + + DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + + if (sv_remaining(sv) == 0) + return (B_FALSE); + + sv_init_sv(&save, sv); + + /* For checking the length, we don't care if it's punycode or not */ + (void) sv_consume_if_c(&save, 'u'); + + if (sv_remaining(sv) == 0) { + st->rs_error = EINVAL; + return (B_FALSE); + } + + if (sv_consume_if_c(&save, '0')) { + *has_namep = B_FALSE; + return (B_TRUE); + } + + *has_namep = B_TRUE; + return (B_TRUE); +} + +static boolean_t +rustv0_parse_nested_path(rust_state_t *restrict st, strview_t *restrict sv, + boolean_t in_value) +{ + uint64_t disambiguator = 0; + size_t len = 0; + char ns; + boolean_t ret = B_FALSE; + boolean_t has_name; + + if (HAS_ERROR(st)) + return (B_FALSE); + + DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + + if (sv_remaining(sv) == 0) + return (B_FALSE); + + SAVE_LEN(st, len); + + ns = sv_consume_c(sv); + + if (!rustv0_parse_path(st, sv, in_value)) + goto done; + + if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disambiguator))) + goto done; + + if (!rustv0_has_name(st, sv, &has_name)) + goto done; + + if (ISUPPER(ns)) { + if (!rust_append(st, "::{")) + goto done; + + switch (ns) { + case 'C': + if (!rust_append(st, "closure")) + goto done; + break; + case 'S': + if (!rust_append(st, "shim")) + goto done; + break; + default: + if (!rust_appendc(st, ns)) + goto done; + break; + } + + if (has_name && !rust_appendc(st, ':')) + goto done; + + if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) + goto done; + + ret = rust_append_printf(st, "#%" PRIu64 "}", disambiguator); + } else { + if (has_name) { + if (!(ret = rust_append(st, "::"))) + goto done; + } + ret = rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE); + } + +done: + DEMDEBUG("%s: nested path = '%.*s' (%s)", __func__, CSTR_END(st, len), + ret ? "success" : "fail"); + + return (ret); +} + +/* + * <disambiguator> = "s" <base-64-number> + * + */ +static boolean_t +rustv0_parse_disambiguator(rust_state_t *restrict st, strview_t *restrict sv, + uint64_t *valp) +{ + if (HAS_ERROR(st) || sv_remaining(sv) < 2) + return (B_FALSE); + + DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + + *valp = 0; + + if (!sv_consume_if_c(sv, 's')) + return (B_FALSE); + + if (!rustv0_parse_base62(st, sv, valp)) { + st->rs_error = EINVAL; + return (B_FALSE); + } + + /* + * Rust RFC 2603 details this in Appendix A, but not the main + * portion of the RFC. If no disambiguator is present, the value + * is 0, if the decoded value is 0, the index is 1, ... + * rustv0_parse_base62() already adjusts _ -> 0, 0 -> 1, so we + * only need to add one here to complete the adjustment. + */ + *valp = *valp + 1; + + DEMDEBUG("%s: disambiguator=%" PRIu64, __func__, *valp); + return (B_TRUE); +} + +/* <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> */ +static boolean_t +rustv0_parse_undisambiguated_identifier(rust_state_t *restrict st, + strview_t *restrict sv, boolean_t repl_underscore) +{ + uint64_t len = 0; + boolean_t puny = B_FALSE; + + if (HAS_ERROR(st)) + return (B_FALSE); + + DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + + if (sv_remaining(sv) == 0) + return (B_FALSE); + + if (sv_consume_if_c(sv, 'u')) + puny = B_TRUE; + + if (!rust_parse_base10(st, sv, &len)) + return (B_FALSE); + + /* skip optional separator '_' */ + (void) sv_consume_if_c(sv, '_'); + + if (sv_remaining(sv) < len) { + DEMDEBUG("%s: ERROR: identifier length (%" PRIu64 ") " + "> remaining bytes (%zu)", __func__, len, + sv_remaining(sv)); + return (B_FALSE); + } + + /* 0 length identifiers are acceptable */ + if (len == 0) + return (B_TRUE); + + if (puny) { + strview_t ident; + + sv_init_sv_range(&ident, sv, len); + if (!rustv0_puny_decode(st, &ident, repl_underscore)) + return (B_FALSE); + + sv_consume_n(sv, len); + return (B_TRUE); + } + + /* + * rust identifiers do not contain '-'. However ABI identifiers + * are allowed to contain them (e.g. extern "foo-bar" fn ...). + * They are substituted with '_' in the mangled output. If we + * do not need to reverse this, we can just append 'len' bytes + * of sv. Otherwise we need to go through and reverse this + * substitution. + */ + if (!repl_underscore) + return (rust_append_sv(st, len, sv)); + + /* + * We checked earlier that len < sv_remaining(sv); so this loop + * cannot overrun. + */ + for (size_t i = 0; i < len; i++) { + char c = sv_consume_c(sv); + + if (c == '_') + c = '-'; + + if (!rust_appendc(st, c)) + return (B_FALSE); + } + + return (B_TRUE); +} + +/* <backref> = "B" <base-62-number> */ +static boolean_t +rustv0_parse_backref(rust_state_t *restrict st, strview_t *restrict sv, + boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t b), + boolean_t bval) +{ + strview_t backref; + strview_t target; + uint64_t idx = 0; + size_t save_len; + size_t len; + + if (HAS_ERROR(st)) + return (B_FALSE); + + sv_init_sv(&backref, sv); + + if (!sv_consume_if_c(sv, 'B')) + return (B_FALSE); + + DEMDEBUG("%s: str='B%.*s'", __func__, SV_PRINT(sv)); + + if (!rustv0_parse_base62(st, sv, &idx)) { + st->rs_error = EINVAL; + return (B_FALSE); + } + + /* + * Determine how many bytes we've consumed (up to the start of + * the current backref token). + */ + VERIFY3P(backref.sv_first, >=, st->rs_orig.sv_first); + len = (size_t)(uintptr_t)(backref.sv_first - st->rs_orig.sv_first); + + /* + * The backref can only refer to an index prior to the start of + * the current backref token -- that is must always refer back in + * the string, never to the current position or beyond. + */ + if (idx >= len) { + DEMDEBUG("%s: ERROR: backref index (%" PRIu64 ") " + "is out of range [0, %zu)", __func__, idx, len); + st->rs_error = ERANGE; + return (B_FALSE); + } + + /* + * Create a strview_t of the original string (sans prefix) by + * copying from st->rs_orig. The length of the target strview_t is + * capped to end immediately prior to this backref token. Since we + * enforce that backrefs must always refer to already processed + * portions of the string (i.e. must always refer backwards), and the + * length of the strview_t is set to end prior to the start of this + * backref token, we guarantee processing of a backref will always + * terminate before it can possibly encounter this backref token + * and cause a loop -- either the processing terminates normally or + * it reaches the end of the capped strview_t. + */ + sv_init_sv_range(&target, &st->rs_orig, len); + + /* + * Consume all the input in the target strview_t up to the index + */ + sv_consume_n(&target, idx); + + DEMDEBUG("%s: backref starting at %" PRIu64 " str='%.*s'%s", __func__, + idx, SV_PRINT(&target), st->rs_skip ? " (skipping)" : ""); + + /* + * If we're skipping the output, there's no reason to bother reparsing + * the output -- we're not going to save it. We still setup everything + * so that the debug output is still emitted. + */ + if (st->rs_skip) + return (B_TRUE); + + SAVE_LEN(st, save_len); + if (!fn(st, &target, bval)) + return (B_FALSE); + + DEMDEBUG("%s: backref is '%.*s'", __func__, CSTR_END(st, save_len)); + return (B_TRUE); +} + +static boolean_t +rustv0_append_lifetime(rust_state_t *restrict st, uint64_t lifetime) +{ + uint64_t bound_lt; + + if (HAS_ERROR(st)) + return (B_FALSE); + + if (!rust_appendc(st, '\'')) + return (B_FALSE); + + if (lifetime == 0) + return (rust_appendc(st, '_')); + + if (sub_overflow(st->rs_lt_depth, lifetime, &bound_lt)) { + DEMDEBUG("%s: ERROR: lifetime value %" PRIu64 + " > current depth %" PRIu64, __func__, lifetime, + st->rs_lt_depth); + st->rs_lt_depth = ERANGE; + return (B_FALSE); + } + + /* + * Use 'a, 'b, ... + */ + if (bound_lt < 26) { + char c = (char)bound_lt + 'a'; + return (rust_append_printf(st, "%c", c)); + } + + /* + * Otherwise, use '_123, '_456, ... + */ + return (rust_append_printf(st, "_%" PRIu64, bound_lt)); +} + +static boolean_t +rustv0_parse_lifetime(rust_state_t *restrict st, strview_t *restrict sv) +{ + uint64_t lifetime; + + if (!sv_consume_if_c(sv, 'L')) + return (B_FALSE); + + if (!rustv0_parse_base62(st, sv, &lifetime)) + return (B_FALSE); + + return (rustv0_append_lifetime(st, lifetime)); +} + +static boolean_t +rustv0_parse_const_data(rust_state_t *restrict st, + const_type_class_t type_class, strview_t *restrict sv) +{ + uint64_t val = 0; + size_t save_len; + boolean_t neg = B_FALSE; + boolean_t ret = B_FALSE; + + VERIFY3S(type_class, !=, CTC_INVALID); + + if (HAS_ERROR(st)) + return (B_FALSE); + + DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + SAVE_LEN(st, save_len); + + if (sv_remaining(sv) == 0) + return (B_FALSE); + + if (type_class == CTC_SIGNED && sv_consume_if_c(sv, 'n')) + neg = B_TRUE; + + ret = OPTIONAL(st, rustv0_parse_hex_num(st, sv, &val)) && + sv_consume_if_c(sv, '_'); + if (!ret) + goto done; + + switch (type_class) { + case CTC_SIGNED: + case CTC_UNSIGNED: + ret = rust_append_printf(st, "%s%" PRIu64, neg ? "-" : "", val); + break; + case CTC_BOOL: + if (val > 1) { + DEMDEBUG("%s: invalid bool val %" PRIu64, __func__, + val); + ret = B_FALSE; + break; + } + ret = rust_append_printf(st, "%s", + (val == 0) ? "false" : "true"); + break; + case CTC_CHAR: + if (val > UINT32_MAX) { + DEMDEBUG("%s: char value %" PRIu64 " out of range", + __func__, val); + ret = B_FALSE; + break; + } + + ret = rust_appendc(st, '\'') && rust_append_utf8_c(st, val) && + rust_appendc(st, '\''); + break; + default: + ret = B_FALSE; + } + +done: + DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, save_len), + ret ? "success" : "fail"); + + return (ret); +} + +static boolean_t +rustv0_parse_const(rust_state_t *restrict st, strview_t *restrict sv, + boolean_t dummy __unused) +{ + strview_t type; + size_t start_len; + const_type_class_t ctype_class; + char ctype; + boolean_t save_skip; + boolean_t ret; + + if (HAS_ERROR(st)) + return (B_FALSE); + + DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + SAVE_LEN(st, start_len); + + if (sv_remaining(sv) == 0) + return (B_FALSE); + + if (rustv0_parse_backref(st, sv, rustv0_parse_const, B_FALSE)) + return (B_TRUE); + + if (sv_consume_if_c(sv, 'p')) { + ret = rust_appendc(st, '_'); + goto done; + } + + ctype = sv_peek(sv, 0); + ctype_class = rustv0_classify_const_type(ctype); + if (ctype_class == CTC_INVALID) { + DEMDEBUG("%s: const type isn't a valid const generic type", + __func__); + return (B_FALSE); + } + + /* + * This isn't spelled out clearly in Rust RFC 2603, but currently + * only unsigned int types are allowed at this point. However, we + * have a bit of a potential tricky situation. Unlike formatting + * the other tokens, if we want to display the type, we do so + * _after_ the value, even though the type appears first. + * + * This is bit of a hack, but we save off the input position from + * sv before the parse the type. We then parse it without saving + * the resulting value, then parse and output the constant. If + * we wish to then display the type, we can go back and parse + * the type again, this time saving the result. + */ + sv_init_sv(&type, sv); + + SKIP_BEGIN(st, save_skip); + ret = rustv0_parse_type(st, sv, B_FALSE); + SKIP_END(st, save_skip); + + if (!ret) { + DEMDEBUG("%s: const type isn't valid", __func__); + return (B_FALSE); + } + + if (sv_consume_if_c(sv, 'p')) { + ret = rust_appendc(st, '_'); + } else { + ret = rustv0_parse_const_data(st, ctype_class, sv); + } + if (!ret) + goto done; + + if (st->rs_show_const_type) { + ret = rust_append(st, ": ") && + rustv0_parse_uint_type(st, &type); + } + +done: + DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, start_len), + ret ? "success" : "fail"); + return (ret); +} + +static boolean_t +rustv0_parse_abi(rust_state_t *restrict st, strview_t *restrict sv) +{ + DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv)); + + if (sv_consume_if_c(sv, 'C')) + return (rust_appendc(st, 'C')); + + return (rustv0_parse_undisambiguated_identifier(st, sv, B_TRUE)); +} + +static boolean_t +rustv0_parse_binder(rust_state_t *restrict st, strview_t *restrict sv) +{ + uint64_t n, i; + + if (!sv_consume_if_c(sv, 'G')) + return (B_FALSE); + + if (!rustv0_parse_base62(st, sv, &n)) + return (B_FALSE); + n += 1; + + if (!rust_append(st, "for<")) + return (B_FALSE); + + for (i = 0; i < n; i++) { + if (i > 0 && !rust_append(st, ", ")) + return (B_FALSE); + + st->rs_lt_depth++; + if (!rustv0_append_lifetime(st, 1)) + return (B_FALSE); + } + + if (!rust_append(st, "> ")) + return (B_FALSE); + + return (B_TRUE); +} + +/* + * <fn-sig> := [<binder>] ["U"] ["K" <abi>] {type} "E" <type> + * + * Note that while the Rust RFC states the binder is manditory, based on + * actual examples, and comparing with the rust-based demangler, it is in + * fact optional. + */ +static boolean_t +rustv0_parse_fnsig(rust_state_t *restrict st, strview_t *restrict sv) +{ + uint64_t save_lt = st->rs_lt_depth; + + DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv)); + + if (!OPTIONAL(st, rustv0_parse_binder(st, sv))) + return (B_FALSE); + + if (sv_consume_if_c(sv, 'U') && !rust_append(st, "unsafe ")) + return (B_FALSE); + + if (sv_consume_if_c(sv, 'K') && + (!rust_append(st, "extern \"") || !rustv0_parse_abi(st, sv) || + !rust_append(st, "\" "))) + return (B_FALSE); + + if (!rust_append(st, "fn(")) + return (B_FALSE); + + if (!rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ", B_FALSE, + NULL)) { + return (B_FALSE); + } + + if (!rust_appendc(st, ')')) + return (B_FALSE); + + /* If the return type is (), don't print it */ + if (!sv_consume_if_c(sv, 'u')) { + if (!rust_append(st, " -> ")) + return (B_FALSE); + + if (!rustv0_parse_type(st, sv, B_FALSE)) + return (B_FALSE); + } + + st->rs_lt_depth = save_lt; + + return (B_TRUE); +} + +/* + * <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type> + */ +static boolean_t +rustv0_parse_dyn_trait_assoc_binding(rust_state_t *restrict st, + strview_t *restrict sv, boolean_t open) +{ + size_t save_len; + + if (HAS_ERROR(st)) + return (B_FALSE); + + if (sv_remaining(sv) == 0) + return (B_FALSE); + + if (!sv_consume_if_c(sv, 'p')) + return (B_FALSE); + + DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + SAVE_LEN(st, save_len); + + if (!rust_append(st, open ? ", " : "<")) + return (B_FALSE); + + if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) { + st->rs_error = EINVAL; + return (B_FALSE); + } + + if (!rust_append(st, " = ")) + return (B_FALSE); + + if (!rustv0_parse_type(st, sv, B_FALSE)) { + st->rs_error = EINVAL; + return (B_FALSE); + } + + DEMDEBUG("%s: binding='%.*s'", __func__, CSTR_END(st, save_len)); + + return (B_TRUE); +} + +static boolean_t +rustv0_parse_dyn_trait(rust_state_t *restrict st, strview_t *restrict sv, + boolean_t dummy __unused) +{ + boolean_t stay_save = st->rs_args_stay_open; + boolean_t open_save = st->rs_args_is_open; + boolean_t open = B_FALSE; + + if (HAS_ERROR(st)) + return (B_FALSE); + + DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + + /* + * This is a bit subtle, but when formatting a trait in trait, + * we want something like this: + * + * dyn Trait<T, U, Assoc=X> + * + * instead of + * + * dyn Trait<T, U, <Assoc=X>> + * + * So when parsing the path, if we encounter generic arguments, we want + * the arg list to remain open at the end of processing the path so + * we can append the bindings to it. We set rs_args_stay_open to B_TRUE + * to indidcate to rustv0_parse_path() that a generic argument list + * should not be closed (i.e. don't append a '>' at the end of the + * list). If rustv0_parse_path() encounters a list of generic arguments, + * it will also set rs->args_is_open to indiciate it opened the list. + * We save this in 'open' so that when we process the associated + * bindings, we know if we need to open the list on the first binding + * or not -- we don't want 'dyn Trait<>' if there are no bindings, + * just 'dyn Trait'. + */ + st->rs_args_stay_open = B_TRUE; + st->rs_args_is_open = B_FALSE; + + if (!rustv0_parse_path(st, sv, B_FALSE)) { + st->rs_args_stay_open = stay_save; + st->rs_args_is_open = open_save; + return (B_FALSE); + } + + open = st->rs_args_is_open; + + st->rs_args_stay_open = stay_save; + st->rs_args_is_open = open_save; + + while (rustv0_parse_dyn_trait_assoc_binding(st, sv, open)) { + open = B_TRUE; + } + + if (HAS_ERROR(st)) + return (B_FALSE); + + if (open && !rust_appendc(st, '>')) + return (B_FALSE); + + return (!HAS_ERROR(st)); +} + +static boolean_t +rustv0_parse_dynbounds(rust_state_t *restrict st, strview_t *restrict sv) +{ + uint64_t save_lt = st->rs_lt_depth; + + if (HAS_ERROR(st)) + return (B_FALSE); + + DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + + /* + * This is another case where Rust RFC2603 seems to disagree with + * the implementation. The RFC implies this is mandatory, while + * the implementations treat it as optional. + */ + if (!OPTIONAL(st, rustv0_parse_binder(st, sv))) + return (B_FALSE); + + if (!rustv0_parse_opt_list(st, sv, rustv0_parse_dyn_trait, " + ", + B_FALSE, NULL)) + return (B_FALSE); + + st->rs_lt_depth = save_lt; + + return (B_TRUE); +} + +static boolean_t +rustv0_parse_generic_arg(rust_state_t *restrict st, strview_t *restrict sv, + boolean_t dummy __unused) +{ + DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + + if (sv_consume_if_c(sv, 'K')) + return (rustv0_parse_const(st, sv, B_FALSE)); + + if (rustv0_parse_lifetime(st, sv)) + return (B_TRUE); + + return (rustv0_parse_type(st, sv, B_FALSE)); +} + +/* + * Parse a hex value into *valp. Note that rust only uses lower case + * hex values. + */ +static boolean_t +rustv0_parse_hex_num(rust_state_t *restrict st, strview_t *restrict sv, + uint64_t *restrict valp) +{ + uint64_t val = 0; + size_t ndigits = 0; + + if (HAS_ERROR(st)) + return (B_FALSE); + + DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + + if (sv_remaining(sv) == 0) + return (B_FALSE); + + /* + * Unfortunately, Rust RFC 2603 also doesn't not explicty define + * {hex-digits}. We follow what decimal digits does, and treat a + * leading 0 as a terminator. + */ + while (sv_remaining(sv) > 0) { + char c = sv_peek(sv, 0); + + if (ISDIGIT(c)) { + val *= 16; + val += c - '0'; + } else if (c >= 'a' && c <= 'f') { + val *= 16; + val += c - 'a' + 10; + } else { + break; + } + + sv_consume_n(sv, 1); + + if (++ndigits == 1 && val == 0) + break; + } + + if (ndigits > 0) + *valp = val; + + return ((ndigits > 0) ? B_TRUE : B_FALSE); +} + +/* + * Parse a base62 number into *valp. The number is explicitly terminated + * by a '_'. The values are also offset by 0 -- that is '_' == 0, + * '0_' == 1, ... + */ +static boolean_t +rustv0_parse_base62(rust_state_t *restrict st, strview_t *restrict sv, + uint64_t *restrict valp) +{ + uint64_t val = 0; + char c; + + if (HAS_ERROR(st)) + return (B_FALSE); + + DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + + if (sv_remaining(sv) == 0) + return (B_FALSE); + + /* A terminating '_' without any digits is 0 */ + if (sv_consume_if_c(sv, '_')) { + *valp = 0; + return (B_TRUE); + } + + /* Need at least one valid digit if > 0 */ + if (!ISALNUM(sv_peek(sv, 0))) + return (B_FALSE); + + while (sv_remaining(sv) > 0) { + c = sv_consume_c(sv); + + if (c == '_') { + /* + * Because a lone '_' was already handled earlier, + * we know we've had at least one other digit and + * can increment the value and return. + */ + *valp = val + 1; + return (B_TRUE); + } else if (ISDIGIT(c)) { + val *= 62; + val += c - '0'; + } else if (ISLOWER(c)) { + val *= 62; + val += c - 'a' + 10; + } else if (ISUPPER(c)) { + val *= 62; + val += c - 'A' + 36; + } else { + return (B_FALSE); + } + } + + /* We reached the end of the string without a terminating _ */ + return (B_FALSE); +} + +static const_type_class_t +rustv0_classify_const_type(char type) +{ + switch (type) { + case 'h': case 't': case 'm': case 'y': case 'o': case 'j': + return (CTC_UNSIGNED); + case 'a': case 'i': case 'l': case 'n': case 's': case 'x': + return (CTC_SIGNED); + case 'b': + return (CTC_BOOL); + case 'c': + return (CTC_CHAR); + default: + return (CTC_INVALID); + } +} + +/* + * Make sure the name is a plausible mangled rust symbol. + * Non-ASCII are never allowed. Rust itself uses [_0-9A-Za-z], however + * some things will add a suffix starting with a '.' (e.g. LLVM thin LTO). + * As such we proceed in two phases. We first only allow [_0-9A-Z-az] until + * we encounter a '.'. At that point, any ASCII character is allowed. + */ +static boolean_t +rustv0_valid_sym(const strview_t *sv) +{ + size_t i; + boolean_t check_rust = B_TRUE; + + for (i = 0; i < sv->sv_rem; i++) { + char c = sv->sv_first[i]; + + if (ISALNUM(c) || c == '_') + continue; + + if (c == '.') { + check_rust = B_FALSE; + continue; + } + + if (check_rust || (c & 0x80) != 0) { + DEMDEBUG("%s: ERROR found invalid character '%c' " + "in '%.*s' at index %zu", + __func__, c, SV_PRINT(sv), i); + return (B_FALSE); + } + } + return (B_TRUE); +} |