summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorJason King <jason.brian.king@gmail.com>2019-09-11 16:52:43 -0500
committerJason King <jason.brian.king@gmail.com>2021-05-13 16:10:25 -0500
commit1cd083931cfd3fb8617c1178f62bce417cfa6af2 (patch)
tree360662333b9a2437dc5f149ded4a0eca104849b1 /usr/src
parent4fe48c6ec9f06cbcce19c4cf97f662b64efde582 (diff)
downloadillumos-joyent-1cd083931cfd3fb8617c1178f62bce417cfa6af2.tar.gz
13780 Add support for rust v0 mangling format
Portions contributed by: Eduard-Mihai Burtescu <eddyb@lyken.rs> Reviewed by: Toomas Soome <tsoome@me.com> Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/lib/libdemangle/Makefile.com16
-rw-r--r--usr/src/lib/libdemangle/common/cxx.c114
-rw-r--r--usr/src/lib/libdemangle/common/cxx_util.c2
-rw-r--r--usr/src/lib/libdemangle/common/demangle-sys.h2
-rw-r--r--usr/src/lib/libdemangle/common/demangle.c5
-rw-r--r--usr/src/lib/libdemangle/common/demangle_int.h75
-rw-r--r--usr/src/lib/libdemangle/common/rust-legacy.c386
-rw-r--r--usr/src/lib/libdemangle/common/rust-v0.c1449
-rw-r--r--usr/src/lib/libdemangle/common/rust-v0puny.c264
-rw-r--r--usr/src/lib/libdemangle/common/rust.c727
-rw-r--r--usr/src/lib/libdemangle/common/rust.h87
-rw-r--r--usr/src/lib/libdemangle/common/str.c2
-rw-r--r--usr/src/lib/libdemangle/common/strview.c3
-rw-r--r--usr/src/lib/libdemangle/common/util.c22
-rw-r--r--usr/src/test/util-tests/tests/demangle/rust.c164
15 files changed, 2800 insertions, 518 deletions
diff --git a/usr/src/lib/libdemangle/Makefile.com b/usr/src/lib/libdemangle/Makefile.com
index 76f2e444fc..4cd8ce653c 100644
--- a/usr/src/lib/libdemangle/Makefile.com
+++ b/usr/src/lib/libdemangle/Makefile.com
@@ -11,12 +11,22 @@
#
# Copyright 2018 Jason King
-# Copyright 2018, Joyent, Inc.
+# Copyright 2019 Joyent, Inc.
#
LIBRARY = libdemangle-sys.a
VERS = .1
-OBJECTS = str.o strview.o util.o cxx_util.o cxx.o demangle.o rust.o
+OBJECTS = \
+ cxx.o \
+ cxx_util.o \
+ demangle.o \
+ rust.o \
+ rust-legacy.o \
+ rust-v0puny.o \
+ rust-v0.o \
+ str.o \
+ strview.o \
+ util.o
include ../../Makefile.lib
@@ -29,10 +39,8 @@ CSTD = $(CSTD_GNU99)
CFLAGS += $(CCVERBOSE)
CPPFLAGS += -I$(SRCDIR) -D_REENTRANT -D__EXTENSIONS__
-
.KEEP_STATE:
all: $(LIBS)
-
include $(SRC)/lib/Makefile.targ
diff --git a/usr/src/lib/libdemangle/common/cxx.c b/usr/src/lib/libdemangle/common/cxx.c
index b0e9566e6c..0ec5e51294 100644
--- a/usr/src/lib/libdemangle/common/cxx.c
+++ b/usr/src/lib/libdemangle/common/cxx.c
@@ -12,24 +12,16 @@
/*
* Copyright 2021 Jason King.
*/
-#include <ctype.h>
#include <errno.h>
-#include <locale.h>
#include <note.h>
#include <string.h>
#include <setjmp.h>
#include <stdio.h>
#include <stdlib.h>
-#include <sys/isa_defs.h>
-#include <sys/debug.h>
#include "demangle-sys.h"
#include "demangle_int.h"
#include "cxx.h"
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof (x) / sizeof (x[0]))
-#endif
-
#define CPP_QUAL_CONST (1U)
#define CPP_QUAL_VOLATILE (2U)
#define CPP_QUAL_RESTRICT (4U)
@@ -47,7 +39,6 @@ typedef struct cpp_db_s {
boolean_t cpp_tag_templates;
boolean_t cpp_fix_forward_references;
boolean_t cpp_try_to_parse_template_args;
- locale_t cpp_loc;
} cpp_db_t;
#define CK(x) \
@@ -81,7 +72,7 @@ static void tpush(cpp_db_t *);
static void tpop(cpp_db_t *);
static void tsave(cpp_db_t *, size_t);
-static boolean_t db_init(cpp_db_t *, sysdem_ops_t *);
+static void db_init(cpp_db_t *, sysdem_ops_t *);
static void db_fini(cpp_db_t *);
static void dump(cpp_db_t *, FILE *);
@@ -96,8 +87,8 @@ static const char *parse_block_invoke(const char *, const char *, cpp_db_t *);
static const char *parse_special_name(const char *, const char *, cpp_db_t *);
static const char *parse_name(const char *, const char *, boolean_t *,
cpp_db_t *);
-static const char *parse_call_offset(const char *, const char *, locale_t);
-static const char *parse_number(const char *, const char *, locale_t);
+static const char *parse_call_offset(const char *, const char *);
+static const char *parse_number(const char *, const char *);
static const char *parse_nested_name(const char *, const char *, boolean_t *,
cpp_db_t *);
static const char *parse_local_name(const char *, const char *, boolean_t *,
@@ -105,7 +96,7 @@ static const char *parse_local_name(const char *, const char *, boolean_t *,
static const char *parse_unscoped_name(const char *, const char *, cpp_db_t *);
static const char *parse_template_args(const char *, const char *, cpp_db_t *);
static const char *parse_substitution(const char *, const char *, cpp_db_t *);
-static const char *parse_discriminator(const char *, const char *, locale_t);
+static const char *parse_discriminator(const char *, const char *);
static const char *parse_cv_qualifiers(const char *, const char *, unsigned *);
static const char *parse_template_param(const char *, const char *, cpp_db_t *);
static const char *parse_decltype(const char *, const char *, cpp_db_t *);
@@ -170,8 +161,8 @@ cpp_demangle(const char *src, size_t srclen, sysdem_ops_t *ops)
char *volatile result = NULL;
cpp_db_t db;
- if (!db_init(&db, ops))
- goto done;
+ db_init(&db, ops);
+
if (setjmp(db.cpp_jmp) != 0)
goto done;
@@ -315,12 +306,12 @@ parse_block_invoke(const char *first, const char *last, cpp_db_t *db)
if (t[0] == '_') {
/* need at least one digit */
- if (t + 1 == last || !isdigit_l(t[1], db->cpp_loc))
+ if (t + 1 == last || ISDIGIT(t[1]))
return (first);
t += 2;
}
- while (t < last && isdigit_l(t[0], db->cpp_loc))
+ while (t < last && ISDIGIT(t[0]))
t++;
done:
@@ -498,10 +489,10 @@ parse_special_name(const char *first, const char *last, cpp_db_t *db)
break;
case 'c':
nadd_l(db, "covariant return thunk to", 0);
- t1 = parse_call_offset(first + 2, last, db->cpp_loc);
+ t1 = parse_call_offset(first + 2, last);
if (t1 == t)
return (first);
- t = parse_call_offset(t1, last, db->cpp_loc);
+ t = parse_call_offset(t1, last);
if (t == t1)
return (first);
t1 = parse_encoding(t, last, db);
@@ -512,7 +503,7 @@ parse_special_name(const char *first, const char *last, cpp_db_t *db)
t = parse_type(first + 2, last, db);
if (t == first + 2)
return (first);
- t1 = parse_number(t, last, db->cpp_loc);
+ t1 = parse_number(t, last);
if (*t1 != '_')
return (first);
t = parse_type(t1 + 1, last, db);
@@ -536,7 +527,7 @@ parse_special_name(const char *first, const char *last, cpp_db_t *db)
nadd_l(db, "non-virtual thunk to", 0);
}
- t = parse_call_offset(first + 1, last, db->cpp_loc);
+ t = parse_call_offset(first + 1, last);
if (t == first + 1)
return (first);
t1 = parse_encoding(t, last, db);
@@ -583,7 +574,7 @@ parse_special_name(const char *first, const char *last, cpp_db_t *db)
* # virtual base override, with vcall offset
*/
static const char *
-parse_call_offset(const char *first, const char *last, locale_t loc)
+parse_call_offset(const char *first, const char *last)
{
VERIFY3P(first, <=, last);
@@ -596,7 +587,7 @@ parse_call_offset(const char *first, const char *last, locale_t loc)
if (first[0] != 'h' && first[0] != 'v')
return (first);
- t = parse_number(first + 1, last, loc);
+ t = parse_number(first + 1, last);
if (t == first + 1 || t == last || t[0] != '_')
return (first);
@@ -606,7 +597,7 @@ parse_call_offset(const char *first, const char *last, locale_t loc)
if (first[0] == 'h')
return (t);
- t1 = parse_number(t, last, loc);
+ t1 = parse_number(t, last);
if (t == t1 || t1 == last || t1[0] != '_')
return (first);
@@ -712,11 +703,11 @@ parse_local_name(const char *first, const char *last,
if (t[0] == 's') {
nfmt(db, "{0:L}::string literal", "{0:R}");
- return (parse_discriminator(t, last, db->cpp_loc));
+ return (parse_discriminator(t, last));
}
if (t[0] == 'd') {
- t1 = parse_number(t + 1, last, db->cpp_loc);
+ t1 = parse_number(t + 1, last);
if (t1[0] != '_')
return (first);
t1++;
@@ -732,7 +723,7 @@ parse_local_name(const char *first, const char *last,
/* parsed, but ignored */
if (t[0] != 'd')
- t2 = parse_discriminator(t2, last, db->cpp_loc);
+ t2 = parse_discriminator(t2, last);
return (t2);
}
@@ -1992,7 +1983,7 @@ parse_function_param(const char *first, const char *last, cpp_db_t *db)
unsigned cv = 0;
if (first[1] == 'L') {
- t2 = parse_number(t1, last, db->cpp_loc);
+ t2 = parse_number(t1, last);
if (t2 == last || t2[0] != 'p')
return (first);
t1 = t2;
@@ -2002,7 +1993,7 @@ parse_function_param(const char *first, const char *last, cpp_db_t *db)
return (first);
t1 = parse_cv_qualifiers(t1, last, &cv);
- t2 = parse_number(t1, last, db->cpp_loc);
+ t2 = parse_number(t1, last);
if (t2 == last || t2[0] != '_')
return (first);
@@ -2439,8 +2430,7 @@ parse_unnamed_type_name(const char *first, const char *last, cpp_db_t *db)
const char *t2 = NULL;
if (first[1] == 't') {
- while (t1 != last && t1[0] != '_' &&
- isdigit_l(t1[0], db->cpp_loc))
+ while (t1 != last && t1[0] != '_' && ISDIGIT(t1[0]))
t1++;
if (t1[0] != '_')
@@ -2483,7 +2473,8 @@ parse_unnamed_type_name(const char *first, const char *last, cpp_db_t *db)
t2 = t1;
while (t2 != last && t2[0] != '_') {
- if (!isdigit_l(*t2++, db->cpp_loc))
+ char c = *t2++;
+ if (!ISDIGIT(c))
return (first);
}
@@ -2653,7 +2644,7 @@ parse_integer_literal(const char *first, const char *last, const char *fmt,
{
VERIFY3P(first, <=, last);
- const char *t = parse_number(first, last, db->cpp_loc);
+ const char *t = parse_number(first, last);
const char *start = first;
if (t == first || t == last || t[0] != 'E')
@@ -2736,11 +2727,9 @@ parse_floating_literal(const char *first, const char *last, cpp_db_t *db)
if (!is_xdigit(t[0]))
return (first);
- unsigned d1 = isdigit_l(t[0], db->cpp_loc) ?
- t[0] - '0' : t[0] - 'a' + 10;
+ unsigned d1 = ISDIGIT(t[0]) ? t[0] - '0' : t[0] - 'a' + 10;
t++;
- unsigned d0 = isdigit_l(t[0], db->cpp_loc) ?
- t[0] - '0' : t[0] - 'a' + 10;
+ unsigned d0 = ISDIGIT(t[0]) ? t[0] - '0' : t[0] - 'a' + 10;
*e = (d1 << 4) + d0;
}
@@ -2749,11 +2738,9 @@ parse_floating_literal(const char *first, const char *last, cpp_db_t *db)
if (!is_xdigit(t[0]))
return (first);
- unsigned d0 = isdigit_l(t[0], db->cpp_loc) ?
- t[0] - '0' : t[0] - 'a' + 10;
+ unsigned d0 = ISDIGIT(t[0]) ? t[0] - '0' : t[0] - 'a' + 10;
t--;
- unsigned d1 = isdigit_l(t[0], db->cpp_loc) ?
- t[0] - '0' : t[0] - 'a' + 10;
+ unsigned d1 = ISDIGIT(t[0]) ? t[0] - '0' : t[0] - 'a' + 10;
*e = (d1 << 4) + d0;
}
@@ -2898,7 +2885,7 @@ parse_expr_primary(const char *first, const char *last, cpp_db_t *db)
return (t + 1);
const char *n;
- for (n = t; n != last && isdigit_l(n[0], db->cpp_loc); n++)
+ for (n = t; n != last && ISDIGIT(n[0]); n++)
;
if (n == last || nempty(db) || n[0] != 'E')
return (first);
@@ -3046,7 +3033,7 @@ parse_operator_name(const char *first, const char *last, cpp_db_t *db)
}
if (first[0] == 'v') {
- if (!isdigit_l(first[1], db->cpp_loc))
+ if (!ISDIGIT(first[1]))
return (first);
t = parse_source_name(first + 2, last, db);
@@ -3155,19 +3142,19 @@ parse_builtin_type(const char *first, const char *last, cpp_db_t *db)
}
static const char *
-parse_base36(const char *first, const char *last, size_t *val, locale_t loc)
+parse_base36(const char *first, const char *last, size_t *val)
{
VERIFY3P(first, <=, last);
const char *t;
for (t = first, *val = 0; t != last; t++) {
- if (!isdigit_l(t[0], loc) && !isupper_l(t[0], loc))
+ if (!ISDIGIT(t[0]) && !ISUPPER(t[0]))
return (t);
*val *= 36;
- if (isdigit_l(t[0], loc))
+ if (ISDIGIT(t[0]))
*val += t[0] - '0';
else
*val += t[0] - 'A' + 10;
@@ -3206,7 +3193,7 @@ parse_substitution(const char *first, const char *last, cpp_db_t *db)
size_t n = 0;
if (t[0] != '_') {
- t = parse_base36(first + 1, last, &n, db->cpp_loc);
+ t = parse_base36(first + 1, last, &n);
if (t == first + 1 || t[0] != '_')
return (first);
@@ -3240,7 +3227,7 @@ parse_source_name(const char *first, const char *last, cpp_db_t *db)
const char *t = NULL;
size_t n = 0;
- for (t = first; t != last && isdigit_l(t[0], db->cpp_loc); t++) {
+ for (t = first; t != last && ISDIGIT(t[0]); t++) {
/* make sure we don't overflow */
size_t nn = n * 10;
if (nn < n)
@@ -3287,8 +3274,8 @@ parse_vector_type(const char *first, const char *last, cpp_db_t *db)
const char *t = first + 2;
const char *t1 = NULL;
- if (isdigit_l(first[2], db->cpp_loc) && first[2] != '0') {
- t1 = parse_number(t, last, db->cpp_loc);
+ if (ISDIGIT(first[2]) && first[2] != '0') {
+ t1 = parse_number(t, last);
if (t1 == last || t1 + 1 == last || t1[0] != '_')
return (first);
@@ -3376,8 +3363,8 @@ parse_array_type(const char *first, const char *last, cpp_db_t *db)
size_t n = nlen(db);
if (t[0] != '_') {
- if (isdigit_l(t[0], db->cpp_loc) && t[0] != '0') {
- t1 = parse_number(t, last, db->cpp_loc);
+ if (ISDIGIT(t[0]) && t[0] != '0') {
+ t1 = parse_number(t, last);
if (t1 == last)
return (first);
@@ -3765,7 +3752,7 @@ parse_template_param(const char *first, const char *last, cpp_db_t *db)
size_t idx = 0;
while (t != last && t[0] != '_') {
- if (!isdigit_l(t[0], db->cpp_loc))
+ if (!ISDIGIT(t[0]))
return (first);
idx *= 10;
@@ -3870,7 +3857,7 @@ parse_template_args(const char *first, const char *last, cpp_db_t *db)
* extension := decimal-digit+ # at the end of string
*/
static const char *
-parse_discriminator(const char *first, const char *last, locale_t loc)
+parse_discriminator(const char *first, const char *last)
{
VERIFY3P(first, <=, last);
@@ -3879,8 +3866,8 @@ parse_discriminator(const char *first, const char *last, locale_t loc)
if (first == last)
return (first);
- if (isdigit_l(first[0], loc)) {
- for (t = first; t != last && isdigit_l(t[0], loc); t++)
+ if (ISDIGIT(first[0])) {
+ for (t = first; t != last && ISDIGIT(t[0]); t++)
;
/* not at the end of the string */
@@ -3893,13 +3880,13 @@ parse_discriminator(const char *first, const char *last, locale_t loc)
}
t = first + 1;
- if (isdigit_l(t[0], loc))
+ if (ISDIGIT(t[0]))
return (t + 1);
if (t[0] != '_' || t + 1 == last)
return (first);
- for (t++; t != last && isdigit_l(t[0], loc); t++)
+ for (t++; t != last && ISDIGIT(t[0]); t++)
;
if (t == last || t[0] != '_')
return (first);
@@ -3937,13 +3924,13 @@ parse_cv_qualifiers(const char *first, const char *last, unsigned *cv)
* <number> ::= [n] <non-negative decimal integer>
*/
static const char *
-parse_number(const char *first, const char *last, locale_t loc)
+parse_number(const char *first, const char *last)
{
VERIFY3P(first, <=, last);
const char *t = first;
- if (first == last || (first[0] != 'n' && !isdigit_l(first[0], loc)))
+ if (first == last || (first[0] != 'n' && !ISDIGIT(first[0])))
return (first);
if (t[0] == 'n')
@@ -3952,7 +3939,7 @@ parse_number(const char *first, const char *last, locale_t loc)
if (t[0] == '0')
return (t + 1);
- while (isdigit_l(t[0], loc))
+ while (ISDIGIT(t[0]))
t++;
return (t);
@@ -4051,7 +4038,7 @@ tsave(cpp_db_t *db, size_t amt)
CK(templ_save(&db->cpp_name, amt, &db->cpp_templ));
}
-static boolean_t
+static void
db_init(cpp_db_t *db, sysdem_ops_t *ops)
{
(void) memset(db, 0, sizeof (*db));
@@ -4062,8 +4049,6 @@ db_init(cpp_db_t *db, sysdem_ops_t *ops)
db->cpp_tag_templates = B_TRUE;
db->cpp_try_to_parse_template_args = B_TRUE;
tpush(db);
- db->cpp_loc = newlocale(LC_CTYPE_MASK, "C", 0);
- return ((db->cpp_loc != NULL) ? B_TRUE : B_FALSE);
}
static void
@@ -4072,7 +4057,6 @@ db_fini(cpp_db_t *db)
name_fini(&db->cpp_name);
sub_fini(&db->cpp_subs);
templ_fini(&db->cpp_templ);
- freelocale(db->cpp_loc);
(void) memset(db, 0, sizeof (*db));
}
diff --git a/usr/src/lib/libdemangle/common/cxx_util.c b/usr/src/lib/libdemangle/common/cxx_util.c
index 91abb504d3..f4ca32fae5 100644
--- a/usr/src/lib/libdemangle/common/cxx_util.c
+++ b/usr/src/lib/libdemangle/common/cxx_util.c
@@ -13,8 +13,6 @@
* Copyright 2017 Jason King
*/
-#include <sys/debug.h>
-#include <sys/sysmacros.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
diff --git a/usr/src/lib/libdemangle/common/demangle-sys.h b/usr/src/lib/libdemangle/common/demangle-sys.h
index 3452d39667..21b2624cf3 100644
--- a/usr/src/lib/libdemangle/common/demangle-sys.h
+++ b/usr/src/lib/libdemangle/common/demangle-sys.h
@@ -26,7 +26,7 @@ extern "C" {
typedef enum sysdem_lang_e {
SYSDEM_LANG_AUTO,
SYSDEM_LANG_CPP,
- SYSDEM_LANG_RUST
+ SYSDEM_LANG_RUST,
} sysdem_lang_t;
typedef struct sysdem_alloc_s {
diff --git a/usr/src/lib/libdemangle/common/demangle.c b/usr/src/lib/libdemangle/common/demangle.c
index bf7c9ab8c7..f8f322757a 100644
--- a/usr/src/lib/libdemangle/common/demangle.c
+++ b/usr/src/lib/libdemangle/common/demangle.c
@@ -11,7 +11,7 @@
/*
* Copyright 2021 Jason King
- * Copyright 2019, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#include <stdlib.h>
@@ -86,6 +86,8 @@ is_mangled(const char *str, size_t n)
(void) sv_consume_if_c(&sv, '_');
if (sv_consume_if_c(&sv, 'Z'))
return (B_TRUE);
+ if (sv_consume_if_c(&sv, 'R'))
+ return (B_TRUE);
return (B_FALSE);
}
@@ -101,6 +103,7 @@ char *
sysdemangle(const char *str, sysdem_lang_t lang, sysdem_ops_t *ops)
{
char *res = NULL;
+
/*
* While the language specific demangler code can handle non-NUL
* terminated strings, we currently don't expose this to consumers.
diff --git a/usr/src/lib/libdemangle/common/demangle_int.h b/usr/src/lib/libdemangle/common/demangle_int.h
index 66a34cf41d..d4c227a87f 100644
--- a/usr/src/lib/libdemangle/common/demangle_int.h
+++ b/usr/src/lib/libdemangle/common/demangle_int.h
@@ -11,24 +11,97 @@
/*
* Copyright 2017 Jason King
- * Copyright 2019, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _DEMANGLE_INT_H
#define _DEMANGLE_INT_H
+#include <inttypes.h>
#include <stdio.h>
+#include <sys/byteorder.h>
+#include <sys/ctype.h> /* Use ASCII ISXXXX() macros */
+#include <sys/debug.h>
+#include <sys/sysmacros.h>
+#include <sys/isa_defs.h>
#include "demangle-sys.h"
#ifdef __cplusplus
extern "C" {
#endif
+#ifdef __CHECKER__
+/*
+ * smatch seems to have a bug which chokes on the builtins, so
+ * we just have it fallback to the non-builtin definitions
+ */
+#elif __GNUC__ >= 5 && __GNUC_MINOR__ > 1
+#define USE_BUILTIN_OVERFLOW
+#elif defined(__clang__)
+#define USE_BUILTIN_OVERFLOW
+#endif
+
+#ifdef USE_BUILTIN_OVERFLOW
+static inline boolean_t
+mul_overflow(uint64_t a, uint64_t b, uint64_t *v)
+{
+ return (__builtin_mul_overflow(a, b, v));
+}
+
+static inline boolean_t
+add_overflow(uint64_t a, uint64_t b, uint64_t *v)
+{
+ return (__builtin_add_overflow(a, b, v));
+}
+
+static inline boolean_t
+sub_overflow(uint64_t a, uint64_t b, uint64_t *v)
+{
+ return (__builtin_sub_overflow(a, b, v));
+}
+#else
+static inline boolean_t
+mul_overflow(uint64_t a, uint64_t b, uint64_t *v)
+{
+ uint64_t val = a * b;
+
+ if (a != 0 && val / a != b)
+ return (B_TRUE);
+ *v = val;
+ return (B_FALSE);
+}
+
+static inline boolean_t
+add_overflow(uint64_t a, uint64_t b, uint64_t *v)
+{
+ uint64_t val = a + b;
+
+ if (val < a || val < b)
+ return (B_TRUE);
+ *v = val;
+ return (B_FALSE);
+}
+
+static inline boolean_t
+sub_overflow(uint64_t a, uint64_t b, uint64_t *v)
+{
+ uint64_t val = a - b;
+
+ if (val > a)
+ return (B_TRUE);
+ *v = val;
+ return (B_FALSE);
+}
+#endif
+
extern sysdem_ops_t *sysdem_ops_default;
char *cpp_demangle(const char *, size_t, sysdem_ops_t *);
char *rust_demangle(const char *, size_t, sysdem_ops_t *);
+struct custr_alloc;
+
void *zalloc(sysdem_ops_t *, size_t);
+void *xcalloc(sysdem_ops_t *, size_t, size_t);
void *xrealloc(sysdem_ops_t *, void *, size_t, size_t);
void xfree(sysdem_ops_t *, void *, size_t);
char *xstrdup(sysdem_ops_t *, const char *);
diff --git a/usr/src/lib/libdemangle/common/rust-legacy.c b/usr/src/lib/libdemangle/common/rust-legacy.c
new file mode 100644
index 0000000000..5b1518f619
--- /dev/null
+++ b/usr/src/lib/libdemangle/common/rust-legacy.c
@@ -0,0 +1,386 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ * Copyright 2021 Jason King
+ */
+
+#include <errno.h>
+#include <libcustr.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "rust.h"
+
+/*
+ * Unfortunately, there is currently no official specification for the legacy
+ * rust name mangling. This is an attempt to document the understanding of the
+ * mangling used here. It is based off examination of
+ * https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/
+ *
+ * A mangled rust name is:
+ * <prefix> <name>
+ *
+ * <prefix> ::= _Z
+ * __Z
+ *
+ * <name> ::= N <name-segment>+ [<hash>] E
+ *
+ * <name-segment> ::= <len> <name-chars>{len}
+ *
+ * <len> ::= [1-9][0-9]+
+ *
+ * <name-chars> ::= <[A-Za-z]> <[A-Za-z0-9]>*
+ * <separator>
+ * <special>
+ *
+ * <separator> ::= '..' # '::'
+ *
+ * <special> ::= $SP$ # '@'
+ * $BP$ # '*'
+ * $RF$ # '&'
+ * $LT$ # '<'
+ * $GT$ # '>'
+ * $LP$ # '('
+ * $RP$ # ')'
+ * $C$ # ','
+ *
+ * <hash> := <len> h <hex-digits>+
+ *
+ * <hex-digits> := <[0-9a-f]>
+ */
+
+static const struct rust_charmap {
+ const char *ruc_seq;
+ char ruc_ch;
+} rust_charmap[] = {
+ { "$SP$", '@' },
+ { "$BP$", '*' },
+ { "$RF$", '&' },
+ { "$LT$", '<' },
+ { "$GT$", '>' },
+ { "$LP$", '(' },
+ { "$RP$", ')' },
+ { "$C$", ',' },
+};
+static const size_t rust_charmap_sz = ARRAY_SIZE(rust_charmap);
+
+static boolean_t rustleg_valid_sym(const strview_t *);
+static boolean_t rustleg_parse_name(rust_state_t *, strview_t *);
+static boolean_t rustleg_parse_hash(rust_state_t *, strview_t *);
+static boolean_t rustleg_parse_special(rust_state_t *, strview_t *);
+static boolean_t rustleg_add_sep(rust_state_t *);
+
+boolean_t
+rust_demangle_legacy(rust_state_t *restrict st, strview_t *restrict sv)
+{
+
+ /* Make sure the whole thing contains valid characters */
+ if (!rustleg_valid_sym(sv)) {
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ if (sv_peek(sv, -1) != 'E') {
+ DEMDEBUG("ERROR: string does not end with 'E'");
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ if (!rustleg_parse_name(st, sv))
+ return (B_FALSE);
+
+ if (sv_remaining(sv) != 0) {
+ DEMDEBUG("ERROR: trailing characters in name");
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+static boolean_t
+rustleg_parse_name_segment(rust_state_t *st, strview_t *svp, boolean_t first)
+{
+ strview_t orig;
+ strview_t name;
+ uint64_t len;
+ size_t rem;
+ boolean_t last = B_FALSE;
+
+ if (HAS_ERROR(st) || sv_remaining(svp) == 0)
+ return (B_FALSE);
+
+ sv_init_sv(&orig, svp);
+
+ if (!rust_parse_base10(st, svp, &len)) {
+ DEMDEBUG("ERROR: no leading length");
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ rem = sv_remaining(svp);
+
+ if (rem < len) {
+ DEMDEBUG("ERROR: segment length (%" PRIu64 ") > remaining "
+ "bytes in string (%zu)", len, rem);
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ /* Is this the last segment before the terminating E? */
+ if (rem == len + 1) {
+ VERIFY3U(sv_peek(svp, -1), ==, 'E');
+ last = B_TRUE;
+ }
+
+ if (!first && !rustleg_add_sep(st))
+ return (B_FALSE);
+
+ /* Reduce length of seg to the length we parsed */
+ (void) sv_init_sv_range(&name, svp, len);
+
+ DEMDEBUG("%s: segment='%.*s'", __func__, SV_PRINT(&name));
+
+ /*
+ * A rust hash starts with 'h', and is the last component of a name
+ * before the terminating 'E'. It is however not always present
+ * in every mangled symbol, and a last segment that starts with 'h'
+ * could be confused for it, so failing to part it just means
+ * we don't have a trailing hash.
+ */
+ if (sv_peek(&name, 0) == 'h' && last) {
+ if (rustleg_parse_hash(st, &name))
+ goto done;
+
+ /*
+ * However any error other than 'not a hash' (e.g. ENOMEM)
+ * means we should fail.
+ */
+ if (st->rs_error != 0)
+ goto done;
+ }
+
+ /* A '_' followed by $ is ignored at the start of a name segment */
+ if (sv_peek(&name, 0) == '_' && sv_peek(&name, 1) == '$')
+ (void) sv_consume_n(&name, 1);
+
+ while (sv_remaining(&name) > 0) {
+ switch (sv_peek(&name, 0)) {
+ case '$':
+ if (rustleg_parse_special(st, &name))
+ continue;
+ break;
+ case '.':
+ /* Convert '..' to '::' */
+ if (sv_peek(&name, 1) != '.')
+ break;
+
+ if (!rustleg_add_sep(st))
+ return (B_FALSE);
+
+ sv_consume_n(&name, 2);
+ continue;
+ default:
+ break;
+ }
+
+ if (!rust_appendc(st, sv_consume_c(&name))) {
+ SET_ERROR(st);
+ return (B_FALSE);
+ }
+ }
+
+done:
+ sv_consume_n(svp, len);
+
+ VERIFY3P(orig.sv_first, <=, svp->sv_first);
+ DEMDEBUG("%s: consumed '%.*s'", __func__,
+ (int)(uintptr_t)(svp->sv_first - orig.sv_first), orig.sv_first);
+ return (B_TRUE);
+}
+
+/*
+ * Parse N (<num><name>{num})+ [<num>h<hex digits]E
+ */
+static boolean_t
+rustleg_parse_name(rust_state_t *st, strview_t *svp)
+{
+ strview_t name;
+ boolean_t first = B_TRUE;
+
+ sv_init_sv(&name, svp);
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: name = '%.*s'", __func__, SV_PRINT(&name));
+
+ if (sv_remaining(svp) == 0) {
+ DEMDEBUG("%s: empty name", __func__);
+ return (B_FALSE);
+ }
+
+ if (!sv_consume_if_c(svp, 'N')) {
+ DEMDEBUG("%s: does not start with 'N'", __func__);
+ return (B_FALSE);
+ }
+
+ while (sv_remaining(svp) > 0 && sv_peek(svp, 0) != 'E') {
+ if (!rustleg_parse_name_segment(st, svp, first))
+ return (B_FALSE);
+ first = B_FALSE;
+ }
+
+ if (!sv_consume_if_c(svp, 'E')) {
+ DEMDEBUG("%s: ERROR no terminating 'E'", __func__);
+ return (B_FALSE);
+ }
+
+ VERIFY3P(name.sv_first, <=, svp->sv_first);
+ DEMDEBUG("%s: consumed '%.*s'", __func__,
+ (int)(uintptr_t)(svp->sv_first - name.sv_first), name.sv_first);
+
+ return (B_TRUE);
+}
+
+static boolean_t
+rustleg_parse_hash(rust_state_t *st, strview_t *svp)
+{
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ VERIFY(sv_consume_if_c(svp, 'h'));
+ if (!rust_appendc(st, 'h'))
+ return (B_FALSE);
+
+ while (sv_remaining(svp) > 0) {
+ char c = sv_consume_c(svp);
+
+ switch (c) {
+ /*
+ * The upper-case hex digits (A-F) are excluded as valid
+ * hash values for several reasons:
+ *
+ * 1. It would result in two different possible names for
+ * the same function, leading to ambiguity in linking (among
+ * other things).
+ *
+ * 2. It would cause potential ambiguity in parsing -- is a
+ * trailing 'E' part of the hash, or the terminating character
+ * in the mangled name?
+ *
+ * 3. No examples were able to be found in the wild where
+ * uppercase digits are used, and other rust demanglers all
+ * seem to assume the hash must contain lower-case hex digits.
+ */
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ case '8': case '9': case 'a': case 'b':
+ case 'c': case 'd': case 'e': case 'f':
+ if (!rust_appendc(st, c))
+ return (B_FALSE);
+ break;
+ default:
+ return (B_FALSE);
+ }
+ }
+
+ return (B_TRUE);
+}
+
+static boolean_t
+rustleg_parse_special(rust_state_t *restrict st, strview_t *restrict svp)
+{
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ if (sv_peek(svp, 0) != '$')
+ return (B_FALSE);
+
+ for (size_t i = 0; i < rust_charmap_sz; i++) {
+ if (sv_consume_if(svp, rust_charmap[i].ruc_seq)) {
+ if (!rust_appendc(st, rust_charmap[i].ruc_ch))
+ return (B_FALSE);
+ return (B_TRUE);
+ }
+ }
+
+ /* Handle $uXXXX$ */
+
+ strview_t sv;
+ uint32_t val = 0;
+ uint_t ndigits = 0;
+
+ sv_init_sv(&sv, svp);
+
+ /* We peeked at this earlier, so it should still be there */
+ VERIFY(sv_consume_if_c(&sv, '$'));
+
+ if (!sv_consume_if_c(&sv, 'u'))
+ return (B_FALSE);
+
+ while (sv_remaining(&sv) > 0) {
+ uint32_t cval = 0;
+ char c;
+
+ if (ndigits == 4)
+ return (B_FALSE);
+
+ c = sv_consume_c(&sv);
+ if (c >= '0' && c <= '9')
+ cval = c - '0';
+ else if (c >= 'a' && c <= 'f')
+ cval = c - 'a' + 10;
+ else if (c == '$')
+ break;
+ else
+ return (B_FALSE);
+
+ val <<= 4;
+ val |= cval;
+ ndigits++;
+ }
+
+ if (!rust_append_utf8_c(st, val))
+ return (B_FALSE);
+
+ sv_consume_n(svp, ndigits + 3);
+ return (B_TRUE);
+}
+
+static boolean_t
+rustleg_add_sep(rust_state_t *st)
+{
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ return (rust_append(st, "::"));
+}
+
+static boolean_t
+rustleg_valid_sym(const strview_t *sv)
+{
+ size_t i;
+
+ for (i = 0; i < sv->sv_rem; i++) {
+ char c = sv->sv_first[i];
+
+ if ((c & 0x80) == 0)
+ continue;
+ DEMDEBUG("%s: ERROR found 8-bit character '%c' in '%.*s' "
+ "at index %zu", __func__, c, SV_PRINT(sv), i);
+ return (B_FALSE);
+ }
+ return (B_TRUE);
+}
diff --git a/usr/src/lib/libdemangle/common/rust-v0.c b/usr/src/lib/libdemangle/common/rust-v0.c
new file mode 100644
index 0000000000..598d8457c9
--- /dev/null
+++ b/usr/src/lib/libdemangle/common/rust-v0.c
@@ -0,0 +1,1449 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ * Copyright 2021 Jason King
+ */
+
+/* BEGIN CSTYLED */
+
+/*
+ * This implements the 'symbol_name_mangling_v2' demangling for rust as
+ * described in Rust RFC 2603 as opposed to the original (now called
+ * legacy) mangling older versions of rust used (implemented in rust.c).
+ *
+ * The specification can be viewed at:
+ * https://github.com/rust-lang/rfcs/blob/master/text/2603-rust-symbol-name-mangling-v0.md
+ */
+
+/* END CSTYLED */
+
+#include <errno.h>
+#include <libcustr.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "rust.h"
+
+/*
+ * Help track amount of additional output added to rs_demangled across
+ * a function call (to allow that portion to be output for debugging)
+ */
+#define SAVE_LEN(_st, _len) _len = custr_len((_st)->rs_demangled)
+#define CSTR_END(_st, _len) \
+ ((int)(custr_len((_st)->rs_demangled) - (_len))), \
+ custr_cstr((_st)->rs_demangled) + (_len)
+
+typedef enum const_type_class {
+ CTC_INVALID = -1,
+ CTC_UNSIGNED,
+ CTC_SIGNED,
+ CTC_CHAR,
+ CTC_BOOL,
+} const_type_class_t;
+
+/*
+ * Sometimes, parsing something is optional. In this case a failure to
+ * parse is fine, however we still want to consider a fatal error as
+ * failure.
+ */
+#define OPTIONAL(_st, _f) ((_f) || !HAS_ERROR(_st))
+
+static boolean_t rustv0_valid_sym(const strview_t *);
+static const_type_class_t rustv0_classify_const_type(char);
+static boolean_t rustv0_parse_hex_num(rust_state_t *restrict,
+ strview_t *restrict, uint64_t *restrict);
+static boolean_t rustv0_parse_base62(rust_state_t *restrict,
+ strview_t *restrict, uint64_t *restrict);
+
+static boolean_t rustv0_parse_undisambiguated_identifier(
+ rust_state_t *restrict, strview_t *restrict, boolean_t);
+static boolean_t rustv0_parse_disambiguator(rust_state_t *restrict,
+ strview_t *restrict, uint64_t *restrict);
+
+static boolean_t rustv0_parse_path(rust_state_t *restrict, strview_t *restrict,
+ boolean_t);
+static boolean_t rustv0_parse_impl_path(rust_state_t *restrict,
+ strview_t *restrict, boolean_t);
+static boolean_t rustv0_parse_nested_path(rust_state_t *restrict,
+ strview_t *restrict, boolean_t);
+static boolean_t rustv0_parse_basic_type(rust_state_t *restrict,
+ strview_t *restrict);
+static boolean_t rustv0_parse_backref(rust_state_t *restrict,
+ strview_t *restrict,
+ boolean_t (*)(rust_state_t *restrict, strview_t *restrict, boolean_t),
+ boolean_t);
+static boolean_t rustv0_parse_lifetime(rust_state_t *restrict,
+ strview_t *restrict);
+static boolean_t rustv0_parse_const(rust_state_t *restrict,
+ strview_t *restrict, boolean_t);
+static boolean_t rustv0_parse_fnsig(rust_state_t *restrict,
+ strview_t *restrict);
+static boolean_t rustv0_parse_dynbounds(rust_state_t *restrict,
+ strview_t *restrict);
+static boolean_t rustv0_parse_generic_arg(rust_state_t *restrict,
+ strview_t *restrict, boolean_t);
+
+boolean_t
+rust_demangle_v0(rust_state_t *restrict st, strview_t *restrict sv)
+{
+ boolean_t save_skip;
+ boolean_t ret;
+
+ /* Make sure all the characters are valid */
+ if (!rustv0_valid_sym(sv)) {
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ /*
+ * <symbol-name> = "_R" [<decimal-number>] <path>
+ * [<instantiating-crate>]
+ *
+ * We've already parsed the prefix in rust_demangle(), as well
+ * as made sure there's no [<decimal-number>] present, so
+ * start with <path>.
+ */
+ if (!rustv0_parse_path(st, sv, B_TRUE))
+ return (B_FALSE);
+
+ /* [<instantiating crate>] -- parse but don't save */
+ SKIP_BEGIN(st, save_skip);
+ ret = OPTIONAL(st, rustv0_parse_path(st, sv, B_FALSE));
+ SKIP_END(st, save_skip);
+ if (!ret)
+ return (B_FALSE);
+
+ /* If nothing's left, we know we're done */
+ if (sv_remaining(sv) == 0)
+ return (!HAS_ERROR(st));
+
+ /*
+ * LLVM sometimes will suffix symbols starting with a '.'
+ * followed by extra data. For things that start with
+ * ".llvm.", we discard the rest of the string. For
+ * other things that start with '.', we copy the
+ * results to the final string. This matches
+ * what the rust native demangler crate does, and
+ * we don't see a reason to deviate from their
+ * behavior.
+ */
+ if (sv_consume_if(sv, ".llvm."))
+ return (!HAS_ERROR(st));
+
+ if (sv_peek(sv, 0) != '.') {
+ DEMDEBUG("%s: Unexpected trailing data at the end of the "
+ "name: '%.*s'", __func__, SV_PRINT(sv));
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ return (rust_append_sv(st, sv_remaining(sv), sv));
+}
+
+/*
+ * Parse an optional list terminated by 'E'. Each result of 'fn' is
+ * separated by 'sep' in the output.
+ */
+static boolean_t
+rustv0_parse_opt_list(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t),
+ const char *restrict sep, boolean_t bval, size_t *restrict countp)
+{
+ size_t count = 0;
+
+ DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
+
+ while (sv_remaining(sv) > 0) {
+ if (sv_consume_if_c(sv, 'E')) {
+ if (countp != NULL)
+ *countp += count;
+ return (B_TRUE);
+ }
+
+ if (count > 0 && !rust_append(st, sep))
+ return (B_FALSE);
+
+ if (!fn(st, sv, bval))
+ return (B_FALSE);
+
+ count++;
+ }
+
+ /*
+ * An optional list should terminate with an 'E'. If we get here,
+ * we ran out of charaters and didn't terminate as we should.
+ */
+ return (B_FALSE);
+}
+
+static boolean_t
+rustv0_parse_uint_type(rust_state_t *restrict st, strview_t *sv)
+{
+ const char *str = NULL;
+ strview_t save;
+ char c;
+
+ if (HAS_ERROR(st) || sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ sv_init_sv(&save, sv);
+
+ switch (c = sv_consume_c(sv)) {
+ case 'h':
+ str = "u8";
+ break;
+ case 't':
+ str = "u16";
+ break;
+ case 'm':
+ str = "u32";
+ break;
+ case 'y':
+ str = "u64";
+ break;
+ case 'o':
+ str = "u128";
+ break;
+ case 'j': /* usize */
+ str = "usize";
+ break;
+ default:
+ sv_init_sv(sv, &save);
+ return (B_FALSE);
+ }
+
+ DEMDEBUG("%s: %c -> %s", __func__, c, str);
+ return (rust_append(st, str));
+}
+
+static boolean_t
+rustv0_parse_basic_type(rust_state_t *restrict st, strview_t *restrict sv)
+{
+ const char *str = NULL;
+ strview_t save;
+ char c;
+
+ if (HAS_ERROR(st) || sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ if (rustv0_parse_uint_type(st, sv))
+ return (B_TRUE);
+
+ sv_init_sv(&save, sv);
+
+ switch (c = sv_consume_c(sv)) {
+ case 'a':
+ str = "i8";
+ break;
+ case 'b':
+ str = "bool";
+ break;
+ case 'c':
+ str = "char";
+ break;
+ case 'd':
+ str = "f64";
+ break;
+ case 'e':
+ str = "str";
+ break;
+ case 'f':
+ str = "f32";
+ break;
+ case 'i':
+ str = "isize";
+ break;
+ case 'l':
+ str = "i32";
+ break;
+ case 'n':
+ str = "i128";
+ break;
+ case 'p':
+ str = "_";
+ break;
+ case 's':
+ str = "i16";
+ break;
+ case 'u':
+ str = "()";
+ break;
+ case 'v':
+ str = "...";
+ break;
+ case 'x':
+ str = "i64";
+ break;
+ case 'z':
+ str = "!";
+ break;
+ default:
+ sv_init_sv(sv, &save);
+ return (B_FALSE);
+ }
+
+ DEMDEBUG("%s: %c -> %s", __func__, c, str);
+ return (rust_append(st, str));
+}
+
+static boolean_t
+rustv0_parse_type(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t dummy __unused)
+{
+ strview_t save;
+ size_t len, tuple_elem_count;
+ boolean_t ret;
+ char c;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ SAVE_LEN(st, len);
+ sv_init_sv(&save, sv);
+
+ switch (c = sv_consume_c(sv)) {
+ case 'A':
+ ret = rust_appendc(st, '[') &&
+ rustv0_parse_type(st, sv, B_FALSE) &&
+ rust_append(st, "; ") &&
+ rustv0_parse_const(st, sv, B_FALSE) &&
+ rust_appendc(st, ']');
+ break;
+ case 'S':
+ ret = rust_appendc(st, '[') &&
+ rustv0_parse_type(st, sv, B_FALSE) &&
+ rust_appendc(st, ']');
+ break;
+ case 'T':
+ tuple_elem_count = 0;
+ ret = rust_appendc(st, '(') &&
+ rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ",
+ B_FALSE, &tuple_elem_count) &&
+ rust_append(st, (tuple_elem_count == 1) ? ",)" : ")");
+ break;
+ case 'R':
+ case 'Q':
+ /* `&mut T` or `&'... mut T` */
+ if (!(ret = rust_appendc(st, '&')))
+ break;
+
+ /*
+ * lifetime is optional, but we need to add a trailing
+ * space if present (so we cannot use the OPTIONAL macro).
+ */
+ if (rustv0_parse_lifetime(st, sv)) {
+ if (!(ret = rust_appendc(st, ' ')))
+ break;
+ } else if (HAS_ERROR(st)) {
+ break;
+ }
+
+ ret = rust_append(st, (c == 'Q') ? "mut " : "") &&
+ rustv0_parse_type(st, sv, B_FALSE);
+ break;
+ case 'P':
+ ret = rust_append(st, "*const ") &&
+ rustv0_parse_type(st, sv, B_FALSE);
+ break;
+ case 'O':
+ ret = rust_append(st, "*mut ") &&
+ rustv0_parse_type(st, sv, B_FALSE);
+ break;
+ case 'F':
+ ret = rustv0_parse_fnsig(st, sv);
+ break;
+ case 'D':
+ ret = rust_append(st, "dyn ") &&
+ rustv0_parse_dynbounds(st, sv);
+ if (!ret)
+ break;
+
+ /*
+ * Rust RFC2603 shows the lifetime as required, however
+ * it appears this is optional.
+ */
+ DEMDEBUG("%s: pre-lifetime: '%*s'", __func__, SV_PRINT(sv));
+
+ /*
+ * We only want to print a non-zero (non "'_")
+ * lifetime.
+ */
+ if (sv_consume_if(sv, "L_"))
+ break;
+
+ /*
+ * But if there is a lifetime we want to print,
+ * we want to prepend " + " before it.
+ */
+ if (sv_peek(sv, 0) == 'L' &&
+ !(ret = rust_append(st, " + ")))
+ break;
+
+ ret = rustv0_parse_lifetime(st, sv);
+ break;
+ default:
+ sv_init_sv(sv, &save);
+
+ ret = rustv0_parse_backref(st, sv, rustv0_parse_type,
+ B_FALSE) ||
+ rustv0_parse_basic_type(st, sv);
+ if (ret)
+ break;
+
+ ret = rustv0_parse_path(st, sv, B_FALSE);
+ break;
+ }
+
+ DEMDEBUG("%s: type='%.*s' (%s)", __func__, CSTR_END(st, len),
+ ret ? "success" : "fail");
+
+ return (ret);
+}
+
+/*
+ * <path> = "C" <identifier> crate root
+ * | "M" <impl-path> <type> <T>
+ * | "X" <impl-path> <type> <path> <T as Trait> (trait impl)
+ * | "Y" <type> <path> <T as Trait> (trait definition)
+ * | "N" <ns> <path> <identifier> ...::ident (nested path)
+ * | "I" <path> {<generic-arg>} "E" ...<T, U>
+ * | <backref>
+ */
+static boolean_t
+rustv0_parse_path(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t in_value)
+{
+ strview_t save;
+ uint64_t disamb = 0;
+ size_t len;
+ boolean_t ret = B_FALSE;
+ boolean_t save_skip;
+ boolean_t args_stay_save = st->rs_args_stay_open;
+ boolean_t args_open_save = st->rs_args_is_open;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ SAVE_LEN(st, len);
+ sv_init_sv(&save, sv);
+
+ switch (sv_consume_c(sv)) {
+ case 'C':
+ if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disamb)))
+ goto done;
+
+ if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE))
+ goto done;
+
+ if (st->rs_verbose &&
+ !rust_append_printf(st, "[%" PRIx64 "]", disamb))
+ goto done;
+ break;
+ case 'M':
+ SKIP_BEGIN(st, save_skip);
+ if (!rustv0_parse_impl_path(st, sv, in_value)) {
+ SKIP_END(st, save_skip);
+ goto done;
+ }
+ SKIP_END(st, save_skip);
+
+ if (!rust_appendc(st, '<') ||
+ !rustv0_parse_type(st, sv, B_FALSE) ||
+ !rust_appendc(st, '>'))
+ goto done;
+ break;
+ case 'X':
+ SKIP_BEGIN(st, save_skip);
+ if (!rustv0_parse_impl_path(st, sv, in_value)) {
+ SKIP_END(st, save_skip);
+ goto done;
+ }
+ SKIP_END(st, save_skip);
+ /*FALLTHRU*/
+ case 'Y':
+ if (!rust_appendc(st, '<') ||
+ !rustv0_parse_type(st, sv, B_FALSE) ||
+ !rust_append(st, " as ") ||
+ !rustv0_parse_path(st, sv, B_FALSE) ||
+ !rust_appendc(st, '>'))
+ goto done;
+ break;
+ case 'N':
+ if (!rustv0_parse_nested_path(st, sv, in_value))
+ goto done;
+ break;
+ case 'I':
+ st->rs_args_stay_open = B_FALSE;
+ st->rs_args_is_open = B_FALSE;
+
+ if (!rustv0_parse_path(st, sv, in_value))
+ goto done;
+
+ if (in_value && !rust_append(st, "::"))
+ goto done;
+
+ if (!rust_appendc(st, '<') ||
+ !rustv0_parse_opt_list(st, sv, rustv0_parse_generic_arg,
+ ", ", B_FALSE, NULL))
+ goto done;
+
+ st->rs_args_stay_open = args_stay_save;
+ st->rs_args_is_open = args_open_save;
+
+ /*
+ * If we were asked to not close our list, then don't and
+ * indicate that the list is open.
+ */
+ if (st->rs_args_stay_open) {
+ st->rs_args_stay_open = B_FALSE;
+ st->rs_args_is_open = B_TRUE;
+ } else if (!rust_appendc(st, '>')) {
+ goto done;
+ }
+ break;
+ default:
+ /*
+ * Didn't recognize the letter, so it has to be a path. Restore
+ * sv to state prior to switch and continue.
+ */
+ sv_init_sv(sv, &save);
+ if (!rustv0_parse_backref(st, sv, rustv0_parse_path, in_value))
+ goto done;
+ }
+
+ ret = B_TRUE;
+
+done:
+ DEMDEBUG("%s: path='%.*s' (%s)", __func__, CSTR_END(st, len),
+ ret ? "success" : "fail");
+
+ return (ret);
+}
+
+static boolean_t
+rustv0_parse_impl_path(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t in_value)
+{
+ uint64_t val = 0;
+
+ return (OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &val)) &&
+ rustv0_parse_path(st, sv, in_value));
+}
+
+/*
+ * A bit of a hack -- when printing a nested path, we need to know
+ * if the identifier is there or not in order to correctly format
+ * the output preceeding it (when present). This peeks ahead and
+ * determines this.
+ */
+static boolean_t
+rustv0_has_name(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t *has_namep)
+{
+ strview_t save;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ sv_init_sv(&save, sv);
+
+ /* For checking the length, we don't care if it's punycode or not */
+ (void) sv_consume_if_c(&save, 'u');
+
+ if (sv_remaining(sv) == 0) {
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ if (sv_consume_if_c(&save, '0')) {
+ *has_namep = B_FALSE;
+ return (B_TRUE);
+ }
+
+ *has_namep = B_TRUE;
+ return (B_TRUE);
+}
+
+static boolean_t
+rustv0_parse_nested_path(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t in_value)
+{
+ uint64_t disambiguator = 0;
+ size_t len = 0;
+ char ns;
+ boolean_t ret = B_FALSE;
+ boolean_t has_name;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ SAVE_LEN(st, len);
+
+ ns = sv_consume_c(sv);
+
+ if (!rustv0_parse_path(st, sv, in_value))
+ goto done;
+
+ if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disambiguator)))
+ goto done;
+
+ if (!rustv0_has_name(st, sv, &has_name))
+ goto done;
+
+ if (ISUPPER(ns)) {
+ if (!rust_append(st, "::{"))
+ goto done;
+
+ switch (ns) {
+ case 'C':
+ if (!rust_append(st, "closure"))
+ goto done;
+ break;
+ case 'S':
+ if (!rust_append(st, "shim"))
+ goto done;
+ break;
+ default:
+ if (!rust_appendc(st, ns))
+ goto done;
+ break;
+ }
+
+ if (has_name && !rust_appendc(st, ':'))
+ goto done;
+
+ if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE))
+ goto done;
+
+ ret = rust_append_printf(st, "#%" PRIu64 "}", disambiguator);
+ } else {
+ if (has_name) {
+ if (!(ret = rust_append(st, "::")))
+ goto done;
+ }
+ ret = rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE);
+ }
+
+done:
+ DEMDEBUG("%s: nested path = '%.*s' (%s)", __func__, CSTR_END(st, len),
+ ret ? "success" : "fail");
+
+ return (ret);
+}
+
+/*
+ * <disambiguator> = "s" <base-64-number>
+ *
+ */
+static boolean_t
+rustv0_parse_disambiguator(rust_state_t *restrict st, strview_t *restrict sv,
+ uint64_t *valp)
+{
+ if (HAS_ERROR(st) || sv_remaining(sv) < 2)
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ *valp = 0;
+
+ if (!sv_consume_if_c(sv, 's'))
+ return (B_FALSE);
+
+ if (!rustv0_parse_base62(st, sv, valp)) {
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ /*
+ * Rust RFC 2603 details this in Appendix A, but not the main
+ * portion of the RFC. If no disambiguator is present, the value
+ * is 0, if the decoded value is 0, the index is 1, ...
+ * rustv0_parse_base62() already adjusts _ -> 0, 0 -> 1, so we
+ * only need to add one here to complete the adjustment.
+ */
+ *valp = *valp + 1;
+
+ DEMDEBUG("%s: disambiguator=%" PRIu64, __func__, *valp);
+ return (B_TRUE);
+}
+
+/* <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> */
+static boolean_t
+rustv0_parse_undisambiguated_identifier(rust_state_t *restrict st,
+ strview_t *restrict sv, boolean_t repl_underscore)
+{
+ uint64_t len = 0;
+ boolean_t puny = B_FALSE;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ if (sv_consume_if_c(sv, 'u'))
+ puny = B_TRUE;
+
+ if (!rust_parse_base10(st, sv, &len))
+ return (B_FALSE);
+
+ /* skip optional separator '_' */
+ (void) sv_consume_if_c(sv, '_');
+
+ if (sv_remaining(sv) < len) {
+ DEMDEBUG("%s: ERROR: identifier length (%" PRIu64 ") "
+ "> remaining bytes (%zu)", __func__, len,
+ sv_remaining(sv));
+ return (B_FALSE);
+ }
+
+ /* 0 length identifiers are acceptable */
+ if (len == 0)
+ return (B_TRUE);
+
+ if (puny) {
+ strview_t ident;
+
+ sv_init_sv_range(&ident, sv, len);
+ if (!rustv0_puny_decode(st, &ident, repl_underscore))
+ return (B_FALSE);
+
+ sv_consume_n(sv, len);
+ return (B_TRUE);
+ }
+
+ /*
+ * rust identifiers do not contain '-'. However ABI identifiers
+ * are allowed to contain them (e.g. extern "foo-bar" fn ...).
+ * They are substituted with '_' in the mangled output. If we
+ * do not need to reverse this, we can just append 'len' bytes
+ * of sv. Otherwise we need to go through and reverse this
+ * substitution.
+ */
+ if (!repl_underscore)
+ return (rust_append_sv(st, len, sv));
+
+ /*
+ * We checked earlier that len < sv_remaining(sv); so this loop
+ * cannot overrun.
+ */
+ for (size_t i = 0; i < len; i++) {
+ char c = sv_consume_c(sv);
+
+ if (c == '_')
+ c = '-';
+
+ if (!rust_appendc(st, c))
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+/* <backref> = "B" <base-62-number> */
+static boolean_t
+rustv0_parse_backref(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t b),
+ boolean_t bval)
+{
+ strview_t backref;
+ strview_t target;
+ uint64_t idx = 0;
+ size_t save_len;
+ size_t len;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ sv_init_sv(&backref, sv);
+
+ if (!sv_consume_if_c(sv, 'B'))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='B%.*s'", __func__, SV_PRINT(sv));
+
+ if (!rustv0_parse_base62(st, sv, &idx)) {
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ /*
+ * Determine how many bytes we've consumed (up to the start of
+ * the current backref token).
+ */
+ VERIFY3P(backref.sv_first, >=, st->rs_orig.sv_first);
+ len = (size_t)(uintptr_t)(backref.sv_first - st->rs_orig.sv_first);
+
+ /*
+ * The backref can only refer to an index prior to the start of
+ * the current backref token -- that is must always refer back in
+ * the string, never to the current position or beyond.
+ */
+ if (idx >= len) {
+ DEMDEBUG("%s: ERROR: backref index (%" PRIu64 ") "
+ "is out of range [0, %zu)", __func__, idx, len);
+ st->rs_error = ERANGE;
+ return (B_FALSE);
+ }
+
+ /*
+ * Create a strview_t of the original string (sans prefix) by
+ * copying from st->rs_orig. The length of the target strview_t is
+ * capped to end immediately prior to this backref token. Since we
+ * enforce that backrefs must always refer to already processed
+ * portions of the string (i.e. must always refer backwards), and the
+ * length of the strview_t is set to end prior to the start of this
+ * backref token, we guarantee processing of a backref will always
+ * terminate before it can possibly encounter this backref token
+ * and cause a loop -- either the processing terminates normally or
+ * it reaches the end of the capped strview_t.
+ */
+ sv_init_sv_range(&target, &st->rs_orig, len);
+
+ /*
+ * Consume all the input in the target strview_t up to the index
+ */
+ sv_consume_n(&target, idx);
+
+ DEMDEBUG("%s: backref starting at %" PRIu64 " str='%.*s'%s", __func__,
+ idx, SV_PRINT(&target), st->rs_skip ? " (skipping)" : "");
+
+ /*
+ * If we're skipping the output, there's no reason to bother reparsing
+ * the output -- we're not going to save it. We still setup everything
+ * so that the debug output is still emitted.
+ */
+ if (st->rs_skip)
+ return (B_TRUE);
+
+ SAVE_LEN(st, save_len);
+ if (!fn(st, &target, bval))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: backref is '%.*s'", __func__, CSTR_END(st, save_len));
+ return (B_TRUE);
+}
+
+static boolean_t
+rustv0_append_lifetime(rust_state_t *restrict st, uint64_t lifetime)
+{
+ uint64_t bound_lt;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ if (!rust_appendc(st, '\''))
+ return (B_FALSE);
+
+ if (lifetime == 0)
+ return (rust_appendc(st, '_'));
+
+ if (sub_overflow(st->rs_lt_depth, lifetime, &bound_lt)) {
+ DEMDEBUG("%s: ERROR: lifetime value %" PRIu64
+ " > current depth %" PRIu64, __func__, lifetime,
+ st->rs_lt_depth);
+ st->rs_lt_depth = ERANGE;
+ return (B_FALSE);
+ }
+
+ /*
+ * Use 'a, 'b, ...
+ */
+ if (bound_lt < 26) {
+ char c = (char)bound_lt + 'a';
+ return (rust_append_printf(st, "%c", c));
+ }
+
+ /*
+ * Otherwise, use '_123, '_456, ...
+ */
+ return (rust_append_printf(st, "_%" PRIu64, bound_lt));
+}
+
+static boolean_t
+rustv0_parse_lifetime(rust_state_t *restrict st, strview_t *restrict sv)
+{
+ uint64_t lifetime;
+
+ if (!sv_consume_if_c(sv, 'L'))
+ return (B_FALSE);
+
+ if (!rustv0_parse_base62(st, sv, &lifetime))
+ return (B_FALSE);
+
+ return (rustv0_append_lifetime(st, lifetime));
+}
+
+static boolean_t
+rustv0_parse_const_data(rust_state_t *restrict st,
+ const_type_class_t type_class, strview_t *restrict sv)
+{
+ uint64_t val = 0;
+ size_t save_len;
+ boolean_t neg = B_FALSE;
+ boolean_t ret = B_FALSE;
+
+ VERIFY3S(type_class, !=, CTC_INVALID);
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+ SAVE_LEN(st, save_len);
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ if (type_class == CTC_SIGNED && sv_consume_if_c(sv, 'n'))
+ neg = B_TRUE;
+
+ ret = OPTIONAL(st, rustv0_parse_hex_num(st, sv, &val)) &&
+ sv_consume_if_c(sv, '_');
+ if (!ret)
+ goto done;
+
+ switch (type_class) {
+ case CTC_SIGNED:
+ case CTC_UNSIGNED:
+ ret = rust_append_printf(st, "%s%" PRIu64, neg ? "-" : "", val);
+ break;
+ case CTC_BOOL:
+ if (val > 1) {
+ DEMDEBUG("%s: invalid bool val %" PRIu64, __func__,
+ val);
+ ret = B_FALSE;
+ break;
+ }
+ ret = rust_append_printf(st, "%s",
+ (val == 0) ? "false" : "true");
+ break;
+ case CTC_CHAR:
+ if (val > UINT32_MAX) {
+ DEMDEBUG("%s: char value %" PRIu64 " out of range",
+ __func__, val);
+ ret = B_FALSE;
+ break;
+ }
+
+ ret = rust_appendc(st, '\'') && rust_append_utf8_c(st, val) &&
+ rust_appendc(st, '\'');
+ break;
+ default:
+ ret = B_FALSE;
+ }
+
+done:
+ DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, save_len),
+ ret ? "success" : "fail");
+
+ return (ret);
+}
+
+static boolean_t
+rustv0_parse_const(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t dummy __unused)
+{
+ strview_t type;
+ size_t start_len;
+ const_type_class_t ctype_class;
+ char ctype;
+ boolean_t save_skip;
+ boolean_t ret;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+ SAVE_LEN(st, start_len);
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ if (rustv0_parse_backref(st, sv, rustv0_parse_const, B_FALSE))
+ return (B_TRUE);
+
+ if (sv_consume_if_c(sv, 'p')) {
+ ret = rust_appendc(st, '_');
+ goto done;
+ }
+
+ ctype = sv_peek(sv, 0);
+ ctype_class = rustv0_classify_const_type(ctype);
+ if (ctype_class == CTC_INVALID) {
+ DEMDEBUG("%s: const type isn't a valid const generic type",
+ __func__);
+ return (B_FALSE);
+ }
+
+ /*
+ * This isn't spelled out clearly in Rust RFC 2603, but currently
+ * only unsigned int types are allowed at this point. However, we
+ * have a bit of a potential tricky situation. Unlike formatting
+ * the other tokens, if we want to display the type, we do so
+ * _after_ the value, even though the type appears first.
+ *
+ * This is bit of a hack, but we save off the input position from
+ * sv before the parse the type. We then parse it without saving
+ * the resulting value, then parse and output the constant. If
+ * we wish to then display the type, we can go back and parse
+ * the type again, this time saving the result.
+ */
+ sv_init_sv(&type, sv);
+
+ SKIP_BEGIN(st, save_skip);
+ ret = rustv0_parse_type(st, sv, B_FALSE);
+ SKIP_END(st, save_skip);
+
+ if (!ret) {
+ DEMDEBUG("%s: const type isn't valid", __func__);
+ return (B_FALSE);
+ }
+
+ if (sv_consume_if_c(sv, 'p')) {
+ ret = rust_appendc(st, '_');
+ } else {
+ ret = rustv0_parse_const_data(st, ctype_class, sv);
+ }
+ if (!ret)
+ goto done;
+
+ if (st->rs_show_const_type) {
+ ret = rust_append(st, ": ") &&
+ rustv0_parse_uint_type(st, &type);
+ }
+
+done:
+ DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, start_len),
+ ret ? "success" : "fail");
+ return (ret);
+}
+
+static boolean_t
+rustv0_parse_abi(rust_state_t *restrict st, strview_t *restrict sv)
+{
+ DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_consume_if_c(sv, 'C'))
+ return (rust_appendc(st, 'C'));
+
+ return (rustv0_parse_undisambiguated_identifier(st, sv, B_TRUE));
+}
+
+static boolean_t
+rustv0_parse_binder(rust_state_t *restrict st, strview_t *restrict sv)
+{
+ uint64_t n, i;
+
+ if (!sv_consume_if_c(sv, 'G'))
+ return (B_FALSE);
+
+ if (!rustv0_parse_base62(st, sv, &n))
+ return (B_FALSE);
+ n += 1;
+
+ if (!rust_append(st, "for<"))
+ return (B_FALSE);
+
+ for (i = 0; i < n; i++) {
+ if (i > 0 && !rust_append(st, ", "))
+ return (B_FALSE);
+
+ st->rs_lt_depth++;
+ if (!rustv0_append_lifetime(st, 1))
+ return (B_FALSE);
+ }
+
+ if (!rust_append(st, "> "))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+/*
+ * <fn-sig> := [<binder>] ["U"] ["K" <abi>] {type} "E" <type>
+ *
+ * Note that while the Rust RFC states the binder is manditory, based on
+ * actual examples, and comparing with the rust-based demangler, it is in
+ * fact optional.
+ */
+static boolean_t
+rustv0_parse_fnsig(rust_state_t *restrict st, strview_t *restrict sv)
+{
+ uint64_t save_lt = st->rs_lt_depth;
+
+ DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
+
+ if (!OPTIONAL(st, rustv0_parse_binder(st, sv)))
+ return (B_FALSE);
+
+ if (sv_consume_if_c(sv, 'U') && !rust_append(st, "unsafe "))
+ return (B_FALSE);
+
+ if (sv_consume_if_c(sv, 'K') &&
+ (!rust_append(st, "extern \"") || !rustv0_parse_abi(st, sv) ||
+ !rust_append(st, "\" ")))
+ return (B_FALSE);
+
+ if (!rust_append(st, "fn("))
+ return (B_FALSE);
+
+ if (!rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ", B_FALSE,
+ NULL)) {
+ return (B_FALSE);
+ }
+
+ if (!rust_appendc(st, ')'))
+ return (B_FALSE);
+
+ /* If the return type is (), don't print it */
+ if (!sv_consume_if_c(sv, 'u')) {
+ if (!rust_append(st, " -> "))
+ return (B_FALSE);
+
+ if (!rustv0_parse_type(st, sv, B_FALSE))
+ return (B_FALSE);
+ }
+
+ st->rs_lt_depth = save_lt;
+
+ return (B_TRUE);
+}
+
+/*
+ * <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
+ */
+static boolean_t
+rustv0_parse_dyn_trait_assoc_binding(rust_state_t *restrict st,
+ strview_t *restrict sv, boolean_t open)
+{
+ size_t save_len;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ if (!sv_consume_if_c(sv, 'p'))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+ SAVE_LEN(st, save_len);
+
+ if (!rust_append(st, open ? ", " : "<"))
+ return (B_FALSE);
+
+ if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) {
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ if (!rust_append(st, " = "))
+ return (B_FALSE);
+
+ if (!rustv0_parse_type(st, sv, B_FALSE)) {
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ DEMDEBUG("%s: binding='%.*s'", __func__, CSTR_END(st, save_len));
+
+ return (B_TRUE);
+}
+
+static boolean_t
+rustv0_parse_dyn_trait(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t dummy __unused)
+{
+ boolean_t stay_save = st->rs_args_stay_open;
+ boolean_t open_save = st->rs_args_is_open;
+ boolean_t open = B_FALSE;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ /*
+ * This is a bit subtle, but when formatting a trait in trait,
+ * we want something like this:
+ *
+ * dyn Trait<T, U, Assoc=X>
+ *
+ * instead of
+ *
+ * dyn Trait<T, U, <Assoc=X>>
+ *
+ * So when parsing the path, if we encounter generic arguments, we want
+ * the arg list to remain open at the end of processing the path so
+ * we can append the bindings to it. We set rs_args_stay_open to B_TRUE
+ * to indidcate to rustv0_parse_path() that a generic argument list
+ * should not be closed (i.e. don't append a '>' at the end of the
+ * list). If rustv0_parse_path() encounters a list of generic arguments,
+ * it will also set rs->args_is_open to indiciate it opened the list.
+ * We save this in 'open' so that when we process the associated
+ * bindings, we know if we need to open the list on the first binding
+ * or not -- we don't want 'dyn Trait<>' if there are no bindings,
+ * just 'dyn Trait'.
+ */
+ st->rs_args_stay_open = B_TRUE;
+ st->rs_args_is_open = B_FALSE;
+
+ if (!rustv0_parse_path(st, sv, B_FALSE)) {
+ st->rs_args_stay_open = stay_save;
+ st->rs_args_is_open = open_save;
+ return (B_FALSE);
+ }
+
+ open = st->rs_args_is_open;
+
+ st->rs_args_stay_open = stay_save;
+ st->rs_args_is_open = open_save;
+
+ while (rustv0_parse_dyn_trait_assoc_binding(st, sv, open)) {
+ open = B_TRUE;
+ }
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ if (open && !rust_appendc(st, '>'))
+ return (B_FALSE);
+
+ return (!HAS_ERROR(st));
+}
+
+static boolean_t
+rustv0_parse_dynbounds(rust_state_t *restrict st, strview_t *restrict sv)
+{
+ uint64_t save_lt = st->rs_lt_depth;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ /*
+ * This is another case where Rust RFC2603 seems to disagree with
+ * the implementation. The RFC implies this is mandatory, while
+ * the implementations treat it as optional.
+ */
+ if (!OPTIONAL(st, rustv0_parse_binder(st, sv)))
+ return (B_FALSE);
+
+ if (!rustv0_parse_opt_list(st, sv, rustv0_parse_dyn_trait, " + ",
+ B_FALSE, NULL))
+ return (B_FALSE);
+
+ st->rs_lt_depth = save_lt;
+
+ return (B_TRUE);
+}
+
+static boolean_t
+rustv0_parse_generic_arg(rust_state_t *restrict st, strview_t *restrict sv,
+ boolean_t dummy __unused)
+{
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_consume_if_c(sv, 'K'))
+ return (rustv0_parse_const(st, sv, B_FALSE));
+
+ if (rustv0_parse_lifetime(st, sv))
+ return (B_TRUE);
+
+ return (rustv0_parse_type(st, sv, B_FALSE));
+}
+
+/*
+ * Parse a hex value into *valp. Note that rust only uses lower case
+ * hex values.
+ */
+static boolean_t
+rustv0_parse_hex_num(rust_state_t *restrict st, strview_t *restrict sv,
+ uint64_t *restrict valp)
+{
+ uint64_t val = 0;
+ size_t ndigits = 0;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ /*
+ * Unfortunately, Rust RFC 2603 also doesn't not explicty define
+ * {hex-digits}. We follow what decimal digits does, and treat a
+ * leading 0 as a terminator.
+ */
+ while (sv_remaining(sv) > 0) {
+ char c = sv_peek(sv, 0);
+
+ if (ISDIGIT(c)) {
+ val *= 16;
+ val += c - '0';
+ } else if (c >= 'a' && c <= 'f') {
+ val *= 16;
+ val += c - 'a' + 10;
+ } else {
+ break;
+ }
+
+ sv_consume_n(sv, 1);
+
+ if (++ndigits == 1 && val == 0)
+ break;
+ }
+
+ if (ndigits > 0)
+ *valp = val;
+
+ return ((ndigits > 0) ? B_TRUE : B_FALSE);
+}
+
+/*
+ * Parse a base62 number into *valp. The number is explicitly terminated
+ * by a '_'. The values are also offset by 0 -- that is '_' == 0,
+ * '0_' == 1, ...
+ */
+static boolean_t
+rustv0_parse_base62(rust_state_t *restrict st, strview_t *restrict sv,
+ uint64_t *restrict valp)
+{
+ uint64_t val = 0;
+ char c;
+
+ if (HAS_ERROR(st))
+ return (B_FALSE);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
+
+ if (sv_remaining(sv) == 0)
+ return (B_FALSE);
+
+ /* A terminating '_' without any digits is 0 */
+ if (sv_consume_if_c(sv, '_')) {
+ *valp = 0;
+ return (B_TRUE);
+ }
+
+ /* Need at least one valid digit if > 0 */
+ if (!ISALNUM(sv_peek(sv, 0)))
+ return (B_FALSE);
+
+ while (sv_remaining(sv) > 0) {
+ c = sv_consume_c(sv);
+
+ if (c == '_') {
+ /*
+ * Because a lone '_' was already handled earlier,
+ * we know we've had at least one other digit and
+ * can increment the value and return.
+ */
+ *valp = val + 1;
+ return (B_TRUE);
+ } else if (ISDIGIT(c)) {
+ val *= 62;
+ val += c - '0';
+ } else if (ISLOWER(c)) {
+ val *= 62;
+ val += c - 'a' + 10;
+ } else if (ISUPPER(c)) {
+ val *= 62;
+ val += c - 'A' + 36;
+ } else {
+ return (B_FALSE);
+ }
+ }
+
+ /* We reached the end of the string without a terminating _ */
+ return (B_FALSE);
+}
+
+static const_type_class_t
+rustv0_classify_const_type(char type)
+{
+ switch (type) {
+ case 'h': case 't': case 'm': case 'y': case 'o': case 'j':
+ return (CTC_UNSIGNED);
+ case 'a': case 'i': case 'l': case 'n': case 's': case 'x':
+ return (CTC_SIGNED);
+ case 'b':
+ return (CTC_BOOL);
+ case 'c':
+ return (CTC_CHAR);
+ default:
+ return (CTC_INVALID);
+ }
+}
+
+/*
+ * Make sure the name is a plausible mangled rust symbol.
+ * Non-ASCII are never allowed. Rust itself uses [_0-9A-Za-z], however
+ * some things will add a suffix starting with a '.' (e.g. LLVM thin LTO).
+ * As such we proceed in two phases. We first only allow [_0-9A-Z-az] until
+ * we encounter a '.'. At that point, any ASCII character is allowed.
+ */
+static boolean_t
+rustv0_valid_sym(const strview_t *sv)
+{
+ size_t i;
+ boolean_t check_rust = B_TRUE;
+
+ for (i = 0; i < sv->sv_rem; i++) {
+ char c = sv->sv_first[i];
+
+ if (ISALNUM(c) || c == '_')
+ continue;
+
+ if (c == '.') {
+ check_rust = B_FALSE;
+ continue;
+ }
+
+ if (check_rust || (c & 0x80) != 0) {
+ DEMDEBUG("%s: ERROR found invalid character '%c' "
+ "in '%.*s' at index %zu",
+ __func__, c, SV_PRINT(sv), i);
+ return (B_FALSE);
+ }
+ }
+ return (B_TRUE);
+}
diff --git a/usr/src/lib/libdemangle/common/rust-v0puny.c b/usr/src/lib/libdemangle/common/rust-v0puny.c
new file mode 100644
index 0000000000..9659902ac1
--- /dev/null
+++ b/usr/src/lib/libdemangle/common/rust-v0puny.c
@@ -0,0 +1,264 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ * Copyright 2021 Jason King
+ */
+
+#include <inttypes.h>
+#include <libcustr.h>
+#include <limits.h>
+#include <string.h>
+#include <sys/byteorder.h>
+#include "rust.h"
+#include "strview.h"
+
+/*
+ * The rust v0 encoding (rust RFC 2603) uses a slightly modified
+ * version of punycode to encode characters that are not ASCII.
+ * The big difference is that '_' is used to separate the ASCII codepoints
+ * from the non-ASCII code points instead of '-'.
+ *
+ * The decoding is taken almost directly from (IETF) RFC 3492
+ */
+
+#define BASE 36
+#define TMIN 1
+#define TMAX 26
+#define SKEW 38
+#define DAMP 700
+#define INITIAL_BIAS 72
+#define INITIAL_N 0x80
+#define DELIMITER '_'
+
+static inline uint32_t char_val(char);
+
+static size_t
+rustv0_puny_adapt(size_t delta, size_t npoints, boolean_t first)
+{
+ size_t k = 0;
+
+ delta = first ? delta / DAMP : delta / 2;
+ delta += delta / npoints;
+ while (delta > ((BASE - TMIN) * TMAX) / 2) {
+ delta /= (BASE - TMIN);
+ k += BASE;
+ }
+
+ return (k + (((BASE - TMIN + 1) * delta) / (delta + SKEW)));
+}
+
+boolean_t
+rustv0_puny_decode(rust_state_t *restrict st, strview_t *restrict src,
+ boolean_t repl_underscore)
+{
+ uint32_t *buf;
+ size_t bufalloc; /* in units of uint32_t */
+ size_t buflen;
+ size_t nbasic;
+ size_t i, old_i, k, w;
+ size_t n = INITIAL_N;
+ size_t bias = INITIAL_BIAS;
+ size_t delim_idx = 0;
+ boolean_t ret = B_FALSE;
+ char c;
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(src));
+
+ /*
+ * The decoded string should never contain more codepoints than
+ * the original string, so creating a temporary buffer large
+ * enought to hold sv_remaining(src) uint32_t's should be
+ * large enough.
+ *
+ * This also serves as a size check -- xcalloc will fail if the
+ * resulting size of the buf (sizeof (uint32_t) * bufalloc) >=
+ * SIZE_MAX. If xcalloc succeeds, we therefore know that that
+ * buflen cannot overflow.
+ */
+ buflen = 0;
+ bufalloc = sv_remaining(src) + 1;
+ buf = xcalloc(st->rs_ops, bufalloc, sizeof (uint32_t));
+ if (buf == NULL) {
+ SET_ERROR(st);
+ return (B_FALSE);
+ }
+
+ /*
+ * Find the position of the last delimiter (if any).
+ * IETF RFC 3492 3.1 states that the delimiter is present if and only
+ * if there are a non-zero number of basic (ASCII) code points. Since
+ * the delimiter itself is a basic code point, the last one present
+ * in the original string is the actual delimiter between the basic
+ * and non-basic code points. Earlier occurences of the delimiter
+ * are treated as normal basic code points. For plain punycode, an
+ * all ASCII string encoded with punycode would terminate with a
+ * final delimiter, and a name with all non-basic code points would
+ * not have a delimiter at all. With the rust v0 encoding, punycode
+ * encoded identifiers have a 'u' prefix prior to the identifier
+ * length (['u'] <decimal-number> <bytes>), so we should never
+ * encounter an all ASCII name that's encoded with punycode (we error
+ * on this). For an all non-basic codepoint identifier, no delimiter
+ * will be present, and we treat that the same as the delimiter being
+ * in the first position of the string, and consume it (if present)
+ * when we transition from copying the basic code points (which there
+ * will be none in this situation) to non-basic code points.
+ */
+ for (i = 0; i < src->sv_rem; i++) {
+ if (src->sv_first[i] == DELIMITER) {
+ delim_idx = i;
+ }
+ }
+ VERIFY3U(delim_idx, <, bufalloc);
+
+ if (delim_idx + 1 == sv_remaining(src)) {
+ DEMDEBUG("%s: encountered an all-ASCII name encoded with "
+ "punycode", __func__);
+ goto done;
+ }
+
+ /* Copy all the basic characters up to the delimiter into buf */
+ for (nbasic = 0; nbasic < delim_idx; nbasic++) {
+ c = sv_consume_c(src);
+
+ /* The rust prefix check should guarantee this */
+ VERIFY3U(c, <, 0x80);
+
+ /*
+ * Normal rust identifiers do not contain '-' in them.
+ * However ABI identifiers could contain a dash. Those
+ * are translated to _, and we need to replace accordingly
+ * when asked.
+ */
+ if (repl_underscore && c == '_')
+ c = '-';
+
+ buf[nbasic] = c;
+ buflen++;
+ }
+ DEMDEBUG("%s: %" PRIu32 " ASCII codepoints copied", __func__, nbasic);
+
+ /*
+ * Consume delimiter between basic and non-basic code points if present.
+ * See above for explanation why it may not be present.
+ */
+ (void) sv_consume_if_c(src, DELIMITER);
+
+ DEMDEBUG("%s: non-ASCII codepoints to decode: %.*s", __func__,
+ SV_PRINT(src));
+
+ for (i = 0; sv_remaining(src) > 0; i++) {
+ VERIFY3U(i, <=, buflen);
+
+ /*
+ * Guarantee we have enough space to insert another codepoint.
+ * Our buffer sizing above should prevent this from ever
+ * tripping, but check this out of paranoia.
+ */
+ VERIFY3U(buflen, <, bufalloc - 1);
+
+ /* decode the next codepoint */
+ for (old_i = i, k = BASE, w = 1; ; k += BASE) {
+ size_t t;
+ uint32_t digit;
+
+ if (sv_remaining(src) == 0)
+ goto done;
+
+ digit = char_val(sv_consume_c(src));
+ if (digit >= BASE)
+ goto done;
+
+ i = i + digit * w;
+
+ if (k <= bias)
+ t = TMIN;
+ else if (k >= bias + TMAX)
+ t = TMAX;
+ else
+ t = k - bias;
+
+ if (digit < t)
+ break;
+
+ w = w * (BASE - t);
+ }
+ buflen++;
+
+ bias = rustv0_puny_adapt(i - old_i, buflen,
+ (old_i == 0) ? B_TRUE : B_FALSE);
+ n = n + i / buflen;
+ i = i % buflen;
+
+ DEMDEBUG("%s: insert \\u%04" PRIx32 " at index %zu (len = %zu)",
+ __func__, n, i, buflen);
+
+ /*
+ * At the start of this while loop, we guaranteed
+ * buflen < bufalloc - 1. Therefore we know there is room
+ * to move over the contents of buf at i to make room
+ * for the codepoint. We also just guaranteed that i
+ * is in the range [0, buflen), so this should always be
+ * safe.
+ */
+ (void) memmove(buf + i + 1, buf + i,
+ (buflen - i) * sizeof (uint32_t));
+
+#if _LP64
+ /*
+ * This is always false for ILP32 and smatch will also complain,
+ * so we just omit it for ILP32.
+ */
+ if (n > UINT32_MAX) {
+ DEMDEBUG("%s: ERROR: utf8 value is out of range",
+ __func__);
+ goto done;
+ }
+#endif
+
+ buf[i] = (uint32_t)n;
+ }
+
+ DEMDEBUG("%s: inserted %zu non-basic code points", __func__,
+ buflen - nbasic);
+
+ for (i = 0; i < buflen; i++) {
+ if (!rust_append_utf8_c(st, buf[i]))
+ goto done;
+ }
+ ret = B_TRUE;
+
+done:
+ xfree(st->rs_ops, buf, bufalloc * sizeof (uint32_t));
+ return (ret);
+}
+
+/*
+ * Convert [0-9][a-z] to a value [0..35]. Rust's punycode encoding always
+ * uses lowercase, so we treat uppercase (and any other characters) as
+ * invalid, and return BASE (36) to indicate a bad value.
+ */
+static inline uint32_t
+char_val(char c)
+{
+ uint32_t v = c;
+
+ if (ISLOWER(c)) {
+ return (c - 'a');
+ } else if (ISDIGIT(c)) {
+ return (c - '0' + 26);
+ } else {
+ DEMDEBUG("%s: ERROR: invalid character 0x%02x encountered",
+ __func__, v);
+ return (BASE);
+ }
+}
diff --git a/usr/src/lib/libdemangle/common/rust.c b/usr/src/lib/libdemangle/common/rust.c
index 9b145ca841..ce1fca4859 100644
--- a/usr/src/lib/libdemangle/common/rust.c
+++ b/usr/src/lib/libdemangle/common/rust.c
@@ -10,564 +10,417 @@
*/
/*
- * Copyright 2019, Joyent, Inc.
* Copyright 2021 Jason King
+ * Copyright 2019 Joyent, Inc.
*/
#include <errno.h>
+#include <langinfo.h>
#include <libcustr.h>
#include <limits.h>
+#include <stdarg.h>
#include <string.h>
-#include <sys/ctype.h> /* We want the C locale ISXXX() versions */
-#include <sys/debug.h>
-#include <stdio.h>
-#include <sys/sysmacros.h>
-#include "strview.h"
#include "demangle_int.h"
+#include "rust.h"
-/*
- * Unfortunately, there is currently no official specification for the rust
- * name mangling. This is an attempt to document the understanding of the
- * mangling used here. It is based off examination of
- * https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/
- *
- * A mangled rust name is:
- * <prefix> <name>
- *
- * <prefix> ::= _Z
- * __Z
- *
- * <name> ::= N <name-segment>+ [<hash>] E
- *
- * <name-segment> ::= <len> <name-chars>{len}
- *
- * <len> ::= [1-9][0-9]+
- *
- * <name-chars> ::= <[A-Za-z]> <[A-Za-z0-9]>*
- * <separator>
- * <special>
- *
- * <separator> ::= '..' # '::'
- *
- * <special> ::= $SP$ # ' '
- * $BP$ # '*'
- * $RF$ # '&'
- * $LT$ # '<'
- * $GT$ # '>'
- * $LP$ # '('
- * $RP$ # ')'
- * $C$ # ','
- * $u7e$ # '~'
- * $u20$ # ' '
- * $u27$ # '\''
- * $u3d$ # '='
- * $u5b$ # '['
- * $u5d$ # ']'
- * $u7b$ # '{'
- * $u7d$ # '}'
- * $u3b$ # ';'
- * $u2b$ # '+'
- * $u22$ # '"'
- *
- * <hash> := <len> h <hex-digits>+
- *
- * <hex-digits> := <[0-9a-f]>
- */
-
-typedef struct rustdem_state {
- const char *rds_str;
- custr_t *rds_demangled;
- sysdem_ops_t *rds_ops;
- int rds_error;
-} rustdem_state_t;
-
-static const struct rust_charmap {
- const char *ruc_seq;
- char ruc_ch;
-} rust_charmap[] = {
- { "$SP$", '@' },
- { "$BP$", '*' },
- { "$RF$", '&' },
- { "$LT$", '<' },
- { "$GT$", '>' },
- { "$LP$", '(' },
- { "$RP$", ')' },
- { "$C$", ',' },
- { "$u7e$", '~' },
- { "$u20$", ' ' },
- { "$u27$", '\'' },
- { "$u3d$", '=' },
- { "$u5b$", '[' },
- { "$u5d$", ']' },
- { "$u7b$", '{' },
- { "$u7d$", '}' },
- { "$u3b$", ';' },
- { "$u2b$", '+' },
- { "$u22$", '"' }
-};
-static const size_t rust_charmap_sz = ARRAY_SIZE(rust_charmap);
-
-static void *rustdem_alloc(custr_alloc_t *, size_t);
-static void rustdem_free(custr_alloc_t *, void *, size_t);
-
-static boolean_t rustdem_append_c(rustdem_state_t *, char);
-static boolean_t rustdem_all_ascii(const strview_t *);
-
-static boolean_t rustdem_parse_prefix(rustdem_state_t *, strview_t *);
-static boolean_t rustdem_parse_name(rustdem_state_t *, strview_t *);
-static boolean_t rustdem_parse_hash(rustdem_state_t *, strview_t *);
-static boolean_t rustdem_parse_num(rustdem_state_t *, strview_t *, uint64_t *);
-static boolean_t rustdem_parse_special(rustdem_state_t *, strview_t *);
-static boolean_t rustdem_add_sep(rustdem_state_t *);
-
-char *
-rust_demangle(const char *s, size_t slen, sysdem_ops_t *ops)
+static void *
+rust_cualloc(custr_alloc_t *cua, size_t len)
{
- rustdem_state_t st = {
- .rds_str = s,
- .rds_ops = ops,
- };
- custr_alloc_ops_t custr_ops = {
- .custr_ao_alloc = rustdem_alloc,
- .custr_ao_free = rustdem_free
- };
- custr_alloc_t custr_alloc = {
- .cua_version = CUSTR_VERSION
- };
- strview_t sv;
- int ret;
-
- if (custr_alloc_init(&custr_alloc, &custr_ops) != 0)
- return (NULL);
- custr_alloc.cua_arg = &st;
-
- sv_init_str(&sv, s, s + slen);
-
- if (sv_remaining(&sv) < 1 || sv_peek(&sv, -1) != 'E') {
- DEMDEBUG("ERROR: string is either too small or does not end "
- "with 'E'");
- errno = EINVAL;
- return (NULL);
- }
-
- if (!rustdem_parse_prefix(&st, &sv)) {
- DEMDEBUG("ERROR: could not parse prefix");
- errno = EINVAL;
- return (NULL);
- }
- DEMDEBUG("parsed prefix; remaining='%.*s'", SV_PRINT(&sv));
-
- if (!rustdem_all_ascii(&sv)) {
- /* rustdem_all_ascii() provides debug output */
- errno = EINVAL;
- return (NULL);
- }
-
- if ((ret = custr_xalloc(&st.rds_demangled, &custr_alloc)) != 0)
- return (NULL);
-
- if (!rustdem_parse_name(&st, &sv)) {
- if (st.rds_error == 0)
- st.rds_error = EINVAL;
- goto fail;
- }
-
- if (sv_remaining(&sv) > 0) {
- DEMDEBUG("ERROR: unexpected trailing characters after "
- "terminating 'E': '%.*s'", SV_PRINT(&sv));
- st.rds_error = EINVAL;
- goto fail;
- }
-
- char *res = xstrdup(ops, custr_cstr(st.rds_demangled));
- if (res == NULL) {
- st.rds_error = errno;
- goto fail;
- }
-
- custr_free(st.rds_demangled);
- DEMDEBUG("result = '%s'", res);
- return (res);
-
-fail:
- custr_free(st.rds_demangled);
- errno = st.rds_error;
- return (NULL);
+ rust_state_t *st = cua->cua_arg;
+ return (zalloc(st->rs_ops, len));
}
-static boolean_t
-rustdem_parse_prefix(rustdem_state_t *st, strview_t *svp)
+static void
+rust_cufree(custr_alloc_t *cua, void *p, size_t len)
{
- strview_t pfx;
+ rust_state_t *st = cua->cua_arg;
+ xfree(st->rs_ops, p, len);
+}
- sv_init_sv(&pfx, svp);
+static const custr_alloc_ops_t rust_custr_ops = {
+ .custr_ao_alloc = rust_cualloc,
+ .custr_ao_free = rust_cufree
+};
- DEMDEBUG("checking for '_Z' or '__Z' in '%.*s'", SV_PRINT(&pfx));
+boolean_t
+rust_appendc(rust_state_t *st, char c)
+{
+ custr_t *cus = st->rs_demangled;
- if (st->rds_error != 0)
+ if (HAS_ERROR(st))
return (B_FALSE);
- if (!sv_consume_if_c(&pfx, '_'))
- return (B_FALSE);
+ if (st->rs_skip)
+ return (B_TRUE);
- (void) sv_consume_if_c(&pfx, '_');
+ switch (c) {
+ case '\a':
+ return (rust_append(st, "\\a"));
+ case '\b':
+ return (rust_append(st, "\\b"));
+ case '\f':
+ return (rust_append(st, "\\f"));
+ case '\n':
+ return (rust_append(st, "\\n"));
+ case '\r':
+ return (rust_append(st, "\\r"));
+ case '\t':
+ return (rust_append(st, "\\t"));
+ case '\v':
+ return (rust_append(st, "\\v"));
+ case '\\':
+ return (rust_append(st, "\\\\"));
+ }
+
+ if (c < ' ')
+ return (rust_append_printf(st, "\\x%02" PRIx8, (uint8_t)c));
- if (!sv_consume_if_c(&pfx, 'Z'))
+ if (custr_appendc(cus, c) != 0) {
+ SET_ERROR(st);
return (B_FALSE);
+ }
- /* Update svp with new position */
- sv_init_sv(svp, &pfx);
return (B_TRUE);
}
-static boolean_t
-rustdem_parse_name_segment(rustdem_state_t *st, strview_t *svp, boolean_t first)
+/*
+ * Append a UTF-8 code point. If we're not in a UTF-8 locale, this gets
+ * appended as '\u<hex codepoint>' otherwise the character itself is
+ * added.
+ */
+boolean_t
+rust_append_utf8_c(rust_state_t *st, uint32_t val)
{
- strview_t sv;
- strview_t name;
- uint64_t len;
- size_t rem;
- boolean_t last = B_FALSE;
+ custr_t *cus = st->rs_demangled;
+ uint_t n = 0;
+ uint8_t c[4] = { 0 };
- if (st->rds_error != 0 || sv_remaining(svp) == 0)
+ if (HAS_ERROR(st))
return (B_FALSE);
- sv_init_sv(&sv, svp);
-
- if (!rustdem_parse_num(st, &sv, &len)) {
- DEMDEBUG("ERROR: no leading length");
- st->rds_error = EINVAL;
- return (B_FALSE);
+ if (!st->rs_isutf8) {
+ if (val < 0x80)
+ return (rust_appendc(st, (char)val));
+ if (val < 0x10000)
+ return (rust_append_printf(st, "\\u%04" PRIx32, val));
+ return (rust_append_printf(st, "\\U%08" PRIx32, val));
}
- rem = sv_remaining(&sv);
-
- if (rem < len) {
- st->rds_error = EINVAL;
+ if (val < 0x80) {
+ return (rust_appendc(st, (char)val));
+ } else if (val < 0x800) {
+ c[0] = 0xc0 | ((val >> 6) & 0x1f);
+ c[1] = 0x80 | (val & 0x3f);
+ n = 2;
+ } else if (val < 0x10000) {
+ c[0] = 0xe0 | ((val >> 12) & 0x0f);
+ c[1] = 0x80 | ((val >> 6) & 0x3f);
+ c[2] = 0x80 | (val & 0x3f);
+ n = 3;
+ } else if (val < 0x110000) {
+ c[0] = 0xf0 | ((val >> 18) & 0x7);
+ c[1] = 0x80 | ((val >> 12) & 0x3f);
+ c[2] = 0x80 | ((val >> 6) & 0x3f);
+ c[3] = 0x80 | (val & 0x3f);
+ n = 4;
+ } else {
+ DEMDEBUG("%s: invalid unicode character \\u%" PRIx32, __func__,
+ val);
return (B_FALSE);
}
- /* Is this the last segment before the terminating E? */
- if (rem == len + 1) {
- VERIFY3U(sv_peek(&sv, -1), ==, 'E');
- last = B_TRUE;
+ for (uint_t i = 0; i < n; i++) {
+ if (custr_appendc(cus, c[i]) != 0) {
+ SET_ERROR(st);
+ return (B_FALSE);
+ }
}
- if (!first && !rustdem_add_sep(st))
- return (B_FALSE);
-
- /* Reduce length of seg to the length we parsed */
- (void) sv_init_sv_range(&name, &sv, len);
-
- DEMDEBUG("%s: segment='%.*s'", __func__, SV_PRINT(&name));
-
- /*
- * A rust hash starts with 'h', and is the last component of a name
- * before the terminating 'E'. It is however not always present
- * in every mangled symbol, and a last segment that starts with 'h'
- * could be confused for it, so failing to parse it just means
- * we don't have a trailing hash.
- */
- if (sv_peek(&name, 0) == 'h' && last) {
- if (rustdem_parse_hash(st, &name))
- goto done;
-
- /*
- * However any error other than 'not a hash' (e.g. ENOMEM)
- * means we should fail.
- */
- if (st->rds_error != 0)
- goto done;
- }
+ return (B_TRUE);
+}
- while (sv_remaining(&name) > 0) {
- switch (sv_peek(&name, 0)) {
- case '$':
- if (rustdem_parse_special(st, &name))
- continue;
- break;
- case '_':
- if (sv_peek(&name, 1) == '$') {
- /*
- * Only consume/ignore '_'. Leave
- * $ for next round.
- */
- sv_consume_n(&name, 1);
- continue;
- }
- break;
- case '.':
- /* Convert '..' to '::' */
- if (sv_peek(&name, 1) != '.')
- break;
+boolean_t
+rust_append(rust_state_t *st, const char *s)
+{
+ custr_t *cus = st->rs_demangled;
- if (!rustdem_add_sep(st))
- return (B_FALSE);
+ if (HAS_ERROR(st))
+ return (B_FALSE);
- sv_consume_n(&name, 2);
- continue;
- default:
- break;
- }
+ if (st->rs_skip)
+ return (B_TRUE);
- if (custr_appendc(st->rds_demangled,
- sv_consume_c(&name)) != 0) {
- st->rds_error = ENOMEM;
- return (B_FALSE);
- }
+ if (custr_append(cus, s) != 0) {
+ SET_ERROR(st);
+ return (B_FALSE);
}
-done:
- sv_consume_n(&sv, len);
- VERIFY3P(svp->sv_first, <=, sv.sv_first);
- DEMDEBUG("%s: consumed '%.*s'", __func__,
- (int)(sv.sv_first - svp->sv_first), svp->sv_first);
- sv_init_sv(svp, &sv);
return (B_TRUE);
}
-/*
- * Parse N (<num><name>{num})+[<num>h<hex digits>]E
- */
-static boolean_t
-rustdem_parse_name(rustdem_state_t *st, strview_t *svp)
+boolean_t
+rust_append_sv(rust_state_t *restrict st, uint64_t n, strview_t *restrict sv)
{
- strview_t name;
- boolean_t first = B_TRUE;
-
- if (st->rds_error != 0)
+ if (HAS_ERROR(st))
return (B_FALSE);
- sv_init_sv(&name, svp);
-
- DEMDEBUG("%s: name = '%.*s'", __func__, SV_PRINT(&name));
+ if (st->rs_skip) {
+ sv_consume_n(sv, (size_t)n);
+ return (B_TRUE);
+ }
- if (sv_remaining(&name) == 0) {
- DEMDEBUG("%s: empty name", __func__);
+ if (n > sv_remaining(sv)) {
+ DEMDEBUG("%s: ERROR amount to append (%" PRIu64 ") > "
+ "remaining bytes (%zu)", __func__, n, sv_remaining(sv));
+ st->rs_error = ERANGE;
return (B_FALSE);
}
- if (!sv_consume_if_c(&name, 'N')) {
- DEMDEBUG("%s: does not start with 'N'", __func__);
+ if (n > INT_MAX) {
+ DEMDEBUG("%s: amount (%" PRIu64 ") > INT_MAX", __func__, n);
+ st->rs_error = ERANGE;
return (B_FALSE);
}
- while (sv_remaining(&name) > 0 && sv_peek(&name, 0) != 'E') {
- if (!rustdem_parse_name_segment(st, &name, first))
- return (B_FALSE);
- first = B_FALSE;
+ if (custr_append_printf(st->rs_demangled, "%.*s",
+ (int)n, sv->sv_first) != 0) {
+ SET_ERROR(st);
+ return (B_FALSE);
}
- VERIFY(sv_consume_if_c(&name, 'E'));
-
- VERIFY3P(svp->sv_first, <=, name.sv_first);
- DEMDEBUG("%s: consumed '%.*s'", __func__,
- (int)(name.sv_first - svp->sv_first), svp->sv_first);
+ sv_consume_n(sv, (size_t)n);
- sv_init_sv(svp, &name);
return (B_TRUE);
}
-static boolean_t
-rustdem_parse_hash(rustdem_state_t *st, strview_t *svp)
+boolean_t
+rust_append_printf(rust_state_t *st, const char *fmt, ...)
{
- strview_t sv;
-
- sv_init_sv(&sv, svp);
+ va_list ap;
+ int ret;
- VERIFY(sv_consume_if_c(&sv, 'h'));
- if (!rustdem_append_c(st, 'h'))
+ if (HAS_ERROR(st))
return (B_FALSE);
- while (sv_remaining(&sv) > 0) {
- char c = sv_consume_c(&sv);
+ if (st->rs_skip)
+ return (B_TRUE);
- switch (c) {
- /*
- * The upper-case hex digits (A-F) are excluded as valid
- * hash values for several reasons:
- *
- * 1. It would result in two different possible names for
- * the same function, leading to ambiguity in linking (among
- * other things).
- *
- * 2. It would cause potential ambiguity in parsing -- is a
- * trailing 'E' part of the hash, or the terminating character
- * in the mangled name?
- *
- * 3. No examples were able to be found in the wild where
- * uppercase digits are used, and other rust demanglers all
- * seem to assume the hash must contain lower-case hex digits.
- */
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- case '8': case '9': case 'a': case 'b':
- case 'c': case 'd': case 'e': case 'f':
- if (!rustdem_append_c(st, c))
- return (B_FALSE);
- break;
- default:
- return (B_FALSE);
- }
- }
+ va_start(ap, fmt);
+ ret = custr_append_vprintf(st->rs_demangled, fmt, ap);
+ va_end(ap);
- sv_init_sv(svp, &sv);
- return (B_TRUE);
+ if (ret == 0)
+ return (B_TRUE);
+ SET_ERROR(st);
+ return (B_FALSE);
}
-/*
- * We have to pick an arbitrary limit here; 999,999,999 fits comfortably
- * within an int32_t, so let's go with that, as it seems unlikely we'd
- * ever see a larger value in context.
- */
-#define MAX_DIGITS 9
-
-static boolean_t
-rustdem_parse_num(rustdem_state_t *restrict st, strview_t *restrict svp,
+boolean_t
+rust_parse_base10(rust_state_t *restrict st, strview_t *restrict sv,
uint64_t *restrict valp)
{
- strview_t snum;
uint64_t v = 0;
- size_t ndigits = 0;
char c;
- if (st->rds_error != 0)
+ if (HAS_ERROR(st) || sv_remaining(sv) == 0)
return (B_FALSE);
- sv_init_sv(&snum, svp);
-
- DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(&snum));
-
- c = sv_peek(&snum, 0);
- if (!ISDIGIT(c)) {
- DEMDEBUG("%s: ERROR no digits in str\n", __func__);
- st->rds_error = EINVAL;
- return (B_FALSE);
- }
+ c = sv_peek(sv, 0);
/*
- * Since there is currently no official specification on rust name
- * mangling, only that it has been stated that rust follows what
- * C++ mangling does. In the Itanium C++ ABI (what practically
- * every non-Windows C++ implementation uses these days), it
- * explicitly disallows leading 0s in numeric values (except for
- * substition and template indexes, which aren't relevant here).
- * We enforce the same restriction -- if a rust implementation allowed
- * leading zeros in numbers (basically segment lengths) it'd
- * cause all sorts of ambiguity problems with names that likely lead
- * to much bigger problems with linking and such, so this seems
- * reasonable.
+ * Since the legacy rust encoding states that it follows the
+ * Itanium C++ mangling format, we match the behavior of the
+ * Itanium C++ ABI in disallowing leading 0s in decimal numbers.
+ *
+ * For Rust encoding v0, RFC2603 currently has omitted the
+ * actual definition of <decimal-number>. However examination of
+ * other implementations written in tandem with the mangling
+ * implementation suggest that <decimal-number> can be expressed
+ * by the eregex: 0|[1-9][0-9]* -- that is a '0' is allowed and
+ * terminates the token, while any other leading digit allows
+ * parsing to continue until a non-digit is encountered, the
+ * end of the string is encountered, or overflow is encountered.
*/
if (c == '0') {
- DEMDEBUG("%s: ERROR number starts with leading 0\n", __func__);
- st->rds_error = EINVAL;
+ if (st->rs_encver == RUSTENC_V0) {
+ sv_consume_n(sv, 1);
+ *valp = 0;
+ return (B_TRUE);
+ }
+
+ DEMDEBUG("%s: ERROR number starts with leading 0\n",
+ __func__);
+ st->rs_error = EINVAL;
+ return (B_FALSE);
+ } else if (!ISDIGIT(c)) {
return (B_FALSE);
}
- while (sv_remaining(&snum) > 0 && ndigits <= MAX_DIGITS) {
- c = sv_consume_c(&snum);
+ while (sv_remaining(sv) > 0) {
+ uint64_t cval;
+ c = sv_peek(sv, 0);
if (!ISDIGIT(c))
break;
+ sv_consume_n(sv, 1);
- v *= 10;
- v += c - '0';
- ndigits++;
- }
+ cval = c - '0';
- if (ndigits > MAX_DIGITS) {
- DEMDEBUG("%s: value %llu is too large\n", __func__, v);
- st->rds_error = ERANGE;
- return (B_FALSE);
- }
+ if (mul_overflow(v, 10, &v)) {
+ DEMDEBUG("%s: multiplication overflowed\n", __func__);
+ st->rs_error = EOVERFLOW;
+ return (B_FALSE);
+ }
- DEMDEBUG("%s: num=%llu", __func__, v);
+ if (add_overflow(v, cval, &v)) {
+ DEMDEBUG("%s: addition overflowed\n", __func__);
+ st->rs_error = EOVERFLOW;
+ return (B_FALSE);
+ }
+ }
*valp = v;
- sv_consume_n(svp, ndigits);
return (B_TRUE);
}
static boolean_t
-rustdem_parse_special(rustdem_state_t *restrict st, strview_t *restrict svp)
+rust_parse_prefix(rust_state_t *restrict st, strview_t *restrict sv)
{
- if (st->rds_error != 0)
+ DEMDEBUG("checking prefix in '%.*s'", SV_PRINT(sv));
+
+ if (HAS_ERROR(st))
return (B_FALSE);
- if (sv_peek(svp, 0) != '$')
+ if (!sv_consume_if_c(sv, '_'))
return (B_FALSE);
- for (size_t i = 0; i < rust_charmap_sz; i++) {
- if (sv_consume_if(svp, rust_charmap[i].ruc_seq)) {
- if (!rustdem_append_c(st, rust_charmap[i].ruc_ch))
- return (B_FALSE);
- return (B_TRUE);
+ /*
+ * MacOS prepends an additional '_' -- allow that in case
+ * we're given symbols from a MacOS object.
+ */
+ (void) sv_consume_if_c(sv, '_');
+
+ if (sv_consume_if_c(sv, 'Z')) {
+ /*
+ * Legacy names must start with '[_]_Z'
+ */
+ st->rs_encver = RUSTENC_LEGACY;
+ DEMDEBUG("name is encoded using the rust legacy mangling "
+ "scheme");
+ } else if (sv_consume_if_c(sv, 'R')) {
+ uint64_t ver = 0;
+
+ /*
+ * The non-legacy encoding is versioned. After the initial
+ * 'R' is the version. This isn't spelled out clearly in the
+ * RFC, but many numeric values encoded take an approach of
+ * a value of 0 is omitted, and any digits represent the
+ * value - 1. In other words, in this case, no digits means
+ * version 0, '_R0...' would be version 1, 'R1...' would
+ * be version 2, etc. Currently only version 0 is defined,
+ * but we try to provide a (hopefully) useful message
+ * when debugging, even if we can't use the version value
+ * beyond that.
+ */
+ if (rust_parse_base10(st, sv, &ver)) {
+ DEMDEBUG("%s: ERROR: an unsupported encoding version "
+ "(%" PRIu64 ") was encountered", ver + 1);
+ st->rs_error = ENOTSUP;
+ return (B_FALSE);
}
+
+ st->rs_encver = RUSTENC_V0;
+ DEMDEBUG("name is encoded using the v0 mangling scheme");
+ } else {
+ DEMDEBUG("did not find a valid rust prefix");
+ return (B_FALSE);
}
- return (B_FALSE);
+
+ sv_init_sv(&st->rs_orig, sv);
+ return (B_TRUE);
+}
+
+static void
+rust_fini_state(rust_state_t *st)
+{
+ custr_free(st->rs_demangled);
+ custr_alloc_fini(&st->rs_cualloc);
}
static boolean_t
-rustdem_add_sep(rustdem_state_t *st)
+rust_init_state(rust_state_t *restrict st, const char *s, sysdem_ops_t *ops)
{
- if (st->rds_error != 0)
+ const char *codeset;
+
+ (void) memset(st, 0, sizeof (*st));
+
+ st->rs_str = s;
+ st->rs_ops = ops;
+
+ st->rs_cualloc.cua_version = CUSTR_VERSION;
+ if (custr_alloc_init(&st->rs_cualloc, &rust_custr_ops) != 0)
return (B_FALSE);
+ st->rs_cualloc.cua_arg = st;
- if (!rustdem_append_c(st, ':') ||
- !rustdem_append_c(st, ':'))
+ if (custr_xalloc(&st->rs_demangled, &st->rs_cualloc) != 0) {
+ custr_alloc_fini(&st->rs_cualloc);
return (B_FALSE);
+ }
+
+ codeset = nl_langinfo(CODESET);
+ if (codeset != NULL && strcmp(codeset, "UTF-8") == 0)
+ st->rs_isutf8 = B_TRUE;
return (B_TRUE);
}
-static boolean_t
-rustdem_append_c(rustdem_state_t *st, char c)
+char *
+rust_demangle(const char *s, size_t len, sysdem_ops_t *ops)
{
- if (st->rds_error != 0)
- return (B_FALSE);
+ rust_state_t st;
+ strview_t sv = { 0 };
+ boolean_t success = B_FALSE;
+ int e = 0;
+ char *out = NULL;
- if (custr_appendc(st->rds_demangled, c) == 0)
- return (B_TRUE);
+ if (!rust_init_state(&st, s, ops))
+ return (NULL);
- st->rds_error = errno;
- return (B_FALSE);
-}
+ sv_init_str(&sv, s, s + len);
-static boolean_t
-rustdem_all_ascii(const strview_t *svp)
-{
- strview_t p;
+ if (!rust_parse_prefix(&st, &sv)) {
+ if (st.rs_error == 0)
+ st.rs_error = EINVAL;
+ goto done;
+ }
- sv_init_sv(&p, svp);
+ DEMDEBUG("parsed prefix; remaining string='%.*s'", SV_PRINT(&sv));
- while (sv_remaining(&p) > 0) {
- char c = sv_consume_c(&p);
+ switch (st.rs_encver) {
+ case RUSTENC_LEGACY:
+ success = rust_demangle_legacy(&st, &sv);
+ break;
+ case RUSTENC_V0:
+ success = rust_demangle_v0(&st, &sv);
+ break;
+ }
- /*
- * #including <sys/ctype.h> conflicts with <ctype.h>. Since
- * we want the C locale macros (ISDIGIT, etc), it also means
- * we can't use isascii(3C).
- */
- if ((c & 0x80) != 0) {
- DEMDEBUG("%s: found non-ascii character 0x%02hhx at "
- "offset %tu", __func__, c,
- (ptrdiff_t)(p.sv_first - svp->sv_first));
- return (B_FALSE);
- }
+done:
+ if (success) {
+ out = xstrdup(ops, custr_cstr(st.rs_demangled));
+ if (out == NULL)
+ SET_ERROR(&st);
+ } else {
+ DEMDEBUG("%s: failed, str='%s'", __func__,
+ custr_cstr(st.rs_demangled));
+
+ st.rs_error = EINVAL;
}
- return (B_TRUE);
-}
-static void *
-rustdem_alloc(custr_alloc_t *cao, size_t len)
-{
- rustdem_state_t *st = cao->cua_arg;
- return (zalloc(st->rds_ops, len));
-}
+ e = st.rs_error;
+ rust_fini_state(&st);
+ if (e > 0)
+ errno = e;
-static void
-rustdem_free(custr_alloc_t *cao, void *p, size_t len)
-{
- rustdem_state_t *st = cao->cua_arg;
- xfree(st->rds_ops, p, len);
+ return (out);
}
diff --git a/usr/src/lib/libdemangle/common/rust.h b/usr/src/lib/libdemangle/common/rust.h
new file mode 100644
index 0000000000..fbe609ab9d
--- /dev/null
+++ b/usr/src/lib/libdemangle/common/rust.h
@@ -0,0 +1,87 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ * Copyright 2021 Jason King
+ */
+
+#ifndef _RUST_H
+#define _RUST_H
+
+#include <errno.h>
+#include <sys/types.h>
+#include "demangle-sys.h"
+#include "demangle_int.h"
+#include "strview.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum rustenc_version {
+ RUSTENC_LEGACY = -1,
+ RUSTENC_V0 = 0
+} rustenc_version_t;
+
+typedef struct rust_state {
+ const char *rs_str; /* The original string */
+ custr_t *rs_demangled;
+ sysdem_ops_t *rs_ops;
+ custr_alloc_t rs_cualloc;
+ strview_t rs_orig; /* strview of original string, sans prefix */
+ int rs_error;
+ rustenc_version_t rs_encver;
+ uint64_t rs_lt_depth;
+ boolean_t rs_skip;
+ boolean_t rs_args_stay_open;
+ boolean_t rs_args_is_open;
+ boolean_t rs_verbose;
+ boolean_t rs_show_const_type;
+ boolean_t rs_isutf8;
+} rust_state_t;
+#define HAS_ERROR(_st) ((_st)->rs_error != 0)
+#define SET_ERROR(_st) ((_st)->rs_error = errno)
+
+/*
+ * In certain circumstances, we need to parse an item, but not emit any
+ * output. These macros assist in that. To use:
+ *
+ * rust_state_t *st;
+ * boolean_t saved_state;
+ * ...
+ * SKIP_BEGIN(st, saved_state);
+ * ... stuff to no emit
+ * SKIP_END(st, saved_state);
+ */
+#define SKIP_BEGIN(_st, _save) \
+ (_save) = (_st)->rs_skip, \
+ (_st)->rs_skip = B_TRUE
+#define SKIP_END(_st, _n) (_st)->rs_skip = (_n)
+
+boolean_t rust_appendc(rust_state_t *, char);
+boolean_t rust_append(rust_state_t *, const char *);
+boolean_t rust_append_printf(rust_state_t *, const char *, ...) __PRINTFLIKE(2);
+boolean_t rust_append_sv(rust_state_t *restrict, uint64_t, strview_t *restrict);
+boolean_t rust_append_utf8_c(rust_state_t *, uint32_t);
+boolean_t rust_parse_base10(rust_state_t *restrict, strview_t *restrict,
+ uint64_t *restrict);
+boolean_t rust_demangle_legacy(rust_state_t *restrict, strview_t *restrict);
+boolean_t rust_demangle_v0(rust_state_t *restrict, strview_t *restrict);
+
+boolean_t rustv0_puny_decode(rust_state_t *restrict, strview_t *restrict,
+ boolean_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RUST_H */
diff --git a/usr/src/lib/libdemangle/common/str.c b/usr/src/lib/libdemangle/common/str.c
index 014ce8a737..8608a17b5f 100644
--- a/usr/src/lib/libdemangle/common/str.c
+++ b/usr/src/lib/libdemangle/common/str.c
@@ -12,8 +12,6 @@
/*
* Copyright 2017 Jason King
*/
-#include <sys/debug.h>
-#include <sys/sysmacros.h>
#include <string.h>
#include "str.h"
#include "demangle_int.h"
diff --git a/usr/src/lib/libdemangle/common/strview.c b/usr/src/lib/libdemangle/common/strview.c
index e4576ee17a..1653484172 100644
--- a/usr/src/lib/libdemangle/common/strview.c
+++ b/usr/src/lib/libdemangle/common/strview.c
@@ -10,10 +10,11 @@
*/
/*
- * Copyright 2019, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#include <string.h>
+#include <sys/types.h>
#include <sys/debug.h>
#include "strview.h"
diff --git a/usr/src/lib/libdemangle/common/util.c b/usr/src/lib/libdemangle/common/util.c
index 739c554826..17eefe82d7 100644
--- a/usr/src/lib/libdemangle/common/util.c
+++ b/usr/src/lib/libdemangle/common/util.c
@@ -14,7 +14,8 @@
* Copyright 2019, Joyent, Inc.
*/
-#include <sys/debug.h>
+#include <errno.h>
+#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include "demangle-sys.h"
@@ -42,6 +43,25 @@ zalloc(sysdem_ops_t *ops, size_t len)
return (p);
}
+void *
+xcalloc(sysdem_ops_t *ops, size_t n, size_t elsize)
+{
+ uint64_t sz;
+
+ if (mul_overflow(n, elsize, &sz)) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+
+#ifndef _LP64
+ if (sz > SIZE_MAX) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+#endif
+
+ return (zalloc(ops, sz));
+}
void
xfree(sysdem_ops_t *ops, void *p, size_t len)
{
diff --git a/usr/src/test/util-tests/tests/demangle/rust.c b/usr/src/test/util-tests/tests/demangle/rust.c
index db2fae28e4..051bf2456b 100644
--- a/usr/src/test/util-tests/tests/demangle/rust.c
+++ b/usr/src/test/util-tests/tests/demangle/rust.c
@@ -26,7 +26,7 @@
* DEALINGS IN THE SOFTWARE.
*/
/*
- * Copyright 2019, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
* Copyright 2021 Jason King
*/
@@ -34,12 +34,16 @@
* Test cases taken from rustc-demangle 0.1.9
*/
#include <errno.h>
+#include <err.h>
+#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/sysmacros.h>
#include <demangle-sys.h>
+#define TEST_LOCALE "C.UTF-8"
+
typedef struct rust_test_case {
const char *mangled;
const char *demangled;
@@ -51,6 +55,7 @@ typedef struct rust_test_grp {
const char *name;
rust_test_case_t tests[];
} rust_test_grp_t;
+
#define GROUP(_n, ...) \
static rust_test_grp_t _n = { \
.name = #_n, \
@@ -60,6 +65,8 @@ typedef struct rust_test_grp {
} \
}
+/* BEGIN CSTYLED */
+
GROUP(demangle,
T_ERR("test"),
T("_ZN4testE", "test"),
@@ -77,7 +84,6 @@ GROUP(demangle_many_dollars,
T("_ZN13test$u20$test4foobE", "test test::foob"),
T("_ZN12test$BP$test4foobE", "test*test::foob"));
-/* BEGIN CSTYLED */
GROUP(demangle_osx,
T("__ZN5alloc9allocator6Layout9for_value17h02a996811f781011E",
"alloc::allocator::Layout::for_value::h02a996811f781011"),
@@ -104,6 +110,143 @@ GROUP(handle_assoc_types,
/* C++ mangled names that aren't valid rust names */
GROUP(cplusplus_as_rust, T_ERR("_ZN7mozilla3dom13BrowserParent22RecvUpdateContentCacheERKNS_12ContentCacheE"));
+GROUP(v0_crate_with_leading_digit,
+ T("_RNvC6_123foo3bar", "123foo::bar"));
+
+GROUP(v0_utf8_idents,
+ T("_RNqCs4fqI2P2rA04_11utf8_identsu30____7hkackfecea1cbdathfdh9hlq6y",
+ "utf8_idents::საჭმელად_გემრიელი_სადილი"));
+
+GROUP(v0_closure,
+ T("_RNCNCNgCs6DXkGYLi8lr_2cc5spawn00B5_",
+ "cc::spawn::{closure#0}::{closure#0}"),
+ T("_RNCINkXs25_NgCsbmNqQUJIY6D_4core5sliceINyB9_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB9_6memchr7memrchrs_0E0Bb_",
+ "<core::slice::Iter<u8> as core::iter::iterator::Iterator>::rposition::<core::slice::memchr::memrchr::{closure#1}>::{closure#0}"));
+
+GROUP(v0_dyn_trait,
+ T("_RINbNbCskIICzLVDPPb_5alloc5alloc8box_freeDINbNiB4_5boxed5FnBoxuEp6OutputuEL_ECs1iopQbuBiw2_3std",
+ "alloc::alloc::box_free::<dyn alloc::boxed::FnBox<(), Output = ()>>"));
+
+GROUP(v0_const_generics,
+ T("_RMC0INtC8arrayvec8ArrayVechKj7b_E", "<arrayvec::ArrayVec<u8, 123>>"),
+ T("_RMCs4fqI2P2rA04_13const_genericINtB0_8UnsignedKhb_E", "<const_generic::Unsigned<11>>"),
+ T("_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKs98_E", "<const_generic::Signed<152>>"),
+ T("_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKanb_E", "<const_generic::Signed<-11>>"),
+ T("_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb0_E", "<const_generic::Bool<false>>"),
+ T("_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb1_E", "<const_generic::Bool<true>>"),
+ T("_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc76_E", "<const_generic::Char<'v'>>"),
+ T("_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKca_E", "<const_generic::Char<'\\n'>>"),
+ T("_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc2202_E", "<const_generic::Char<'∂'>>"));
+
+GROUP(v0_exponential_explosion,
+ T("_RMC0TTTTTTpB8_EB7_EB6_EB5_EB4_EB3_E",
+ "<((((((_, _), (_, _)), ((_, _), (_, _))), (((_, _), (_, _)), ((_, _), (_, _)))), "
+ "((((_, _), (_, _)), ((_, _), (_, _))), (((_, _), (_, _)), ((_, _), (_, _))))), "
+ "(((((_, _), (_, _)), ((_, _), (_, _))), (((_, _), (_, _)), ((_, _), (_, _)))), "
+ "((((_, _), (_, _)), ((_, _), (_, _))), (((_, _), (_, _)), ((_, _), (_, _))))))>"));
+
+GROUP(v0_thinlto,
+ T("_RC3foo.llvm.9D1C9369", "foo"),
+ T("_RC3foo.llvm.9D1C9369@@16", "foo"),
+ T("_RNvC9backtrace3foo.llvm.A5310EB9", "backtrace::foo"));
+
+GROUP(v0_demangle_extra_suffix,
+ T("_RNvNtNtNtNtCs92dm3009vxr_4rand4rngs7adapter9reseeding4fork23FORK_HANDLER_REGISTERED.0.0",
+ "rand::rngs::adapter::reseeding::fork::FORK_HANDLER_REGISTERED.0.0"));
+
+/*
+ * From Rust RFC2603
+ */
+GROUP(v0_generic_func,
+ T("_RINvNtC3std3mem8align_ofdE", "std::mem::align_of::<f64>"),
+ T("_RINvNtC3std3mem8align_ofNtNtC3std3mem12DiscriminantE",
+ "std::mem::align_of::<std::mem::Discriminant>"),
+ T("_RINvNtC3std3mem8align_ofQTReuEE",
+ "std::mem::align_of::<&mut (&str, ())>"));
+
+GROUP(v0_eddyb,
+ T("_RNvXsa_NtNtCs7hxHya3g3Sg_4core3ptr6uniqueINtB5_6UniqueNtNtNtCshRVCqTKO4VO_5cargo4util4toml10TomlTargetEINtNtB9_7convert4FromINtNtB7_8non_null7NonNullBQ_EE4fromBW_",
+ "<core::ptr::unique::Unique<cargo::util::toml::TomlTarget> as core::convert::From<core::ptr::non_null::NonNull<cargo::util::toml::TomlTarget>>>::from"),
+ T("_RNvXsG_NtNtCs2ZCqZGLqlfc_3std3ffi6os_strNtB5_5OsStrINtNtCs7hxHya3g3Sg_4core7convert5AsRefBC_E6as_ref",
+ "<std::ffi::os_str::OsStr as core::convert::AsRef<std::ffi::os_str::OsStr>>::as_ref"),
+ T("_RNvMs_NtCs7hxHya3g3Sg_4core6resultINtB4_6ResultNtNtB6_5alloc6LayoutNtBL_9LayoutErrE6unwrapCsdJWFNQ9j01_12aho_corasick",
+ "<core::result::Result<core::alloc::Layout, core::alloc::LayoutErr>>::unwrap"),
+ T("_RINvNtCs7hxHya3g3Sg_4core3mem7size_ofFUKCEPaECs2ZCqZGLqlfc_3std",
+ "core::mem::size_of::<unsafe extern \"C\" fn() -> *const i8>"),
+ T("_RINvCsc1o8JKpgQAm_4test28___rust_begin_short_backtraceFEuEB2_",
+ "test::__rust_begin_short_backtrace::<fn()>"),
+ T("_ZN4core5array104_$LT$impl$u20$core..iter..traits..collect..IntoIterator$u20$for$u20$$RF$$u5b$$RF$str$u3b$$u20$_$u5d$$GT$9into_iter17hc066f1a15f41761dE",
+ "core::array::<impl core::iter::traits::collect::IntoIterator for &[&str; _]>::into_iter::hc066f1a15f41761d"));
+
+GROUP(v0_afl_fast,
+ T_ERR("_RMC0TTTTTTPB8_yB7_EB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTTTpB8_yB7_eB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTTTpB4_yB7_EB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTTTpB4_yB7_EB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTTTTB8_yB7_EB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTTTSB8_yB7_EB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTTTRB8_yB7_EB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTTTQB8_yB7_EB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTTTOB8_yB7_EB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTTTpB8_yB7_hB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTTTpB8_yB7_llvmEB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTTTpB1_yB7_eB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTTTpB1_tB7_fB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTC0TTTTTPpB0_SB7_llvmTPpB8_SB7_EB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC3TTTTTtpB_yB7_EB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTRLpB8_llvB_vmEB_EB5FEB4EB5FEB4_EB3_E"),
+ T_ERR("_RMC0TTTTQLp.B_llvmEB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TRMC0TTTTQLp.B_YBTTTQLp.B_YB7_EBd_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTQLp.B_llvmEB6_EB5_EB4_E!3_E"),
+ T_ERR("_RMC0TRMC0TTTTQLp.B_bB7_EB6_EB5_EB4_E"),
+ T_ERR("_RMC0TTTTRLp.B_llvmEB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTQLpC0TTTfQLp.B_B_EB84_EB3_E"),
+ T_ERR("_RMC0TTTTQLp.TfQLp.B_jC0TTTfQLp.B_llvT_EB3_E"),
+ T_ERR("_RMC0TTTTQLpB8_TTTTTQLp_B_llvmEB6_E3_E"),
+ T_ERR("_RIC0TRLpB8B8_B8_llvmEB6_EB5_llvmEB6_EB5_EB4_EL3_E"),
+ T_ERR("_RNCINkXs25NNNNNNNNNNNNNNNNNNNNNNNN_INyB9_4IterhENuNgNoBb_4iter8iteraionNCN1_6hr7m0E0Bb_"),
+ T_ERR("_RNCXNkXs25_NgCsbmNqQUJIY6D_4core5sliceINyB4_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB2_6hr7m0E0Bb_"),
+ T_ERR("_RNCXNkXs25_NgCsbmNqQUJIY6D_4core5sliceINyB9_4IterhENuNgNoBZ_4iter8iterator8Iterator9rpositionNCNgNpB2_6hr7m0E0Bb_"),
+ T_ERR("_RYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYyYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYyYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYNfYB_YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYNfYB_"),
+ T_ERR("_RNCXNkXs25_NSCsbmNqQUJIY6D_4core5sliceINyB2_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB2_6D_4core5sliReINyB1_4IterhENu6D_4core5sliceINyB1_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpBNgNoBb_4iter8iterr9rpo25_NgCsbmNqQUJIY6D_4core5sliceIN4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB2_6NqQUJIY6D_4core5sliReINyB1_4IterhENu6D_4core5sliceINyB1_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpBNgNoBb_4iter8iterr9rpositionNCNgNpB2_6NqQUJIY6B2_6hr7m0E7m0EsitionNCNgNpB2_6NqQUJIY6B2_6hr7m0E7m0E0Bb_"),
+ T_ERR("_RNCXNkXs25_NCCsbmNqQUJIY6D_4core5sliceINyB2_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB2_6D_4core5sliReINyB1_4IterhENu6D_4core5sliceINyB1_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpBNgNoBb_4iter8iterr9rpo25_NgCsbmNqQUJIY6D_4core5sliceIN4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB2_6NqQUJIY6D_4core5sliReINyB1_4IterhENu6D_4core5sliceINyB1_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpBNgNoBb_4iter8iterr9rpositionNCNgNpB2_6NqQUJIY6B2_6hr7m0E7m0EsitionNCNgNpB2_6NqQUJIY6B2_6hr7m0E7m0E0Bb_"),
+ T_ERR("_RMC0TTTTRL_B4_llvmEB6_EB5_EB4_EB3_E"),
+ T_ERR("_RMC0TTTTRRMC0TB7_llvmEB6_EB5_EB4_EB3_EL_B7_llvmEB6_EB5_EB4_EB3_E"),
+ T_ERR("_RIC0TTTTQIC0L_B7_llvmEB6_E75_EB4_EB3_E"),
+ T_ERR("_RNCINkXs25_NSCsbmNqQUJIY6D_4core53liceINyBK_4IterhDNCINkXs25_NSCsbmD_4core5sRNCINkXs25_NSCsbmNqQUJIY6D_4core5sliceINyB9_4IterhDNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhENuNgNoBN_4iter8iterator8Iterato29rposillvmtionNCXs25_NSCsbUJIY6D_4core5sliceINyB9_4IterhDNuNgNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhDNuNgNoBN_4iter8iterator8IliceINyB1_4IterhENuNgNoBN_4iter8iterator8Iterator9rposillvmtionNCXs25_NSCsbUJIY6D_4core5sliceINyB9_4IterhDNuNgNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhDNuNgNoBN_4iter8iterator8Iter9rposillvmtionNCNgNpB1_Bb_"),
+ T_ERR("_RNCINkXs25_NSCsbmNqQUJIY6D_4core93liceINyBK_4IterhDNCINkXs25_NSCsbmD_4core5sRNCINkXs25_NSCsbmNqQUJIY6D_4core5sliceINyB9_4IterhDNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhENuNgNoBN_4iter8iterator8Iterato29rposillvmtionNCXs25_NSCsbUJIY6D_4core5sliceINyB9_4IterhDNuNgNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhDNuNgNoBN_4iter8iterator8IliceINyB1_4IterhENuNgNoBN_4iter8iterator8Iterator9rposillvmtionNCXs25_NSCsbUJIY6D_4core5sliceINyB9_4IterhDNuNgNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhDNuNgNoBN_4iter8iterator8Iter9rposillvmtionNCNgNpB1_Bb_"),
+ T_ERR("_RNCINkXs25_NSCsbmNqQUJIY6D_4core5sliceINyB9_4IterhDNCINkXs25_NSCsbmD_4core5sRNCINkXs25_NSCsbmNqQUJIY6D_4core5sliceINyB9_4IterhDNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhENuNgNoBN_4iter8iterator8Iterato29rposillvmtiB_NCXs25_NSCsbUJIY6D_4core5sliceINyB9_4IterhDNuNgNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhDNuNgNoBN_4iter8iterator8IliceINyB1_4IterhENuNgNoBN_4iter8iterator8Iterator9rposillvmtionNCXs25_NSCsbUJIY6D_4core5sliceINyB9_4IterhDNuNgNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhDNuNgNoBN_4iter8iterator8Iter9rposillvmtionNCNgNpB1_Bb_"),
+ T_ERR("_RNCINkXs25_NSCsbmNqQUJIY6D_4coreu425_NSNgNoBN_4iter8iteratotliceINyB9_4IterhDNCINkXs25_NSCsbmD_4core5sRNCINkXs25_NSCsbeNqQUJIY6D_4core5sliceINyB9_4IterhLNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_48888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888eeeeeeeeeeeeeee88888888888888888888888888888888888888G88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888NSCsbmJIY6D_4core5sliceINyB9_4IterhDNuNgNoBN_4iter8iteravmtionNCNgNpB1_Bb_"),
+ T_ERR("_ZN9EB0_EB3_E"),
+ T_ERR("_ZN9INTTB7_$B6_SB5_E"),
+ T_ERR("_ZN9INTTB7 E.6_SBEEEEEEEEEEEEEEEEB7_EB6_EB5_EB0EB6_EB5_EB0_EB3_EEB0_EB3_E"),
+ T_ERR("_ZN9I3TTB7_$B8_C0TTT9I3TTB7_$B8_$$5$B_E"),
+ T_ERR("_ZN9$C$TB7_$B8_C0TTT9I3TB7_$B8_$$5$B_E"),
+ T_ERR("_ZN9......=E"),
+ T_ERR("_RMC0TTTTQLpfQNp.B_aaaaaTOTfQL_aaaaaB_"),
+ T_ERR("_RMC0TTTTRLpB8_lRMC0B_aaB5_EB4_B5_EEB3_E"),
+ T_ERR("_RMC0TTTTRLp_aalRMC0B_aaB5_EB4_B5_EEB3_E"),
+ T_ERR("_RMC0TTTTRLp_C0TaalRMC0B6_EB_aaB4_B5_EEB3_E"),
+ T_ERR("_RMC0TTTTRL0_aalRMC0B_aaB5_EB4_B5_EEB3_E"),
+ T_ERR("_RMC6aEB8_XB4_YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYAly_IYB_lYYYYYYYAly_HYB_"),
+ T_ERR("_RMC6aEB8_XB4_YYYYYYYYYYYYYYYYYYYYYYYMC6aEB8ZXB4_YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYAlypHYYYYYYYYYYYYYYYYYYAlyNHYB_"),
+ T_ERR("_RMC6aEB8_NB4_YYNYYYNYYYYYYYYYYYxYYYYYYYRAC6aEB8_NBV_YYNYYYNYYYYYYYYYYYxYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYlYNHYB_YYY"),
+ T_ERR("_ZN9A7$TB7_$B8_$B$TT9I3TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TT9I3TB7m$__ZN9$C$TB_$BZN9A7$TB7_$B8_$B$TT6$C$$B$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$BP$B$TT9I3TB7m$__ZN9$ $TB_$B8_$A$TT9I3TB7m$8_$A$TT9I3TB7m$__ZN2UE"),
+ T_ERR("_ZN9A7$TB7_$B8_$B$TT9I3TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TT9I3TB7m$__ZN9$C$TB_$BZN9A7$TB7_$B8_$B$TT6$C$$B$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$SP$B$TT9I3TB7m$__ZN9$ $TB_$B8_$A$TT9I3TB7m$8_$A$TT9I3TB7m$__ZN2UE"),
+ T_ERR("__ZN9?@EEEEEJE"),
+ T_ERR("_RMC0TTTATjpB8_EB7_TB_aaB5_EB4_EB3_E"),
+ T_ERR("_ZN949$TE7_llv4C$TE$C$7_llvm$C$TT9'3TB_$__................................................................................................................................................................................................................................................................................................................................................................................................$B$.E..........................................:.........................................................................................................................................P...............................................@..................................................................................................................................................................................................................................................................TTB7_E.6_SB_E.6_S65__ZQCI<_EB=E"),
+ T_ERR("_RMC6aEB8_XB4_YYYYYYYYYYYYYYYYYYYYYYYYNSCsbmJIY6D_4core0MC6aEB8_XB4_YYYYYYYYYYYYYYYYYYsliceINyB9_rhDNuNgNoBN_4iter8iteravmt}ore5sliceINyB9_4IterhDNuNgNCINOXs25_NSCsbmJIY6D_4core5sliceI_yB9_4IterhDNuNgNoB__4llvmwionNB9_4INkXs25_NSCrhDYYYYYYYYYYYYYYYYYYYYYYYYYYYaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa)))))_aa)))))))))))))))))))))"),
+ T_ERR("_ZN9A7$TB7_$B8_$B$TT9I3TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TT9I3TB7m$__ZN9$C$TB_$BZN9A7$TB7_$B8_$B$TT6$C$$B$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$LP$B$TT9I3TB7m$__ZN9$ $TB_$B8_$A$TT9I3TB7m$8_$A$TT9I3TB7m$__ZN2UE"),
+ T_ERR("_ZN9A7$TB7_$B8_$B$TT9I3TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TT9I3TB7m$__ZN9$C$TB_$BZN9A7$TB7_$B8_$B$TT6$C$$B$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$LT$B$TT9I3TB7m$__ZN9$ $TB_$B8_$A$TT9I3TB7m$8_$A$TT9I3TB7m$__ZN2UE"),
+ T_ERR("_ZN9A7$TB7_$B8_$B$TT9I3TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TT9I3TB7m$__ZN9$C$TB_$BZN9A7$TB7_$B8_$B$TT6$C$$B$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$GT$B$TT9I3TB7m$__ZN9$ $TB_$B8_$A$TT9I3TB7m$8_$A$TT9I3TB7m$__ZN2UE"),
+ T_ERR("_ZN9A7$TB7_$B8_$B$TT9I3TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TT9I3TB7m$__ZN9$C$TB_$BZN9A7$TB7_$B8_$B$TT6$C$$B$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$RP$B$TT9I3TB7m$__ZN9$ $TB_$B8_$A$TT9I3TB7m$8_$A$TT9I3TB7m$__ZN2UE"),
+ T_ERR("_ZN9A7$TB7_$B8_$B$TT9I3TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TT9I3TB7m$__ZN9$C$TB_$BZN9A7$TB7_$B8_$B$TT6$C$$B$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$RF$B$TT9I3TB7m$__ZN9$ $TB_$B8_$A$TT9I3TB7m$8_$A$TT9I3TB7m$__ZN2UE"),
+ T_ERR("_RNCXNkXs25_NgCsbmNqQUJIY6D_4core5sliceINyBK_4IterhENuNGNoBb_4iter8iterator8Iterator9rpositionNCNgNpB2_6D_4core5sliReINyB1_4IterhENu6D_4core5sliceINyB1_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpBNgNoBb_4iter8iterr9rpo25_NgCsbmNqQUJIY6D_4core5sliceIN4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB2_6NqQUJIY6D_4core5sliReINyB1_4IterhENu6D_4core5sliceINyB1_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpBNgNoBb_4iter8iterr9rpositionNCNgNpB2_6NqQUJIY6B2_6hr7m0E7m0EsitionNCNgNpB2_6NqQUJIY6B2_6hr7m0E7m0E0Bb_"),
+ T_ERR("_RIC6kIIIIIB4_lB5_EB4NEB3_A"),
+ T_ERR("_ZN9I3TTB7_$B8_$B$TT9I398C$T$B8_$B$TT9I398C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TB$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9ITB7$LT$B$TT9I3TB7m$__ZB8T_aa$B8K$C$TT9I3_ZN9$C$TB7_$LT$B$TT9I3TB7m$__ZN9$ $TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN:$C$TB7_$B8_$B$TT9I3TB7m$__ZN$RF$9$C$TB_$BZN9A7$TB7_8B8_$B$TT6$C$$B$%B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$LT$B$TT9I3TB7m$__ZN9$ UE"),
+ T_ERR("_RIC6aOB_aaB4_RIC6aOB_aaB8_gB._NaEB5_gB8_gB4_NaEB5_))))))))))))))))))))))da)))))))C6aEB8_XB4_DC6aXB4_DC6aEJ8_gB_NaEB5_gB8_gB4_NaEB5_))))))))))))))))))))))_a))))))))sitUonNCNgNpB1_6hr7m0E0Bb_)))sitionNCNgNpB1_6hr7m0E0Bb_"));
+
/* END CSTYLED */
static rust_test_grp_t *rust_tests[] = {
@@ -116,8 +259,18 @@ static rust_test_grp_t *rust_tests[] = {
&invalid_no_chop,
&handle_assoc_types,
&cplusplus_as_rust,
+ &v0_crate_with_leading_digit,
+ &v0_utf8_idents,
+ &v0_closure,
+ &v0_dyn_trait,
+ &v0_const_generics,
+ &v0_exponential_explosion,
+ &v0_thinlto,
+ &v0_demangle_extra_suffix,
+ &v0_generic_func,
+ &v0_eddyb,
+ &v0_afl_fast,
};
-
static const size_t n_rust_tests = ARRAY_SIZE(rust_tests);
static boolean_t
@@ -195,8 +348,13 @@ run_test(rust_test_grp_t *test)
int
main(int argc, char **argv)
{
+ const char *l;
boolean_t ok = B_TRUE;
+ l = setlocale(LC_CTYPE, TEST_LOCALE);
+ if (l == NULL || strcmp(l, TEST_LOCALE) != 0)
+ errx(EXIT_FAILURE, "failed to set locale to %s", TEST_LOCALE);
+
for (size_t i = 0; i < n_rust_tests; i++)
ok &= run_test(rust_tests[i]);