diff options
Diffstat (limited to 'usr/src')
| -rw-r--r-- | usr/src/lib/libdemangle/Makefile.com | 16 | ||||
| -rw-r--r-- | usr/src/lib/libdemangle/common/cxx.c | 114 | ||||
| -rw-r--r-- | usr/src/lib/libdemangle/common/cxx_util.c | 2 | ||||
| -rw-r--r-- | usr/src/lib/libdemangle/common/demangle-sys.h | 2 | ||||
| -rw-r--r-- | usr/src/lib/libdemangle/common/demangle.c | 5 | ||||
| -rw-r--r-- | usr/src/lib/libdemangle/common/demangle_int.h | 75 | ||||
| -rw-r--r-- | usr/src/lib/libdemangle/common/rust-legacy.c | 386 | ||||
| -rw-r--r-- | usr/src/lib/libdemangle/common/rust-v0.c | 1449 | ||||
| -rw-r--r-- | usr/src/lib/libdemangle/common/rust-v0puny.c | 264 | ||||
| -rw-r--r-- | usr/src/lib/libdemangle/common/rust.c | 727 | ||||
| -rw-r--r-- | usr/src/lib/libdemangle/common/rust.h | 87 | ||||
| -rw-r--r-- | usr/src/lib/libdemangle/common/str.c | 2 | ||||
| -rw-r--r-- | usr/src/lib/libdemangle/common/strview.c | 3 | ||||
| -rw-r--r-- | usr/src/lib/libdemangle/common/util.c | 22 | ||||
| -rw-r--r-- | usr/src/test/util-tests/tests/demangle/rust.c | 164 | 
15 files changed, 2800 insertions, 518 deletions
| diff --git a/usr/src/lib/libdemangle/Makefile.com b/usr/src/lib/libdemangle/Makefile.com index 76f2e444fc..4cd8ce653c 100644 --- a/usr/src/lib/libdemangle/Makefile.com +++ b/usr/src/lib/libdemangle/Makefile.com @@ -11,12 +11,22 @@  #  # Copyright 2018 Jason King -# Copyright 2018, Joyent, Inc. +# Copyright 2019 Joyent, Inc.  #  LIBRARY = libdemangle-sys.a  VERS    = .1 -OBJECTS = str.o strview.o util.o cxx_util.o cxx.o demangle.o rust.o +OBJECTS =		\ +	cxx.o		\ +	cxx_util.o	\ +	demangle.o	\ +	rust.o		\ +	rust-legacy.o	\ +	rust-v0puny.o	\ +	rust-v0.o	\ +	str.o		\ +	strview.o	\ +	util.o  include ../../Makefile.lib @@ -29,10 +39,8 @@ CSTD =		$(CSTD_GNU99)  CFLAGS +=	$(CCVERBOSE)  CPPFLAGS +=	-I$(SRCDIR) -D_REENTRANT -D__EXTENSIONS__ -  .KEEP_STATE:  all:		$(LIBS) -  include $(SRC)/lib/Makefile.targ diff --git a/usr/src/lib/libdemangle/common/cxx.c b/usr/src/lib/libdemangle/common/cxx.c index b0e9566e6c..0ec5e51294 100644 --- a/usr/src/lib/libdemangle/common/cxx.c +++ b/usr/src/lib/libdemangle/common/cxx.c @@ -12,24 +12,16 @@  /*   * Copyright 2021 Jason King.   */ -#include <ctype.h>  #include <errno.h> -#include <locale.h>  #include <note.h>  #include <string.h>  #include <setjmp.h>  #include <stdio.h>  #include <stdlib.h> -#include <sys/isa_defs.h> -#include <sys/debug.h>  #include "demangle-sys.h"  #include "demangle_int.h"  #include "cxx.h" -#ifndef	ARRAY_SIZE -#define	ARRAY_SIZE(x) (sizeof (x) / sizeof (x[0])) -#endif -  #define	CPP_QUAL_CONST		(1U)  #define	CPP_QUAL_VOLATILE	(2U)  #define	CPP_QUAL_RESTRICT	(4U) @@ -47,7 +39,6 @@ typedef struct cpp_db_s {  	boolean_t	cpp_tag_templates;  	boolean_t	cpp_fix_forward_references;  	boolean_t	cpp_try_to_parse_template_args; -	locale_t	cpp_loc;  } cpp_db_t;  #define	CK(x)						\ @@ -81,7 +72,7 @@ static void tpush(cpp_db_t *);  static void tpop(cpp_db_t *);  static void tsave(cpp_db_t *, size_t); -static boolean_t db_init(cpp_db_t *, sysdem_ops_t *); +static void db_init(cpp_db_t *, sysdem_ops_t *);  static void db_fini(cpp_db_t *);  static void dump(cpp_db_t *, FILE *); @@ -96,8 +87,8 @@ static const char *parse_block_invoke(const char *, const char *, cpp_db_t *);  static const char *parse_special_name(const char *, const char *, cpp_db_t *);  static const char *parse_name(const char *, const char *, boolean_t *,      cpp_db_t *); -static const char *parse_call_offset(const char *, const char *, locale_t); -static const char *parse_number(const char *, const char *, locale_t); +static const char *parse_call_offset(const char *, const char *); +static const char *parse_number(const char *, const char *);  static const char *parse_nested_name(const char *, const char *, boolean_t *,      cpp_db_t *);  static const char *parse_local_name(const char *, const char *, boolean_t *, @@ -105,7 +96,7 @@ static const char *parse_local_name(const char *, const char *, boolean_t *,  static const char *parse_unscoped_name(const char *, const char *, cpp_db_t *);  static const char *parse_template_args(const char *, const char *, cpp_db_t *);  static const char *parse_substitution(const char *, const char *, cpp_db_t *); -static const char *parse_discriminator(const char *, const char *, locale_t); +static const char *parse_discriminator(const char *, const char *);  static const char *parse_cv_qualifiers(const char *, const char *, unsigned *);  static const char *parse_template_param(const char *, const char *, cpp_db_t *);  static const char *parse_decltype(const char *, const char *, cpp_db_t *); @@ -170,8 +161,8 @@ cpp_demangle(const char *src, size_t srclen, sysdem_ops_t *ops)  	char *volatile result = NULL;  	cpp_db_t db; -	if (!db_init(&db, ops)) -		goto done; +	db_init(&db, ops); +  	if (setjmp(db.cpp_jmp) != 0)  		goto done; @@ -315,12 +306,12 @@ parse_block_invoke(const char *first, const char *last, cpp_db_t *db)  	if (t[0] == '_') {  		/* need at least one digit */ -		if (t + 1 == last || !isdigit_l(t[1], db->cpp_loc)) +		if (t + 1 == last || ISDIGIT(t[1]))  			return (first);  		t += 2;  	} -	while (t < last && isdigit_l(t[0], db->cpp_loc)) +	while (t < last && ISDIGIT(t[0]))  		t++;  done: @@ -498,10 +489,10 @@ parse_special_name(const char *first, const char *last, cpp_db_t *db)  			break;  		case 'c':  			nadd_l(db, "covariant return thunk to", 0); -			t1 = parse_call_offset(first + 2, last, db->cpp_loc); +			t1 = parse_call_offset(first + 2, last);  			if (t1 == t)  				return (first); -			t = parse_call_offset(t1, last, db->cpp_loc); +			t = parse_call_offset(t1, last);  			if (t == t1)  				return (first);  			t1 = parse_encoding(t, last, db); @@ -512,7 +503,7 @@ parse_special_name(const char *first, const char *last, cpp_db_t *db)  			t = parse_type(first + 2, last, db);  			if (t == first + 2)  				return (first); -			t1 = parse_number(t, last, db->cpp_loc); +			t1 = parse_number(t, last);  			if (*t1 != '_')  				return (first);  			t = parse_type(t1 + 1, last, db); @@ -536,7 +527,7 @@ parse_special_name(const char *first, const char *last, cpp_db_t *db)  				nadd_l(db, "non-virtual thunk to", 0);  			} -			t = parse_call_offset(first + 1, last, db->cpp_loc); +			t = parse_call_offset(first + 1, last);  			if (t == first + 1)  				return (first);  			t1 = parse_encoding(t, last, db); @@ -583,7 +574,7 @@ parse_special_name(const char *first, const char *last, cpp_db_t *db)   *               # virtual base override, with vcall offset   */  static const char * -parse_call_offset(const char *first, const char *last, locale_t loc) +parse_call_offset(const char *first, const char *last)  {  	VERIFY3P(first, <=, last); @@ -596,7 +587,7 @@ parse_call_offset(const char *first, const char *last, locale_t loc)  	if (first[0] != 'h' && first[0] != 'v')  		return (first); -	t = parse_number(first + 1, last, loc); +	t = parse_number(first + 1, last);  	if (t == first + 1 || t == last || t[0] != '_')  		return (first); @@ -606,7 +597,7 @@ parse_call_offset(const char *first, const char *last, locale_t loc)  	if (first[0] == 'h')  		return (t); -	t1 = parse_number(t, last, loc); +	t1 = parse_number(t, last);  	if (t == t1 || t1 == last || t1[0] != '_')  		return (first); @@ -712,11 +703,11 @@ parse_local_name(const char *first, const char *last,  	if (t[0] == 's') {  		nfmt(db, "{0:L}::string literal", "{0:R}"); -		return (parse_discriminator(t, last, db->cpp_loc)); +		return (parse_discriminator(t, last));  	}  	if (t[0] == 'd') { -		t1 = parse_number(t + 1, last, db->cpp_loc); +		t1 = parse_number(t + 1, last);  		if (t1[0] != '_')  			return (first);  		t1++; @@ -732,7 +723,7 @@ parse_local_name(const char *first, const char *last,  	/* parsed, but ignored */  	if (t[0] != 'd') -		t2 = parse_discriminator(t2, last, db->cpp_loc); +		t2 = parse_discriminator(t2, last);  	return (t2);  } @@ -1992,7 +1983,7 @@ parse_function_param(const char *first, const char *last, cpp_db_t *db)  	unsigned cv = 0;  	if (first[1] == 'L') { -		t2 = parse_number(t1, last, db->cpp_loc); +		t2 = parse_number(t1, last);  		if (t2 == last || t2[0] != 'p')  			return (first);  		t1 = t2; @@ -2002,7 +1993,7 @@ parse_function_param(const char *first, const char *last, cpp_db_t *db)  		return (first);  	t1 = parse_cv_qualifiers(t1, last, &cv); -	t2 = parse_number(t1, last, db->cpp_loc); +	t2 = parse_number(t1, last);  	if (t2 == last || t2[0] != '_')  		return (first); @@ -2439,8 +2430,7 @@ parse_unnamed_type_name(const char *first, const char *last, cpp_db_t *db)  	const char *t2 = NULL;  	if (first[1] == 't') { -		while (t1 != last && t1[0] != '_' && -		    isdigit_l(t1[0], db->cpp_loc)) +		while (t1 != last && t1[0] != '_' && ISDIGIT(t1[0]))  			t1++;  		if (t1[0] != '_') @@ -2483,7 +2473,8 @@ parse_unnamed_type_name(const char *first, const char *last, cpp_db_t *db)  	t2 = t1;  	while (t2 != last && t2[0] != '_') { -		if (!isdigit_l(*t2++, db->cpp_loc)) +		char c = *t2++; +		if (!ISDIGIT(c))  			return (first);  	} @@ -2653,7 +2644,7 @@ parse_integer_literal(const char *first, const char *last, const char *fmt,  {  	VERIFY3P(first, <=, last); -	const char *t = parse_number(first, last, db->cpp_loc); +	const char *t = parse_number(first, last);  	const char *start = first;  	if (t == first || t == last || t[0] != 'E') @@ -2736,11 +2727,9 @@ parse_floating_literal(const char *first, const char *last, cpp_db_t *db)  		if (!is_xdigit(t[0]))  			return (first); -		unsigned d1 = isdigit_l(t[0], db->cpp_loc) ? -		    t[0] - '0' : t[0] - 'a' + 10; +		unsigned d1 = ISDIGIT(t[0]) ?  t[0] - '0' : t[0] - 'a' + 10;  		t++; -		unsigned d0 = isdigit_l(t[0], db->cpp_loc) ? -		    t[0] - '0' : t[0] - 'a' + 10; +		unsigned d0 = ISDIGIT(t[0]) ?  t[0] - '0' : t[0] - 'a' + 10;  		*e = (d1 << 4) + d0;  	} @@ -2749,11 +2738,9 @@ parse_floating_literal(const char *first, const char *last, cpp_db_t *db)  		if (!is_xdigit(t[0]))  			return (first); -		unsigned d0 = isdigit_l(t[0], db->cpp_loc) ? -		    t[0] - '0' : t[0] - 'a' + 10; +		unsigned d0 = ISDIGIT(t[0]) ?  t[0] - '0' : t[0] - 'a' + 10;  		t--; -		unsigned d1 = isdigit_l(t[0], db->cpp_loc) ? -		    t[0] - '0' : t[0] - 'a' + 10; +		unsigned d1 = ISDIGIT(t[0]) ?  t[0] - '0' : t[0] - 'a' + 10;  		*e = (d1 << 4) + d0;  	} @@ -2898,7 +2885,7 @@ parse_expr_primary(const char *first, const char *last, cpp_db_t *db)  			return (t + 1);  		const char *n; -		for (n = t; n != last && isdigit_l(n[0], db->cpp_loc); n++) +		for (n = t; n != last && ISDIGIT(n[0]); n++)  			;  		if (n == last || nempty(db) || n[0] != 'E')  			return (first); @@ -3046,7 +3033,7 @@ parse_operator_name(const char *first, const char *last, cpp_db_t *db)  	}  	if (first[0] == 'v') { -		if (!isdigit_l(first[1], db->cpp_loc)) +		if (!ISDIGIT(first[1]))  			return (first);  		t = parse_source_name(first + 2, last, db); @@ -3155,19 +3142,19 @@ parse_builtin_type(const char *first, const char *last, cpp_db_t *db)  }  static const char * -parse_base36(const char *first, const char *last, size_t *val, locale_t loc) +parse_base36(const char *first, const char *last, size_t *val)  {  	VERIFY3P(first, <=, last);  	const char *t;  	for (t = first, *val = 0; t != last; t++) { -		if (!isdigit_l(t[0], loc) && !isupper_l(t[0], loc)) +		if (!ISDIGIT(t[0]) && !ISUPPER(t[0]))  			return (t);  		*val *= 36; -		if (isdigit_l(t[0], loc)) +		if (ISDIGIT(t[0]))  			*val += t[0] - '0';  		else  			*val += t[0] - 'A' + 10; @@ -3206,7 +3193,7 @@ parse_substitution(const char *first, const char *last, cpp_db_t *db)  	size_t n = 0;  	if (t[0] != '_') { -		t = parse_base36(first + 1, last, &n, db->cpp_loc); +		t = parse_base36(first + 1, last, &n);  		if (t == first + 1 || t[0] != '_')  			return (first); @@ -3240,7 +3227,7 @@ parse_source_name(const char *first, const char *last, cpp_db_t *db)  	const char *t = NULL;  	size_t n = 0; -	for (t = first; t != last && isdigit_l(t[0], db->cpp_loc); t++) { +	for (t = first; t != last && ISDIGIT(t[0]); t++) {  		/* make sure we don't overflow */  		size_t nn = n * 10;  		if (nn < n) @@ -3287,8 +3274,8 @@ parse_vector_type(const char *first, const char *last, cpp_db_t *db)  	const char *t = first + 2;  	const char *t1 = NULL; -	if (isdigit_l(first[2], db->cpp_loc) && first[2] != '0') { -		t1 = parse_number(t, last, db->cpp_loc); +	if (ISDIGIT(first[2]) && first[2] != '0') { +		t1 = parse_number(t, last);  		if (t1 == last || t1 + 1 == last || t1[0] != '_')  			return (first); @@ -3376,8 +3363,8 @@ parse_array_type(const char *first, const char *last, cpp_db_t *db)  	size_t n = nlen(db);  	if (t[0] != '_') { -		if (isdigit_l(t[0], db->cpp_loc) && t[0] != '0') { -			t1 = parse_number(t, last, db->cpp_loc); +		if (ISDIGIT(t[0]) && t[0] != '0') { +			t1 = parse_number(t, last);  			if (t1 == last)  				return (first); @@ -3765,7 +3752,7 @@ parse_template_param(const char *first, const char *last, cpp_db_t *db)  	size_t idx = 0;  	while (t != last && t[0] != '_') { -		if (!isdigit_l(t[0], db->cpp_loc)) +		if (!ISDIGIT(t[0]))  			return (first);  		idx *= 10; @@ -3870,7 +3857,7 @@ parse_template_args(const char *first, const char *last, cpp_db_t *db)   *  extension      := decimal-digit+               # at the end of string   */  static const char * -parse_discriminator(const char *first, const char *last, locale_t loc) +parse_discriminator(const char *first, const char *last)  {  	VERIFY3P(first, <=, last); @@ -3879,8 +3866,8 @@ parse_discriminator(const char *first, const char *last, locale_t loc)  	if (first == last)  		return (first); -	if (isdigit_l(first[0], loc)) { -		for (t = first; t != last && isdigit_l(t[0], loc); t++) +	if (ISDIGIT(first[0])) { +		for (t = first; t != last && ISDIGIT(t[0]); t++)  			;  		/* not at the end of the string */ @@ -3893,13 +3880,13 @@ parse_discriminator(const char *first, const char *last, locale_t loc)  	}  	t = first + 1; -	if (isdigit_l(t[0], loc)) +	if (ISDIGIT(t[0]))  		return (t + 1);  	if (t[0] != '_' || t + 1 == last)  		return (first); -	for (t++; t != last && isdigit_l(t[0], loc); t++) +	for (t++; t != last && ISDIGIT(t[0]); t++)  		;  	if (t == last || t[0] != '_')  		return (first); @@ -3937,13 +3924,13 @@ parse_cv_qualifiers(const char *first, const char *last, unsigned *cv)   * <number> ::= [n] <non-negative decimal integer>   */  static const char * -parse_number(const char *first, const char *last, locale_t loc) +parse_number(const char *first, const char *last)  {  	VERIFY3P(first, <=, last);  	const char *t = first; -	if (first == last || (first[0] != 'n' && !isdigit_l(first[0], loc))) +	if (first == last || (first[0] != 'n' && !ISDIGIT(first[0])))  		return (first);  	if (t[0] == 'n') @@ -3952,7 +3939,7 @@ parse_number(const char *first, const char *last, locale_t loc)  	if (t[0] == '0')  		return (t + 1); -	while (isdigit_l(t[0], loc)) +	while (ISDIGIT(t[0]))  		t++;  	return (t); @@ -4051,7 +4038,7 @@ tsave(cpp_db_t *db, size_t amt)  	CK(templ_save(&db->cpp_name, amt, &db->cpp_templ));  } -static boolean_t +static void  db_init(cpp_db_t *db, sysdem_ops_t *ops)  {  	(void) memset(db, 0, sizeof (*db)); @@ -4062,8 +4049,6 @@ db_init(cpp_db_t *db, sysdem_ops_t *ops)  	db->cpp_tag_templates = B_TRUE;  	db->cpp_try_to_parse_template_args = B_TRUE;  	tpush(db); -	db->cpp_loc = newlocale(LC_CTYPE_MASK, "C", 0); -	return ((db->cpp_loc != NULL) ? B_TRUE : B_FALSE);  }  static void @@ -4072,7 +4057,6 @@ db_fini(cpp_db_t *db)  	name_fini(&db->cpp_name);  	sub_fini(&db->cpp_subs);  	templ_fini(&db->cpp_templ); -	freelocale(db->cpp_loc);  	(void) memset(db, 0, sizeof (*db));  } diff --git a/usr/src/lib/libdemangle/common/cxx_util.c b/usr/src/lib/libdemangle/common/cxx_util.c index 91abb504d3..f4ca32fae5 100644 --- a/usr/src/lib/libdemangle/common/cxx_util.c +++ b/usr/src/lib/libdemangle/common/cxx_util.c @@ -13,8 +13,6 @@   * Copyright 2017 Jason King   */ -#include <sys/debug.h> -#include <sys/sysmacros.h>  #include <string.h>  #include <errno.h>  #include <stdlib.h> diff --git a/usr/src/lib/libdemangle/common/demangle-sys.h b/usr/src/lib/libdemangle/common/demangle-sys.h index 3452d39667..21b2624cf3 100644 --- a/usr/src/lib/libdemangle/common/demangle-sys.h +++ b/usr/src/lib/libdemangle/common/demangle-sys.h @@ -26,7 +26,7 @@ extern "C" {  typedef enum sysdem_lang_e {  	SYSDEM_LANG_AUTO,  	SYSDEM_LANG_CPP, -	SYSDEM_LANG_RUST +	SYSDEM_LANG_RUST,  } sysdem_lang_t;  typedef struct sysdem_alloc_s { diff --git a/usr/src/lib/libdemangle/common/demangle.c b/usr/src/lib/libdemangle/common/demangle.c index bf7c9ab8c7..f8f322757a 100644 --- a/usr/src/lib/libdemangle/common/demangle.c +++ b/usr/src/lib/libdemangle/common/demangle.c @@ -11,7 +11,7 @@  /*   * Copyright 2021 Jason King - * Copyright 2019, Joyent, Inc. + * Copyright 2019 Joyent, Inc.   */  #include <stdlib.h> @@ -86,6 +86,8 @@ is_mangled(const char *str, size_t n)  	(void) sv_consume_if_c(&sv, '_');  	if (sv_consume_if_c(&sv, 'Z'))  		return (B_TRUE); +	if (sv_consume_if_c(&sv, 'R')) +		return (B_TRUE);  	return (B_FALSE);  } @@ -101,6 +103,7 @@ char *  sysdemangle(const char *str, sysdem_lang_t lang, sysdem_ops_t *ops)  {  	char *res = NULL; +  	/*  	 * While the language specific demangler code can handle non-NUL  	 * terminated strings, we currently don't expose this to consumers. diff --git a/usr/src/lib/libdemangle/common/demangle_int.h b/usr/src/lib/libdemangle/common/demangle_int.h index 66a34cf41d..d4c227a87f 100644 --- a/usr/src/lib/libdemangle/common/demangle_int.h +++ b/usr/src/lib/libdemangle/common/demangle_int.h @@ -11,24 +11,97 @@  /*   * Copyright 2017 Jason King - * Copyright 2019, Joyent, Inc. + * Copyright 2019 Joyent, Inc.   */  #ifndef _DEMANGLE_INT_H  #define	_DEMANGLE_INT_H +#include <inttypes.h>  #include <stdio.h> +#include <sys/byteorder.h> +#include <sys/ctype.h> /* Use ASCII ISXXXX() macros */ +#include <sys/debug.h> +#include <sys/sysmacros.h> +#include <sys/isa_defs.h>  #include "demangle-sys.h"  #ifdef __cplusplus  extern "C" {  #endif +#ifdef __CHECKER__ +/* + * smatch seems to have a bug which chokes on the builtins, so + * we just have it fallback to the non-builtin definitions + */ +#elif __GNUC__ >= 5 && __GNUC_MINOR__ > 1 +#define	USE_BUILTIN_OVERFLOW +#elif defined(__clang__) +#define	USE_BUILTIN_OVERFLOW +#endif + +#ifdef USE_BUILTIN_OVERFLOW +static inline boolean_t +mul_overflow(uint64_t a, uint64_t b, uint64_t *v) +{ +	return (__builtin_mul_overflow(a, b, v)); +} + +static inline boolean_t +add_overflow(uint64_t a, uint64_t b, uint64_t *v) +{ +	return (__builtin_add_overflow(a, b, v)); +} + +static inline boolean_t +sub_overflow(uint64_t a, uint64_t b, uint64_t *v) +{ +	return (__builtin_sub_overflow(a, b, v)); +} +#else +static inline boolean_t +mul_overflow(uint64_t a, uint64_t b, uint64_t *v) +{ +	uint64_t val = a * b; + +	if (a != 0 && val / a != b) +		return (B_TRUE); +	*v = val; +	return (B_FALSE); +} + +static inline boolean_t +add_overflow(uint64_t a, uint64_t b, uint64_t *v) +{ +	uint64_t val = a + b; + +	if (val < a || val < b) +		return (B_TRUE); +	*v = val; +	return (B_FALSE); +} + +static inline boolean_t +sub_overflow(uint64_t a, uint64_t b, uint64_t *v) +{ +	uint64_t val = a - b; + +	if (val > a) +		return (B_TRUE); +	*v = val; +	return (B_FALSE); +} +#endif +  extern sysdem_ops_t *sysdem_ops_default;  char *cpp_demangle(const char *, size_t, sysdem_ops_t *);  char *rust_demangle(const char *, size_t, sysdem_ops_t *); +struct custr_alloc; +  void *zalloc(sysdem_ops_t *, size_t); +void *xcalloc(sysdem_ops_t *, size_t, size_t);  void *xrealloc(sysdem_ops_t *, void *, size_t, size_t);  void xfree(sysdem_ops_t *, void *, size_t);  char *xstrdup(sysdem_ops_t *, const char *); diff --git a/usr/src/lib/libdemangle/common/rust-legacy.c b/usr/src/lib/libdemangle/common/rust-legacy.c new file mode 100644 index 0000000000..5b1518f619 --- /dev/null +++ b/usr/src/lib/libdemangle/common/rust-legacy.c @@ -0,0 +1,386 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source.  A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Joyent, Inc. + * Copyright 2021 Jason King + */ + +#include <errno.h> +#include <libcustr.h> +#include <limits.h> +#include <string.h> +#include <stdio.h> + +#include "rust.h" + +/* + * Unfortunately, there is currently no official specification for the legacy + * rust name mangling.  This is an attempt to document the understanding of the + * mangling used here.  It is based off examination of + *     https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/ + * + * A mangled rust name is: + *     <prefix> <name> + * + * <prefix>	::=	_Z + *			__Z + * + * <name>	::= N <name-segment>+ [<hash>] E + * + * <name-segment> ::= <len> <name-chars>{len} + * + * <len>	::= [1-9][0-9]+ + * + * <name-chars>	::=	<[A-Za-z]> <[A-Za-z0-9]>* + *			<separator> + *			<special> + * + * <separator>	::=	'..'	# '::' + * + * <special>	::=	$SP$	# '@' + *			$BP$	# '*' + *			$RF$	# '&' + *			$LT$	# '<' + *			$GT$	# '>' + *			$LP$	# '(' + *			$RP$	# ')' + *			$C$	# ',' + * + * <hash>	:= <len> h <hex-digits>+ + * + * <hex-digits>	:= <[0-9a-f]> + */ + +static const struct rust_charmap { +	const char	*ruc_seq; +	char		ruc_ch; +} rust_charmap[] = { +	{ "$SP$", '@' }, +	{ "$BP$", '*' }, +	{ "$RF$", '&' }, +	{ "$LT$", '<' }, +	{ "$GT$", '>' }, +	{ "$LP$", '(' }, +	{ "$RP$", ')' }, +	{ "$C$", ',' }, +}; +static const size_t rust_charmap_sz = ARRAY_SIZE(rust_charmap); + +static boolean_t rustleg_valid_sym(const strview_t *); +static boolean_t rustleg_parse_name(rust_state_t *, strview_t *); +static boolean_t rustleg_parse_hash(rust_state_t *, strview_t *); +static boolean_t rustleg_parse_special(rust_state_t *, strview_t *); +static boolean_t rustleg_add_sep(rust_state_t *); + +boolean_t +rust_demangle_legacy(rust_state_t *restrict st, strview_t *restrict sv) +{ + +	/* Make sure the whole thing contains valid characters */ +	if (!rustleg_valid_sym(sv)) { +		st->rs_error = EINVAL; +		return (B_FALSE); +	} + +	if (sv_peek(sv, -1) != 'E') { +		DEMDEBUG("ERROR: string does not end with 'E'"); +		st->rs_error = EINVAL; +		return (B_FALSE); +	} + +	if (!rustleg_parse_name(st, sv)) +		return (B_FALSE); + +	if (sv_remaining(sv) != 0) { +		DEMDEBUG("ERROR: trailing characters in name"); +		st->rs_error = EINVAL; +		return (B_FALSE); +	} + +	return (B_TRUE); +} + +static boolean_t +rustleg_parse_name_segment(rust_state_t *st, strview_t *svp, boolean_t first) +{ +	strview_t orig; +	strview_t name; +	uint64_t len; +	size_t rem; +	boolean_t last = B_FALSE; + +	if (HAS_ERROR(st) || sv_remaining(svp) == 0) +		return (B_FALSE); + +	sv_init_sv(&orig, svp); + +	if (!rust_parse_base10(st, svp, &len)) { +		DEMDEBUG("ERROR: no leading length"); +		st->rs_error = EINVAL; +		return (B_FALSE); +	} + +	rem = sv_remaining(svp); + +	if (rem < len) { +		DEMDEBUG("ERROR: segment length (%" PRIu64 ") > remaining " +		    "bytes in string (%zu)", len, rem); +		st->rs_error = EINVAL; +		return (B_FALSE); +	} + +	/* Is this the last segment before the terminating E? */ +	if (rem == len + 1) { +		VERIFY3U(sv_peek(svp, -1), ==, 'E'); +		last = B_TRUE; +	} + +	if (!first && !rustleg_add_sep(st)) +		return (B_FALSE); + +	/* Reduce length of seg to the length we parsed */ +	(void) sv_init_sv_range(&name, svp, len); + +	DEMDEBUG("%s: segment='%.*s'", __func__, SV_PRINT(&name)); + +	/* +	 * A rust hash starts with 'h', and is the last component of a name +	 * before the terminating 'E'. It is however not always present +	 * in every mangled symbol, and a last segment that starts with 'h' +	 * could be confused for it, so failing to part it just means +	 * we don't have a trailing hash. +	 */ +	if (sv_peek(&name, 0) == 'h' && last) { +		if (rustleg_parse_hash(st, &name)) +			goto done; + +		/* +		 * However any error other than 'not a hash' (e.g. ENOMEM) +		 * means we should fail. +		 */ +		if (st->rs_error != 0) +			goto done; +	} + +	/* A '_' followed by $ is ignored at the start of a name segment */ +	if (sv_peek(&name, 0) == '_' && sv_peek(&name, 1) == '$') +		(void) sv_consume_n(&name, 1); + +	while (sv_remaining(&name) > 0) { +		switch (sv_peek(&name, 0)) { +		case '$': +			if (rustleg_parse_special(st, &name)) +				continue; +			break; +		case '.': +			/* Convert '..' to '::' */ +			if (sv_peek(&name, 1) != '.') +				break; + +			if (!rustleg_add_sep(st)) +				return (B_FALSE); + +			sv_consume_n(&name, 2); +			continue; +		default: +			break; +		} + +		if (!rust_appendc(st, sv_consume_c(&name))) { +			SET_ERROR(st); +			return (B_FALSE); +		} +	} + +done: +	sv_consume_n(svp, len); + +	VERIFY3P(orig.sv_first, <=, svp->sv_first); +	DEMDEBUG("%s: consumed '%.*s'", __func__, +	    (int)(uintptr_t)(svp->sv_first - orig.sv_first), orig.sv_first); +	return (B_TRUE); +} + +/* + * Parse N (<num><name>{num})+ [<num>h<hex digits]E + */ +static boolean_t +rustleg_parse_name(rust_state_t *st, strview_t *svp) +{ +	strview_t name; +	boolean_t first = B_TRUE; + +	sv_init_sv(&name, svp); + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	DEMDEBUG("%s: name = '%.*s'", __func__, SV_PRINT(&name)); + +	if (sv_remaining(svp) == 0) { +		DEMDEBUG("%s: empty name", __func__); +		return (B_FALSE); +	} + +	if (!sv_consume_if_c(svp, 'N')) { +		DEMDEBUG("%s: does not start with 'N'", __func__); +		return (B_FALSE); +	} + +	while (sv_remaining(svp) > 0 && sv_peek(svp, 0) != 'E') { +		if (!rustleg_parse_name_segment(st, svp, first)) +			return (B_FALSE); +		first = B_FALSE; +	} + +	if (!sv_consume_if_c(svp, 'E')) { +		DEMDEBUG("%s: ERROR no terminating 'E'", __func__); +		return (B_FALSE); +	} + +	VERIFY3P(name.sv_first, <=, svp->sv_first); +	DEMDEBUG("%s: consumed '%.*s'", __func__, +	    (int)(uintptr_t)(svp->sv_first - name.sv_first), name.sv_first); + +	return (B_TRUE); +} + +static boolean_t +rustleg_parse_hash(rust_state_t *st, strview_t *svp) +{ +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	VERIFY(sv_consume_if_c(svp, 'h')); +	if (!rust_appendc(st, 'h')) +		return (B_FALSE); + +	while (sv_remaining(svp) > 0) { +		char c = sv_consume_c(svp); + +		switch (c) { +		/* +		 * The upper-case hex digits (A-F) are excluded as valid +		 * hash values for several reasons: +		 * +		 * 1. It would result in two different possible names for +		 * the same function, leading to ambiguity in linking (among +		 * other things). +		 * +		 * 2. It would cause potential ambiguity in parsing -- is a +		 * trailing 'E' part of the hash, or the terminating character +		 * in the mangled name? +		 * +		 * 3. No examples were able to be found in the wild where +		 * uppercase digits are used, and other rust demanglers all +		 * seem to assume the hash must contain lower-case hex digits. +		 */ +		case '0': case '1': case '2': case '3': +		case '4': case '5': case '6': case '7': +		case '8': case '9': case 'a': case 'b': +		case 'c': case 'd': case 'e': case 'f': +			if (!rust_appendc(st, c)) +				return (B_FALSE); +			break; +		default: +			return (B_FALSE); +		} +	} + +	return (B_TRUE); +} + +static boolean_t +rustleg_parse_special(rust_state_t *restrict st, strview_t *restrict svp) +{ +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	if (sv_peek(svp, 0) != '$') +		return (B_FALSE); + +	for (size_t i = 0; i < rust_charmap_sz; i++) { +		if (sv_consume_if(svp, rust_charmap[i].ruc_seq)) { +			if (!rust_appendc(st, rust_charmap[i].ruc_ch)) +				return (B_FALSE); +			return (B_TRUE); +		} +	} + +	/* Handle $uXXXX$ */ + +	strview_t sv; +	uint32_t val = 0; +	uint_t ndigits = 0; + +	sv_init_sv(&sv, svp); + +	/* We peeked at this earlier, so it should still be there */ +	VERIFY(sv_consume_if_c(&sv, '$')); + +	if (!sv_consume_if_c(&sv, 'u')) +		return (B_FALSE); + +	while (sv_remaining(&sv) > 0) { +		uint32_t cval = 0; +		char c; + +		if (ndigits == 4) +			return (B_FALSE); + +		c = sv_consume_c(&sv); +		if (c >= '0' && c <= '9') +			cval = c - '0'; +		else if (c >= 'a' && c <= 'f') +			cval = c - 'a' + 10; +		else if (c == '$') +			break; +		else +			return (B_FALSE); + +		val <<= 4; +		val |= cval; +		ndigits++; +	} + +	if (!rust_append_utf8_c(st, val)) +		return (B_FALSE); + +	sv_consume_n(svp, ndigits + 3); +	return (B_TRUE); +} + +static boolean_t +rustleg_add_sep(rust_state_t *st) +{ +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	return (rust_append(st, "::")); +} + +static boolean_t +rustleg_valid_sym(const strview_t *sv) +{ +	size_t i; + +	for (i = 0; i < sv->sv_rem; i++) { +		char c = sv->sv_first[i]; + +		if ((c & 0x80) == 0) +			continue; +		DEMDEBUG("%s: ERROR found 8-bit character '%c' in '%.*s' " +		    "at index %zu", __func__, c, SV_PRINT(sv), i); +		return (B_FALSE); +	} +	return (B_TRUE); +} diff --git a/usr/src/lib/libdemangle/common/rust-v0.c b/usr/src/lib/libdemangle/common/rust-v0.c new file mode 100644 index 0000000000..598d8457c9 --- /dev/null +++ b/usr/src/lib/libdemangle/common/rust-v0.c @@ -0,0 +1,1449 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source.  A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Joyent, Inc. + * Copyright 2021 Jason King + */ + +/* BEGIN CSTYLED */ + +/* + * This implements the 'symbol_name_mangling_v2' demangling for rust as + * described in Rust RFC 2603 as opposed to the original (now called + * legacy) mangling older versions of rust used (implemented in rust.c). + * + * The specification can be viewed at: + *     https://github.com/rust-lang/rfcs/blob/master/text/2603-rust-symbol-name-mangling-v0.md + */ + +/* END CSTYLED */ + +#include <errno.h> +#include <libcustr.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "rust.h" + +/* + * Help track amount of additional output added to rs_demangled across + * a function call (to allow that portion to be output for debugging) + */ +#define	SAVE_LEN(_st, _len) _len = custr_len((_st)->rs_demangled) +#define	CSTR_END(_st, _len)					\ +	((int)(custr_len((_st)->rs_demangled) - (_len))),	\ +	custr_cstr((_st)->rs_demangled) + (_len) + +typedef enum const_type_class { +	CTC_INVALID = -1, +	CTC_UNSIGNED, +	CTC_SIGNED, +	CTC_CHAR, +	CTC_BOOL, +} const_type_class_t; + +/* + * Sometimes, parsing something is optional.  In this case a failure to + * parse is fine, however we still want to consider a fatal error as + * failure. + */ +#define	OPTIONAL(_st, _f) ((_f) || !HAS_ERROR(_st)) + +static boolean_t rustv0_valid_sym(const strview_t *); +static const_type_class_t rustv0_classify_const_type(char); +static boolean_t rustv0_parse_hex_num(rust_state_t *restrict, +    strview_t *restrict, uint64_t *restrict); +static boolean_t rustv0_parse_base62(rust_state_t *restrict, +    strview_t *restrict, uint64_t *restrict); + +static boolean_t rustv0_parse_undisambiguated_identifier( +    rust_state_t *restrict, strview_t *restrict, boolean_t); +static boolean_t rustv0_parse_disambiguator(rust_state_t *restrict, +    strview_t *restrict, uint64_t *restrict); + +static boolean_t rustv0_parse_path(rust_state_t *restrict, strview_t *restrict, +    boolean_t); +static boolean_t rustv0_parse_impl_path(rust_state_t *restrict, +    strview_t *restrict, boolean_t); +static boolean_t rustv0_parse_nested_path(rust_state_t *restrict, +    strview_t *restrict, boolean_t); +static boolean_t rustv0_parse_basic_type(rust_state_t *restrict, +    strview_t *restrict); +static boolean_t rustv0_parse_backref(rust_state_t *restrict, +    strview_t *restrict, +    boolean_t (*)(rust_state_t *restrict, strview_t *restrict, boolean_t), +    boolean_t); +static boolean_t rustv0_parse_lifetime(rust_state_t *restrict, +    strview_t *restrict); +static boolean_t rustv0_parse_const(rust_state_t *restrict, +    strview_t *restrict, boolean_t); +static boolean_t rustv0_parse_fnsig(rust_state_t *restrict, +    strview_t *restrict); +static boolean_t rustv0_parse_dynbounds(rust_state_t *restrict, +    strview_t *restrict); +static boolean_t rustv0_parse_generic_arg(rust_state_t *restrict, +    strview_t *restrict, boolean_t); + +boolean_t +rust_demangle_v0(rust_state_t *restrict st, strview_t *restrict sv) +{ +	boolean_t save_skip; +	boolean_t ret; + +	/* Make sure all the characters are valid */ +	if (!rustv0_valid_sym(sv)) { +		st->rs_error = EINVAL; +		return (B_FALSE); +	} + +	/* +	 * <symbol-name> = "_R" [<decimal-number>] <path> +	 *	[<instantiating-crate>] +	 * +	 * We've already parsed the prefix in rust_demangle(), as well +	 * as made sure there's no [<decimal-number>] present, so +	 * start with <path>. +	 */ +	if (!rustv0_parse_path(st, sv, B_TRUE)) +		return (B_FALSE); + +	/* [<instantiating crate>] -- parse but don't save */ +	SKIP_BEGIN(st, save_skip); +	ret = OPTIONAL(st, rustv0_parse_path(st, sv, B_FALSE)); +	SKIP_END(st, save_skip); +	if (!ret) +		return (B_FALSE); + +	/* If nothing's left, we know we're done */ +	if (sv_remaining(sv) == 0) +		return (!HAS_ERROR(st)); + +	/* +	 * LLVM sometimes will suffix symbols starting with a '.' +	 * followed by extra data. For things that start with +	 * ".llvm.", we discard the rest of the string.  For +	 * other things that start with '.', we copy the +	 * results to the final string. This matches +	 * what the rust native demangler crate does, and +	 * we don't see a reason to deviate from their +	 * behavior. +	 */ +	if (sv_consume_if(sv, ".llvm.")) +		return (!HAS_ERROR(st)); + +	if (sv_peek(sv, 0) != '.') { +		DEMDEBUG("%s: Unexpected trailing data at the end of the " +		    "name: '%.*s'", __func__, SV_PRINT(sv)); +		st->rs_error = EINVAL; +		return (B_FALSE); +	} + +	return (rust_append_sv(st, sv_remaining(sv), sv)); +} + +/* + * Parse an optional list terminated by 'E'. Each result of 'fn' is + * separated by 'sep' in the output. + */ +static boolean_t +rustv0_parse_opt_list(rust_state_t *restrict st, strview_t *restrict sv, +    boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t), +    const char *restrict sep, boolean_t bval, size_t *restrict countp) +{ +	size_t count = 0; + +	DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv)); + +	while (sv_remaining(sv) > 0) { +		if (sv_consume_if_c(sv, 'E')) { +			if (countp != NULL) +				*countp += count; +			return (B_TRUE); +		} + +		if (count > 0 && !rust_append(st, sep)) +			return (B_FALSE); + +		if (!fn(st, sv, bval)) +			return (B_FALSE); + +		count++; +	} + +	/* +	 * An optional list should terminate with an 'E'.  If we get here, +	 * we ran out of charaters and didn't terminate as we should. +	 */ +	return (B_FALSE); +} + +static boolean_t +rustv0_parse_uint_type(rust_state_t *restrict st, strview_t *sv) +{ +	const char *str = NULL; +	strview_t save; +	char c; + +	if (HAS_ERROR(st) || sv_remaining(sv) == 0) +		return (B_FALSE); + +	sv_init_sv(&save, sv); + +	switch (c = sv_consume_c(sv)) { +	case 'h': +		str = "u8"; +		break; +	case 't': +		str = "u16"; +		break; +	case 'm': +		str = "u32"; +		break; +	case 'y': +		str = "u64"; +		break; +	case 'o': +		str = "u128"; +		break; +	case 'j':	/* usize */ +		str = "usize"; +		break; +	default: +		sv_init_sv(sv, &save); +		return (B_FALSE); +	} + +	DEMDEBUG("%s: %c -> %s", __func__, c, str); +	return (rust_append(st, str)); +} + +static boolean_t +rustv0_parse_basic_type(rust_state_t *restrict st, strview_t *restrict sv) +{ +	const char *str = NULL; +	strview_t save; +	char c; + +	if (HAS_ERROR(st) || sv_remaining(sv) == 0) +		return (B_FALSE); + +	if (rustv0_parse_uint_type(st, sv)) +		return (B_TRUE); + +	sv_init_sv(&save, sv); + +	switch (c = sv_consume_c(sv)) { +	case 'a': +		str = "i8"; +		break; +	case 'b': +		str = "bool"; +		break; +	case 'c': +		str = "char"; +		break; +	case 'd': +		str = "f64"; +		break; +	case 'e': +		str = "str"; +		break; +	case 'f': +		str = "f32"; +		break; +	case 'i': +		str = "isize"; +		break; +	case 'l': +		str = "i32"; +		break; +	case 'n': +		str = "i128"; +		break; +	case 'p': +		str = "_"; +		break; +	case 's': +		str = "i16"; +		break; +	case 'u': +		str = "()"; +		break; +	case 'v': +		str = "..."; +		break; +	case 'x': +		str = "i64"; +		break; +	case 'z': +		str = "!"; +		break; +	default: +		sv_init_sv(sv, &save); +		return (B_FALSE); +	} + +	DEMDEBUG("%s: %c -> %s", __func__, c, str); +	return (rust_append(st, str)); +} + +static boolean_t +rustv0_parse_type(rust_state_t *restrict st, strview_t *restrict sv, +    boolean_t dummy __unused) +{ +	strview_t save; +	size_t len, tuple_elem_count; +	boolean_t ret; +	char c; + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + +	if (sv_remaining(sv) == 0) +		return (B_FALSE); + +	SAVE_LEN(st, len); +	sv_init_sv(&save, sv); + +	switch (c = sv_consume_c(sv)) { +	case 'A': +		ret = rust_appendc(st, '[') && +		    rustv0_parse_type(st, sv, B_FALSE) && +		    rust_append(st, "; ") && +		    rustv0_parse_const(st, sv, B_FALSE) && +		    rust_appendc(st, ']'); +		break; +	case 'S': +		ret = rust_appendc(st, '[') && +		    rustv0_parse_type(st, sv, B_FALSE) && +		    rust_appendc(st, ']'); +		break; +	case 'T': +		tuple_elem_count = 0; +		ret = rust_appendc(st, '(') && +		    rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ", +		    B_FALSE, &tuple_elem_count) && +		    rust_append(st, (tuple_elem_count == 1) ? ",)" : ")"); +		break; +	case 'R': +	case 'Q': +		/* `&mut T` or `&'... mut T` */ +		if (!(ret = rust_appendc(st, '&'))) +			break; + +		/* +		 * lifetime is optional, but we need to add a trailing +		 * space if present (so we cannot use the OPTIONAL macro). +		 */ +		if (rustv0_parse_lifetime(st, sv)) { +			if (!(ret = rust_appendc(st, ' '))) +				break; +		} else if (HAS_ERROR(st)) { +			break; +		} + +		ret = rust_append(st, (c == 'Q') ? "mut " : "") && +		    rustv0_parse_type(st, sv, B_FALSE); +		break; +	case 'P': +		ret = rust_append(st, "*const ") && +		    rustv0_parse_type(st, sv, B_FALSE); +		break; +	case 'O': +		ret = rust_append(st, "*mut ") && +		    rustv0_parse_type(st, sv, B_FALSE); +		break; +	case 'F': +		ret = rustv0_parse_fnsig(st, sv); +		break; +	case 'D': +		ret = rust_append(st, "dyn ") && +		    rustv0_parse_dynbounds(st, sv); +		if (!ret) +			break; + +		/* +		 * Rust RFC2603 shows the lifetime as required, however +		 * it appears this is optional. +		 */ +		DEMDEBUG("%s: pre-lifetime: '%*s'", __func__, SV_PRINT(sv)); + +		/* +		 * We only want to print a non-zero (non "'_") +		 * lifetime. +		 */ +		if (sv_consume_if(sv, "L_")) +			break; + +		/* +		 * But if there is a lifetime we want to print, +		 * we want to prepend " + " before it. +		 */ +		if (sv_peek(sv, 0) == 'L' && +		    !(ret = rust_append(st, " + "))) +			break; + +		ret = rustv0_parse_lifetime(st, sv); +		break; +	default: +		sv_init_sv(sv, &save); + +		ret = rustv0_parse_backref(st, sv, rustv0_parse_type, +		    B_FALSE) || +		    rustv0_parse_basic_type(st, sv); +		if (ret) +			break; + +		ret = rustv0_parse_path(st, sv, B_FALSE); +		break; +	} + +	DEMDEBUG("%s: type='%.*s' (%s)", __func__, CSTR_END(st, len), +	    ret ? "success" : "fail"); + +	return (ret); +} + +/* + * <path> = "C" <identifier>		crate root + *	| "M" <impl-path> <type>	<T> + *	| "X" <impl-path> <type> <path>	<T as Trait> (trait impl) + *	| "Y" <type> <path>		<T as Trait> (trait definition) + *	| "N" <ns> <path> <identifier>	...::ident (nested path) + *	| "I" <path> {<generic-arg>} "E" ...<T, U> + *	| <backref> + */ +static boolean_t +rustv0_parse_path(rust_state_t *restrict st, strview_t *restrict sv, +    boolean_t in_value) +{ +	strview_t save; +	uint64_t disamb = 0; +	size_t len; +	boolean_t ret = B_FALSE; +	boolean_t save_skip; +	boolean_t args_stay_save = st->rs_args_stay_open; +	boolean_t args_open_save = st->rs_args_is_open; + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + +	if (sv_remaining(sv) == 0) +		return (B_FALSE); + +	SAVE_LEN(st, len); +	sv_init_sv(&save, sv); + +	switch (sv_consume_c(sv)) { +	case 'C': +		if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disamb))) +			goto done; + +		if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) +			goto done; + +		if (st->rs_verbose && +		    !rust_append_printf(st, "[%" PRIx64 "]", disamb)) +			goto done; +		break; +	case 'M': +		SKIP_BEGIN(st, save_skip); +		if (!rustv0_parse_impl_path(st, sv, in_value)) { +			SKIP_END(st, save_skip); +			goto done; +		} +		SKIP_END(st, save_skip); + +		if (!rust_appendc(st, '<') || +		    !rustv0_parse_type(st, sv, B_FALSE) || +		    !rust_appendc(st, '>')) +			goto done; +		break; +	case 'X': +		SKIP_BEGIN(st, save_skip); +		if (!rustv0_parse_impl_path(st, sv, in_value)) { +			SKIP_END(st, save_skip); +			goto done; +		} +		SKIP_END(st, save_skip); +		/*FALLTHRU*/ +	case 'Y': +		if (!rust_appendc(st, '<') || +		    !rustv0_parse_type(st, sv, B_FALSE) || +		    !rust_append(st, " as ") || +		    !rustv0_parse_path(st, sv, B_FALSE) || +		    !rust_appendc(st, '>')) +			goto done; +		break; +	case 'N': +		if (!rustv0_parse_nested_path(st, sv, in_value)) +			goto done; +		break; +	case 'I': +		st->rs_args_stay_open = B_FALSE; +		st->rs_args_is_open = B_FALSE; + +		if (!rustv0_parse_path(st, sv, in_value)) +			goto done; + +		if (in_value && !rust_append(st, "::")) +			goto done; + +		if (!rust_appendc(st, '<') || +		    !rustv0_parse_opt_list(st, sv, rustv0_parse_generic_arg, +		    ", ", B_FALSE, NULL)) +			goto done; + +		st->rs_args_stay_open = args_stay_save; +		st->rs_args_is_open = args_open_save; + +		/* +		 * If we were asked to not close our list, then don't and +		 * indicate that the list is open. +		 */ +		if (st->rs_args_stay_open) { +			st->rs_args_stay_open = B_FALSE; +			st->rs_args_is_open = B_TRUE; +		} else if (!rust_appendc(st, '>')) { +			goto done; +		} +		break; +	default: +		/* +		 * Didn't recognize the letter, so it has to be a path. Restore +		 * sv to state prior to switch and continue. +		 */ +		sv_init_sv(sv, &save); +		if (!rustv0_parse_backref(st, sv, rustv0_parse_path, in_value)) +			goto done; +	} + +	ret = B_TRUE; + +done: +	DEMDEBUG("%s: path='%.*s' (%s)", __func__, CSTR_END(st, len), +	    ret ? "success" : "fail"); + +	return (ret); +} + +static boolean_t +rustv0_parse_impl_path(rust_state_t *restrict st, strview_t *restrict sv, +    boolean_t in_value) +{ +	uint64_t val = 0; + +	return (OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &val)) && +	    rustv0_parse_path(st, sv, in_value)); +} + +/* + * A bit of a hack -- when printing a nested path, we need to know + * if the identifier is there or not in order to correctly format + * the output preceeding it (when present). This peeks ahead and + * determines this. + */ +static boolean_t +rustv0_has_name(rust_state_t *restrict st, strview_t *restrict sv, +    boolean_t *has_namep) +{ +	strview_t save; + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + +	if (sv_remaining(sv) == 0) +		return (B_FALSE); + +	sv_init_sv(&save, sv); + +	/* For checking the length, we don't care if it's punycode or not */ +	(void) sv_consume_if_c(&save, 'u'); + +	if (sv_remaining(sv) == 0) { +		st->rs_error = EINVAL; +		return (B_FALSE); +	} + +	if (sv_consume_if_c(&save, '0')) { +		*has_namep = B_FALSE; +		return (B_TRUE); +	} + +	*has_namep = B_TRUE; +	return (B_TRUE); +} + +static boolean_t +rustv0_parse_nested_path(rust_state_t *restrict st, strview_t *restrict sv, +    boolean_t in_value) +{ +	uint64_t disambiguator = 0; +	size_t len = 0; +	char ns; +	boolean_t ret = B_FALSE; +	boolean_t has_name; + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + +	if (sv_remaining(sv) == 0) +		return (B_FALSE); + +	SAVE_LEN(st, len); + +	ns = sv_consume_c(sv); + +	if (!rustv0_parse_path(st, sv, in_value)) +		goto done; + +	if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disambiguator))) +		goto done; + +	if (!rustv0_has_name(st, sv, &has_name)) +		goto done; + +	if (ISUPPER(ns)) { +		if (!rust_append(st, "::{")) +			goto done; + +		switch (ns) { +		case 'C': +			if (!rust_append(st, "closure")) +				goto done; +			break; +		case 'S': +			if (!rust_append(st, "shim")) +				goto done; +			break; +		default: +			if (!rust_appendc(st, ns)) +				goto done; +			break; +		} + +		if (has_name && !rust_appendc(st, ':')) +			goto done; + +		if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) +			goto done; + +		ret = rust_append_printf(st, "#%" PRIu64 "}", disambiguator); +	} else { +		if (has_name) { +			if (!(ret = rust_append(st, "::"))) +				goto done; +		} +		ret = rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE); +	} + +done: +	DEMDEBUG("%s: nested path = '%.*s' (%s)", __func__, CSTR_END(st, len), +	    ret ? "success" : "fail"); + +	return (ret); +} + +/* + * <disambiguator> = "s" <base-64-number> + * + */ +static boolean_t +rustv0_parse_disambiguator(rust_state_t *restrict st, strview_t *restrict sv, +    uint64_t *valp) +{ +	if (HAS_ERROR(st) || sv_remaining(sv) < 2) +		return (B_FALSE); + +	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + +	*valp = 0; + +	if (!sv_consume_if_c(sv, 's')) +		return (B_FALSE); + +	if (!rustv0_parse_base62(st, sv, valp)) { +		st->rs_error = EINVAL; +		return (B_FALSE); +	} + +	/* +	 * Rust RFC 2603 details this in Appendix A, but not the main +	 * portion of the RFC. If no disambiguator is present, the value +	 * is 0, if the decoded value is 0, the index is 1, ... +	 * rustv0_parse_base62() already adjusts _ -> 0, 0 -> 1, so we +	 * only need to add one here to complete the adjustment. +	 */ +	*valp = *valp + 1; + +	DEMDEBUG("%s: disambiguator=%" PRIu64, __func__, *valp); +	return (B_TRUE); +} + +/* <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> */ +static boolean_t +rustv0_parse_undisambiguated_identifier(rust_state_t *restrict st, +    strview_t *restrict sv, boolean_t repl_underscore) +{ +	uint64_t len = 0; +	boolean_t puny = B_FALSE; + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + +	if (sv_remaining(sv) == 0) +		return (B_FALSE); + +	if (sv_consume_if_c(sv, 'u')) +		puny = B_TRUE; + +	if (!rust_parse_base10(st, sv, &len)) +		return (B_FALSE); + +	/* skip optional separator '_' */ +	(void) sv_consume_if_c(sv, '_'); + +	if (sv_remaining(sv) < len) { +		DEMDEBUG("%s: ERROR: identifier length (%" PRIu64 ") " +		    "> remaining bytes (%zu)", __func__, len, +		    sv_remaining(sv)); +		return (B_FALSE); +	} + +	/* 0 length identifiers are acceptable */ +	if (len == 0) +		return (B_TRUE); + +	if (puny) { +		strview_t ident; + +		sv_init_sv_range(&ident, sv, len); +		if (!rustv0_puny_decode(st, &ident, repl_underscore)) +			return (B_FALSE); + +		sv_consume_n(sv, len); +		return (B_TRUE); +	} + +	/* +	 * rust identifiers do not contain '-'. However ABI identifiers +	 * are allowed to contain them (e.g. extern "foo-bar" fn ...). +	 * They are substituted with '_' in the mangled output. If we +	 * do not need to reverse this, we can just append 'len' bytes +	 * of sv.  Otherwise we need to go through and reverse this +	 * substitution. +	 */ +	if (!repl_underscore) +		return (rust_append_sv(st, len, sv)); + +	/* +	 * We checked earlier that len < sv_remaining(sv); so this loop +	 * cannot overrun. +	 */ +	for (size_t i = 0; i < len; i++) { +		char c = sv_consume_c(sv); + +		if (c == '_') +			c = '-'; + +		if (!rust_appendc(st, c)) +			return (B_FALSE); +	} + +	return (B_TRUE); +} + +/* <backref> = "B" <base-62-number> */ +static boolean_t +rustv0_parse_backref(rust_state_t *restrict st, strview_t *restrict sv, +    boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t b), +    boolean_t bval) +{ +	strview_t backref; +	strview_t target; +	uint64_t idx = 0; +	size_t save_len; +	size_t len; + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	sv_init_sv(&backref, sv); + +	if (!sv_consume_if_c(sv, 'B')) +		return (B_FALSE); + +	DEMDEBUG("%s: str='B%.*s'", __func__, SV_PRINT(sv)); + +	if (!rustv0_parse_base62(st, sv, &idx)) { +		st->rs_error = EINVAL; +		return (B_FALSE); +	} + +	/* +	 * Determine how many bytes we've consumed (up to the start of +	 * the current backref token). +	 */ +	VERIFY3P(backref.sv_first, >=, st->rs_orig.sv_first); +	len = (size_t)(uintptr_t)(backref.sv_first - st->rs_orig.sv_first); + +	/* +	 * The backref can only refer to an index prior to the start of +	 * the current backref token -- that is must always refer back in +	 * the string, never to the current position or beyond. +	 */ +	if (idx >= len) { +		DEMDEBUG("%s: ERROR: backref index (%" PRIu64 ") " +		    "is out of range [0, %zu)", __func__, idx, len); +		st->rs_error = ERANGE; +		return (B_FALSE); +	} + +	/* +	 * Create a strview_t of the original string (sans prefix) by +	 * copying from st->rs_orig. The length of the target strview_t is +	 * capped to end immediately prior to this backref token. Since we +	 * enforce that backrefs must always refer to already processed +	 * portions of the string (i.e. must always refer backwards), and the +	 * length of the strview_t is set to end prior to the start of this +	 * backref token, we guarantee processing of a backref will always +	 * terminate before it can possibly encounter this backref token +	 * and cause a loop -- either the processing terminates normally or +	 * it reaches the end of the capped strview_t. +	 */ +	sv_init_sv_range(&target, &st->rs_orig, len); + +	/* +	 * Consume all the input in the target strview_t up to the index +	 */ +	sv_consume_n(&target, idx); + +	DEMDEBUG("%s: backref starting at %" PRIu64 " str='%.*s'%s", __func__, +	    idx, SV_PRINT(&target), st->rs_skip ? " (skipping)" : ""); + +	/* +	 * If we're skipping the output, there's no reason to bother reparsing +	 * the output -- we're not going to save it. We still setup everything +	 * so that the debug output is still emitted. +	 */ +	if (st->rs_skip) +		return (B_TRUE); + +	SAVE_LEN(st, save_len); +	if (!fn(st, &target, bval)) +		return (B_FALSE); + +	DEMDEBUG("%s: backref is '%.*s'", __func__, CSTR_END(st, save_len)); +	return (B_TRUE); +} + +static boolean_t +rustv0_append_lifetime(rust_state_t *restrict st, uint64_t lifetime) +{ +	uint64_t bound_lt; + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	if (!rust_appendc(st, '\'')) +		return (B_FALSE); + +	if (lifetime == 0) +		return (rust_appendc(st, '_')); + +	if (sub_overflow(st->rs_lt_depth, lifetime, &bound_lt)) { +		DEMDEBUG("%s: ERROR: lifetime value %" PRIu64 +		    " > current depth %" PRIu64, __func__, lifetime, +		    st->rs_lt_depth); +		st->rs_lt_depth = ERANGE; +		return (B_FALSE); +	} + +	/* +	 * Use 'a, 'b, ... +	 */ +	if (bound_lt < 26) { +		char c = (char)bound_lt + 'a'; +		return (rust_append_printf(st, "%c", c)); +	} + +	/* +	 * Otherwise, use '_123, '_456, ... +	 */ +	return (rust_append_printf(st, "_%" PRIu64, bound_lt)); +} + +static boolean_t +rustv0_parse_lifetime(rust_state_t *restrict st, strview_t *restrict sv) +{ +	uint64_t lifetime; + +	if (!sv_consume_if_c(sv, 'L')) +		return (B_FALSE); + +	if (!rustv0_parse_base62(st, sv, &lifetime)) +		return (B_FALSE); + +	return (rustv0_append_lifetime(st, lifetime)); +} + +static boolean_t +rustv0_parse_const_data(rust_state_t *restrict st, +    const_type_class_t type_class, strview_t *restrict sv) +{ +	uint64_t val = 0; +	size_t save_len; +	boolean_t neg = B_FALSE; +	boolean_t ret = B_FALSE; + +	VERIFY3S(type_class, !=, CTC_INVALID); + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); +	SAVE_LEN(st, save_len); + +	if (sv_remaining(sv) == 0) +		return (B_FALSE); + +	if (type_class == CTC_SIGNED && sv_consume_if_c(sv, 'n')) +		neg = B_TRUE; + +	ret = OPTIONAL(st, rustv0_parse_hex_num(st, sv, &val)) && +	    sv_consume_if_c(sv, '_'); +	if (!ret) +		goto done; + +	switch (type_class) { +	case CTC_SIGNED: +	case CTC_UNSIGNED: +		ret = rust_append_printf(st, "%s%" PRIu64, neg ? "-" : "", val); +		break; +	case CTC_BOOL: +		if (val > 1) { +			DEMDEBUG("%s: invalid bool val %" PRIu64, __func__, +			    val); +			ret = B_FALSE; +			break; +		} +		ret = rust_append_printf(st, "%s", +		    (val == 0) ? "false" : "true"); +		break; +	case CTC_CHAR: +		if (val > UINT32_MAX) { +			DEMDEBUG("%s: char value %" PRIu64 " out of range", +			    __func__, val); +			ret = B_FALSE; +			break; +		} + +		ret = rust_appendc(st, '\'') && rust_append_utf8_c(st, val) && +		    rust_appendc(st, '\''); +		break; +	default: +		ret = B_FALSE; +	} + +done: +	DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, save_len), +	    ret ? "success" : "fail"); + +	return (ret); +} + +static boolean_t +rustv0_parse_const(rust_state_t *restrict st, strview_t *restrict sv, +    boolean_t dummy __unused) +{ +	strview_t type; +	size_t start_len; +	const_type_class_t ctype_class; +	char ctype; +	boolean_t save_skip; +	boolean_t ret; + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); +	SAVE_LEN(st, start_len); + +	if (sv_remaining(sv) == 0) +		return (B_FALSE); + +	if (rustv0_parse_backref(st, sv, rustv0_parse_const, B_FALSE)) +		return (B_TRUE); + +	if (sv_consume_if_c(sv, 'p')) { +		ret = rust_appendc(st, '_'); +		goto done; +	} + +	ctype = sv_peek(sv, 0); +	ctype_class = rustv0_classify_const_type(ctype); +	if (ctype_class == CTC_INVALID) { +		DEMDEBUG("%s: const type isn't a valid const generic type", +		    __func__); +		return (B_FALSE); +	} + +	/* +	 * This isn't spelled out clearly in Rust RFC 2603, but currently +	 * only unsigned int types are allowed at this point. However, we +	 * have a bit of a potential tricky situation. Unlike formatting +	 * the other tokens, if we want to display the type, we do so +	 * _after_ the value, even though the type appears first. +	 * +	 * This is bit of a hack, but we save off the input position from +	 * sv before the parse the type. We then parse it without saving +	 * the resulting value, then parse and output the constant. If +	 * we wish to then display the type, we can go back and parse +	 * the type again, this time saving the result. +	 */ +	sv_init_sv(&type, sv); + +	SKIP_BEGIN(st, save_skip); +	ret = rustv0_parse_type(st, sv, B_FALSE); +	SKIP_END(st, save_skip); + +	if (!ret) { +		DEMDEBUG("%s: const type isn't valid", __func__); +		return (B_FALSE); +	} + +	if (sv_consume_if_c(sv, 'p')) { +		ret = rust_appendc(st, '_'); +	} else { +		ret = rustv0_parse_const_data(st, ctype_class, sv); +	} +	if (!ret) +		goto done; + +	if (st->rs_show_const_type) { +		ret = rust_append(st, ": ") && +		    rustv0_parse_uint_type(st, &type); +	} + +done: +	DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, start_len), +	    ret ? "success" : "fail"); +	return (ret); +} + +static boolean_t +rustv0_parse_abi(rust_state_t *restrict st, strview_t *restrict sv) +{ +	DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv)); + +	if (sv_consume_if_c(sv, 'C')) +		return (rust_appendc(st, 'C')); + +	return (rustv0_parse_undisambiguated_identifier(st, sv, B_TRUE)); +} + +static boolean_t +rustv0_parse_binder(rust_state_t *restrict st, strview_t *restrict sv) +{ +	uint64_t n, i; + +	if (!sv_consume_if_c(sv, 'G')) +		return (B_FALSE); + +	if (!rustv0_parse_base62(st, sv, &n)) +		return (B_FALSE); +	n += 1; + +	if (!rust_append(st, "for<")) +		return (B_FALSE); + +	for (i = 0; i < n; i++) { +		if (i > 0 && !rust_append(st, ", ")) +			return (B_FALSE); + +		st->rs_lt_depth++; +		if (!rustv0_append_lifetime(st, 1)) +			return (B_FALSE); +	} + +	if (!rust_append(st, "> ")) +		return (B_FALSE); + +	return (B_TRUE); +} + +/* + * <fn-sig> := [<binder>] ["U"] ["K" <abi>] {type} "E" <type> + * + * Note that while the Rust RFC states the binder is manditory, based on + * actual examples, and comparing with the rust-based demangler, it is in + * fact optional. + */ +static boolean_t +rustv0_parse_fnsig(rust_state_t *restrict st, strview_t *restrict sv) +{ +	uint64_t save_lt = st->rs_lt_depth; + +	DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv)); + +	if (!OPTIONAL(st, rustv0_parse_binder(st, sv))) +		return (B_FALSE); + +	if (sv_consume_if_c(sv, 'U') && !rust_append(st, "unsafe ")) +		return (B_FALSE); + +	if (sv_consume_if_c(sv, 'K') && +	    (!rust_append(st, "extern \"") || !rustv0_parse_abi(st, sv) || +	    !rust_append(st, "\" "))) +		return (B_FALSE); + +	if (!rust_append(st, "fn(")) +		return (B_FALSE); + +	if (!rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ", B_FALSE, +	    NULL)) { +		return (B_FALSE); +	} + +	if (!rust_appendc(st, ')')) +		return (B_FALSE); + +	/* If the return type is (), don't print it */ +	if (!sv_consume_if_c(sv, 'u')) { +		if (!rust_append(st, " -> ")) +			return (B_FALSE); + +		if (!rustv0_parse_type(st, sv, B_FALSE)) +			return (B_FALSE); +	} + +	st->rs_lt_depth = save_lt; + +	return (B_TRUE); +} + +/* + * <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type> + */ +static boolean_t +rustv0_parse_dyn_trait_assoc_binding(rust_state_t *restrict st, +    strview_t *restrict sv, boolean_t open) +{ +	size_t save_len; + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	if (sv_remaining(sv) == 0) +		return (B_FALSE); + +	if (!sv_consume_if_c(sv, 'p')) +		return (B_FALSE); + +	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); +	SAVE_LEN(st, save_len); + +	if (!rust_append(st, open ? ", " : "<")) +		return (B_FALSE); + +	if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) { +		st->rs_error = EINVAL; +		return (B_FALSE); +	} + +	if (!rust_append(st, " = ")) +		return (B_FALSE); + +	if (!rustv0_parse_type(st, sv, B_FALSE)) { +		st->rs_error = EINVAL; +		return (B_FALSE); +	} + +	DEMDEBUG("%s: binding='%.*s'", __func__, CSTR_END(st, save_len)); + +	return (B_TRUE); +} + +static boolean_t +rustv0_parse_dyn_trait(rust_state_t *restrict st, strview_t *restrict sv, +    boolean_t dummy __unused) +{ +	boolean_t stay_save = st->rs_args_stay_open; +	boolean_t open_save = st->rs_args_is_open; +	boolean_t open = B_FALSE; + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + +	/* +	 * This is a bit subtle, but when formatting a trait in trait, +	 * we want something like this: +	 * +	 *	dyn Trait<T, U, Assoc=X> +	 * +	 * instead of +	 * +	 *	dyn Trait<T, U, <Assoc=X>> +	 * +	 * So when parsing the path, if we encounter generic arguments, we want +	 * the arg list to remain open at the end of processing the path so +	 * we can append the bindings to it. We set rs_args_stay_open to B_TRUE +	 * to indidcate to rustv0_parse_path() that a generic argument list +	 * should not be closed (i.e. don't append a '>' at the end of the +	 * list). If rustv0_parse_path() encounters a list of generic arguments, +	 * it will also set rs->args_is_open to indiciate it opened the list. +	 * We save this in 'open' so that when we process the associated +	 * bindings, we know if we need to open the list on the first binding +	 * or not -- we don't want 'dyn Trait<>' if there are no bindings, +	 * just 'dyn Trait'. +	 */ +	st->rs_args_stay_open = B_TRUE; +	st->rs_args_is_open = B_FALSE; + +	if (!rustv0_parse_path(st, sv, B_FALSE)) { +		st->rs_args_stay_open = stay_save; +		st->rs_args_is_open = open_save; +		return (B_FALSE); +	} + +	open = st->rs_args_is_open; + +	st->rs_args_stay_open = stay_save; +	st->rs_args_is_open = open_save; + +	while (rustv0_parse_dyn_trait_assoc_binding(st, sv, open)) { +		open = B_TRUE; +	} + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	if (open && !rust_appendc(st, '>')) +		return (B_FALSE); + +	return (!HAS_ERROR(st)); +} + +static boolean_t +rustv0_parse_dynbounds(rust_state_t *restrict st, strview_t *restrict sv) +{ +	uint64_t save_lt = st->rs_lt_depth; + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + +	/* +	 * This is another case where Rust RFC2603 seems to disagree with +	 * the implementation. The RFC implies this is mandatory, while +	 * the implementations treat it as optional. +	 */ +	if (!OPTIONAL(st, rustv0_parse_binder(st, sv))) +		return (B_FALSE); + +	if (!rustv0_parse_opt_list(st, sv, rustv0_parse_dyn_trait, " + ", +	    B_FALSE, NULL)) +		return (B_FALSE); + +	st->rs_lt_depth = save_lt; + +	return (B_TRUE); +} + +static boolean_t +rustv0_parse_generic_arg(rust_state_t *restrict st, strview_t *restrict sv, +    boolean_t dummy __unused) +{ +	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + +	if (sv_consume_if_c(sv, 'K')) +		return (rustv0_parse_const(st, sv, B_FALSE)); + +	if (rustv0_parse_lifetime(st, sv)) +		return (B_TRUE); + +	return (rustv0_parse_type(st, sv, B_FALSE)); +} + +/* + * Parse a hex value into *valp. Note that rust only uses lower case + * hex values. + */ +static boolean_t +rustv0_parse_hex_num(rust_state_t *restrict st, strview_t *restrict sv, +    uint64_t *restrict valp) +{ +	uint64_t val = 0; +	size_t ndigits = 0; + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + +	if (sv_remaining(sv) == 0) +		return (B_FALSE); + +	/* +	 * Unfortunately, Rust RFC 2603 also doesn't not explicty define +	 * {hex-digits}. We follow what decimal digits does, and treat a +	 * leading 0 as a terminator. +	 */ +	while (sv_remaining(sv) > 0) { +		char c = sv_peek(sv, 0); + +		if (ISDIGIT(c)) { +			val *= 16; +			val += c - '0'; +		} else if (c >= 'a' && c <= 'f') { +			val *= 16; +			val += c - 'a' + 10; +		} else { +			break; +		} + +		sv_consume_n(sv, 1); + +		if (++ndigits == 1 && val == 0) +			break; +	} + +	if (ndigits > 0) +		*valp = val; + +	return ((ndigits > 0) ? B_TRUE : B_FALSE); +} + +/* + * Parse a base62 number into *valp.  The number is explicitly terminated + * by a '_'.  The values are also offset by 0 -- that is '_' == 0, + * '0_' == 1, ... + */ +static boolean_t +rustv0_parse_base62(rust_state_t *restrict st, strview_t *restrict sv, +    uint64_t *restrict valp) +{ +	uint64_t val = 0; +	char c; + +	if (HAS_ERROR(st)) +		return (B_FALSE); + +	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); + +	if (sv_remaining(sv) == 0) +		return (B_FALSE); + +	/* A terminating '_' without any digits is 0 */ +	if (sv_consume_if_c(sv, '_')) { +		*valp = 0; +		return (B_TRUE); +	} + +	/* Need at least one valid digit if > 0 */ +	if (!ISALNUM(sv_peek(sv, 0))) +		return (B_FALSE); + +	while (sv_remaining(sv) > 0) { +		c = sv_consume_c(sv); + +		if (c == '_') { +			/* +			 * Because a lone '_' was already handled earlier, +			 * we know we've had at least one other digit and +			 * can increment the value and return. +			 */ +			*valp = val + 1; +			return (B_TRUE); +		} else if (ISDIGIT(c)) { +			val *= 62; +			val += c - '0'; +		} else if (ISLOWER(c)) { +			val *= 62; +			val += c - 'a' + 10; +		} else if (ISUPPER(c)) { +			val *= 62; +			val += c - 'A' + 36; +		} else { +			return (B_FALSE); +		} +	} + +	/* We reached the end of the string without a terminating _ */ +	return (B_FALSE); +} + +static const_type_class_t +rustv0_classify_const_type(char type) +{ +	switch (type) { +	case 'h': case 't': case 'm': case 'y': case 'o': case 'j': +		return (CTC_UNSIGNED); +	case 'a': case 'i': case 'l': case 'n': case 's': case 'x': +		return (CTC_SIGNED); +	case 'b': +		return (CTC_BOOL); +	case 'c': +		return (CTC_CHAR); +	default: +		return (CTC_INVALID); +	} +} + +/* + * Make sure the name is a plausible mangled rust symbol. + * Non-ASCII are never allowed.  Rust itself uses [_0-9A-Za-z], however + * some things will add a suffix starting with a '.' (e.g. LLVM thin LTO). + * As such we proceed in two phases. We first only allow [_0-9A-Z-az] until + * we encounter a '.'. At that point, any ASCII character is allowed. + */ +static boolean_t +rustv0_valid_sym(const strview_t *sv) +{ +	size_t i; +	boolean_t check_rust = B_TRUE; + +	for (i = 0; i < sv->sv_rem; i++) { +		char c = sv->sv_first[i]; + +		if (ISALNUM(c) || c == '_') +			continue; + +		if (c == '.') { +			check_rust = B_FALSE; +			continue; +		} + +		if (check_rust || (c & 0x80) != 0) { +			DEMDEBUG("%s: ERROR found invalid character '%c' " +			    "in '%.*s' at index %zu", +			    __func__, c, SV_PRINT(sv), i); +			return (B_FALSE); +		} +	} +	return (B_TRUE); +} diff --git a/usr/src/lib/libdemangle/common/rust-v0puny.c b/usr/src/lib/libdemangle/common/rust-v0puny.c new file mode 100644 index 0000000000..9659902ac1 --- /dev/null +++ b/usr/src/lib/libdemangle/common/rust-v0puny.c @@ -0,0 +1,264 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source.  A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Joyent, Inc. + * Copyright 2021 Jason King + */ + +#include <inttypes.h> +#include <libcustr.h> +#include <limits.h> +#include <string.h> +#include <sys/byteorder.h> +#include "rust.h" +#include "strview.h" + +/* + * The rust v0 encoding (rust RFC 2603) uses a slightly modified + * version of punycode to encode characters that are not ASCII. + * The big difference is that '_' is used to separate the ASCII codepoints + * from the non-ASCII code points instead of '-'. + * + * The decoding is taken almost directly from (IETF) RFC 3492 + */ + +#define	BASE		36 +#define	TMIN		1 +#define	TMAX		26 +#define	SKEW		38 +#define	DAMP		700 +#define	INITIAL_BIAS	72 +#define	INITIAL_N	0x80 +#define	DELIMITER	'_' + +static inline uint32_t char_val(char); + +static size_t +rustv0_puny_adapt(size_t delta, size_t npoints, boolean_t first) +{ +	size_t k = 0; + +	delta = first ? delta / DAMP : delta / 2; +	delta += delta / npoints; +	while (delta > ((BASE - TMIN) * TMAX) / 2) { +		delta /= (BASE - TMIN); +		k += BASE; +	} + +	return (k + (((BASE - TMIN + 1) * delta) / (delta + SKEW))); +} + +boolean_t +rustv0_puny_decode(rust_state_t *restrict st, strview_t *restrict src, +    boolean_t repl_underscore) +{ +	uint32_t *buf; +	size_t bufalloc; /* in units of uint32_t */ +	size_t buflen; +	size_t nbasic; +	size_t i, old_i, k, w; +	size_t n = INITIAL_N; +	size_t bias = INITIAL_BIAS; +	size_t delim_idx = 0; +	boolean_t ret = B_FALSE; +	char c; + +	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(src)); + +	/* +	 * The decoded string should never contain more codepoints than +	 * the original string, so creating a temporary buffer large +	 * enought to hold sv_remaining(src) uint32_t's should be +	 * large enough. +	 * +	 * This also serves as a size check -- xcalloc will fail if the +	 * resulting size of the buf (sizeof (uint32_t) * bufalloc) >= +	 * SIZE_MAX. If xcalloc succeeds, we therefore know that that +	 * buflen cannot overflow. +	 */ +	buflen = 0; +	bufalloc = sv_remaining(src) + 1; +	buf = xcalloc(st->rs_ops, bufalloc, sizeof (uint32_t)); +	if (buf == NULL) { +		SET_ERROR(st); +		return (B_FALSE); +	} + +	/* +	 * Find the position of the last delimiter (if any). +	 * IETF RFC 3492 3.1 states that the delimiter is present if and only +	 * if there are a non-zero number of basic (ASCII) code points. Since +	 * the delimiter itself is a basic code point, the last one present +	 * in the original string is the actual delimiter between the basic +	 * and non-basic code points. Earlier occurences of the delimiter +	 * are treated as normal basic code points. For plain punycode, an +	 * all ASCII string encoded with punycode would terminate with a +	 * final delimiter, and a name with all non-basic code points would +	 * not have a delimiter at all. With the rust v0 encoding, punycode +	 * encoded identifiers have a 'u' prefix prior to the identifier +	 * length (['u'] <decimal-number> <bytes>), so we should never +	 * encounter an all ASCII name that's encoded with punycode (we error +	 * on this).  For an all non-basic codepoint identifier, no delimiter +	 * will be present, and we treat that the same as the delimiter being +	 * in the first position of the string, and consume it (if present) +	 * when we transition from copying the basic code points (which there +	 * will be none in this situation) to non-basic code points. +	 */ +	for (i = 0; i < src->sv_rem; i++) { +		if (src->sv_first[i] == DELIMITER) { +			delim_idx = i; +		} +	} +	VERIFY3U(delim_idx, <, bufalloc); + +	if (delim_idx + 1 == sv_remaining(src)) { +		DEMDEBUG("%s: encountered an all-ASCII name encoded with " +		    "punycode", __func__); +		goto done; +	} + +	/* Copy all the basic characters up to the delimiter into buf */ +	for (nbasic = 0; nbasic < delim_idx; nbasic++) { +		c = sv_consume_c(src); + +		/* The rust prefix check should guarantee this */ +		VERIFY3U(c, <, 0x80); + +		/* +		 * Normal rust identifiers do not contain '-' in them. +		 * However ABI identifiers could contain a dash. Those +		 * are translated to _, and we need to replace accordingly +		 * when asked. +		 */ +		if (repl_underscore && c == '_') +			c = '-'; + +		buf[nbasic] = c; +		buflen++; +	} +	DEMDEBUG("%s: %" PRIu32 " ASCII codepoints copied", __func__, nbasic); + +	/* +	 * Consume delimiter between basic and non-basic code points if present. +	 * See above for explanation why it may not be present. +	 */ +	(void) sv_consume_if_c(src, DELIMITER); + +	DEMDEBUG("%s: non-ASCII codepoints to decode: %.*s", __func__, +	    SV_PRINT(src)); + +	for (i = 0; sv_remaining(src) > 0; i++) { +		VERIFY3U(i, <=, buflen); + +		/* +		 * Guarantee we have enough space to insert another codepoint. +		 * Our buffer sizing above should prevent this from ever +		 * tripping, but check this out of paranoia. +		 */ +		VERIFY3U(buflen, <, bufalloc - 1); + +		/* decode the next codepoint */ +		for (old_i = i, k = BASE, w = 1; ; k += BASE) { +			size_t t; +			uint32_t digit; + +			if (sv_remaining(src) == 0) +				goto done; + +			digit = char_val(sv_consume_c(src)); +			if (digit >= BASE) +				goto done; + +			i = i + digit * w; + +			if (k <= bias) +				t = TMIN; +			else if (k >= bias + TMAX) +				t = TMAX; +			else +				t = k - bias; + +			if (digit < t) +				break; + +			w = w * (BASE - t); +		} +		buflen++; + +		bias = rustv0_puny_adapt(i - old_i, buflen, +		    (old_i == 0) ? B_TRUE : B_FALSE); +		n = n + i / buflen; +		i = i % buflen; + +		DEMDEBUG("%s: insert \\u%04" PRIx32 " at index %zu (len = %zu)", +		    __func__, n, i, buflen); + +		/* +		 * At the start of this while loop, we guaranteed +		 * buflen < bufalloc - 1. Therefore we know there is room +		 * to move over the contents of buf at i to make room +		 * for the codepoint. We also just guaranteed that i +		 * is in the range [0, buflen), so this should always be +		 * safe. +		 */ +		(void) memmove(buf + i + 1, buf + i, +		    (buflen - i) * sizeof (uint32_t)); + +#if _LP64 +		/* +		 * This is always false for ILP32 and smatch will also complain, +		 * so we just omit it for ILP32. +		 */ +		if (n > UINT32_MAX) { +			DEMDEBUG("%s: ERROR: utf8 value is out of range", +			    __func__); +			goto done; +		} +#endif + +		buf[i] = (uint32_t)n; +	} + +	DEMDEBUG("%s: inserted %zu non-basic code points", __func__, +	    buflen - nbasic); + +	for (i = 0; i < buflen; i++) { +		if (!rust_append_utf8_c(st, buf[i])) +			goto done; +	} +	ret = B_TRUE; + +done: +	xfree(st->rs_ops, buf, bufalloc * sizeof (uint32_t)); +	return (ret); +} + +/* + * Convert [0-9][a-z] to a value [0..35]. Rust's punycode encoding always + * uses lowercase, so we treat uppercase (and any other characters) as + * invalid, and return BASE (36) to indicate a bad value. + */ +static inline uint32_t +char_val(char c) +{ +	uint32_t v = c; + +	if (ISLOWER(c)) { +		return (c - 'a'); +	} else if (ISDIGIT(c)) { +		return (c - '0' + 26); +	} else { +		DEMDEBUG("%s: ERROR: invalid character 0x%02x encountered", +		    __func__, v); +		return (BASE); +	} +} diff --git a/usr/src/lib/libdemangle/common/rust.c b/usr/src/lib/libdemangle/common/rust.c index 9b145ca841..ce1fca4859 100644 --- a/usr/src/lib/libdemangle/common/rust.c +++ b/usr/src/lib/libdemangle/common/rust.c @@ -10,564 +10,417 @@   */  /* - * Copyright 2019, Joyent, Inc.   * Copyright 2021 Jason King + * Copyright 2019 Joyent, Inc.   */  #include <errno.h> +#include <langinfo.h>  #include <libcustr.h>  #include <limits.h> +#include <stdarg.h>  #include <string.h> -#include <sys/ctype.h>	/* We want the C locale ISXXX() versions */ -#include <sys/debug.h> -#include <stdio.h> -#include <sys/sysmacros.h> -#include "strview.h"  #include "demangle_int.h" +#include "rust.h" -/* - * Unfortunately, there is currently no official specification for the rust - * name mangling.  This is an attempt to document the understanding of the - * mangling used here.  It is based off examination of - *     https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/ - * - * A mangled rust name is: - *     <prefix> <name> - * - * <prefix>	::=	_Z - *			__Z - * - * <name>	::= N <name-segment>+ [<hash>] E - * - * <name-segment> ::= <len> <name-chars>{len} - * - * <len>	::= [1-9][0-9]+ - * - * <name-chars>	::=	<[A-Za-z]> <[A-Za-z0-9]>* - *			<separator> - *			<special> - * - * <separator>	::=	'..'	# '::' - * - * <special>	::=	$SP$	# ' ' - *			$BP$	# '*' - *			$RF$	# '&' - *			$LT$	# '<' - *			$GT$	# '>' - *			$LP$	# '(' - *			$RP$	# ')' - *			$C$	# ',' - *			$u7e$	# '~' - *			$u20$	# ' ' - *			$u27$	# '\'' - *			$u3d$	# '=' - *			$u5b$	# '[' - *			$u5d$	# ']' - *			$u7b$	# '{' - *			$u7d$	# '}' - *			$u3b$	# ';' - *			$u2b$	# '+' - *			$u22$	# '"' - * - * <hash>	:= <len> h <hex-digits>+ - * - * <hex-digits>	:= <[0-9a-f]> - */ - -typedef struct rustdem_state { -	const char	*rds_str; -	custr_t		*rds_demangled; -	sysdem_ops_t	*rds_ops; -	int		rds_error; -} rustdem_state_t; - -static const struct rust_charmap { -	const char	*ruc_seq; -	char		ruc_ch; -} rust_charmap[] = { -	{ "$SP$", '@' }, -	{ "$BP$", '*' }, -	{ "$RF$", '&' }, -	{ "$LT$", '<' }, -	{ "$GT$", '>' }, -	{ "$LP$", '(' }, -	{ "$RP$", ')' }, -	{ "$C$", ',' }, -	{ "$u7e$", '~' }, -	{ "$u20$", ' ' }, -	{ "$u27$", '\'' }, -	{ "$u3d$", '=' }, -	{ "$u5b$", '[' }, -	{ "$u5d$", ']' }, -	{ "$u7b$", '{' }, -	{ "$u7d$", '}' }, -	{ "$u3b$", ';' }, -	{ "$u2b$", '+' }, -	{ "$u22$", '"' } -}; -static const size_t rust_charmap_sz = ARRAY_SIZE(rust_charmap); - -static void *rustdem_alloc(custr_alloc_t *, size_t); -static void rustdem_free(custr_alloc_t *, void *, size_t); - -static boolean_t rustdem_append_c(rustdem_state_t *, char); -static boolean_t rustdem_all_ascii(const strview_t *); - -static boolean_t rustdem_parse_prefix(rustdem_state_t *, strview_t *); -static boolean_t rustdem_parse_name(rustdem_state_t *, strview_t *); -static boolean_t rustdem_parse_hash(rustdem_state_t *, strview_t *); -static boolean_t rustdem_parse_num(rustdem_state_t *, strview_t *, uint64_t *); -static boolean_t rustdem_parse_special(rustdem_state_t *, strview_t *); -static boolean_t rustdem_add_sep(rustdem_state_t *); - -char * -rust_demangle(const char *s, size_t slen, sysdem_ops_t *ops) +static void * +rust_cualloc(custr_alloc_t *cua, size_t len)  { -	rustdem_state_t st = { -		.rds_str = s, -		.rds_ops = ops, -	}; -	custr_alloc_ops_t custr_ops = { -		.custr_ao_alloc = rustdem_alloc, -		.custr_ao_free = rustdem_free -	}; -	custr_alloc_t custr_alloc = { -		.cua_version = CUSTR_VERSION -	}; -	strview_t sv; -	int ret; - -	if (custr_alloc_init(&custr_alloc, &custr_ops) != 0) -		return (NULL); -	custr_alloc.cua_arg = &st; - -	sv_init_str(&sv, s, s + slen); - -	if (sv_remaining(&sv) < 1 || sv_peek(&sv, -1) != 'E') { -		DEMDEBUG("ERROR: string is either too small or does not end " -		    "with 'E'"); -		errno = EINVAL; -		return (NULL); -	} - -	if (!rustdem_parse_prefix(&st, &sv)) { -		DEMDEBUG("ERROR: could not parse prefix"); -		errno = EINVAL; -		return (NULL); -	} -	DEMDEBUG("parsed prefix; remaining='%.*s'", SV_PRINT(&sv)); - -	if (!rustdem_all_ascii(&sv)) { -		/* rustdem_all_ascii() provides debug output */ -		errno = EINVAL; -		return (NULL); -	} - -	if ((ret = custr_xalloc(&st.rds_demangled, &custr_alloc)) != 0) -		return (NULL); - -	if (!rustdem_parse_name(&st, &sv)) { -		if (st.rds_error == 0) -			st.rds_error = EINVAL; -		goto fail; -	} - -	if (sv_remaining(&sv) > 0) { -		DEMDEBUG("ERROR: unexpected trailing characters after " -		    "terminating 'E': '%.*s'", SV_PRINT(&sv)); -		st.rds_error = EINVAL; -		goto fail; -	} - -	char *res = xstrdup(ops, custr_cstr(st.rds_demangled)); -	if (res == NULL) { -		st.rds_error = errno; -		goto fail; -	} - -	custr_free(st.rds_demangled); -	DEMDEBUG("result = '%s'", res); -	return (res); - -fail: -	custr_free(st.rds_demangled); -	errno = st.rds_error; -	return (NULL); +	rust_state_t *st = cua->cua_arg; +	return (zalloc(st->rs_ops, len));  } -static boolean_t -rustdem_parse_prefix(rustdem_state_t *st, strview_t *svp) +static void +rust_cufree(custr_alloc_t *cua, void *p, size_t len)  { -	strview_t pfx; +	rust_state_t *st = cua->cua_arg; +	xfree(st->rs_ops, p, len); +} -	sv_init_sv(&pfx, svp); +static const custr_alloc_ops_t rust_custr_ops = { +	.custr_ao_alloc = rust_cualloc, +	.custr_ao_free = rust_cufree +}; -	DEMDEBUG("checking for '_Z' or '__Z' in '%.*s'", SV_PRINT(&pfx)); +boolean_t +rust_appendc(rust_state_t *st, char c) +{ +	custr_t *cus = st->rs_demangled; -	if (st->rds_error != 0) +	if (HAS_ERROR(st))  		return (B_FALSE); -	if (!sv_consume_if_c(&pfx, '_')) -		return (B_FALSE); +	if (st->rs_skip) +		return (B_TRUE); -	(void) sv_consume_if_c(&pfx, '_'); +	switch (c) { +	case '\a': +		return (rust_append(st, "\\a")); +	case '\b': +		return (rust_append(st, "\\b")); +	case '\f': +		return (rust_append(st, "\\f")); +	case '\n': +		return (rust_append(st, "\\n")); +	case '\r': +		return (rust_append(st, "\\r")); +	case '\t': +		return (rust_append(st, "\\t")); +	case '\v': +		return (rust_append(st, "\\v")); +	case '\\': +		return (rust_append(st, "\\\\")); +	} + +	if (c < ' ') +		return (rust_append_printf(st, "\\x%02" PRIx8, (uint8_t)c)); -	if (!sv_consume_if_c(&pfx, 'Z')) +	if (custr_appendc(cus, c) != 0) { +		SET_ERROR(st);  		return (B_FALSE); +	} -	/* Update svp with new position */ -	sv_init_sv(svp, &pfx);  	return (B_TRUE);  } -static boolean_t -rustdem_parse_name_segment(rustdem_state_t *st, strview_t *svp, boolean_t first) +/* + * Append a UTF-8 code point. If we're not in a UTF-8 locale, this gets + * appended as '\u<hex codepoint>' otherwise the character itself is + * added. + */ +boolean_t +rust_append_utf8_c(rust_state_t *st, uint32_t val)  { -	strview_t sv; -	strview_t name; -	uint64_t len; -	size_t rem; -	boolean_t last = B_FALSE; +	custr_t *cus = st->rs_demangled; +	uint_t n = 0; +	uint8_t c[4] = { 0 }; -	if (st->rds_error != 0 || sv_remaining(svp) == 0) +	if (HAS_ERROR(st))  		return (B_FALSE); -	sv_init_sv(&sv, svp); - -	if (!rustdem_parse_num(st, &sv, &len)) { -		DEMDEBUG("ERROR: no leading length"); -		st->rds_error = EINVAL; -		return (B_FALSE); +	if (!st->rs_isutf8) { +		if (val < 0x80) +			return (rust_appendc(st, (char)val)); +		if (val < 0x10000) +			return (rust_append_printf(st, "\\u%04" PRIx32, val)); +		return (rust_append_printf(st, "\\U%08" PRIx32, val));  	} -	rem = sv_remaining(&sv); - -	if (rem < len) { -		st->rds_error = EINVAL; +	if (val < 0x80) { +		return (rust_appendc(st, (char)val)); +	} else if (val < 0x800) { +		c[0] = 0xc0 | ((val >> 6) & 0x1f); +		c[1] = 0x80 | (val & 0x3f); +		n = 2; +	} else if (val < 0x10000) { +		c[0] = 0xe0 | ((val >> 12) & 0x0f); +		c[1] = 0x80 | ((val >> 6) & 0x3f); +		c[2] = 0x80 | (val & 0x3f); +		n = 3; +	} else if (val < 0x110000) { +		c[0] = 0xf0 | ((val >> 18) & 0x7); +		c[1] = 0x80 | ((val >> 12) & 0x3f); +		c[2] = 0x80 | ((val >> 6) & 0x3f); +		c[3] = 0x80 | (val & 0x3f); +		n = 4; +	} else { +		DEMDEBUG("%s: invalid unicode character \\u%" PRIx32, __func__, +		    val);  		return (B_FALSE);  	} -	/* Is this the last segment before the terminating E? */ -	if (rem == len + 1) { -		VERIFY3U(sv_peek(&sv, -1), ==, 'E'); -		last = B_TRUE; +	for (uint_t i = 0; i < n; i++) { +		if (custr_appendc(cus, c[i]) != 0) { +			SET_ERROR(st); +			return (B_FALSE); +		}  	} -	if (!first && !rustdem_add_sep(st)) -		return (B_FALSE); - -	/* Reduce length of seg to the length we parsed */ -	(void) sv_init_sv_range(&name, &sv, len); - -	DEMDEBUG("%s: segment='%.*s'", __func__, SV_PRINT(&name)); - -	/* -	 * A rust hash starts with 'h', and is the last component of a name -	 * before the terminating 'E'. It is however not always present -	 * in every mangled symbol, and a last segment that starts with 'h' -	 * could be confused for it, so failing to parse it just means -	 * we don't have a trailing hash. -	 */ -	if (sv_peek(&name, 0) == 'h' && last) { -		if (rustdem_parse_hash(st, &name)) -			goto done; - -		/* -		 * However any error other than 'not a hash' (e.g. ENOMEM) -		 * means we should fail. -		 */ -		if (st->rds_error != 0) -			goto done; -	} +	return (B_TRUE); +} -	while (sv_remaining(&name) > 0) { -		switch (sv_peek(&name, 0)) { -		case '$': -			if (rustdem_parse_special(st, &name)) -				continue; -			break; -		case '_': -			if (sv_peek(&name, 1) == '$') { -				/* -				 * Only consume/ignore '_'.  Leave -				 * $ for next round. -				 */ -				sv_consume_n(&name, 1); -				continue; -			} -			break; -		case '.': -			/* Convert '..' to '::' */ -			if (sv_peek(&name, 1) != '.') -				break; +boolean_t +rust_append(rust_state_t *st, const char *s) +{ +	custr_t *cus = st->rs_demangled; -			if (!rustdem_add_sep(st)) -				return (B_FALSE); +	if (HAS_ERROR(st)) +		return (B_FALSE); -			sv_consume_n(&name, 2); -			continue; -		default: -			break; -		} +	if (st->rs_skip) +		return (B_TRUE); -		if (custr_appendc(st->rds_demangled, -		    sv_consume_c(&name)) != 0) { -			st->rds_error = ENOMEM; -			return (B_FALSE); -		} +	if (custr_append(cus, s) != 0) { +		SET_ERROR(st); +		return (B_FALSE);  	} -done: -	sv_consume_n(&sv, len); -	VERIFY3P(svp->sv_first, <=, sv.sv_first); -	DEMDEBUG("%s: consumed '%.*s'", __func__, -	    (int)(sv.sv_first - svp->sv_first), svp->sv_first); -	sv_init_sv(svp, &sv);  	return (B_TRUE);  } -/* - * Parse N (<num><name>{num})+[<num>h<hex digits>]E - */ -static boolean_t -rustdem_parse_name(rustdem_state_t *st, strview_t *svp) +boolean_t +rust_append_sv(rust_state_t *restrict st, uint64_t n, strview_t *restrict sv)  { -	strview_t name; -	boolean_t first = B_TRUE; - -	if (st->rds_error != 0) +	if (HAS_ERROR(st))  		return (B_FALSE); -	sv_init_sv(&name, svp); - -	DEMDEBUG("%s: name = '%.*s'", __func__, SV_PRINT(&name)); +	if (st->rs_skip) { +		sv_consume_n(sv, (size_t)n); +		return (B_TRUE); +	} -	if (sv_remaining(&name) == 0) { -		DEMDEBUG("%s: empty name", __func__); +	if (n > sv_remaining(sv)) { +		DEMDEBUG("%s: ERROR amount to append (%" PRIu64 ") > " +		    "remaining bytes (%zu)", __func__, n, sv_remaining(sv)); +		st->rs_error = ERANGE;  		return (B_FALSE);  	} -	if (!sv_consume_if_c(&name, 'N')) { -		DEMDEBUG("%s: does not start with 'N'", __func__); +	if (n > INT_MAX) { +		DEMDEBUG("%s: amount (%" PRIu64 ") > INT_MAX", __func__, n); +		st->rs_error = ERANGE;  		return (B_FALSE);  	} -	while (sv_remaining(&name) > 0 && sv_peek(&name, 0) != 'E') { -		if (!rustdem_parse_name_segment(st, &name, first)) -			return (B_FALSE); -		first = B_FALSE; +	if (custr_append_printf(st->rs_demangled, "%.*s", +	    (int)n, sv->sv_first) != 0) { +		SET_ERROR(st); +		return (B_FALSE);  	} -	VERIFY(sv_consume_if_c(&name, 'E')); - -	VERIFY3P(svp->sv_first, <=, name.sv_first); -	DEMDEBUG("%s: consumed '%.*s'", __func__, -	    (int)(name.sv_first - svp->sv_first), svp->sv_first); +	sv_consume_n(sv, (size_t)n); -	sv_init_sv(svp, &name);  	return (B_TRUE);  } -static boolean_t -rustdem_parse_hash(rustdem_state_t *st, strview_t *svp) +boolean_t +rust_append_printf(rust_state_t *st, const char *fmt, ...)  { -	strview_t sv; - -	sv_init_sv(&sv, svp); +	va_list ap; +	int ret; -	VERIFY(sv_consume_if_c(&sv, 'h')); -	if (!rustdem_append_c(st, 'h')) +	if (HAS_ERROR(st))  		return (B_FALSE); -	while (sv_remaining(&sv) > 0) { -		char c = sv_consume_c(&sv); +	if (st->rs_skip) +		return (B_TRUE); -		switch (c) { -		/* -		 * The upper-case hex digits (A-F) are excluded as valid -		 * hash values for several reasons: -		 * -		 * 1. It would result in two different possible names for -		 * the same function, leading to ambiguity in linking (among -		 * other things). -		 * -		 * 2. It would cause potential ambiguity in parsing -- is a -		 * trailing 'E' part of the hash, or the terminating character -		 * in the mangled name? -		 * -		 * 3. No examples were able to be found in the wild where -		 * uppercase digits are used, and other rust demanglers all -		 * seem to assume the hash must contain lower-case hex digits. -		 */ -		case '0': case '1': case '2': case '3': -		case '4': case '5': case '6': case '7': -		case '8': case '9': case 'a': case 'b': -		case 'c': case 'd': case 'e': case 'f': -			if (!rustdem_append_c(st, c)) -				return (B_FALSE); -			break; -		default: -			return (B_FALSE); -		} -	} +	va_start(ap, fmt); +	ret = custr_append_vprintf(st->rs_demangled, fmt, ap); +	va_end(ap); -	sv_init_sv(svp, &sv); -	return (B_TRUE); +	if (ret == 0) +		return (B_TRUE); +	SET_ERROR(st); +	return (B_FALSE);  } -/* - * We have to pick an arbitrary limit here; 999,999,999 fits comfortably - * within an int32_t, so let's go with that, as it seems unlikely we'd - * ever see a larger value in context. - */ -#define	MAX_DIGITS 9 - -static boolean_t -rustdem_parse_num(rustdem_state_t *restrict st, strview_t *restrict svp, +boolean_t +rust_parse_base10(rust_state_t *restrict st, strview_t *restrict sv,      uint64_t *restrict valp)  { -	strview_t snum;  	uint64_t v = 0; -	size_t ndigits = 0;  	char c; -	if (st->rds_error != 0) +	if (HAS_ERROR(st) || sv_remaining(sv) == 0)  		return (B_FALSE); -	sv_init_sv(&snum, svp); - -	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(&snum)); - -	c = sv_peek(&snum, 0); -	if (!ISDIGIT(c)) { -		DEMDEBUG("%s: ERROR no digits in str\n", __func__); -		st->rds_error = EINVAL; -		return (B_FALSE); -	} +	c = sv_peek(sv, 0);  	/* -	 * Since there is currently no official specification on rust name -	 * mangling, only that it has been stated that rust follows what -	 * C++ mangling does.  In the Itanium C++ ABI (what practically -	 * every non-Windows C++ implementation uses these days), it -	 * explicitly disallows leading 0s in numeric values (except for -	 * substition and template indexes, which aren't relevant here). -	 * We enforce the same restriction -- if a rust implementation allowed -	 * leading zeros in numbers (basically segment lengths) it'd -	 * cause all sorts of ambiguity problems with names that likely lead -	 * to much bigger problems with linking and such, so this seems -	 * reasonable. +	 * Since the legacy rust encoding states that it follows the +	 * Itanium C++ mangling format, we match the behavior of the +	 * Itanium C++ ABI in disallowing leading 0s in decimal numbers. +	 * +	 * For Rust encoding v0, RFC2603 currently has omitted the +	 * actual definition of <decimal-number>. However examination of +	 * other implementations written in tandem with the mangling +	 * implementation suggest that <decimal-number> can be expressed +	 * by the eregex: 0|[1-9][0-9]* -- that is a '0' is allowed and +	 * terminates the token, while any other leading digit allows +	 * parsing to continue until a non-digit is encountered, the +	 * end of the string is encountered, or overflow is encountered.  	 */  	if (c == '0') { -		DEMDEBUG("%s: ERROR number starts with leading 0\n", __func__); -		st->rds_error = EINVAL; +		if (st->rs_encver == RUSTENC_V0) { +			sv_consume_n(sv, 1); +			*valp = 0; +			return (B_TRUE); +		} + +		DEMDEBUG("%s: ERROR number starts with leading 0\n", +		    __func__); +		st->rs_error = EINVAL; +		return (B_FALSE); +	} else if (!ISDIGIT(c)) {  		return (B_FALSE);  	} -	while (sv_remaining(&snum) > 0 && ndigits <= MAX_DIGITS) { -		c = sv_consume_c(&snum); +	while (sv_remaining(sv) > 0) { +		uint64_t cval; +		c = sv_peek(sv, 0);  		if (!ISDIGIT(c))  			break; +		sv_consume_n(sv, 1); -		v *= 10; -		v += c - '0'; -		ndigits++; -	} +		cval = c - '0'; -	if (ndigits > MAX_DIGITS) { -		DEMDEBUG("%s: value %llu is too large\n", __func__, v); -		st->rds_error = ERANGE; -		return (B_FALSE); -	} +		if (mul_overflow(v, 10, &v)) { +			DEMDEBUG("%s: multiplication overflowed\n", __func__); +			st->rs_error = EOVERFLOW; +			return (B_FALSE); +		} -	DEMDEBUG("%s: num=%llu", __func__, v); +		if (add_overflow(v, cval, &v)) { +			DEMDEBUG("%s: addition overflowed\n", __func__); +			st->rs_error = EOVERFLOW; +			return (B_FALSE); +		} +	}  	*valp = v; -	sv_consume_n(svp, ndigits);  	return (B_TRUE);  }  static boolean_t -rustdem_parse_special(rustdem_state_t *restrict st, strview_t *restrict svp) +rust_parse_prefix(rust_state_t *restrict st, strview_t *restrict sv)  { -	if (st->rds_error != 0) +	DEMDEBUG("checking prefix in '%.*s'", SV_PRINT(sv)); + +	if (HAS_ERROR(st))  		return (B_FALSE); -	if (sv_peek(svp, 0) != '$') +	if (!sv_consume_if_c(sv, '_'))  		return (B_FALSE); -	for (size_t i = 0; i < rust_charmap_sz; i++) { -		if (sv_consume_if(svp, rust_charmap[i].ruc_seq)) { -			if (!rustdem_append_c(st, rust_charmap[i].ruc_ch)) -				return (B_FALSE); -			return (B_TRUE); +	/* +	 * MacOS prepends an additional '_' -- allow that in case +	 * we're given symbols from a MacOS object. +	 */ +	(void) sv_consume_if_c(sv, '_'); + +	if (sv_consume_if_c(sv, 'Z')) { +		/* +		 * Legacy names must start with '[_]_Z' +		 */ +		st->rs_encver = RUSTENC_LEGACY; +		DEMDEBUG("name is encoded using the rust legacy mangling " +		    "scheme"); +	} else if (sv_consume_if_c(sv, 'R')) { +		uint64_t ver = 0; + +		/* +		 * The non-legacy encoding is versioned. After the initial +		 * 'R' is the version. This isn't spelled out clearly in the +		 * RFC, but many numeric values encoded take an approach of +		 * a value of 0 is omitted, and any digits represent the +		 * value - 1. In other words, in this case, no digits means +		 * version 0, '_R0...' would be version 1, 'R1...' would +		 * be version 2, etc. Currently only version 0 is defined, +		 * but we try to provide a (hopefully) useful message +		 * when debugging, even if we can't use the version value +		 * beyond that. +		 */ +		if (rust_parse_base10(st, sv, &ver)) { +			DEMDEBUG("%s: ERROR: an unsupported encoding version " +			    "(%" PRIu64 ") was encountered", ver + 1); +			st->rs_error = ENOTSUP; +			return (B_FALSE);  		} + +		st->rs_encver = RUSTENC_V0; +		DEMDEBUG("name is encoded using the v0 mangling scheme"); +	} else { +		DEMDEBUG("did not find a valid rust prefix"); +		return (B_FALSE);  	} -	return (B_FALSE); + +	sv_init_sv(&st->rs_orig, sv); +	return (B_TRUE); +} + +static void +rust_fini_state(rust_state_t *st) +{ +	custr_free(st->rs_demangled); +	custr_alloc_fini(&st->rs_cualloc);  }  static boolean_t -rustdem_add_sep(rustdem_state_t *st) +rust_init_state(rust_state_t *restrict st, const char *s, sysdem_ops_t *ops)  { -	if (st->rds_error != 0) +	const char *codeset; + +	(void) memset(st, 0, sizeof (*st)); + +	st->rs_str = s; +	st->rs_ops = ops; + +	st->rs_cualloc.cua_version = CUSTR_VERSION; +	if (custr_alloc_init(&st->rs_cualloc, &rust_custr_ops) != 0)  		return (B_FALSE); +	st->rs_cualloc.cua_arg = st; -	if (!rustdem_append_c(st, ':') || -	    !rustdem_append_c(st, ':')) +	if (custr_xalloc(&st->rs_demangled, &st->rs_cualloc) != 0) { +		custr_alloc_fini(&st->rs_cualloc);  		return (B_FALSE); +	} + +	codeset = nl_langinfo(CODESET); +	if (codeset != NULL && strcmp(codeset, "UTF-8") == 0) +		st->rs_isutf8 = B_TRUE;  	return (B_TRUE);  } -static boolean_t -rustdem_append_c(rustdem_state_t *st, char c) +char * +rust_demangle(const char *s, size_t len, sysdem_ops_t *ops)  { -	if (st->rds_error != 0) -		return (B_FALSE); +	rust_state_t st; +	strview_t sv = { 0 }; +	boolean_t success = B_FALSE; +	int e = 0; +	char *out = NULL; -	if (custr_appendc(st->rds_demangled, c) == 0) -		return (B_TRUE); +	if (!rust_init_state(&st, s, ops)) +		return (NULL); -	st->rds_error = errno; -	return (B_FALSE); -} +	sv_init_str(&sv, s, s + len); -static boolean_t -rustdem_all_ascii(const strview_t *svp) -{ -	strview_t p; +	if (!rust_parse_prefix(&st, &sv)) { +		if (st.rs_error == 0) +			st.rs_error = EINVAL; +		goto done; +	} -	sv_init_sv(&p, svp); +	DEMDEBUG("parsed prefix; remaining string='%.*s'", SV_PRINT(&sv)); -	while (sv_remaining(&p) > 0) { -		char c = sv_consume_c(&p); +	switch (st.rs_encver) { +	case RUSTENC_LEGACY: +		success = rust_demangle_legacy(&st, &sv); +		break; +	case RUSTENC_V0: +		success = rust_demangle_v0(&st, &sv); +		break; +	} -		/* -		 * #including <sys/ctype.h> conflicts with <ctype.h>.  Since -		 * we want the C locale macros (ISDIGIT, etc), it also means -		 * we can't use isascii(3C). -		 */ -		if ((c & 0x80) != 0) { -			DEMDEBUG("%s: found non-ascii character 0x%02hhx at " -			    "offset %tu", __func__, c, -			    (ptrdiff_t)(p.sv_first - svp->sv_first)); -			return (B_FALSE); -		} +done: +	if (success) { +		out = xstrdup(ops, custr_cstr(st.rs_demangled)); +		if (out == NULL) +			SET_ERROR(&st); +	} else { +		DEMDEBUG("%s: failed, str='%s'", __func__, +		    custr_cstr(st.rs_demangled)); + +		st.rs_error = EINVAL;  	} -	return (B_TRUE); -} -static void * -rustdem_alloc(custr_alloc_t *cao, size_t len) -{ -	rustdem_state_t *st = cao->cua_arg; -	return (zalloc(st->rds_ops, len)); -} +	e = st.rs_error; +	rust_fini_state(&st); +	if (e > 0) +		errno = e; -static void -rustdem_free(custr_alloc_t *cao, void *p, size_t len) -{ -	rustdem_state_t *st = cao->cua_arg; -	xfree(st->rds_ops, p, len); +	return (out);  } diff --git a/usr/src/lib/libdemangle/common/rust.h b/usr/src/lib/libdemangle/common/rust.h new file mode 100644 index 0000000000..fbe609ab9d --- /dev/null +++ b/usr/src/lib/libdemangle/common/rust.h @@ -0,0 +1,87 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source.  A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Joyent, Inc. + * Copyright 2021 Jason King + */ + +#ifndef _RUST_H +#define	_RUST_H + +#include <errno.h> +#include <sys/types.h> +#include "demangle-sys.h" +#include "demangle_int.h" +#include "strview.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum rustenc_version { +	RUSTENC_LEGACY = -1, +	RUSTENC_V0 = 0 +} rustenc_version_t; + +typedef struct rust_state { +	const char	*rs_str; /* The original string */ +	custr_t		*rs_demangled; +	sysdem_ops_t	*rs_ops; +	custr_alloc_t	rs_cualloc; +	strview_t	rs_orig; /* strview of original string, sans prefix */ +	int		rs_error; +	rustenc_version_t rs_encver; +	uint64_t	rs_lt_depth; +	boolean_t	rs_skip; +	boolean_t	rs_args_stay_open; +	boolean_t	rs_args_is_open; +	boolean_t	rs_verbose; +	boolean_t	rs_show_const_type; +	boolean_t	rs_isutf8; +} rust_state_t; +#define	HAS_ERROR(_st) ((_st)->rs_error != 0) +#define	SET_ERROR(_st) ((_st)->rs_error = errno) + +/* + * In certain circumstances, we need to parse an item, but not emit any + * output. These macros assist in that. To use: + * + * rust_state_t *st; + * boolean_t saved_state; + * ... + * SKIP_BEGIN(st, saved_state); + * ... stuff to no emit + * SKIP_END(st, saved_state); + */ +#define	SKIP_BEGIN(_st, _save)		\ +	(_save) = (_st)->rs_skip,	\ +	(_st)->rs_skip = B_TRUE +#define	SKIP_END(_st, _n) (_st)->rs_skip = (_n) + +boolean_t rust_appendc(rust_state_t *, char); +boolean_t rust_append(rust_state_t *, const char *); +boolean_t rust_append_printf(rust_state_t *, const char *, ...) __PRINTFLIKE(2); +boolean_t rust_append_sv(rust_state_t *restrict, uint64_t, strview_t *restrict); +boolean_t rust_append_utf8_c(rust_state_t *, uint32_t); +boolean_t rust_parse_base10(rust_state_t *restrict, strview_t *restrict, +    uint64_t *restrict); +boolean_t rust_demangle_legacy(rust_state_t *restrict, strview_t *restrict); +boolean_t rust_demangle_v0(rust_state_t *restrict, strview_t *restrict); + +boolean_t rustv0_puny_decode(rust_state_t *restrict, strview_t *restrict, +    boolean_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _RUST_H */ diff --git a/usr/src/lib/libdemangle/common/str.c b/usr/src/lib/libdemangle/common/str.c index 014ce8a737..8608a17b5f 100644 --- a/usr/src/lib/libdemangle/common/str.c +++ b/usr/src/lib/libdemangle/common/str.c @@ -12,8 +12,6 @@  /*   * Copyright 2017 Jason King   */ -#include <sys/debug.h> -#include <sys/sysmacros.h>  #include <string.h>  #include "str.h"  #include "demangle_int.h" diff --git a/usr/src/lib/libdemangle/common/strview.c b/usr/src/lib/libdemangle/common/strview.c index e4576ee17a..1653484172 100644 --- a/usr/src/lib/libdemangle/common/strview.c +++ b/usr/src/lib/libdemangle/common/strview.c @@ -10,10 +10,11 @@   */  /* - * Copyright 2019, Joyent, Inc. + * Copyright 2019 Joyent, Inc.   */  #include <string.h> +#include <sys/types.h>  #include <sys/debug.h>  #include "strview.h" diff --git a/usr/src/lib/libdemangle/common/util.c b/usr/src/lib/libdemangle/common/util.c index 739c554826..17eefe82d7 100644 --- a/usr/src/lib/libdemangle/common/util.c +++ b/usr/src/lib/libdemangle/common/util.c @@ -14,7 +14,8 @@   * Copyright 2019, Joyent, Inc.   */ -#include <sys/debug.h> +#include <errno.h> +#include <limits.h>  #include <stdlib.h>  #include <string.h>  #include "demangle-sys.h" @@ -42,6 +43,25 @@ zalloc(sysdem_ops_t *ops, size_t len)  	return (p);  } +void * +xcalloc(sysdem_ops_t *ops, size_t n, size_t elsize) +{ +	uint64_t sz; + +	if (mul_overflow(n, elsize, &sz)) { +		errno = ENOMEM; +		return (NULL); +	} + +#ifndef _LP64 +	if (sz > SIZE_MAX) { +		errno = ENOMEM; +		return (NULL); +	} +#endif + +	return (zalloc(ops, sz)); +}  void  xfree(sysdem_ops_t *ops, void *p, size_t len)  { diff --git a/usr/src/test/util-tests/tests/demangle/rust.c b/usr/src/test/util-tests/tests/demangle/rust.c index db2fae28e4..051bf2456b 100644 --- a/usr/src/test/util-tests/tests/demangle/rust.c +++ b/usr/src/test/util-tests/tests/demangle/rust.c @@ -26,7 +26,7 @@   * DEALINGS IN THE SOFTWARE.   */  /* - * Copyright 2019, Joyent, Inc. + * Copyright 2019 Joyent, Inc.   * Copyright 2021 Jason King   */ @@ -34,12 +34,16 @@   * Test cases taken from rustc-demangle 0.1.9   */  #include <errno.h> +#include <err.h> +#include <locale.h>  #include <stdio.h>  #include <stdlib.h>  #include <string.h>  #include <sys/sysmacros.h>  #include <demangle-sys.h> +#define	TEST_LOCALE "C.UTF-8" +  typedef struct rust_test_case {  	const char *mangled;  	const char *demangled; @@ -51,6 +55,7 @@ typedef struct rust_test_grp {  	const char		*name;  	rust_test_case_t	tests[];  } rust_test_grp_t; +  #define	GROUP(_n, ...)			\  	static rust_test_grp_t _n = {	\  		.name = #_n,		\ @@ -60,6 +65,8 @@ typedef struct rust_test_grp {  		}			\  	} +/* BEGIN CSTYLED */ +  GROUP(demangle,      T_ERR("test"),      T("_ZN4testE", "test"), @@ -77,7 +84,6 @@ GROUP(demangle_many_dollars,      T("_ZN13test$u20$test4foobE", "test test::foob"),      T("_ZN12test$BP$test4foobE", "test*test::foob")); -/* BEGIN CSTYLED */  GROUP(demangle_osx,      T("__ZN5alloc9allocator6Layout9for_value17h02a996811f781011E",      "alloc::allocator::Layout::for_value::h02a996811f781011"), @@ -104,6 +110,143 @@ GROUP(handle_assoc_types,  /* C++ mangled names that aren't valid rust names */  GROUP(cplusplus_as_rust, T_ERR("_ZN7mozilla3dom13BrowserParent22RecvUpdateContentCacheERKNS_12ContentCacheE")); +GROUP(v0_crate_with_leading_digit, +    T("_RNvC6_123foo3bar", "123foo::bar")); + +GROUP(v0_utf8_idents, +    T("_RNqCs4fqI2P2rA04_11utf8_identsu30____7hkackfecea1cbdathfdh9hlq6y", +    "utf8_idents::საჭმელად_გემრიელი_სადილი")); + +GROUP(v0_closure, +    T("_RNCNCNgCs6DXkGYLi8lr_2cc5spawn00B5_", +    "cc::spawn::{closure#0}::{closure#0}"), +    T("_RNCINkXs25_NgCsbmNqQUJIY6D_4core5sliceINyB9_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB9_6memchr7memrchrs_0E0Bb_", +    "<core::slice::Iter<u8> as core::iter::iterator::Iterator>::rposition::<core::slice::memchr::memrchr::{closure#1}>::{closure#0}")); + +GROUP(v0_dyn_trait, +    T("_RINbNbCskIICzLVDPPb_5alloc5alloc8box_freeDINbNiB4_5boxed5FnBoxuEp6OutputuEL_ECs1iopQbuBiw2_3std", +    "alloc::alloc::box_free::<dyn alloc::boxed::FnBox<(), Output = ()>>")); + +GROUP(v0_const_generics, +    T("_RMC0INtC8arrayvec8ArrayVechKj7b_E", "<arrayvec::ArrayVec<u8, 123>>"), +    T("_RMCs4fqI2P2rA04_13const_genericINtB0_8UnsignedKhb_E", "<const_generic::Unsigned<11>>"), +    T("_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKs98_E", "<const_generic::Signed<152>>"), +    T("_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKanb_E", "<const_generic::Signed<-11>>"), +    T("_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb0_E", "<const_generic::Bool<false>>"), +    T("_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb1_E", "<const_generic::Bool<true>>"), +    T("_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc76_E", "<const_generic::Char<'v'>>"), +    T("_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKca_E", "<const_generic::Char<'\\n'>>"), +    T("_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc2202_E", "<const_generic::Char<'∂'>>")); + +GROUP(v0_exponential_explosion, +    T("_RMC0TTTTTTpB8_EB7_EB6_EB5_EB4_EB3_E", +    "<((((((_, _), (_, _)), ((_, _), (_, _))), (((_, _), (_, _)), ((_, _), (_, _)))), " +    "((((_, _), (_, _)), ((_, _), (_, _))), (((_, _), (_, _)), ((_, _), (_, _))))), " +    "(((((_, _), (_, _)), ((_, _), (_, _))), (((_, _), (_, _)), ((_, _), (_, _)))), " +    "((((_, _), (_, _)), ((_, _), (_, _))), (((_, _), (_, _)), ((_, _), (_, _))))))>")); + +GROUP(v0_thinlto, +    T("_RC3foo.llvm.9D1C9369", "foo"), +    T("_RC3foo.llvm.9D1C9369@@16", "foo"), +    T("_RNvC9backtrace3foo.llvm.A5310EB9", "backtrace::foo")); + +GROUP(v0_demangle_extra_suffix, +    T("_RNvNtNtNtNtCs92dm3009vxr_4rand4rngs7adapter9reseeding4fork23FORK_HANDLER_REGISTERED.0.0", +    "rand::rngs::adapter::reseeding::fork::FORK_HANDLER_REGISTERED.0.0")); + +/* + * From Rust RFC2603 + */ +GROUP(v0_generic_func, +    T("_RINvNtC3std3mem8align_ofdE", "std::mem::align_of::<f64>"), +    T("_RINvNtC3std3mem8align_ofNtNtC3std3mem12DiscriminantE", +    "std::mem::align_of::<std::mem::Discriminant>"), +    T("_RINvNtC3std3mem8align_ofQTReuEE", +    "std::mem::align_of::<&mut (&str, ())>")); + +GROUP(v0_eddyb, +    T("_RNvXsa_NtNtCs7hxHya3g3Sg_4core3ptr6uniqueINtB5_6UniqueNtNtNtCshRVCqTKO4VO_5cargo4util4toml10TomlTargetEINtNtB9_7convert4FromINtNtB7_8non_null7NonNullBQ_EE4fromBW_", +      "<core::ptr::unique::Unique<cargo::util::toml::TomlTarget> as core::convert::From<core::ptr::non_null::NonNull<cargo::util::toml::TomlTarget>>>::from"), +    T("_RNvXsG_NtNtCs2ZCqZGLqlfc_3std3ffi6os_strNtB5_5OsStrINtNtCs7hxHya3g3Sg_4core7convert5AsRefBC_E6as_ref", +      "<std::ffi::os_str::OsStr as core::convert::AsRef<std::ffi::os_str::OsStr>>::as_ref"), +    T("_RNvMs_NtCs7hxHya3g3Sg_4core6resultINtB4_6ResultNtNtB6_5alloc6LayoutNtBL_9LayoutErrE6unwrapCsdJWFNQ9j01_12aho_corasick", +      "<core::result::Result<core::alloc::Layout, core::alloc::LayoutErr>>::unwrap"), +    T("_RINvNtCs7hxHya3g3Sg_4core3mem7size_ofFUKCEPaECs2ZCqZGLqlfc_3std", +      "core::mem::size_of::<unsafe extern \"C\" fn() -> *const i8>"), +    T("_RINvCsc1o8JKpgQAm_4test28___rust_begin_short_backtraceFEuEB2_", +      "test::__rust_begin_short_backtrace::<fn()>"), +    T("_ZN4core5array104_$LT$impl$u20$core..iter..traits..collect..IntoIterator$u20$for$u20$$RF$$u5b$$RF$str$u3b$$u20$_$u5d$$GT$9into_iter17hc066f1a15f41761dE", +      "core::array::<impl core::iter::traits::collect::IntoIterator for &[&str; _]>::into_iter::hc066f1a15f41761d")); + +GROUP(v0_afl_fast, +    T_ERR("_RMC0TTTTTTPB8_yB7_EB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTTTpB8_yB7_eB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTTTpB4_yB7_EB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTTTpB4_yB7_EB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTTTTB8_yB7_EB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTTTSB8_yB7_EB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTTTRB8_yB7_EB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTTTQB8_yB7_EB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTTTOB8_yB7_EB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTTTpB8_yB7_hB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTTTpB8_yB7_llvmEB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTTTpB1_yB7_eB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTTTpB1_tB7_fB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTC0TTTTTPpB0_SB7_llvmTPpB8_SB7_EB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC3TTTTTtpB_yB7_EB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTRLpB8_llvB_vmEB_EB5FEB4EB5FEB4_EB3_E"), +    T_ERR("_RMC0TTTTQLp.B_llvmEB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TRMC0TTTTQLp.B_YBTTTQLp.B_YB7_EBd_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTQLp.B_llvmEB6_EB5_EB4_E!3_E"), +    T_ERR("_RMC0TRMC0TTTTQLp.B_bB7_EB6_EB5_EB4_E"), +    T_ERR("_RMC0TTTTRLp.B_llvmEB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTQLpC0TTTfQLp.B_B_EB84_EB3_E"), +    T_ERR("_RMC0TTTTQLp.TfQLp.B_jC0TTTfQLp.B_llvT_EB3_E"), +    T_ERR("_RMC0TTTTQLpB8_TTTTTQLp_B_llvmEB6_E3_E"), +    T_ERR("_RIC0TRLpB8B8_B8_llvmEB6_EB5_llvmEB6_EB5_EB4_EL3_E"), +    T_ERR("_RNCINkXs25NNNNNNNNNNNNNNNNNNNNNNNN_INyB9_4IterhENuNgNoBb_4iter8iteraionNCN1_6hr7m0E0Bb_"), +    T_ERR("_RNCXNkXs25_NgCsbmNqQUJIY6D_4core5sliceINyB4_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB2_6hr7m0E0Bb_"), +    T_ERR("_RNCXNkXs25_NgCsbmNqQUJIY6D_4core5sliceINyB9_4IterhENuNgNoBZ_4iter8iterator8Iterator9rpositionNCNgNpB2_6hr7m0E0Bb_"), +    T_ERR("_RYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYyYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYyYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYNfYB_YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYNfYB_"), +    T_ERR("_RNCXNkXs25_NSCsbmNqQUJIY6D_4core5sliceINyB2_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB2_6D_4core5sliReINyB1_4IterhENu6D_4core5sliceINyB1_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpBNgNoBb_4iter8iterr9rpo25_NgCsbmNqQUJIY6D_4core5sliceIN4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB2_6NqQUJIY6D_4core5sliReINyB1_4IterhENu6D_4core5sliceINyB1_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpBNgNoBb_4iter8iterr9rpositionNCNgNpB2_6NqQUJIY6B2_6hr7m0E7m0EsitionNCNgNpB2_6NqQUJIY6B2_6hr7m0E7m0E0Bb_"), +    T_ERR("_RNCXNkXs25_NCCsbmNqQUJIY6D_4core5sliceINyB2_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB2_6D_4core5sliReINyB1_4IterhENu6D_4core5sliceINyB1_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpBNgNoBb_4iter8iterr9rpo25_NgCsbmNqQUJIY6D_4core5sliceIN4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB2_6NqQUJIY6D_4core5sliReINyB1_4IterhENu6D_4core5sliceINyB1_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpBNgNoBb_4iter8iterr9rpositionNCNgNpB2_6NqQUJIY6B2_6hr7m0E7m0EsitionNCNgNpB2_6NqQUJIY6B2_6hr7m0E7m0E0Bb_"), +    T_ERR("_RMC0TTTTRL_B4_llvmEB6_EB5_EB4_EB3_E"), +    T_ERR("_RMC0TTTTRRMC0TB7_llvmEB6_EB5_EB4_EB3_EL_B7_llvmEB6_EB5_EB4_EB3_E"), +    T_ERR("_RIC0TTTTQIC0L_B7_llvmEB6_E75_EB4_EB3_E"), +    T_ERR("_RNCINkXs25_NSCsbmNqQUJIY6D_4core53liceINyBK_4IterhDNCINkXs25_NSCsbmD_4core5sRNCINkXs25_NSCsbmNqQUJIY6D_4core5sliceINyB9_4IterhDNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhENuNgNoBN_4iter8iterator8Iterato29rposillvmtionNCXs25_NSCsbUJIY6D_4core5sliceINyB9_4IterhDNuNgNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhDNuNgNoBN_4iter8iterator8IliceINyB1_4IterhENuNgNoBN_4iter8iterator8Iterator9rposillvmtionNCXs25_NSCsbUJIY6D_4core5sliceINyB9_4IterhDNuNgNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhDNuNgNoBN_4iter8iterator8Iter9rposillvmtionNCNgNpB1_Bb_"), +    T_ERR("_RNCINkXs25_NSCsbmNqQUJIY6D_4core93liceINyBK_4IterhDNCINkXs25_NSCsbmD_4core5sRNCINkXs25_NSCsbmNqQUJIY6D_4core5sliceINyB9_4IterhDNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhENuNgNoBN_4iter8iterator8Iterato29rposillvmtionNCXs25_NSCsbUJIY6D_4core5sliceINyB9_4IterhDNuNgNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhDNuNgNoBN_4iter8iterator8IliceINyB1_4IterhENuNgNoBN_4iter8iterator8Iterator9rposillvmtionNCXs25_NSCsbUJIY6D_4core5sliceINyB9_4IterhDNuNgNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhDNuNgNoBN_4iter8iterator8Iter9rposillvmtionNCNgNpB1_Bb_"), +    T_ERR("_RNCINkXs25_NSCsbmNqQUJIY6D_4core5sliceINyB9_4IterhDNCINkXs25_NSCsbmD_4core5sRNCINkXs25_NSCsbmNqQUJIY6D_4core5sliceINyB9_4IterhDNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhENuNgNoBN_4iter8iterator8Iterato29rposillvmtiB_NCXs25_NSCsbUJIY6D_4core5sliceINyB9_4IterhDNuNgNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhDNuNgNoBN_4iter8iterator8IliceINyB1_4IterhENuNgNoBN_4iter8iterator8Iterator9rposillvmtionNCXs25_NSCsbUJIY6D_4core5sliceINyB9_4IterhDNuNgNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_4IterhDNuNgNoBN_4iter8iterator8Iter9rposillvmtionNCNgNpB1_Bb_"), +    T_ERR("_RNCINkXs25_NSCsbmNqQUJIY6D_4coreu425_NSNgNoBN_4iter8iteratotliceINyB9_4IterhDNCINkXs25_NSCsbmD_4core5sRNCINkXs25_NSCsbeNqQUJIY6D_4core5sliceINyB9_4IterhLNCINkXs25_NSCsbmJIY6D_4core5sliceINyB9_48888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888eeeeeeeeeeeeeee88888888888888888888888888888888888888G88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888NSCsbmJIY6D_4core5sliceINyB9_4IterhDNuNgNoBN_4iter8iteravmtionNCNgNpB1_Bb_"), +    T_ERR("_ZN9EB0_EB3_E"), +    T_ERR("_ZN9INTTB7_$B6_SB5_E"), +    T_ERR("_ZN9INTTB7 E.6_SBEEEEEEEEEEEEEEEEB7_EB6_EB5_EB0EB6_EB5_EB0_EB3_EEB0_EB3_E"), +    T_ERR("_ZN9I3TTB7_$B8_C0TTT9I3TTB7_$B8_$$5$B_E"), +    T_ERR("_ZN9$C$TB7_$B8_C0TTT9I3TB7_$B8_$$5$B_E"), +    T_ERR("_ZN9......=E"), +    T_ERR("_RMC0TTTTQLpfQNp.B_aaaaaTOTfQL_aaaaaB_"), +    T_ERR("_RMC0TTTTRLpB8_lRMC0B_aaB5_EB4_B5_EEB3_E"), +    T_ERR("_RMC0TTTTRLp_aalRMC0B_aaB5_EB4_B5_EEB3_E"), +    T_ERR("_RMC0TTTTRLp_C0TaalRMC0B6_EB_aaB4_B5_EEB3_E"), +    T_ERR("_RMC0TTTTRL0_aalRMC0B_aaB5_EB4_B5_EEB3_E"), +    T_ERR("_RMC6aEB8_XB4_YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYAly_IYB_lYYYYYYYAly_HYB_"), +    T_ERR("_RMC6aEB8_XB4_YYYYYYYYYYYYYYYYYYYYYYYMC6aEB8ZXB4_YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYAlypHYYYYYYYYYYYYYYYYYYAlyNHYB_"), +    T_ERR("_RMC6aEB8_NB4_YYNYYYNYYYYYYYYYYYxYYYYYYYRAC6aEB8_NBV_YYNYYYNYYYYYYYYYYYxYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYlYNHYB_YYY"), +    T_ERR("_ZN9A7$TB7_$B8_$B$TT9I3TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TT9I3TB7m$__ZN9$C$TB_$BZN9A7$TB7_$B8_$B$TT6$C$$B$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$BP$B$TT9I3TB7m$__ZN9$ $TB_$B8_$A$TT9I3TB7m$8_$A$TT9I3TB7m$__ZN2UE"), +    T_ERR("_ZN9A7$TB7_$B8_$B$TT9I3TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TT9I3TB7m$__ZN9$C$TB_$BZN9A7$TB7_$B8_$B$TT6$C$$B$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$SP$B$TT9I3TB7m$__ZN9$ $TB_$B8_$A$TT9I3TB7m$8_$A$TT9I3TB7m$__ZN2UE"), +    T_ERR("__ZN9?@EEEEEJE"), +    T_ERR("_RMC0TTTATjpB8_EB7_TB_aaB5_EB4_EB3_E"), +    T_ERR("_ZN949$TE7_llv4C$TE$C$7_llvm$C$TT9'3TB_$__................................................................................................................................................................................................................................................................................................................................................................................................$B$.E..........................................:.........................................................................................................................................P...............................................@..................................................................................................................................................................................................................................................................TTB7_E.6_SB_E.6_S65__ZQCI<_EB=E"), +    T_ERR("_RMC6aEB8_XB4_YYYYYYYYYYYYYYYYYYYYYYYYNSCsbmJIY6D_4core0MC6aEB8_XB4_YYYYYYYYYYYYYYYYYYsliceINyB9_rhDNuNgNoBN_4iter8iteravmt}ore5sliceINyB9_4IterhDNuNgNCINOXs25_NSCsbmJIY6D_4core5sliceI_yB9_4IterhDNuNgNoB__4llvmwionNB9_4INkXs25_NSCrhDYYYYYYYYYYYYYYYYYYYYYYYYYYYaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa)))))_aa)))))))))))))))))))))"), +    T_ERR("_ZN9A7$TB7_$B8_$B$TT9I3TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TT9I3TB7m$__ZN9$C$TB_$BZN9A7$TB7_$B8_$B$TT6$C$$B$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$LP$B$TT9I3TB7m$__ZN9$ $TB_$B8_$A$TT9I3TB7m$8_$A$TT9I3TB7m$__ZN2UE"), +    T_ERR("_ZN9A7$TB7_$B8_$B$TT9I3TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TT9I3TB7m$__ZN9$C$TB_$BZN9A7$TB7_$B8_$B$TT6$C$$B$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$LT$B$TT9I3TB7m$__ZN9$ $TB_$B8_$A$TT9I3TB7m$8_$A$TT9I3TB7m$__ZN2UE"), +    T_ERR("_ZN9A7$TB7_$B8_$B$TT9I3TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TT9I3TB7m$__ZN9$C$TB_$BZN9A7$TB7_$B8_$B$TT6$C$$B$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$GT$B$TT9I3TB7m$__ZN9$ $TB_$B8_$A$TT9I3TB7m$8_$A$TT9I3TB7m$__ZN2UE"), +    T_ERR("_ZN9A7$TB7_$B8_$B$TT9I3TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TT9I3TB7m$__ZN9$C$TB_$BZN9A7$TB7_$B8_$B$TT6$C$$B$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$RP$B$TT9I3TB7m$__ZN9$ $TB_$B8_$A$TT9I3TB7m$8_$A$TT9I3TB7m$__ZN2UE"), +    T_ERR("_ZN9A7$TB7_$B8_$B$TT9I3TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TT9I3TB7m$__ZN9$C$TB_$BZN9A7$TB7_$B8_$B$TT6$C$$B$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$RF$B$TT9I3TB7m$__ZN9$ $TB_$B8_$A$TT9I3TB7m$8_$A$TT9I3TB7m$__ZN2UE"), +    T_ERR("_RNCXNkXs25_NgCsbmNqQUJIY6D_4core5sliceINyBK_4IterhENuNGNoBb_4iter8iterator8Iterator9rpositionNCNgNpB2_6D_4core5sliReINyB1_4IterhENu6D_4core5sliceINyB1_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpBNgNoBb_4iter8iterr9rpo25_NgCsbmNqQUJIY6D_4core5sliceIN4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB2_6NqQUJIY6D_4core5sliReINyB1_4IterhENu6D_4core5sliceINyB1_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpBNgNoBb_4iter8iterr9rpositionNCNgNpB2_6NqQUJIY6B2_6hr7m0E7m0EsitionNCNgNpB2_6NqQUJIY6B2_6hr7m0E7m0E0Bb_"), +    T_ERR("_RIC6kIIIIIB4_lB5_EB4NEB3_A"), +    T_ERR("_ZN9I3TTB7_$B8_$B$TT9I398C$T$B8_$B$TT9I398C$T_aa$B8_$C$TT9I3_ZN9$C$TB7_$B8_$B$TB$$B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9ITB7$LT$B$TT9I3TB7m$__ZB8T_aa$B8K$C$TT9I3_ZN9$C$TB7_$LT$B$TT9I3TB7m$__ZN9$ $TB7m$__ZN98C$T_aa$B8_$C$TT9I3_ZN:$C$TB7_$B8_$B$TT9I3TB7m$__ZN$RF$9$C$TB_$BZN9A7$TB7_8B8_$B$TT6$C$$B$%B$$__ZN98C$T_aa$B8_$K$TT9I7_ZN9$C$TB7_$B8T_aa$B8_$C$TT9I3_ZN9$C$TB7_$LT$B$TT9I3TB7m$__ZN9$ UE"), +    T_ERR("_RIC6aOB_aaB4_RIC6aOB_aaB8_gB._NaEB5_gB8_gB4_NaEB5_))))))))))))))))))))))da)))))))C6aEB8_XB4_DC6aXB4_DC6aEJ8_gB_NaEB5_gB8_gB4_NaEB5_))))))))))))))))))))))_a))))))))sitUonNCNgNpB1_6hr7m0E0Bb_)))sitionNCNgNpB1_6hr7m0E0Bb_")); +  /* END CSTYLED */  static rust_test_grp_t *rust_tests[] = { @@ -116,8 +259,18 @@ static rust_test_grp_t *rust_tests[] = {  	&invalid_no_chop,  	&handle_assoc_types,  	&cplusplus_as_rust, +	&v0_crate_with_leading_digit, +	&v0_utf8_idents, +	&v0_closure, +	&v0_dyn_trait, +	&v0_const_generics, +	&v0_exponential_explosion, +	&v0_thinlto, +	&v0_demangle_extra_suffix, +	&v0_generic_func, +	&v0_eddyb, +	&v0_afl_fast,  }; -  static const size_t n_rust_tests = ARRAY_SIZE(rust_tests);  static boolean_t @@ -195,8 +348,13 @@ run_test(rust_test_grp_t *test)  int  main(int argc, char **argv)  { +	const char *l;  	boolean_t ok = B_TRUE; +	l = setlocale(LC_CTYPE, TEST_LOCALE); +	if (l == NULL || strcmp(l, TEST_LOCALE) != 0) +		errx(EXIT_FAILURE, "failed to set locale to %s", TEST_LOCALE); +  	for (size_t i = 0; i < n_rust_tests; i++)  		ok &= run_test(rust_tests[i]); | 
