[illumos-gate merge]

commit 1cd083931cfd3fb8617c1178f62bce417cfa6af2 13780 Add support for rust v0 mangling format commit 4fe48c6ec9f06cbcce19c4cf97f662b64efde582 13798 loader: Update the EFI timer to be called once a second commit 9e3493cb8a0cfe96c9aef9b7da42c6c9b5c24b43 13374 Port L2ARC Improvements from OpenZFS
author: Dan McDonald <danmcd@joyent.com> 2021-05-14 12:00:48 -0400
committer: Dan McDonald <danmcd@joyent.com> 2021-05-14 12:00:48 -0400
commit: 1a25930b922d3fede4a252f4f2e0ecb8de2656cb (patch)
tree: a2ff441f85489691a94f13a644cb7cfe10b81f9e /usr/src/lib/libdemangle/common/rust.c
parent: 3aa01401155d92a38a0d4e107043c130432e4a43 (diff)
parent: 1cd083931cfd3fb8617c1178f62bce417cfa6af2 (diff)
download: illumos-joyent-1a25930b922d3fede4a252f4f2e0ecb8de2656cb.tar.gz
1 files changed, 290 insertions, 437 deletions
diff --git a/usr/src/lib/libdemangle/common/rust.c b/usr/src/lib/libdemangle/common/rust.c
index 9b145ca841..ce1fca4859 100644
--- a/usr/src/lib/libdemangle/common/rust.c
+++ b/usr/src/lib/libdemangle/common/rust.c
@@ -10,564 +10,417 @@
  */
 
 /*
- * Copyright 2019, Joyent, Inc.
  * Copyright 2021 Jason King
+ * Copyright 2019 Joyent, Inc.
  */
 
 #include <errno.h>
+#include <langinfo.h>
 #include <libcustr.h>
 #include <limits.h>
+#include <stdarg.h>
 #include <string.h>
-#include <sys/ctype.h>	/* We want the C locale ISXXX() versions */
-#include <sys/debug.h>
-#include <stdio.h>
-#include <sys/sysmacros.h>
 
-#include "strview.h"
 #include "demangle_int.h"
+#include "rust.h"
 
-/*
- * Unfortunately, there is currently no official specification for the rust
- * name mangling.  This is an attempt to document the understanding of the
- * mangling used here.  It is based off examination of
- *     https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/
- *
- * A mangled rust name is:
- *     <prefix> <name>
- *
- * <prefix>	::=	_Z
- *			__Z
- *
- * <name>	::= N <name-segment>+ [<hash>] E
- *
- * <name-segment> ::= <len> <name-chars>{len}
- *
- * <len>	::= [1-9][0-9]+
- *
- * <name-chars>	::=	<[A-Za-z]> <[A-Za-z0-9]>*
- *			<separator>
- *			<special>
- *
- * <separator>	::=	'..'	# '::'
- *
- * <special>	::=	$SP$	# ' '
- *			$BP$	# '*'
- *			$RF$	# '&'
- *			$LT$	# '<'
- *			$GT$	# '>'
- *			$LP$	# '('
- *			$RP$	# ')'
- *			$C$	# ','
- *			$u7e$	# '~'
- *			$u20$	# ' '
- *			$u27$	# '\''
- *			$u3d$	# '='
- *			$u5b$	# '['
- *			$u5d$	# ']'
- *			$u7b$	# '{'
- *			$u7d$	# '}'
- *			$u3b$	# ';'
- *			$u2b$	# '+'
- *			$u22$	# '"'
- *
- * <hash>	:= <len> h <hex-digits>+
- *
- * <hex-digits>	:= <[0-9a-f]>
- */
-
-typedef struct rustdem_state {
-	const char	*rds_str;
-	custr_t		*rds_demangled;
-	sysdem_ops_t	*rds_ops;
-	int		rds_error;
-} rustdem_state_t;
-
-static const struct rust_charmap {
-	const char	*ruc_seq;
-	char		ruc_ch;
-} rust_charmap[] = {
-	{ "$SP$", '@' },
-	{ "$BP$", '*' },
-	{ "$RF$", '&' },
-	{ "$LT$", '<' },
-	{ "$GT$", '>' },
-	{ "$LP$", '(' },
-	{ "$RP$", ')' },
-	{ "$C$", ',' },
-	{ "$u7e$", '~' },
-	{ "$u20$", ' ' },
-	{ "$u27$", '\'' },
-	{ "$u3d$", '=' },
-	{ "$u5b$", '[' },
-	{ "$u5d$", ']' },
-	{ "$u7b$", '{' },
-	{ "$u7d$", '}' },
-	{ "$u3b$", ';' },
-	{ "$u2b$", '+' },
-	{ "$u22$", '"' }
-};
-static const size_t rust_charmap_sz = ARRAY_SIZE(rust_charmap);
-
-static void *rustdem_alloc(custr_alloc_t *, size_t);
-static void rustdem_free(custr_alloc_t *, void *, size_t);
-
-static boolean_t rustdem_append_c(rustdem_state_t *, char);
-static boolean_t rustdem_all_ascii(const strview_t *);
-
-static boolean_t rustdem_parse_prefix(rustdem_state_t *, strview_t *);
-static boolean_t rustdem_parse_name(rustdem_state_t *, strview_t *);
-static boolean_t rustdem_parse_hash(rustdem_state_t *, strview_t *);
-static boolean_t rustdem_parse_num(rustdem_state_t *, strview_t *, uint64_t *);
-static boolean_t rustdem_parse_special(rustdem_state_t *, strview_t *);
-static boolean_t rustdem_add_sep(rustdem_state_t *);
-
-char *
-rust_demangle(const char *s, size_t slen, sysdem_ops_t *ops)
+static void *
+rust_cualloc(custr_alloc_t *cua, size_t len)
 {
-	rustdem_state_t st = {
-		.rds_str = s,
-		.rds_ops = ops,
-	};
-	custr_alloc_ops_t custr_ops = {
-		.custr_ao_alloc = rustdem_alloc,
-		.custr_ao_free = rustdem_free
-	};
-	custr_alloc_t custr_alloc = {
-		.cua_version = CUSTR_VERSION
-	};
-	strview_t sv;
-	int ret;
-
-	if (custr_alloc_init(&custr_alloc, &custr_ops) != 0)
-		return (NULL);
-	custr_alloc.cua_arg = &st;
-
-	sv_init_str(&sv, s, s + slen);
-
-	if (sv_remaining(&sv) < 1 || sv_peek(&sv, -1) != 'E') {
-		DEMDEBUG("ERROR: string is either too small or does not end "
-		    "with 'E'");
-		errno = EINVAL;
-		return (NULL);
-	}
-
-	if (!rustdem_parse_prefix(&st, &sv)) {
-		DEMDEBUG("ERROR: could not parse prefix");
-		errno = EINVAL;
-		return (NULL);
-	}
-	DEMDEBUG("parsed prefix; remaining='%.*s'", SV_PRINT(&sv));
-
-	if (!rustdem_all_ascii(&sv)) {
-		/* rustdem_all_ascii() provides debug output */
-		errno = EINVAL;
-		return (NULL);
-	}
-
-	if ((ret = custr_xalloc(&st.rds_demangled, &custr_alloc)) != 0)
-		return (NULL);
-
-	if (!rustdem_parse_name(&st, &sv)) {
-		if (st.rds_error == 0)
-			st.rds_error = EINVAL;
-		goto fail;
-	}
-
-	if (sv_remaining(&sv) > 0) {
-		DEMDEBUG("ERROR: unexpected trailing characters after "
-		    "terminating 'E': '%.*s'", SV_PRINT(&sv));
-		st.rds_error = EINVAL;
-		goto fail;
-	}
-
-	char *res = xstrdup(ops, custr_cstr(st.rds_demangled));
-	if (res == NULL) {
-		st.rds_error = errno;
-		goto fail;
-	}
-
-	custr_free(st.rds_demangled);
-	DEMDEBUG("result = '%s'", res);
-	return (res);
-
-fail:
-	custr_free(st.rds_demangled);
-	errno = st.rds_error;
-	return (NULL);
+	rust_state_t *st = cua->cua_arg;
+	return (zalloc(st->rs_ops, len));
 }
 
-static boolean_t
-rustdem_parse_prefix(rustdem_state_t *st, strview_t *svp)
+static void
+rust_cufree(custr_alloc_t *cua, void *p, size_t len)
 {
-	strview_t pfx;
+	rust_state_t *st = cua->cua_arg;
+	xfree(st->rs_ops, p, len);
+}
 
-	sv_init_sv(&pfx, svp);
+static const custr_alloc_ops_t rust_custr_ops = {
+	.custr_ao_alloc = rust_cualloc,
+	.custr_ao_free = rust_cufree
+};
 
-	DEMDEBUG("checking for '_Z' or '__Z' in '%.*s'", SV_PRINT(&pfx));
+boolean_t
+rust_appendc(rust_state_t *st, char c)
+{
+	custr_t *cus = st->rs_demangled;
 
-	if (st->rds_error != 0)
+	if (HAS_ERROR(st))
 		return (B_FALSE);
 
-	if (!sv_consume_if_c(&pfx, '_'))
-		return (B_FALSE);
+	if (st->rs_skip)
+		return (B_TRUE);
 
-	(void) sv_consume_if_c(&pfx, '_');
+	switch (c) {
+	case '\a':
+		return (rust_append(st, "\\a"));
+	case '\b':
+		return (rust_append(st, "\\b"));
+	case '\f':
+		return (rust_append(st, "\\f"));
+	case '\n':
+		return (rust_append(st, "\\n"));
+	case '\r':
+		return (rust_append(st, "\\r"));
+	case '\t':
+		return (rust_append(st, "\\t"));
+	case '\v':
+		return (rust_append(st, "\\v"));
+	case '\\':
+		return (rust_append(st, "\\\\"));
+	}
+
+	if (c < ' ')
+		return (rust_append_printf(st, "\\x%02" PRIx8, (uint8_t)c));
 
-	if (!sv_consume_if_c(&pfx, 'Z'))
+	if (custr_appendc(cus, c) != 0) {
+		SET_ERROR(st);
 		return (B_FALSE);
+	}
 
-	/* Update svp with new position */
-	sv_init_sv(svp, &pfx);
 	return (B_TRUE);
 }
 
-static boolean_t
-rustdem_parse_name_segment(rustdem_state_t *st, strview_t *svp, boolean_t first)
+/*
+ * Append a UTF-8 code point. If we're not in a UTF-8 locale, this gets
+ * appended as '\u<hex codepoint>' otherwise the character itself is
+ * added.
+ */
+boolean_t
+rust_append_utf8_c(rust_state_t *st, uint32_t val)
 {
-	strview_t sv;
-	strview_t name;
-	uint64_t len;
-	size_t rem;
-	boolean_t last = B_FALSE;
+	custr_t *cus = st->rs_demangled;
+	uint_t n = 0;
+	uint8_t c[4] = { 0 };
 
-	if (st->rds_error != 0 || sv_remaining(svp) == 0)
+	if (HAS_ERROR(st))
 		return (B_FALSE);
 
-	sv_init_sv(&sv, svp);
-
-	if (!rustdem_parse_num(st, &sv, &len)) {
-		DEMDEBUG("ERROR: no leading length");
-		st->rds_error = EINVAL;
-		return (B_FALSE);
+	if (!st->rs_isutf8) {
+		if (val < 0x80)
+			return (rust_appendc(st, (char)val));
+		if (val < 0x10000)
+			return (rust_append_printf(st, "\\u%04" PRIx32, val));
+		return (rust_append_printf(st, "\\U%08" PRIx32, val));
 	}
 
-	rem = sv_remaining(&sv);
-
-	if (rem < len) {
-		st->rds_error = EINVAL;
+	if (val < 0x80) {
+		return (rust_appendc(st, (char)val));
+	} else if (val < 0x800) {
+		c[0] = 0xc0 | ((val >> 6) & 0x1f);
+		c[1] = 0x80 | (val & 0x3f);
+		n = 2;
+	} else if (val < 0x10000) {
+		c[0] = 0xe0 | ((val >> 12) & 0x0f);
+		c[1] = 0x80 | ((val >> 6) & 0x3f);
+		c[2] = 0x80 | (val & 0x3f);
+		n = 3;
+	} else if (val < 0x110000) {
+		c[0] = 0xf0 | ((val >> 18) & 0x7);
+		c[1] = 0x80 | ((val >> 12) & 0x3f);
+		c[2] = 0x80 | ((val >> 6) & 0x3f);
+		c[3] = 0x80 | (val & 0x3f);
+		n = 4;
+	} else {
+		DEMDEBUG("%s: invalid unicode character \\u%" PRIx32, __func__,
+		    val);
 		return (B_FALSE);
 	}
 
-	/* Is this the last segment before the terminating E? */
-	if (rem == len + 1) {
-		VERIFY3U(sv_peek(&sv, -1), ==, 'E');
-		last = B_TRUE;
+	for (uint_t i = 0; i < n; i++) {
+		if (custr_appendc(cus, c[i]) != 0) {
+			SET_ERROR(st);
+			return (B_FALSE);
+		}
 	}
 
-	if (!first && !rustdem_add_sep(st))
-		return (B_FALSE);
-
-	/* Reduce length of seg to the length we parsed */
-	(void) sv_init_sv_range(&name, &sv, len);
-
-	DEMDEBUG("%s: segment='%.*s'", __func__, SV_PRINT(&name));
-
-	/*
-	 * A rust hash starts with 'h', and is the last component of a name
-	 * before the terminating 'E'. It is however not always present
-	 * in every mangled symbol, and a last segment that starts with 'h'
-	 * could be confused for it, so failing to parse it just means
-	 * we don't have a trailing hash.
-	 */
-	if (sv_peek(&name, 0) == 'h' && last) {
-		if (rustdem_parse_hash(st, &name))
-			goto done;
-
-		/*
-		 * However any error other than 'not a hash' (e.g. ENOMEM)
-		 * means we should fail.
-		 */
-		if (st->rds_error != 0)
-			goto done;
-	}
+	return (B_TRUE);
+}
 
-	while (sv_remaining(&name) > 0) {
-		switch (sv_peek(&name, 0)) {
-		case '$':
-			if (rustdem_parse_special(st, &name))
-				continue;
-			break;
-		case '_':
-			if (sv_peek(&name, 1) == '$') {
-				/*
-				 * Only consume/ignore '_'.  Leave
-				 * $ for next round.
-				 */
-				sv_consume_n(&name, 1);
-				continue;
-			}
-			break;
-		case '.':
-			/* Convert '..' to '::' */
-			if (sv_peek(&name, 1) != '.')
-				break;
+boolean_t
+rust_append(rust_state_t *st, const char *s)
+{
+	custr_t *cus = st->rs_demangled;
 
-			if (!rustdem_add_sep(st))
-				return (B_FALSE);
+	if (HAS_ERROR(st))
+		return (B_FALSE);
 
-			sv_consume_n(&name, 2);
-			continue;
-		default:
-			break;
-		}
+	if (st->rs_skip)
+		return (B_TRUE);
 
-		if (custr_appendc(st->rds_demangled,
-		    sv_consume_c(&name)) != 0) {
-			st->rds_error = ENOMEM;
-			return (B_FALSE);
-		}
+	if (custr_append(cus, s) != 0) {
+		SET_ERROR(st);
+		return (B_FALSE);
 	}
 
-done:
-	sv_consume_n(&sv, len);
-	VERIFY3P(svp->sv_first, <=, sv.sv_first);
-	DEMDEBUG("%s: consumed '%.*s'", __func__,
-	    (int)(sv.sv_first - svp->sv_first), svp->sv_first);
-	sv_init_sv(svp, &sv);
 	return (B_TRUE);
 }
 
-/*
- * Parse N (<num><name>{num})+[<num>h<hex digits>]E
- */
-static boolean_t
-rustdem_parse_name(rustdem_state_t *st, strview_t *svp)
+boolean_t
+rust_append_sv(rust_state_t *restrict st, uint64_t n, strview_t *restrict sv)
 {
-	strview_t name;
-	boolean_t first = B_TRUE;
-
-	if (st->rds_error != 0)
+	if (HAS_ERROR(st))
 		return (B_FALSE);
 
-	sv_init_sv(&name, svp);
-
-	DEMDEBUG("%s: name = '%.*s'", __func__, SV_PRINT(&name));
+	if (st->rs_skip) {
+		sv_consume_n(sv, (size_t)n);
+		return (B_TRUE);
+	}
 
-	if (sv_remaining(&name) == 0) {
-		DEMDEBUG("%s: empty name", __func__);
+	if (n > sv_remaining(sv)) {
+		DEMDEBUG("%s: ERROR amount to append (%" PRIu64 ") > "
+		    "remaining bytes (%zu)", __func__, n, sv_remaining(sv));
+		st->rs_error = ERANGE;
 		return (B_FALSE);
 	}
 
-	if (!sv_consume_if_c(&name, 'N')) {
-		DEMDEBUG("%s: does not start with 'N'", __func__);
+	if (n > INT_MAX) {
+		DEMDEBUG("%s: amount (%" PRIu64 ") > INT_MAX", __func__, n);
+		st->rs_error = ERANGE;
 		return (B_FALSE);
 	}
 
-	while (sv_remaining(&name) > 0 && sv_peek(&name, 0) != 'E') {
-		if (!rustdem_parse_name_segment(st, &name, first))
-			return (B_FALSE);
-		first = B_FALSE;
+	if (custr_append_printf(st->rs_demangled, "%.*s",
+	    (int)n, sv->sv_first) != 0) {
+		SET_ERROR(st);
+		return (B_FALSE);
 	}
-	VERIFY(sv_consume_if_c(&name, 'E'));
-
-	VERIFY3P(svp->sv_first, <=, name.sv_first);
-	DEMDEBUG("%s: consumed '%.*s'", __func__,
-	    (int)(name.sv_first - svp->sv_first), svp->sv_first);
+	sv_consume_n(sv, (size_t)n);
 
-	sv_init_sv(svp, &name);
 	return (B_TRUE);
 }
 
-static boolean_t
-rustdem_parse_hash(rustdem_state_t *st, strview_t *svp)
+boolean_t
+rust_append_printf(rust_state_t *st, const char *fmt, ...)
 {
-	strview_t sv;
-
-	sv_init_sv(&sv, svp);
+	va_list ap;
+	int ret;
 
-	VERIFY(sv_consume_if_c(&sv, 'h'));
-	if (!rustdem_append_c(st, 'h'))
+	if (HAS_ERROR(st))
 		return (B_FALSE);
 
-	while (sv_remaining(&sv) > 0) {
-		char c = sv_consume_c(&sv);
+	if (st->rs_skip)
+		return (B_TRUE);
 
-		switch (c) {
-		/*
-		 * The upper-case hex digits (A-F) are excluded as valid
-		 * hash values for several reasons:
-		 *
-		 * 1. It would result in two different possible names for
-		 * the same function, leading to ambiguity in linking (among
-		 * other things).
-		 *
-		 * 2. It would cause potential ambiguity in parsing -- is a
-		 * trailing 'E' part of the hash, or the terminating character
-		 * in the mangled name?
-		 *
-		 * 3. No examples were able to be found in the wild where
-		 * uppercase digits are used, and other rust demanglers all
-		 * seem to assume the hash must contain lower-case hex digits.
-		 */
-		case '0': case '1': case '2': case '3':
-		case '4': case '5': case '6': case '7':
-		case '8': case '9': case 'a': case 'b':
-		case 'c': case 'd': case 'e': case 'f':
-			if (!rustdem_append_c(st, c))
-				return (B_FALSE);
-			break;
-		default:
-			return (B_FALSE);
-		}
-	}
+	va_start(ap, fmt);
+	ret = custr_append_vprintf(st->rs_demangled, fmt, ap);
+	va_end(ap);
 
-	sv_init_sv(svp, &sv);
-	return (B_TRUE);
+	if (ret == 0)
+		return (B_TRUE);
+	SET_ERROR(st);
+	return (B_FALSE);
 }
 
-/*
- * We have to pick an arbitrary limit here; 999,999,999 fits comfortably
- * within an int32_t, so let's go with that, as it seems unlikely we'd
- * ever see a larger value in context.
- */
-#define	MAX_DIGITS 9
-
-static boolean_t
-rustdem_parse_num(rustdem_state_t *restrict st, strview_t *restrict svp,
+boolean_t
+rust_parse_base10(rust_state_t *restrict st, strview_t *restrict sv,
     uint64_t *restrict valp)
 {
-	strview_t snum;
 	uint64_t v = 0;
-	size_t ndigits = 0;
 	char c;
 
-	if (st->rds_error != 0)
+	if (HAS_ERROR(st) || sv_remaining(sv) == 0)
 		return (B_FALSE);
 
-	sv_init_sv(&snum, svp);
-
-	DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(&snum));
-
-	c = sv_peek(&snum, 0);
-	if (!ISDIGIT(c)) {
-		DEMDEBUG("%s: ERROR no digits in str\n", __func__);
-		st->rds_error = EINVAL;
-		return (B_FALSE);
-	}
+	c = sv_peek(sv, 0);
 
 	/*
-	 * Since there is currently no official specification on rust name
-	 * mangling, only that it has been stated that rust follows what
-	 * C++ mangling does.  In the Itanium C++ ABI (what practically
-	 * every non-Windows C++ implementation uses these days), it
-	 * explicitly disallows leading 0s in numeric values (except for
-	 * substition and template indexes, which aren't relevant here).
-	 * We enforce the same restriction -- if a rust implementation allowed
-	 * leading zeros in numbers (basically segment lengths) it'd
-	 * cause all sorts of ambiguity problems with names that likely lead
-	 * to much bigger problems with linking and such, so this seems
-	 * reasonable.
+	 * Since the legacy rust encoding states that it follows the
+	 * Itanium C++ mangling format, we match the behavior of the
+	 * Itanium C++ ABI in disallowing leading 0s in decimal numbers.
+	 *
+	 * For Rust encoding v0, RFC2603 currently has omitted the
+	 * actual definition of <decimal-number>. However examination of
+	 * other implementations written in tandem with the mangling
+	 * implementation suggest that <decimal-number> can be expressed
+	 * by the eregex: 0|[1-9][0-9]* -- that is a '0' is allowed and
+	 * terminates the token, while any other leading digit allows
+	 * parsing to continue until a non-digit is encountered, the
+	 * end of the string is encountered, or overflow is encountered.
 	 */
 	if (c == '0') {
-		DEMDEBUG("%s: ERROR number starts with leading 0\n", __func__);
-		st->rds_error = EINVAL;
+		if (st->rs_encver == RUSTENC_V0) {
+			sv_consume_n(sv, 1);
+			*valp = 0;
+			return (B_TRUE);
+		}
+
+		DEMDEBUG("%s: ERROR number starts with leading 0\n",
+		    __func__);
+		st->rs_error = EINVAL;
+		return (B_FALSE);
+	} else if (!ISDIGIT(c)) {
 		return (B_FALSE);
 	}
 
-	while (sv_remaining(&snum) > 0 && ndigits <= MAX_DIGITS) {
-		c = sv_consume_c(&snum);
+	while (sv_remaining(sv) > 0) {
+		uint64_t cval;
 
+		c = sv_peek(sv, 0);
 		if (!ISDIGIT(c))
 			break;
+		sv_consume_n(sv, 1);
 
-		v *= 10;
-		v += c - '0';
-		ndigits++;
-	}
+		cval = c - '0';
 
-	if (ndigits > MAX_DIGITS) {
-		DEMDEBUG("%s: value %llu is too large\n", __func__, v);
-		st->rds_error = ERANGE;
-		return (B_FALSE);
-	}
+		if (mul_overflow(v, 10, &v)) {
+			DEMDEBUG("%s: multiplication overflowed\n", __func__);
+			st->rs_error = EOVERFLOW;
+			return (B_FALSE);
+		}
 
-	DEMDEBUG("%s: num=%llu", __func__, v);
+		if (add_overflow(v, cval, &v)) {
+			DEMDEBUG("%s: addition overflowed\n", __func__);
+			st->rs_error = EOVERFLOW;
+			return (B_FALSE);
+		}
+	}
 
 	*valp = v;
-	sv_consume_n(svp, ndigits);
 	return (B_TRUE);
 }
 
 static boolean_t
-rustdem_parse_special(rustdem_state_t *restrict st, strview_t *restrict svp)
+rust_parse_prefix(rust_state_t *restrict st, strview_t *restrict sv)
 {
-	if (st->rds_error != 0)
+	DEMDEBUG("checking prefix in '%.*s'", SV_PRINT(sv));
+
+	if (HAS_ERROR(st))
 		return (B_FALSE);
 
-	if (sv_peek(svp, 0) != '$')
+	if (!sv_consume_if_c(sv, '_'))
 		return (B_FALSE);
 
-	for (size_t i = 0; i < rust_charmap_sz; i++) {
-		if (sv_consume_if(svp, rust_charmap[i].ruc_seq)) {
-			if (!rustdem_append_c(st, rust_charmap[i].ruc_ch))
-				return (B_FALSE);
-			return (B_TRUE);
+	/*
+	 * MacOS prepends an additional '_' -- allow that in case
+	 * we're given symbols from a MacOS object.
+	 */
+	(void) sv_consume_if_c(sv, '_');
+
+	if (sv_consume_if_c(sv, 'Z')) {
+		/*
+		 * Legacy names must start with '[_]_Z'
+		 */
+		st->rs_encver = RUSTENC_LEGACY;
+		DEMDEBUG("name is encoded using the rust legacy mangling "
+		    "scheme");
+	} else if (sv_consume_if_c(sv, 'R')) {
+		uint64_t ver = 0;
+
+		/*
+		 * The non-legacy encoding is versioned. After the initial
+		 * 'R' is the version. This isn't spelled out clearly in the
+		 * RFC, but many numeric values encoded take an approach of
+		 * a value of 0 is omitted, and any digits represent the
+		 * value - 1. In other words, in this case, no digits means
+		 * version 0, '_R0...' would be version 1, 'R1...' would
+		 * be version 2, etc. Currently only version 0 is defined,
+		 * but we try to provide a (hopefully) useful message
+		 * when debugging, even if we can't use the version value
+		 * beyond that.
+		 */
+		if (rust_parse_base10(st, sv, &ver)) {
+			DEMDEBUG("%s: ERROR: an unsupported encoding version "
+			    "(%" PRIu64 ") was encountered", ver + 1);
+			st->rs_error = ENOTSUP;
+			return (B_FALSE);
 		}
+
+		st->rs_encver = RUSTENC_V0;
+		DEMDEBUG("name is encoded using the v0 mangling scheme");
+	} else {
+		DEMDEBUG("did not find a valid rust prefix");
+		return (B_FALSE);
 	}
-	return (B_FALSE);
+
+	sv_init_sv(&st->rs_orig, sv);
+	return (B_TRUE);
+}
+
+static void
+rust_fini_state(rust_state_t *st)
+{
+	custr_free(st->rs_demangled);
+	custr_alloc_fini(&st->rs_cualloc);
 }
 
 static boolean_t
-rustdem_add_sep(rustdem_state_t *st)
+rust_init_state(rust_state_t *restrict st, const char *s, sysdem_ops_t *ops)
 {
-	if (st->rds_error != 0)
+	const char *codeset;
+
+	(void) memset(st, 0, sizeof (*st));
+
+	st->rs_str = s;
+	st->rs_ops = ops;
+
+	st->rs_cualloc.cua_version = CUSTR_VERSION;
+	if (custr_alloc_init(&st->rs_cualloc, &rust_custr_ops) != 0)
 		return (B_FALSE);
+	st->rs_cualloc.cua_arg = st;
 
-	if (!rustdem_append_c(st, ':') ||
-	    !rustdem_append_c(st, ':'))
+	if (custr_xalloc(&st->rs_demangled, &st->rs_cualloc) != 0) {
+		custr_alloc_fini(&st->rs_cualloc);
 		return (B_FALSE);
+	}
+
+	codeset = nl_langinfo(CODESET);
+	if (codeset != NULL && strcmp(codeset, "UTF-8") == 0)
+		st->rs_isutf8 = B_TRUE;
 
 	return (B_TRUE);
 }
 
-static boolean_t
-rustdem_append_c(rustdem_state_t *st, char c)
+char *
+rust_demangle(const char *s, size_t len, sysdem_ops_t *ops)
 {
-	if (st->rds_error != 0)
-		return (B_FALSE);
+	rust_state_t st;
+	strview_t sv = { 0 };
+	boolean_t success = B_FALSE;
+	int e = 0;
+	char *out = NULL;
 
-	if (custr_appendc(st->rds_demangled, c) == 0)
-		return (B_TRUE);
+	if (!rust_init_state(&st, s, ops))
+		return (NULL);
 
-	st->rds_error = errno;
-	return (B_FALSE);
-}
+	sv_init_str(&sv, s, s + len);
 
-static boolean_t
-rustdem_all_ascii(const strview_t *svp)
-{
-	strview_t p;
+	if (!rust_parse_prefix(&st, &sv)) {
+		if (st.rs_error == 0)
+			st.rs_error = EINVAL;
+		goto done;
+	}
 
-	sv_init_sv(&p, svp);
+	DEMDEBUG("parsed prefix; remaining string='%.*s'", SV_PRINT(&sv));
 
-	while (sv_remaining(&p) > 0) {
-		char c = sv_consume_c(&p);
+	switch (st.rs_encver) {
+	case RUSTENC_LEGACY:
+		success = rust_demangle_legacy(&st, &sv);
+		break;
+	case RUSTENC_V0:
+		success = rust_demangle_v0(&st, &sv);
+		break;
+	}
 
-		/*
-		 * #including <sys/ctype.h> conflicts with <ctype.h>.  Since
-		 * we want the C locale macros (ISDIGIT, etc), it also means
-		 * we can't use isascii(3C).
-		 */
-		if ((c & 0x80) != 0) {
-			DEMDEBUG("%s: found non-ascii character 0x%02hhx at "
-			    "offset %tu", __func__, c,
-			    (ptrdiff_t)(p.sv_first - svp->sv_first));
-			return (B_FALSE);
-		}
+done:
+	if (success) {
+		out = xstrdup(ops, custr_cstr(st.rs_demangled));
+		if (out == NULL)
+			SET_ERROR(&st);
+	} else {
+		DEMDEBUG("%s: failed, str='%s'", __func__,
+		    custr_cstr(st.rs_demangled));
+
+		st.rs_error = EINVAL;
 	}
-	return (B_TRUE);
-}
 
-static void *
-rustdem_alloc(custr_alloc_t *cao, size_t len)
-{
-	rustdem_state_t *st = cao->cua_arg;
-	return (zalloc(st->rds_ops, len));
-}
+	e = st.rs_error;
+	rust_fini_state(&st);
+	if (e > 0)
+		errno = e;
 
-static void
-rustdem_free(custr_alloc_t *cao, void *p, size_t len)
-{
-	rustdem_state_t *st = cao->cua_arg;
-	xfree(st->rds_ops, p, len);
+	return (out);
 }
author	Dan McDonald <danmcd@joyent.com>	2021-05-14 12:00:48 -0400
committer	Dan McDonald <danmcd@joyent.com>	2021-05-14 12:00:48 -0400
commit	1a25930b922d3fede4a252f4f2e0ecb8de2656cb (patch)
tree	a2ff441f85489691a94f13a644cb7cfe10b81f9e /usr/src/lib/libdemangle/common/rust.c
parent	3aa01401155d92a38a0d4e107043c130432e4a43 (diff)
parent	1cd083931cfd3fb8617c1178f62bce417cfa6af2 (diff)
download	illumos-joyent-1a25930b922d3fede4a252f4f2e0ecb8de2656cb.tar.gz