[illumos-gate merge]

commit 1cd083931cfd3fb8617c1178f62bce417cfa6af2 13780 Add support for rust v0 mangling format commit 4fe48c6ec9f06cbcce19c4cf97f662b64efde582 13798 loader: Update the EFI timer to be called once a second commit 9e3493cb8a0cfe96c9aef9b7da42c6c9b5c24b43 13374 Port L2ARC Improvements from OpenZFS
author: Dan McDonald <danmcd@joyent.com> 2021-05-14 12:00:48 -0400
committer: Dan McDonald <danmcd@joyent.com> 2021-05-14 12:00:48 -0400
commit: 1a25930b922d3fede4a252f4f2e0ecb8de2656cb (patch)
tree: a2ff441f85489691a94f13a644cb7cfe10b81f9e /usr/src/lib/libdemangle/common/rust-legacy.c
parent: 3aa01401155d92a38a0d4e107043c130432e4a43 (diff)
parent: 1cd083931cfd3fb8617c1178f62bce417cfa6af2 (diff)
download: illumos-joyent-1a25930b922d3fede4a252f4f2e0ecb8de2656cb.tar.gz
1 files changed, 386 insertions, 0 deletions
diff --git a/usr/src/lib/libdemangle/common/rust-legacy.c b/usr/src/lib/libdemangle/common/rust-legacy.c
new file mode 100644
index 0000000000..5b1518f619
--- /dev/null
+++ b/usr/src/lib/libdemangle/common/rust-legacy.c
@@ -0,0 +1,386 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ * Copyright 2021 Jason King
+ */
+
+#include <errno.h>
+#include <libcustr.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "rust.h"
+
+/*
+ * Unfortunately, there is currently no official specification for the legacy
+ * rust name mangling.  This is an attempt to document the understanding of the
+ * mangling used here.  It is based off examination of
+ *     https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/
+ *
+ * A mangled rust name is:
+ *     <prefix> <name>
+ *
+ * <prefix>	::=	_Z
+ *			__Z
+ *
+ * <name>	::= N <name-segment>+ [<hash>] E
+ *
+ * <name-segment> ::= <len> <name-chars>{len}
+ *
+ * <len>	::= [1-9][0-9]+
+ *
+ * <name-chars>	::=	<[A-Za-z]> <[A-Za-z0-9]>*
+ *			<separator>
+ *			<special>
+ *
+ * <separator>	::=	'..'	# '::'
+ *
+ * <special>	::=	$SP$	# '@'
+ *			$BP$	# '*'
+ *			$RF$	# '&'
+ *			$LT$	# '<'
+ *			$GT$	# '>'
+ *			$LP$	# '('
+ *			$RP$	# ')'
+ *			$C$	# ','
+ *
+ * <hash>	:= <len> h <hex-digits>+
+ *
+ * <hex-digits>	:= <[0-9a-f]>
+ */
+
+static const struct rust_charmap {
+	const char	*ruc_seq;
+	char		ruc_ch;
+} rust_charmap[] = {
+	{ "$SP$", '@' },
+	{ "$BP$", '*' },
+	{ "$RF$", '&' },
+	{ "$LT$", '<' },
+	{ "$GT$", '>' },
+	{ "$LP$", '(' },
+	{ "$RP$", ')' },
+	{ "$C$", ',' },
+};
+static const size_t rust_charmap_sz = ARRAY_SIZE(rust_charmap);
+
+static boolean_t rustleg_valid_sym(const strview_t *);
+static boolean_t rustleg_parse_name(rust_state_t *, strview_t *);
+static boolean_t rustleg_parse_hash(rust_state_t *, strview_t *);
+static boolean_t rustleg_parse_special(rust_state_t *, strview_t *);
+static boolean_t rustleg_add_sep(rust_state_t *);
+
+boolean_t
+rust_demangle_legacy(rust_state_t *restrict st, strview_t *restrict sv)
+{
+
+	/* Make sure the whole thing contains valid characters */
+	if (!rustleg_valid_sym(sv)) {
+		st->rs_error = EINVAL;
+		return (B_FALSE);
+	}
+
+	if (sv_peek(sv, -1) != 'E') {
+		DEMDEBUG("ERROR: string does not end with 'E'");
+		st->rs_error = EINVAL;
+		return (B_FALSE);
+	}
+
+	if (!rustleg_parse_name(st, sv))
+		return (B_FALSE);
+
+	if (sv_remaining(sv) != 0) {
+		DEMDEBUG("ERROR: trailing characters in name");
+		st->rs_error = EINVAL;
+		return (B_FALSE);
+	}
+
+	return (B_TRUE);
+}
+
+static boolean_t
+rustleg_parse_name_segment(rust_state_t *st, strview_t *svp, boolean_t first)
+{
+	strview_t orig;
+	strview_t name;
+	uint64_t len;
+	size_t rem;
+	boolean_t last = B_FALSE;
+
+	if (HAS_ERROR(st) || sv_remaining(svp) == 0)
+		return (B_FALSE);
+
+	sv_init_sv(&orig, svp);
+
+	if (!rust_parse_base10(st, svp, &len)) {
+		DEMDEBUG("ERROR: no leading length");
+		st->rs_error = EINVAL;
+		return (B_FALSE);
+	}
+
+	rem = sv_remaining(svp);
+
+	if (rem < len) {
+		DEMDEBUG("ERROR: segment length (%" PRIu64 ") > remaining "
+		    "bytes in string (%zu)", len, rem);
+		st->rs_error = EINVAL;
+		return (B_FALSE);
+	}
+
+	/* Is this the last segment before the terminating E? */
+	if (rem == len + 1) {
+		VERIFY3U(sv_peek(svp, -1), ==, 'E');
+		last = B_TRUE;
+	}
+
+	if (!first && !rustleg_add_sep(st))
+		return (B_FALSE);
+
+	/* Reduce length of seg to the length we parsed */
+	(void) sv_init_sv_range(&name, svp, len);
+
+	DEMDEBUG("%s: segment='%.*s'", __func__, SV_PRINT(&name));
+
+	/*
+	 * A rust hash starts with 'h', and is the last component of a name
+	 * before the terminating 'E'. It is however not always present
+	 * in every mangled symbol, and a last segment that starts with 'h'
+	 * could be confused for it, so failing to part it just means
+	 * we don't have a trailing hash.
+	 */
+	if (sv_peek(&name, 0) == 'h' && last) {
+		if (rustleg_parse_hash(st, &name))
+			goto done;
+
+		/*
+		 * However any error other than 'not a hash' (e.g. ENOMEM)
+		 * means we should fail.
+		 */
+		if (st->rs_error != 0)
+			goto done;
+	}
+
+	/* A '_' followed by $ is ignored at the start of a name segment */
+	if (sv_peek(&name, 0) == '_' && sv_peek(&name, 1) == '$')
+		(void) sv_consume_n(&name, 1);
+
+	while (sv_remaining(&name) > 0) {
+		switch (sv_peek(&name, 0)) {
+		case '$':
+			if (rustleg_parse_special(st, &name))
+				continue;
+			break;
+		case '.':
+			/* Convert '..' to '::' */
+			if (sv_peek(&name, 1) != '.')
+				break;
+
+			if (!rustleg_add_sep(st))
+				return (B_FALSE);
+
+			sv_consume_n(&name, 2);
+			continue;
+		default:
+			break;
+		}
+
+		if (!rust_appendc(st, sv_consume_c(&name))) {
+			SET_ERROR(st);
+			return (B_FALSE);
+		}
+	}
+
+done:
+	sv_consume_n(svp, len);
+
+	VERIFY3P(orig.sv_first, <=, svp->sv_first);
+	DEMDEBUG("%s: consumed '%.*s'", __func__,
+	    (int)(uintptr_t)(svp->sv_first - orig.sv_first), orig.sv_first);
+	return (B_TRUE);
+}
+
+/*
+ * Parse N (<num><name>{num})+ [<num>h<hex digits]E
+ */
+static boolean_t
+rustleg_parse_name(rust_state_t *st, strview_t *svp)
+{
+	strview_t name;
+	boolean_t first = B_TRUE;
+
+	sv_init_sv(&name, svp);
+
+	if (HAS_ERROR(st))
+		return (B_FALSE);
+
+	DEMDEBUG("%s: name = '%.*s'", __func__, SV_PRINT(&name));
+
+	if (sv_remaining(svp) == 0) {
+		DEMDEBUG("%s: empty name", __func__);
+		return (B_FALSE);
+	}
+
+	if (!sv_consume_if_c(svp, 'N')) {
+		DEMDEBUG("%s: does not start with 'N'", __func__);
+		return (B_FALSE);
+	}
+
+	while (sv_remaining(svp) > 0 && sv_peek(svp, 0) != 'E') {
+		if (!rustleg_parse_name_segment(st, svp, first))
+			return (B_FALSE);
+		first = B_FALSE;
+	}
+
+	if (!sv_consume_if_c(svp, 'E')) {
+		DEMDEBUG("%s: ERROR no terminating 'E'", __func__);
+		return (B_FALSE);
+	}
+
+	VERIFY3P(name.sv_first, <=, svp->sv_first);
+	DEMDEBUG("%s: consumed '%.*s'", __func__,
+	    (int)(uintptr_t)(svp->sv_first - name.sv_first), name.sv_first);
+
+	return (B_TRUE);
+}
+
+static boolean_t
+rustleg_parse_hash(rust_state_t *st, strview_t *svp)
+{
+	if (HAS_ERROR(st))
+		return (B_FALSE);
+
+	VERIFY(sv_consume_if_c(svp, 'h'));
+	if (!rust_appendc(st, 'h'))
+		return (B_FALSE);
+
+	while (sv_remaining(svp) > 0) {
+		char c = sv_consume_c(svp);
+
+		switch (c) {
+		/*
+		 * The upper-case hex digits (A-F) are excluded as valid
+		 * hash values for several reasons:
+		 *
+		 * 1. It would result in two different possible names for
+		 * the same function, leading to ambiguity in linking (among
+		 * other things).
+		 *
+		 * 2. It would cause potential ambiguity in parsing -- is a
+		 * trailing 'E' part of the hash, or the terminating character
+		 * in the mangled name?
+		 *
+		 * 3. No examples were able to be found in the wild where
+		 * uppercase digits are used, and other rust demanglers all
+		 * seem to assume the hash must contain lower-case hex digits.
+		 */
+		case '0': case '1': case '2': case '3':
+		case '4': case '5': case '6': case '7':
+		case '8': case '9': case 'a': case 'b':
+		case 'c': case 'd': case 'e': case 'f':
+			if (!rust_appendc(st, c))
+				return (B_FALSE);
+			break;
+		default:
+			return (B_FALSE);
+		}
+	}
+
+	return (B_TRUE);
+}
+
+static boolean_t
+rustleg_parse_special(rust_state_t *restrict st, strview_t *restrict svp)
+{
+	if (HAS_ERROR(st))
+		return (B_FALSE);
+
+	if (sv_peek(svp, 0) != '$')
+		return (B_FALSE);
+
+	for (size_t i = 0; i < rust_charmap_sz; i++) {
+		if (sv_consume_if(svp, rust_charmap[i].ruc_seq)) {
+			if (!rust_appendc(st, rust_charmap[i].ruc_ch))
+				return (B_FALSE);
+			return (B_TRUE);
+		}
+	}
+
+	/* Handle $uXXXX$ */
+
+	strview_t sv;
+	uint32_t val = 0;
+	uint_t ndigits = 0;
+
+	sv_init_sv(&sv, svp);
+
+	/* We peeked at this earlier, so it should still be there */
+	VERIFY(sv_consume_if_c(&sv, '$'));
+
+	if (!sv_consume_if_c(&sv, 'u'))
+		return (B_FALSE);
+
+	while (sv_remaining(&sv) > 0) {
+		uint32_t cval = 0;
+		char c;
+
+		if (ndigits == 4)
+			return (B_FALSE);
+
+		c = sv_consume_c(&sv);
+		if (c >= '0' && c <= '9')
+			cval = c - '0';
+		else if (c >= 'a' && c <= 'f')
+			cval = c - 'a' + 10;
+		else if (c == '$')
+			break;
+		else
+			return (B_FALSE);
+
+		val <<= 4;
+		val |= cval;
+		ndigits++;
+	}
+
+	if (!rust_append_utf8_c(st, val))
+		return (B_FALSE);
+
+	sv_consume_n(svp, ndigits + 3);
+	return (B_TRUE);
+}
+
+static boolean_t
+rustleg_add_sep(rust_state_t *st)
+{
+	if (HAS_ERROR(st))
+		return (B_FALSE);
+
+	return (rust_append(st, "::"));
+}
+
+static boolean_t
+rustleg_valid_sym(const strview_t *sv)
+{
+	size_t i;
+
+	for (i = 0; i < sv->sv_rem; i++) {
+		char c = sv->sv_first[i];
+
+		if ((c & 0x80) == 0)
+			continue;
+		DEMDEBUG("%s: ERROR found 8-bit character '%c' in '%.*s' "
+		    "at index %zu", __func__, c, SV_PRINT(sv), i);
+		return (B_FALSE);
+	}
+	return (B_TRUE);
+}
author	Dan McDonald <danmcd@joyent.com>	2021-05-14 12:00:48 -0400
committer	Dan McDonald <danmcd@joyent.com>	2021-05-14 12:00:48 -0400
commit	1a25930b922d3fede4a252f4f2e0ecb8de2656cb (patch)
tree	a2ff441f85489691a94f13a644cb7cfe10b81f9e /usr/src/lib/libdemangle/common/rust-legacy.c
parent	3aa01401155d92a38a0d4e107043c130432e4a43 (diff)
parent	1cd083931cfd3fb8617c1178f62bce417cfa6af2 (diff)
download	illumos-joyent-1a25930b922d3fede4a252f4f2e0ecb8de2656cb.tar.gz