summaryrefslogtreecommitdiff
path: root/usr/src/lib
diff options
context:
space:
mode:
authorJason King <jason.king@joyent.com>2019-01-03 18:11:04 +0000
committerJason King <jason.king@joyent.com>2019-03-27 17:34:29 +0000
commit6cf64ca03e24fc61dedf0e3705acd6716ce4145a (patch)
treea49834aa509a618bb7cb188fe8b0c711276656e4 /usr/src/lib
parent48514b9d3e7aeee085e365e879a73a9e4873323d (diff)
downloadillumos-joyent-6cf64ca03e24fc61dedf0e3705acd6716ce4145a.tar.gz
OS-7478 Add rust demangling supportrelease-20190328
Reviewed by: Robert Mustacchi <rm@joyent.com> Approved by: Cody Peter Mello <cody.mello@joyent.com>
Diffstat (limited to 'usr/src/lib')
-rw-r--r--usr/src/lib/Makefile2
-rw-r--r--usr/src/lib/libdemangle/Makefile.com4
-rw-r--r--usr/src/lib/libdemangle/common/cxx.c3
-rw-r--r--usr/src/lib/libdemangle/common/demangle-sys.h4
-rw-r--r--usr/src/lib/libdemangle/common/demangle.c130
-rw-r--r--usr/src/lib/libdemangle/common/demangle_int.h15
-rw-r--r--usr/src/lib/libdemangle/common/rust.c543
-rw-r--r--usr/src/lib/libdemangle/common/strview.c107
-rw-r--r--usr/src/lib/libdemangle/common/strview.h140
-rw-r--r--usr/src/lib/libdemangle/common/util.c15
10 files changed, 934 insertions, 29 deletions
diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile
index c0c5625ad5..d271ff0211 100644
--- a/usr/src/lib/Makefile
+++ b/usr/src/lib/Makefile
@@ -22,7 +22,7 @@
#
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2012 by Delphix. All rights reserved.
-# Copyright 2018, Joyent, Inc.
+# Copyright 2019, Joyent, Inc.
# Copyright (c) 2013 Gary Mills
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
# Copyright (c) 2015 Gary Mills
diff --git a/usr/src/lib/libdemangle/Makefile.com b/usr/src/lib/libdemangle/Makefile.com
index 0b0d495df7..7eba05ce1c 100644
--- a/usr/src/lib/libdemangle/Makefile.com
+++ b/usr/src/lib/libdemangle/Makefile.com
@@ -16,12 +16,12 @@
LIBRARY = libdemangle-sys.a
VERS = .1
-OBJECTS = str.o util.o cxx_util.o cxx.o demangle.o
+OBJECTS = str.o strview.o util.o cxx_util.o cxx.o demangle.o rust.o
include ../../Makefile.lib
LIBS = $(DYNLIB) $(LINTLIB)
-LDLIBS += -lc
+LDLIBS += -lc -lcustr
SRCDIR = ../common
$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC)
diff --git a/usr/src/lib/libdemangle/common/cxx.c b/usr/src/lib/libdemangle/common/cxx.c
index e3b4c06a8a..af5f549f78 100644
--- a/usr/src/lib/libdemangle/common/cxx.c
+++ b/usr/src/lib/libdemangle/common/cxx.c
@@ -165,11 +165,10 @@ static const char *parse_vector_type(const char *, const char *, cpp_db_t *);
size_t cpp_name_max_depth = 1024; /* max depth of name stack */
char *
-cpp_demangle(const char *src, sysdem_ops_t *ops)
+cpp_demangle(const char *src, size_t srclen, sysdem_ops_t *ops)
{
char *result = NULL;
cpp_db_t db;
- size_t srclen = strlen(src);
if (!db_init(&db, ops))
goto done;
diff --git a/usr/src/lib/libdemangle/common/demangle-sys.h b/usr/src/lib/libdemangle/common/demangle-sys.h
index 02636c9521..05776ee5ee 100644
--- a/usr/src/lib/libdemangle/common/demangle-sys.h
+++ b/usr/src/lib/libdemangle/common/demangle-sys.h
@@ -11,6 +11,7 @@
/*
* Copyright 2017 Jason King
+ * Copyright 2018, Joyent, Inc.
*/
#ifndef _DEMANGLE_SYS_H
@@ -24,7 +25,8 @@ extern "C" {
typedef enum sysdem_lang_e {
SYSDEM_LANG_AUTO,
- SYSDEM_LANG_CPP
+ SYSDEM_LANG_CPP,
+ SYSDEM_LANG_RUST
} sysdem_lang_t;
typedef struct sysdem_alloc_s {
diff --git a/usr/src/lib/libdemangle/common/demangle.c b/usr/src/lib/libdemangle/common/demangle.c
index e827fd8cec..4f8e9ad678 100644
--- a/usr/src/lib/libdemangle/common/demangle.c
+++ b/usr/src/lib/libdemangle/common/demangle.c
@@ -11,13 +11,17 @@
/*
* Copyright 2018 Jason King
+ * Copyright 2019, Joyent, Inc.
*/
#include <stdlib.h>
+#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <pthread.h>
+#include <sys/ctype.h>
#include <sys/debug.h>
+#include <stdarg.h>
#include "demangle-sys.h"
#include "demangle_int.h"
@@ -25,31 +29,63 @@
static pthread_once_t debug_once = PTHREAD_ONCE_INIT;
volatile boolean_t demangle_debug;
+FILE *debugf = stderr;
+
+static const char *
+langstr(sysdem_lang_t lang)
+{
+ switch (lang) {
+ case SYSDEM_LANG_AUTO:
+ return ("auto");
+ case SYSDEM_LANG_CPP:
+ return ("c++");
+ case SYSDEM_LANG_RUST:
+ return ("rust");
+ default:
+ return ("invalid");
+ }
+}
static sysdem_lang_t
-detect_lang(const char *str)
+detect_lang(const char *str, size_t n)
{
- size_t n = strlen(str);
+ const char *p = str;
+ size_t len;
if (n < 3 || str[0] != '_')
return (SYSDEM_LANG_AUTO);
- switch (str[1]) {
- case 'Z':
+ /*
+ * Check for ^_Z or ^__Z
+ */
+ p = str + 1;
+ if (*p == '_') {
+ p++;
+ }
+
+ if (*p != 'Z')
+ return (SYSDEM_LANG_AUTO);
+
+ /*
+ * Sadly, rust currently uses the same prefix as C++, however
+ * demangling rust as a C++ mangled name yields less than desirable
+ * results. However rust names end with a hash. We use that to
+ * attempt to disambiguate
+ */
+
+ /* Find 'h'<hexdigit>+E$ */
+ if ((p = strrchr(p, 'h')) == NULL)
return (SYSDEM_LANG_CPP);
- case '_':
- break;
+ if ((len = strspn(p + 1, "0123456789abcdef")) == 0)
+ return (SYSDEM_LANG_CPP);
- default:
- return (SYSDEM_LANG_AUTO);
- }
+ p += len + 1;
- /* why they use ___Z sometimes is puzzling... *sigh* */
- if (str[2] == '_' && str[3] == 'Z')
+ if (p[0] != 'E' || p[1] != '\0')
return (SYSDEM_LANG_CPP);
- return (SYSDEM_LANG_AUTO);
+ return (SYSDEM_LANG_RUST);
}
static void
@@ -62,26 +98,76 @@ check_debug(void)
char *
sysdemangle(const char *str, sysdem_lang_t lang, sysdem_ops_t *ops)
{
+ /*
+ * While the language specific demangler code can handle non-NUL
+ * terminated strings, we currently don't expose this to consumers.
+ * Consumers should still pass in a NUL-terminated string.
+ */
+ size_t slen;
+
VERIFY0(pthread_once(&debug_once, check_debug));
+ DEMDEBUG("name = '%s'", (str == NULL) ? "(NULL)" : str);
+ DEMDEBUG("lang = %s (%d)", langstr(lang), lang);
+
+ if (str == NULL) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ slen = strlen(str);
+
+ switch (lang) {
+ case SYSDEM_LANG_AUTO:
+ case SYSDEM_LANG_CPP:
+ case SYSDEM_LANG_RUST:
+ break;
+ default:
+ errno = EINVAL;
+ return (NULL);
+ }
+
if (ops == NULL)
ops = sysdem_ops_default;
if (lang == SYSDEM_LANG_AUTO) {
- lang = detect_lang(str);
- if (lang == SYSDEM_LANG_AUTO) {
- errno = ENOTSUP;
- return (NULL);
- }
+ lang = detect_lang(str, slen);
+ if (lang != SYSDEM_LANG_AUTO)
+ DEMDEBUG("detected language is %s", langstr(lang));
}
switch (lang) {
- case SYSDEM_LANG_AUTO:
- break;
case SYSDEM_LANG_CPP:
- return (cpp_demangle(str, ops));
+ return (cpp_demangle(str, slen, ops));
+ case SYSDEM_LANG_RUST:
+ return (rust_demangle(str, slen, ops));
+ case SYSDEM_LANG_AUTO:
+ DEMDEBUG("could not detect language");
+ errno = ENOTSUP;
+ return (NULL);
+ default:
+ /*
+ * This can't happen unless there's a bug with detect_lang,
+ * but gcc doesn't know that.
+ */
+ errno = EINVAL;
+ return (NULL);
}
+}
- errno = ENOTSUP;
- return (NULL);
+int
+demdebug(const char *fmt, ...)
+{
+ va_list ap;
+
+ flockfile(debugf);
+ (void) fprintf(debugf, "LIBDEMANGLE: ");
+ va_start(ap, fmt);
+ (void) vfprintf(debugf, fmt, ap);
+ (void) fputc('\n', debugf);
+ (void) fflush(debugf);
+ va_end(ap);
+ funlockfile(debugf);
+
+ return (0);
}
diff --git a/usr/src/lib/libdemangle/common/demangle_int.h b/usr/src/lib/libdemangle/common/demangle_int.h
index 9abb2cc295..66a34cf41d 100644
--- a/usr/src/lib/libdemangle/common/demangle_int.h
+++ b/usr/src/lib/libdemangle/common/demangle_int.h
@@ -11,6 +11,7 @@
/*
* Copyright 2017 Jason King
+ * Copyright 2019, Joyent, Inc.
*/
#ifndef _DEMANGLE_INT_H
#define _DEMANGLE_INT_H
@@ -24,14 +25,26 @@ extern "C" {
extern sysdem_ops_t *sysdem_ops_default;
-char *cpp_demangle(const char *, sysdem_ops_t *);
+char *cpp_demangle(const char *, size_t, sysdem_ops_t *);
+char *rust_demangle(const char *, size_t, sysdem_ops_t *);
void *zalloc(sysdem_ops_t *, size_t);
void *xrealloc(sysdem_ops_t *, void *, size_t, size_t);
void xfree(sysdem_ops_t *, void *, size_t);
+char *xstrdup(sysdem_ops_t *, const char *);
extern volatile boolean_t demangle_debug;
+/*
+ * gcc seems to get unhappy with the ASSERT() style definition (also borrowed
+ * for the DEMDEBUG macro unless demdebug() is returns a non-void value
+ * (despite the return value never being used).
+ */
+int demdebug(const char *, ...);
+
+#define DEMDEBUG(s, ...) \
+ ((void)(demangle_debug && demdebug(s, ## __VA_ARGS__)))
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/lib/libdemangle/common/rust.c b/usr/src/lib/libdemangle/common/rust.c
new file mode 100644
index 0000000000..f99fe79a10
--- /dev/null
+++ b/usr/src/lib/libdemangle/common/rust.c
@@ -0,0 +1,543 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019, Joyent, Inc.
+ */
+
+#include <errno.h>
+#include <libcustr.h>
+#include <limits.h>
+#include <string.h>
+#include <sys/ctype.h> /* We want the C locale ISXXX() versions */
+#include <sys/debug.h>
+#include <stdio.h>
+#include <sys/sysmacros.h>
+
+#include "strview.h"
+#include "demangle_int.h"
+
+/*
+ * Unfortunately, there is currently no official specification for the rust
+ * name mangling. This is an attempt to document the understanding of the
+ * mangling used here. It is based off examination of
+ * https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/
+ *
+ * A mangled rust name is:
+ * <prefix> <name> <hash> E
+ *
+ * <prefix> ::= _Z
+ * __Z
+ *
+ * <name> ::= <name-segment>+
+ *
+ * <name-segment> ::= <len> <name-chars>{len}
+ *
+ * <len> ::= [1-9][0-9]+
+ *
+ * <name-chars> ::= <[A-Za-z]> <[A-Za-z0-9]>*
+ * <separator>
+ * <special>
+ *
+ * <separator> ::= '..' # '::'
+ *
+ * <special> ::= $SP$ # ' '
+ * $BP$ # '*'
+ * $RF$ # '&'
+ * $LT$ # '<'
+ * $GT$ # '>'
+ * $LP$ # '('
+ * $RP$ # ')'
+ * $C$ # ','
+ * $u7e$ # '~'
+ * $u20$ # ' '
+ * $u27$ # '\''
+ * $u3d$ # '='
+ * $u5b$ # '['
+ * $u5d$ # ']'
+ * $u7b$ # '{'
+ * $u7d$ # '}'
+ * $u3b$ # ';'
+ * $u2b$ # '+'
+ * $u22$ # '"'
+ *
+ * <hash> := <len> h <hex-digits>+
+ *
+ * <hex-digits> := <[0-9a-f]>
+ */
+
+typedef struct rustdem_state {
+ const char *rds_str;
+ custr_t *rds_demangled;
+ sysdem_ops_t *rds_ops;
+ int rds_error;
+} rustdem_state_t;
+
+static const struct rust_charmap {
+ const char *ruc_seq;
+ char ruc_ch;
+} rust_charmap[] = {
+ { "$SP$", '@' },
+ { "$BP$", '*' },
+ { "$RF$", '&' },
+ { "$LT$", '<' },
+ { "$GT$", '>' },
+ { "$LP$", '(' },
+ { "$RP$", ')' },
+ { "$C$", ',' },
+ { "$u7e$", '~' },
+ { "$u20$", ' ' },
+ { "$u27$", '\'' },
+ { "$u3d$", '=' },
+ { "$u5b$", '[' },
+ { "$u5d$", ']' },
+ { "$u7b$", '{' },
+ { "$u7d$", '}' },
+ { "$u3b$", ';' },
+ { "$u2b$", '+' },
+ { "$u22$", '"' }
+};
+static const size_t rust_charmap_sz = ARRAY_SIZE(rust_charmap);
+
+static void *rustdem_alloc(custr_alloc_t *, size_t);
+static void rustdem_free(custr_alloc_t *, void *, size_t);
+
+static boolean_t rustdem_append_c(rustdem_state_t *, char);
+static boolean_t rustdem_all_ascii(const strview_t *);
+
+static boolean_t rustdem_parse_prefix(rustdem_state_t *, strview_t *);
+static boolean_t rustdem_parse_name(rustdem_state_t *, strview_t *);
+static boolean_t rustdem_parse_hash(rustdem_state_t *, strview_t *);
+static boolean_t rustdem_parse_num(rustdem_state_t *, strview_t *, uint64_t *);
+static boolean_t rustdem_parse_special(rustdem_state_t *, strview_t *);
+static boolean_t rustdem_add_sep(rustdem_state_t *);
+
+char *
+rust_demangle(const char *s, size_t slen, sysdem_ops_t *ops)
+{
+ rustdem_state_t st = {
+ .rds_str = s,
+ .rds_ops = ops,
+ };
+ custr_alloc_ops_t custr_ops = {
+ .custr_ao_alloc = rustdem_alloc,
+ .custr_ao_free = rustdem_free
+ };
+ custr_alloc_t custr_alloc = {
+ .cua_version = CUSTR_VERSION
+ };
+ strview_t sv;
+ int ret;
+
+ if (custr_alloc_init(&custr_alloc, &custr_ops) != 0)
+ return (NULL);
+ custr_alloc.cua_arg = &st;
+
+ sv_init_str(&sv, s, s + slen);
+
+ if (sv_remaining(&sv) < 1 || sv_peek(&sv, -1) != 'E') {
+ DEMDEBUG("ERROR: string is either too small or does not end "
+ "with 'E'");
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ if (!rustdem_parse_prefix(&st, &sv)) {
+ DEMDEBUG("ERROR: could not parse prefix");
+ errno = EINVAL;
+ return (NULL);
+ }
+ DEMDEBUG("parsed prefix; remaining='%.*s'", SV_PRINT(&sv));
+
+ if (!rustdem_all_ascii(&sv)) {
+ /* rustdem_all_ascii() provides debug output */
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ if ((ret = custr_xalloc(&st.rds_demangled, &custr_alloc)) != 0)
+ return (NULL);
+
+ while (sv_remaining(&sv) > 1) {
+ if (rustdem_parse_name(&st, &sv))
+ continue;
+ if (st.rds_error != 0)
+ goto fail;
+ }
+
+ if (st.rds_error != 0 || !sv_consume_if_c(&sv, 'E'))
+ goto fail;
+
+ char *res = xstrdup(ops, custr_cstr(st.rds_demangled));
+ if (res == NULL) {
+ st.rds_error = errno;
+ goto fail;
+ }
+
+ custr_free(st.rds_demangled);
+ DEMDEBUG("result = '%s'", res);
+ return (res);
+
+fail:
+ custr_free(st.rds_demangled);
+ errno = st.rds_error;
+ return (NULL);
+}
+
+static boolean_t
+rustdem_parse_prefix(rustdem_state_t *st, strview_t *svp)
+{
+ strview_t pfx;
+
+ sv_init_sv(&pfx, svp);
+
+ DEMDEBUG("checking for '_ZN' or '__ZN' in '%.*s'", SV_PRINT(&pfx));
+
+ if (st->rds_error != 0)
+ return (B_FALSE);
+
+ if (!sv_consume_if_c(&pfx, '_'))
+ return (B_FALSE);
+
+ (void) sv_consume_if_c(&pfx, '_');
+
+ if (!sv_consume_if_c(&pfx, 'Z') || !sv_consume_if_c(&pfx, 'N'))
+ return (B_FALSE);
+
+ /* Update svp with new position */
+ sv_init_sv(svp, &pfx);
+ return (B_TRUE);
+}
+
+static boolean_t
+rustdem_parse_name_segment(rustdem_state_t *st, strview_t *svp, boolean_t first)
+{
+ strview_t sv;
+ strview_t name;
+ uint64_t len;
+ size_t rem;
+ boolean_t last = B_FALSE;
+
+ if (st->rds_error != 0 || sv_remaining(svp) == 0)
+ return (B_FALSE);
+
+ sv_init_sv(&sv, svp);
+
+ if (!rustdem_parse_num(st, &sv, &len)) {
+ DEMDEBUG("ERROR: no leading length");
+ st->rds_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ rem = sv_remaining(&sv);
+
+ if (rem < len || len > SIZE_MAX) {
+ st->rds_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ /* Is this the last segment before the terminating E? */
+ if (rem == len + 1) {
+ VERIFY3U(sv_peek(&sv, -1), ==, 'E');
+ last = B_TRUE;
+ }
+
+ if (!first && !rustdem_add_sep(st))
+ return (B_FALSE);
+
+ /* Reduce length of seg to the length we parsed */
+ (void) sv_init_sv_range(&name, &sv, len);
+
+ DEMDEBUG("%s: segment='%.*s'", __func__, SV_PRINT(&name));
+
+ /*
+ * A rust hash starts with 'h', and is the last component of a name
+ * before the terminating 'E'
+ */
+ if (sv_peek(&name, 0) == 'h' && last) {
+ if (!rustdem_parse_hash(st, &name))
+ return (B_FALSE);
+ goto done;
+ }
+
+ while (sv_remaining(&name) > 0) {
+ switch (sv_peek(&name, 0)) {
+ case '$':
+ if (rustdem_parse_special(st, &name))
+ continue;
+ break;
+ case '_':
+ if (sv_peek(&name, 1) == '$') {
+ /*
+ * Only consume/ignore '_'. Leave
+ * $ for next round.
+ */
+ sv_consume_n(&name, 1);
+ continue;
+ }
+ break;
+ case '.':
+ /* Convert '..' to '::' */
+ if (sv_peek(&name, 1) != '.')
+ break;
+
+ if (!rustdem_add_sep(st))
+ return (B_FALSE);
+
+ sv_consume_n(&name, 2);
+ continue;
+ default:
+ break;
+ }
+
+ if (custr_appendc(st->rds_demangled,
+ sv_consume_c(&name)) != 0) {
+ st->rds_error = ENOMEM;
+ return (B_FALSE);
+ }
+ }
+
+done:
+ DEMDEBUG("%s: consumed '%.*s'", __func__, (int)len, svp->sv_first);
+ sv_consume_n(&sv, len);
+ sv_init_sv(svp, &sv);
+ return (B_TRUE);
+}
+
+static boolean_t
+rustdem_parse_name(rustdem_state_t *st, strview_t *svp)
+{
+ strview_t name;
+ boolean_t first = B_TRUE;
+
+ if (st->rds_error != 0)
+ return (B_FALSE);
+
+ sv_init_sv(&name, svp);
+
+ if (sv_remaining(&name) == 0)
+ return (B_FALSE);
+
+ while (sv_remaining(&name) > 0 && sv_peek(&name, 0) != 'E') {
+ if (!rustdem_parse_name_segment(st, &name, first))
+ return (B_FALSE);
+ first = B_FALSE;
+ }
+
+ sv_init_sv(svp, &name);
+ return (B_TRUE);
+}
+
+static boolean_t
+rustdem_parse_hash(rustdem_state_t *st, strview_t *svp)
+{
+ strview_t sv;
+
+ sv_init_sv(&sv, svp);
+
+ VERIFY(sv_consume_if_c(&sv, 'h'));
+ if (!rustdem_append_c(st, 'h'))
+ return (B_FALSE);
+
+ while (sv_remaining(&sv) > 0) {
+ char c = sv_consume_c(&sv);
+
+ switch (c) {
+ /*
+ * The upper-case hex digits (A-F) are excluded as valid
+ * hash values for several reasons:
+ *
+ * 1. It would result in two different possible names for
+ * the same function, leading to ambiguity in linking (among
+ * other things).
+ *
+ * 2. It would cause potential ambiguity in parsing -- is a
+ * trailing 'E' part of the hash, or the terminating character
+ * in the mangled name?
+ *
+ * 3. No examples were able to be found in the wild where
+ * uppercase digits are used, and other rust demanglers all
+ * seem to assume the hash must contain lower-case hex digits.
+ */
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ case '8': case '9': case 'a': case 'b':
+ case 'c': case 'd': case 'e': case 'f':
+ if (!rustdem_append_c(st, c))
+ return (B_FALSE);
+ break;
+ default:
+ return (B_FALSE);
+ }
+ }
+
+ sv_init_sv(svp, &sv);
+ return (B_TRUE);
+}
+
+/*
+ * A 10 digit value would imply a name 1Gb or larger in size. It seems
+ * unlikely to the point of absurdity any such value could every possibly
+ * be valid (or even have compiled properly). This also prevents the
+ * uint64_t conversion from possibly overflowing since the value must always
+ * be below 10 * UINT32_MAX.
+ */
+#define MAX_DIGITS 10
+
+static boolean_t
+rustdem_parse_num(rustdem_state_t *restrict st, strview_t *restrict svp,
+ uint64_t *restrict valp)
+{
+ strview_t snum;
+ uint64_t v = 0;
+ size_t ndigits = 0;
+ char c;
+
+ if (st->rds_error != 0)
+ return (B_FALSE);
+
+ sv_init_sv(&snum, svp);
+
+ DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(&snum));
+
+ c = sv_peek(&snum, 0);
+ if (!ISDIGIT(c)) {
+ DEMDEBUG("%s: ERROR no digits in str\n", __func__);
+ st->rds_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ /*
+ * Since there is currently no official specification on rust name
+ * mangling, only that it has been stated that rust follows what
+ * C++ mangling does. In the Itanium C++ ABI (what practically
+ * every non-Windows C++ implementation uses these days), it
+ * explicitly disallows leading 0s in numeric values (except for
+ * substition and template indexes, which aren't relevant here).
+ * We enforce the same restriction -- if a rust implementation allowed
+ * leading zeros in numbers (basically segment lengths) it'd
+ * cause all sorts of ambiguity problems with names that likely lead
+ * to much bigger problems with linking and such, so this seems
+ * reasonable.
+ */
+ if (c == '0') {
+ DEMDEBUG("%s: ERROR number starts with leading 0\n", __func__);
+ st->rds_error = EINVAL;
+ return (B_FALSE);
+ }
+
+ while (sv_remaining(&snum) > 0 && ndigits <= MAX_DIGITS) {
+ c = sv_consume_c(&snum);
+
+ if (!ISDIGIT(c))
+ break;
+
+ v *= 10;
+ v += c - '0';
+ ndigits++;
+ }
+
+ if (ndigits > MAX_DIGITS) {
+ DEMDEBUG("%s: value %llu is too large\n", __func__, v);
+ st->rds_error = ERANGE;
+ return (B_FALSE);
+ }
+
+ DEMDEBUG("%s: num=%llu", __func__, v);
+
+ *valp = v;
+ sv_consume_n(svp, ndigits);
+ return (B_TRUE);
+}
+
+static boolean_t
+rustdem_parse_special(rustdem_state_t *restrict st, strview_t *restrict svp)
+{
+ if (st->rds_error != 0)
+ return (B_FALSE);
+
+ if (sv_peek(svp, 0) != '$')
+ return (B_FALSE);
+
+ for (size_t i = 0; i < rust_charmap_sz; i++) {
+ if (sv_consume_if(svp, rust_charmap[i].ruc_seq)) {
+ if (!rustdem_append_c(st, rust_charmap[i].ruc_ch))
+ return (B_FALSE);
+ return (B_TRUE);
+ }
+ }
+ return (B_FALSE);
+}
+
+static boolean_t
+rustdem_add_sep(rustdem_state_t *st)
+{
+ if (st->rds_error != 0)
+ return (B_FALSE);
+
+ if (!rustdem_append_c(st, ':') ||
+ !rustdem_append_c(st, ':'))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+static boolean_t
+rustdem_append_c(rustdem_state_t *st, char c)
+{
+ if (st->rds_error != 0)
+ return (B_FALSE);
+
+ if (custr_appendc(st->rds_demangled, c) == 0)
+ return (B_TRUE);
+
+ st->rds_error = errno;
+ return (B_FALSE);
+}
+
+static boolean_t
+rustdem_all_ascii(const strview_t *svp)
+{
+ strview_t p;
+
+ sv_init_sv(&p, svp);
+
+ while (sv_remaining(&p) > 0) {
+ char c = sv_consume_c(&p);
+
+ /*
+ * #including <sys/ctype.h> conflicts with <ctype.h>. Since
+ * we want the C locale macros (ISDIGIT, etc), it also means
+ * we can't use isascii(3C).
+ */
+ if ((c & 0x80) != 0) {
+ DEMDEBUG("%s: found non-ascii character 0x%02hhx at "
+ "offset %tu", __func__, c,
+ (ptrdiff_t)(p.sv_first - svp->sv_first));
+ return (B_FALSE);
+ }
+ }
+ return (B_TRUE);
+}
+
+static void *
+rustdem_alloc(custr_alloc_t *cao, size_t len)
+{
+ rustdem_state_t *st = cao->cua_arg;
+ return (zalloc(st->rds_ops, len));
+}
+
+static void
+rustdem_free(custr_alloc_t *cao, void *p, size_t len)
+{
+ rustdem_state_t *st = cao->cua_arg;
+ xfree(st->rds_ops, p, len);
+}
diff --git a/usr/src/lib/libdemangle/common/strview.c b/usr/src/lib/libdemangle/common/strview.c
new file mode 100644
index 0000000000..e4576ee17a
--- /dev/null
+++ b/usr/src/lib/libdemangle/common/strview.c
@@ -0,0 +1,107 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019, Joyent, Inc.
+ */
+
+#include <string.h>
+#include <sys/debug.h>
+#include "strview.h"
+
+void
+sv_init_sv(strview_t *sv, const strview_t *src)
+{
+ *sv = *src;
+}
+
+void
+sv_init_sv_range(strview_t *sv, const strview_t *src, size_t len)
+{
+ VERIFY3U(sv_remaining(src), >=, len);
+
+ sv->sv_first = src->sv_first;
+ sv->sv_last = src->sv_first + len;
+ sv->sv_rem = len;
+}
+
+void
+sv_init_str(strview_t *sv, const char *first, const char *last)
+{
+ if (last == NULL)
+ last = first + strlen(first);
+
+ VERIFY3P(first, <=, last);
+ sv->sv_first = first;
+ sv->sv_last = last;
+ sv->sv_rem = (size_t)(uintptr_t)(sv->sv_last - sv->sv_first);
+}
+
+size_t
+sv_remaining(const strview_t *sv)
+{
+ return (sv->sv_rem);
+}
+
+boolean_t
+sv_consume_if_c(strview_t *sv, char c)
+{
+ if (sv->sv_rem < 1 || *sv->sv_first != c)
+ return (B_FALSE);
+
+ sv->sv_first++;
+ sv->sv_rem--;
+ return (B_TRUE);
+}
+
+boolean_t
+sv_consume_if(strview_t *sv, const char *str)
+{
+ size_t slen = strlen(str);
+
+ if (sv->sv_rem < slen)
+ return (B_FALSE);
+ if (strncmp(sv->sv_first, str, slen) != 0)
+ return (B_FALSE);
+
+ sv->sv_first += slen;
+ sv->sv_rem -= slen;
+ return (B_TRUE);
+}
+
+char
+sv_peek(const strview_t *sv, ssize_t n)
+{
+ const char *p;
+
+ p = (n >= 0) ? sv->sv_first + n : sv->sv_last + n;
+ return ((p >= sv->sv_first && p < sv->sv_last) ? *p : '\0');
+}
+
+char
+sv_consume_c(strview_t *sv)
+{
+ char c = '\0';
+
+ if (sv->sv_first < sv->sv_last) {
+ c = *sv->sv_first++;
+ sv->sv_rem--;
+ }
+ return (c);
+}
+
+void
+sv_consume_n(strview_t *sv, size_t n)
+{
+ VERIFY3U(sv->sv_rem, >=, n);
+ sv->sv_first += n;
+ sv->sv_rem -= n;
+}
diff --git a/usr/src/lib/libdemangle/common/strview.h b/usr/src/lib/libdemangle/common/strview.h
new file mode 100644
index 0000000000..ac94c67c6c
--- /dev/null
+++ b/usr/src/lib/libdemangle/common/strview.h
@@ -0,0 +1,140 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019, Joyent, Inc.
+ */
+
+#ifndef _STRVIEW_H
+#define _STRVIEW_H
+
+#include <inttypes.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * strview_t's represent a read-only subset of a string. It is somewhat
+ * similar to the concept of ranges found in other languages in that one can
+ * create a strview_t, and then create a smaller range for iteration.
+ *
+ * sv_first is the address of the first location (and is advanced as values
+ * are consumed) in the string.
+ *
+ * sv_last is the address one byte after the last valid value of the subset.
+ * Basically, the length of the range is equal to 'sv_last - sv_first'. For
+ * example, in the string 'abcdef' to create a view 'bcd', *sv_first would
+ * equal 'b' and *sv_last would equal 'e'.
+ *
+ * sv_rem is the number of bytes remaining in the range.
+ *
+ * A strview_t maintains references to the underlying string, so the lifetime
+ * of a strview_t should be equal to or less than the underlying string (i.e.
+ * it doesn't copy the data from the underlying string, but maintains pointers
+ * to the original data).
+ *
+ * While the underlying string does not need to be NUL-terminated, NUL is still
+ * used as a sentinel value in some instances (e.g. sv_peek()), and should not
+ * be contained within the defined range.
+ *
+ * As hinted above, the functions currently do not deal with multi-byte
+ * characters, i.e. each character is assumed to be a single byte. The
+ * current consumers do not need to handle multi-byte characters (UTF-8
+ * or otherwise), so this is sufficient at the current time.
+ */
+typedef struct strview {
+ const char *sv_first;
+ const char *sv_last;
+ size_t sv_rem;
+} strview_t;
+
+/*
+ * SV_PRINT() is used for printing strview_t values during debugging, e.g.
+ * `DEMDEBUG("%*.s", SV_PRINT(sv));`
+ */
+#define SV_PRINT(_sv) (int)(_sv)->sv_rem, (_sv)->sv_first
+
+/*
+ * Initialize a strview_t from an already initialized strview_t -- the state of
+ * the source strview_t is duplicated in the newly initialized strview_t.
+ */
+void sv_init_sv(strview_t *, const strview_t *);
+
+/*
+ * Initialize a strview_t as a subset of an already initialized strview_t.
+ * The size of the subset (size_t) must be <= sv_remaining(src).
+ */
+void sv_init_sv_range(strview_t *, const strview_t *, size_t);
+
+/*
+ * Initialize a strview_t from a string. The two const char * pointers are the
+ * sv_first and sv_last values to use (see above). If the source string is
+ * NUL-terminated, one can optionally pass NULL for the second parameter in
+ * which case, the entire NUL-terminated string (starting at sv_first) is
+ * treated as a strview_t.
+ */
+void sv_init_str(strview_t *, const char *, const char *);
+
+/*
+ * Return the number of bytes remaining to consume in the strview_t
+ */
+size_t sv_remaining(const strview_t *);
+
+/*
+ * Return the char at the given position in the strview_t (without advancing
+ * the position). Position values >=0 are relative to the current position
+ * of the strview_t (e.g. '0' will return the next character, '1' will return
+ * the character after that), while negative position values are relative to
+ * the end of the strview_t (e.g. '-1' will return the last character, '-2'
+ * will return the second to last character).
+ *
+ * If the position value is out of range, '\0' is returned.
+ */
+char sv_peek(const strview_t *, ssize_t);
+
+/*
+ * Return the next character and advance the strview_t position. If no more
+ * characters are available, '\0' is returned.
+ */
+char sv_consume_c(strview_t *);
+
+/*
+ * Advance the position of the strview_t by the given number of bytes. The
+ * amount must be <= the number of bytes remaining in the strview_t.
+ */
+void sv_consume_n(strview_t *, size_t);
+
+/*
+ * Advance the strview_t position if the bytes of the strview starting at the
+ * current position match the given NUL-terminated string. The length of the
+ * NUL-terminated string must be <= the number of bytes remaining in the
+ * strview_t.
+ *
+ * If there is a match, the position of the strview_t is advanced by the
+ * length of the NUL-terminated comparison string, and B_TRUE is returned. If
+ * there is no match, the position is not advanced and B_FALSE is returned.
+ */
+boolean_t sv_consume_if(strview_t *, const char *);
+
+/*
+ * Advance the position of the strview_t if the next char in the strview_t
+ * is equal to the given char. If there is a match, the strview_t position
+ * is advanced one byte and B_TRUE is returned. If they do not match, B_FALSE
+ * is returned and the position is not advanced.
+ */
+boolean_t sv_consume_if_c(strview_t *, char);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _STRVIEW_H */
diff --git a/usr/src/lib/libdemangle/common/util.c b/usr/src/lib/libdemangle/common/util.c
index 9ffb72c79b..739c554826 100644
--- a/usr/src/lib/libdemangle/common/util.c
+++ b/usr/src/lib/libdemangle/common/util.c
@@ -11,6 +11,7 @@
/*
* Copyright 2017 Jason King
+ * Copyright 2019, Joyent, Inc.
*/
#include <sys/debug.h>
@@ -71,6 +72,20 @@ xrealloc(sysdem_ops_t *ops, void *p, size_t oldsz, size_t newsz)
return (temp);
}
+char *
+xstrdup(sysdem_ops_t *ops, const char *src)
+{
+ size_t len = strlen(src);
+ char *str = zalloc(ops, len + 1);
+
+ if (str == NULL)
+ return (NULL);
+
+ /* zalloc(len+1) guarantees this will be NUL-terminated */
+ (void) memcpy(str, src, len);
+ return (str);
+}
+
/*ARGSUSED*/
static void
def_free(void *p, size_t len)