diff options
author | Jason King <jason.king@joyent.com> | 2019-01-03 18:11:04 +0000 |
---|---|---|
committer | Jason King <jason.king@joyent.com> | 2019-03-27 17:34:29 +0000 |
commit | 6cf64ca03e24fc61dedf0e3705acd6716ce4145a (patch) | |
tree | a49834aa509a618bb7cb188fe8b0c711276656e4 /usr/src/lib | |
parent | 48514b9d3e7aeee085e365e879a73a9e4873323d (diff) | |
download | illumos-joyent-6cf64ca03e24fc61dedf0e3705acd6716ce4145a.tar.gz |
OS-7478 Add rust demangling supportrelease-20190328
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Cody Peter Mello <cody.mello@joyent.com>
Diffstat (limited to 'usr/src/lib')
-rw-r--r-- | usr/src/lib/Makefile | 2 | ||||
-rw-r--r-- | usr/src/lib/libdemangle/Makefile.com | 4 | ||||
-rw-r--r-- | usr/src/lib/libdemangle/common/cxx.c | 3 | ||||
-rw-r--r-- | usr/src/lib/libdemangle/common/demangle-sys.h | 4 | ||||
-rw-r--r-- | usr/src/lib/libdemangle/common/demangle.c | 130 | ||||
-rw-r--r-- | usr/src/lib/libdemangle/common/demangle_int.h | 15 | ||||
-rw-r--r-- | usr/src/lib/libdemangle/common/rust.c | 543 | ||||
-rw-r--r-- | usr/src/lib/libdemangle/common/strview.c | 107 | ||||
-rw-r--r-- | usr/src/lib/libdemangle/common/strview.h | 140 | ||||
-rw-r--r-- | usr/src/lib/libdemangle/common/util.c | 15 |
10 files changed, 934 insertions, 29 deletions
diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile index c0c5625ad5..d271ff0211 100644 --- a/usr/src/lib/Makefile +++ b/usr/src/lib/Makefile @@ -22,7 +22,7 @@ # # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2012 by Delphix. All rights reserved. -# Copyright 2018, Joyent, Inc. +# Copyright 2019, Joyent, Inc. # Copyright (c) 2013 Gary Mills # Copyright 2014 Garrett D'Amore <garrett@damore.org> # Copyright (c) 2015 Gary Mills diff --git a/usr/src/lib/libdemangle/Makefile.com b/usr/src/lib/libdemangle/Makefile.com index 0b0d495df7..7eba05ce1c 100644 --- a/usr/src/lib/libdemangle/Makefile.com +++ b/usr/src/lib/libdemangle/Makefile.com @@ -16,12 +16,12 @@ LIBRARY = libdemangle-sys.a VERS = .1 -OBJECTS = str.o util.o cxx_util.o cxx.o demangle.o +OBJECTS = str.o strview.o util.o cxx_util.o cxx.o demangle.o rust.o include ../../Makefile.lib LIBS = $(DYNLIB) $(LINTLIB) -LDLIBS += -lc +LDLIBS += -lc -lcustr SRCDIR = ../common $(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC) diff --git a/usr/src/lib/libdemangle/common/cxx.c b/usr/src/lib/libdemangle/common/cxx.c index e3b4c06a8a..af5f549f78 100644 --- a/usr/src/lib/libdemangle/common/cxx.c +++ b/usr/src/lib/libdemangle/common/cxx.c @@ -165,11 +165,10 @@ static const char *parse_vector_type(const char *, const char *, cpp_db_t *); size_t cpp_name_max_depth = 1024; /* max depth of name stack */ char * -cpp_demangle(const char *src, sysdem_ops_t *ops) +cpp_demangle(const char *src, size_t srclen, sysdem_ops_t *ops) { char *result = NULL; cpp_db_t db; - size_t srclen = strlen(src); if (!db_init(&db, ops)) goto done; diff --git a/usr/src/lib/libdemangle/common/demangle-sys.h b/usr/src/lib/libdemangle/common/demangle-sys.h index 02636c9521..05776ee5ee 100644 --- a/usr/src/lib/libdemangle/common/demangle-sys.h +++ b/usr/src/lib/libdemangle/common/demangle-sys.h @@ -11,6 +11,7 @@ /* * Copyright 2017 Jason King + * Copyright 2018, Joyent, Inc. */ #ifndef _DEMANGLE_SYS_H @@ -24,7 +25,8 @@ extern "C" { typedef enum sysdem_lang_e { SYSDEM_LANG_AUTO, - SYSDEM_LANG_CPP + SYSDEM_LANG_CPP, + SYSDEM_LANG_RUST } sysdem_lang_t; typedef struct sysdem_alloc_s { diff --git a/usr/src/lib/libdemangle/common/demangle.c b/usr/src/lib/libdemangle/common/demangle.c index e827fd8cec..4f8e9ad678 100644 --- a/usr/src/lib/libdemangle/common/demangle.c +++ b/usr/src/lib/libdemangle/common/demangle.c @@ -11,13 +11,17 @@ /* * Copyright 2018 Jason King + * Copyright 2019, Joyent, Inc. */ #include <stdlib.h> +#include <stdio.h> #include <string.h> #include <errno.h> #include <pthread.h> +#include <sys/ctype.h> #include <sys/debug.h> +#include <stdarg.h> #include "demangle-sys.h" #include "demangle_int.h" @@ -25,31 +29,63 @@ static pthread_once_t debug_once = PTHREAD_ONCE_INIT; volatile boolean_t demangle_debug; +FILE *debugf = stderr; + +static const char * +langstr(sysdem_lang_t lang) +{ + switch (lang) { + case SYSDEM_LANG_AUTO: + return ("auto"); + case SYSDEM_LANG_CPP: + return ("c++"); + case SYSDEM_LANG_RUST: + return ("rust"); + default: + return ("invalid"); + } +} static sysdem_lang_t -detect_lang(const char *str) +detect_lang(const char *str, size_t n) { - size_t n = strlen(str); + const char *p = str; + size_t len; if (n < 3 || str[0] != '_') return (SYSDEM_LANG_AUTO); - switch (str[1]) { - case 'Z': + /* + * Check for ^_Z or ^__Z + */ + p = str + 1; + if (*p == '_') { + p++; + } + + if (*p != 'Z') + return (SYSDEM_LANG_AUTO); + + /* + * Sadly, rust currently uses the same prefix as C++, however + * demangling rust as a C++ mangled name yields less than desirable + * results. However rust names end with a hash. We use that to + * attempt to disambiguate + */ + + /* Find 'h'<hexdigit>+E$ */ + if ((p = strrchr(p, 'h')) == NULL) return (SYSDEM_LANG_CPP); - case '_': - break; + if ((len = strspn(p + 1, "0123456789abcdef")) == 0) + return (SYSDEM_LANG_CPP); - default: - return (SYSDEM_LANG_AUTO); - } + p += len + 1; - /* why they use ___Z sometimes is puzzling... *sigh* */ - if (str[2] == '_' && str[3] == 'Z') + if (p[0] != 'E' || p[1] != '\0') return (SYSDEM_LANG_CPP); - return (SYSDEM_LANG_AUTO); + return (SYSDEM_LANG_RUST); } static void @@ -62,26 +98,76 @@ check_debug(void) char * sysdemangle(const char *str, sysdem_lang_t lang, sysdem_ops_t *ops) { + /* + * While the language specific demangler code can handle non-NUL + * terminated strings, we currently don't expose this to consumers. + * Consumers should still pass in a NUL-terminated string. + */ + size_t slen; + VERIFY0(pthread_once(&debug_once, check_debug)); + DEMDEBUG("name = '%s'", (str == NULL) ? "(NULL)" : str); + DEMDEBUG("lang = %s (%d)", langstr(lang), lang); + + if (str == NULL) { + errno = EINVAL; + return (NULL); + } + + slen = strlen(str); + + switch (lang) { + case SYSDEM_LANG_AUTO: + case SYSDEM_LANG_CPP: + case SYSDEM_LANG_RUST: + break; + default: + errno = EINVAL; + return (NULL); + } + if (ops == NULL) ops = sysdem_ops_default; if (lang == SYSDEM_LANG_AUTO) { - lang = detect_lang(str); - if (lang == SYSDEM_LANG_AUTO) { - errno = ENOTSUP; - return (NULL); - } + lang = detect_lang(str, slen); + if (lang != SYSDEM_LANG_AUTO) + DEMDEBUG("detected language is %s", langstr(lang)); } switch (lang) { - case SYSDEM_LANG_AUTO: - break; case SYSDEM_LANG_CPP: - return (cpp_demangle(str, ops)); + return (cpp_demangle(str, slen, ops)); + case SYSDEM_LANG_RUST: + return (rust_demangle(str, slen, ops)); + case SYSDEM_LANG_AUTO: + DEMDEBUG("could not detect language"); + errno = ENOTSUP; + return (NULL); + default: + /* + * This can't happen unless there's a bug with detect_lang, + * but gcc doesn't know that. + */ + errno = EINVAL; + return (NULL); } +} - errno = ENOTSUP; - return (NULL); +int +demdebug(const char *fmt, ...) +{ + va_list ap; + + flockfile(debugf); + (void) fprintf(debugf, "LIBDEMANGLE: "); + va_start(ap, fmt); + (void) vfprintf(debugf, fmt, ap); + (void) fputc('\n', debugf); + (void) fflush(debugf); + va_end(ap); + funlockfile(debugf); + + return (0); } diff --git a/usr/src/lib/libdemangle/common/demangle_int.h b/usr/src/lib/libdemangle/common/demangle_int.h index 9abb2cc295..66a34cf41d 100644 --- a/usr/src/lib/libdemangle/common/demangle_int.h +++ b/usr/src/lib/libdemangle/common/demangle_int.h @@ -11,6 +11,7 @@ /* * Copyright 2017 Jason King + * Copyright 2019, Joyent, Inc. */ #ifndef _DEMANGLE_INT_H #define _DEMANGLE_INT_H @@ -24,14 +25,26 @@ extern "C" { extern sysdem_ops_t *sysdem_ops_default; -char *cpp_demangle(const char *, sysdem_ops_t *); +char *cpp_demangle(const char *, size_t, sysdem_ops_t *); +char *rust_demangle(const char *, size_t, sysdem_ops_t *); void *zalloc(sysdem_ops_t *, size_t); void *xrealloc(sysdem_ops_t *, void *, size_t, size_t); void xfree(sysdem_ops_t *, void *, size_t); +char *xstrdup(sysdem_ops_t *, const char *); extern volatile boolean_t demangle_debug; +/* + * gcc seems to get unhappy with the ASSERT() style definition (also borrowed + * for the DEMDEBUG macro unless demdebug() is returns a non-void value + * (despite the return value never being used). + */ +int demdebug(const char *, ...); + +#define DEMDEBUG(s, ...) \ + ((void)(demangle_debug && demdebug(s, ## __VA_ARGS__))) + #ifdef __cplusplus } #endif diff --git a/usr/src/lib/libdemangle/common/rust.c b/usr/src/lib/libdemangle/common/rust.c new file mode 100644 index 0000000000..f99fe79a10 --- /dev/null +++ b/usr/src/lib/libdemangle/common/rust.c @@ -0,0 +1,543 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019, Joyent, Inc. + */ + +#include <errno.h> +#include <libcustr.h> +#include <limits.h> +#include <string.h> +#include <sys/ctype.h> /* We want the C locale ISXXX() versions */ +#include <sys/debug.h> +#include <stdio.h> +#include <sys/sysmacros.h> + +#include "strview.h" +#include "demangle_int.h" + +/* + * Unfortunately, there is currently no official specification for the rust + * name mangling. This is an attempt to document the understanding of the + * mangling used here. It is based off examination of + * https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/ + * + * A mangled rust name is: + * <prefix> <name> <hash> E + * + * <prefix> ::= _Z + * __Z + * + * <name> ::= <name-segment>+ + * + * <name-segment> ::= <len> <name-chars>{len} + * + * <len> ::= [1-9][0-9]+ + * + * <name-chars> ::= <[A-Za-z]> <[A-Za-z0-9]>* + * <separator> + * <special> + * + * <separator> ::= '..' # '::' + * + * <special> ::= $SP$ # ' ' + * $BP$ # '*' + * $RF$ # '&' + * $LT$ # '<' + * $GT$ # '>' + * $LP$ # '(' + * $RP$ # ')' + * $C$ # ',' + * $u7e$ # '~' + * $u20$ # ' ' + * $u27$ # '\'' + * $u3d$ # '=' + * $u5b$ # '[' + * $u5d$ # ']' + * $u7b$ # '{' + * $u7d$ # '}' + * $u3b$ # ';' + * $u2b$ # '+' + * $u22$ # '"' + * + * <hash> := <len> h <hex-digits>+ + * + * <hex-digits> := <[0-9a-f]> + */ + +typedef struct rustdem_state { + const char *rds_str; + custr_t *rds_demangled; + sysdem_ops_t *rds_ops; + int rds_error; +} rustdem_state_t; + +static const struct rust_charmap { + const char *ruc_seq; + char ruc_ch; +} rust_charmap[] = { + { "$SP$", '@' }, + { "$BP$", '*' }, + { "$RF$", '&' }, + { "$LT$", '<' }, + { "$GT$", '>' }, + { "$LP$", '(' }, + { "$RP$", ')' }, + { "$C$", ',' }, + { "$u7e$", '~' }, + { "$u20$", ' ' }, + { "$u27$", '\'' }, + { "$u3d$", '=' }, + { "$u5b$", '[' }, + { "$u5d$", ']' }, + { "$u7b$", '{' }, + { "$u7d$", '}' }, + { "$u3b$", ';' }, + { "$u2b$", '+' }, + { "$u22$", '"' } +}; +static const size_t rust_charmap_sz = ARRAY_SIZE(rust_charmap); + +static void *rustdem_alloc(custr_alloc_t *, size_t); +static void rustdem_free(custr_alloc_t *, void *, size_t); + +static boolean_t rustdem_append_c(rustdem_state_t *, char); +static boolean_t rustdem_all_ascii(const strview_t *); + +static boolean_t rustdem_parse_prefix(rustdem_state_t *, strview_t *); +static boolean_t rustdem_parse_name(rustdem_state_t *, strview_t *); +static boolean_t rustdem_parse_hash(rustdem_state_t *, strview_t *); +static boolean_t rustdem_parse_num(rustdem_state_t *, strview_t *, uint64_t *); +static boolean_t rustdem_parse_special(rustdem_state_t *, strview_t *); +static boolean_t rustdem_add_sep(rustdem_state_t *); + +char * +rust_demangle(const char *s, size_t slen, sysdem_ops_t *ops) +{ + rustdem_state_t st = { + .rds_str = s, + .rds_ops = ops, + }; + custr_alloc_ops_t custr_ops = { + .custr_ao_alloc = rustdem_alloc, + .custr_ao_free = rustdem_free + }; + custr_alloc_t custr_alloc = { + .cua_version = CUSTR_VERSION + }; + strview_t sv; + int ret; + + if (custr_alloc_init(&custr_alloc, &custr_ops) != 0) + return (NULL); + custr_alloc.cua_arg = &st; + + sv_init_str(&sv, s, s + slen); + + if (sv_remaining(&sv) < 1 || sv_peek(&sv, -1) != 'E') { + DEMDEBUG("ERROR: string is either too small or does not end " + "with 'E'"); + errno = EINVAL; + return (NULL); + } + + if (!rustdem_parse_prefix(&st, &sv)) { + DEMDEBUG("ERROR: could not parse prefix"); + errno = EINVAL; + return (NULL); + } + DEMDEBUG("parsed prefix; remaining='%.*s'", SV_PRINT(&sv)); + + if (!rustdem_all_ascii(&sv)) { + /* rustdem_all_ascii() provides debug output */ + errno = EINVAL; + return (NULL); + } + + if ((ret = custr_xalloc(&st.rds_demangled, &custr_alloc)) != 0) + return (NULL); + + while (sv_remaining(&sv) > 1) { + if (rustdem_parse_name(&st, &sv)) + continue; + if (st.rds_error != 0) + goto fail; + } + + if (st.rds_error != 0 || !sv_consume_if_c(&sv, 'E')) + goto fail; + + char *res = xstrdup(ops, custr_cstr(st.rds_demangled)); + if (res == NULL) { + st.rds_error = errno; + goto fail; + } + + custr_free(st.rds_demangled); + DEMDEBUG("result = '%s'", res); + return (res); + +fail: + custr_free(st.rds_demangled); + errno = st.rds_error; + return (NULL); +} + +static boolean_t +rustdem_parse_prefix(rustdem_state_t *st, strview_t *svp) +{ + strview_t pfx; + + sv_init_sv(&pfx, svp); + + DEMDEBUG("checking for '_ZN' or '__ZN' in '%.*s'", SV_PRINT(&pfx)); + + if (st->rds_error != 0) + return (B_FALSE); + + if (!sv_consume_if_c(&pfx, '_')) + return (B_FALSE); + + (void) sv_consume_if_c(&pfx, '_'); + + if (!sv_consume_if_c(&pfx, 'Z') || !sv_consume_if_c(&pfx, 'N')) + return (B_FALSE); + + /* Update svp with new position */ + sv_init_sv(svp, &pfx); + return (B_TRUE); +} + +static boolean_t +rustdem_parse_name_segment(rustdem_state_t *st, strview_t *svp, boolean_t first) +{ + strview_t sv; + strview_t name; + uint64_t len; + size_t rem; + boolean_t last = B_FALSE; + + if (st->rds_error != 0 || sv_remaining(svp) == 0) + return (B_FALSE); + + sv_init_sv(&sv, svp); + + if (!rustdem_parse_num(st, &sv, &len)) { + DEMDEBUG("ERROR: no leading length"); + st->rds_error = EINVAL; + return (B_FALSE); + } + + rem = sv_remaining(&sv); + + if (rem < len || len > SIZE_MAX) { + st->rds_error = EINVAL; + return (B_FALSE); + } + + /* Is this the last segment before the terminating E? */ + if (rem == len + 1) { + VERIFY3U(sv_peek(&sv, -1), ==, 'E'); + last = B_TRUE; + } + + if (!first && !rustdem_add_sep(st)) + return (B_FALSE); + + /* Reduce length of seg to the length we parsed */ + (void) sv_init_sv_range(&name, &sv, len); + + DEMDEBUG("%s: segment='%.*s'", __func__, SV_PRINT(&name)); + + /* + * A rust hash starts with 'h', and is the last component of a name + * before the terminating 'E' + */ + if (sv_peek(&name, 0) == 'h' && last) { + if (!rustdem_parse_hash(st, &name)) + return (B_FALSE); + goto done; + } + + while (sv_remaining(&name) > 0) { + switch (sv_peek(&name, 0)) { + case '$': + if (rustdem_parse_special(st, &name)) + continue; + break; + case '_': + if (sv_peek(&name, 1) == '$') { + /* + * Only consume/ignore '_'. Leave + * $ for next round. + */ + sv_consume_n(&name, 1); + continue; + } + break; + case '.': + /* Convert '..' to '::' */ + if (sv_peek(&name, 1) != '.') + break; + + if (!rustdem_add_sep(st)) + return (B_FALSE); + + sv_consume_n(&name, 2); + continue; + default: + break; + } + + if (custr_appendc(st->rds_demangled, + sv_consume_c(&name)) != 0) { + st->rds_error = ENOMEM; + return (B_FALSE); + } + } + +done: + DEMDEBUG("%s: consumed '%.*s'", __func__, (int)len, svp->sv_first); + sv_consume_n(&sv, len); + sv_init_sv(svp, &sv); + return (B_TRUE); +} + +static boolean_t +rustdem_parse_name(rustdem_state_t *st, strview_t *svp) +{ + strview_t name; + boolean_t first = B_TRUE; + + if (st->rds_error != 0) + return (B_FALSE); + + sv_init_sv(&name, svp); + + if (sv_remaining(&name) == 0) + return (B_FALSE); + + while (sv_remaining(&name) > 0 && sv_peek(&name, 0) != 'E') { + if (!rustdem_parse_name_segment(st, &name, first)) + return (B_FALSE); + first = B_FALSE; + } + + sv_init_sv(svp, &name); + return (B_TRUE); +} + +static boolean_t +rustdem_parse_hash(rustdem_state_t *st, strview_t *svp) +{ + strview_t sv; + + sv_init_sv(&sv, svp); + + VERIFY(sv_consume_if_c(&sv, 'h')); + if (!rustdem_append_c(st, 'h')) + return (B_FALSE); + + while (sv_remaining(&sv) > 0) { + char c = sv_consume_c(&sv); + + switch (c) { + /* + * The upper-case hex digits (A-F) are excluded as valid + * hash values for several reasons: + * + * 1. It would result in two different possible names for + * the same function, leading to ambiguity in linking (among + * other things). + * + * 2. It would cause potential ambiguity in parsing -- is a + * trailing 'E' part of the hash, or the terminating character + * in the mangled name? + * + * 3. No examples were able to be found in the wild where + * uppercase digits are used, and other rust demanglers all + * seem to assume the hash must contain lower-case hex digits. + */ + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + case '8': case '9': case 'a': case 'b': + case 'c': case 'd': case 'e': case 'f': + if (!rustdem_append_c(st, c)) + return (B_FALSE); + break; + default: + return (B_FALSE); + } + } + + sv_init_sv(svp, &sv); + return (B_TRUE); +} + +/* + * A 10 digit value would imply a name 1Gb or larger in size. It seems + * unlikely to the point of absurdity any such value could every possibly + * be valid (or even have compiled properly). This also prevents the + * uint64_t conversion from possibly overflowing since the value must always + * be below 10 * UINT32_MAX. + */ +#define MAX_DIGITS 10 + +static boolean_t +rustdem_parse_num(rustdem_state_t *restrict st, strview_t *restrict svp, + uint64_t *restrict valp) +{ + strview_t snum; + uint64_t v = 0; + size_t ndigits = 0; + char c; + + if (st->rds_error != 0) + return (B_FALSE); + + sv_init_sv(&snum, svp); + + DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(&snum)); + + c = sv_peek(&snum, 0); + if (!ISDIGIT(c)) { + DEMDEBUG("%s: ERROR no digits in str\n", __func__); + st->rds_error = EINVAL; + return (B_FALSE); + } + + /* + * Since there is currently no official specification on rust name + * mangling, only that it has been stated that rust follows what + * C++ mangling does. In the Itanium C++ ABI (what practically + * every non-Windows C++ implementation uses these days), it + * explicitly disallows leading 0s in numeric values (except for + * substition and template indexes, which aren't relevant here). + * We enforce the same restriction -- if a rust implementation allowed + * leading zeros in numbers (basically segment lengths) it'd + * cause all sorts of ambiguity problems with names that likely lead + * to much bigger problems with linking and such, so this seems + * reasonable. + */ + if (c == '0') { + DEMDEBUG("%s: ERROR number starts with leading 0\n", __func__); + st->rds_error = EINVAL; + return (B_FALSE); + } + + while (sv_remaining(&snum) > 0 && ndigits <= MAX_DIGITS) { + c = sv_consume_c(&snum); + + if (!ISDIGIT(c)) + break; + + v *= 10; + v += c - '0'; + ndigits++; + } + + if (ndigits > MAX_DIGITS) { + DEMDEBUG("%s: value %llu is too large\n", __func__, v); + st->rds_error = ERANGE; + return (B_FALSE); + } + + DEMDEBUG("%s: num=%llu", __func__, v); + + *valp = v; + sv_consume_n(svp, ndigits); + return (B_TRUE); +} + +static boolean_t +rustdem_parse_special(rustdem_state_t *restrict st, strview_t *restrict svp) +{ + if (st->rds_error != 0) + return (B_FALSE); + + if (sv_peek(svp, 0) != '$') + return (B_FALSE); + + for (size_t i = 0; i < rust_charmap_sz; i++) { + if (sv_consume_if(svp, rust_charmap[i].ruc_seq)) { + if (!rustdem_append_c(st, rust_charmap[i].ruc_ch)) + return (B_FALSE); + return (B_TRUE); + } + } + return (B_FALSE); +} + +static boolean_t +rustdem_add_sep(rustdem_state_t *st) +{ + if (st->rds_error != 0) + return (B_FALSE); + + if (!rustdem_append_c(st, ':') || + !rustdem_append_c(st, ':')) + return (B_FALSE); + + return (B_TRUE); +} + +static boolean_t +rustdem_append_c(rustdem_state_t *st, char c) +{ + if (st->rds_error != 0) + return (B_FALSE); + + if (custr_appendc(st->rds_demangled, c) == 0) + return (B_TRUE); + + st->rds_error = errno; + return (B_FALSE); +} + +static boolean_t +rustdem_all_ascii(const strview_t *svp) +{ + strview_t p; + + sv_init_sv(&p, svp); + + while (sv_remaining(&p) > 0) { + char c = sv_consume_c(&p); + + /* + * #including <sys/ctype.h> conflicts with <ctype.h>. Since + * we want the C locale macros (ISDIGIT, etc), it also means + * we can't use isascii(3C). + */ + if ((c & 0x80) != 0) { + DEMDEBUG("%s: found non-ascii character 0x%02hhx at " + "offset %tu", __func__, c, + (ptrdiff_t)(p.sv_first - svp->sv_first)); + return (B_FALSE); + } + } + return (B_TRUE); +} + +static void * +rustdem_alloc(custr_alloc_t *cao, size_t len) +{ + rustdem_state_t *st = cao->cua_arg; + return (zalloc(st->rds_ops, len)); +} + +static void +rustdem_free(custr_alloc_t *cao, void *p, size_t len) +{ + rustdem_state_t *st = cao->cua_arg; + xfree(st->rds_ops, p, len); +} diff --git a/usr/src/lib/libdemangle/common/strview.c b/usr/src/lib/libdemangle/common/strview.c new file mode 100644 index 0000000000..e4576ee17a --- /dev/null +++ b/usr/src/lib/libdemangle/common/strview.c @@ -0,0 +1,107 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019, Joyent, Inc. + */ + +#include <string.h> +#include <sys/debug.h> +#include "strview.h" + +void +sv_init_sv(strview_t *sv, const strview_t *src) +{ + *sv = *src; +} + +void +sv_init_sv_range(strview_t *sv, const strview_t *src, size_t len) +{ + VERIFY3U(sv_remaining(src), >=, len); + + sv->sv_first = src->sv_first; + sv->sv_last = src->sv_first + len; + sv->sv_rem = len; +} + +void +sv_init_str(strview_t *sv, const char *first, const char *last) +{ + if (last == NULL) + last = first + strlen(first); + + VERIFY3P(first, <=, last); + sv->sv_first = first; + sv->sv_last = last; + sv->sv_rem = (size_t)(uintptr_t)(sv->sv_last - sv->sv_first); +} + +size_t +sv_remaining(const strview_t *sv) +{ + return (sv->sv_rem); +} + +boolean_t +sv_consume_if_c(strview_t *sv, char c) +{ + if (sv->sv_rem < 1 || *sv->sv_first != c) + return (B_FALSE); + + sv->sv_first++; + sv->sv_rem--; + return (B_TRUE); +} + +boolean_t +sv_consume_if(strview_t *sv, const char *str) +{ + size_t slen = strlen(str); + + if (sv->sv_rem < slen) + return (B_FALSE); + if (strncmp(sv->sv_first, str, slen) != 0) + return (B_FALSE); + + sv->sv_first += slen; + sv->sv_rem -= slen; + return (B_TRUE); +} + +char +sv_peek(const strview_t *sv, ssize_t n) +{ + const char *p; + + p = (n >= 0) ? sv->sv_first + n : sv->sv_last + n; + return ((p >= sv->sv_first && p < sv->sv_last) ? *p : '\0'); +} + +char +sv_consume_c(strview_t *sv) +{ + char c = '\0'; + + if (sv->sv_first < sv->sv_last) { + c = *sv->sv_first++; + sv->sv_rem--; + } + return (c); +} + +void +sv_consume_n(strview_t *sv, size_t n) +{ + VERIFY3U(sv->sv_rem, >=, n); + sv->sv_first += n; + sv->sv_rem -= n; +} diff --git a/usr/src/lib/libdemangle/common/strview.h b/usr/src/lib/libdemangle/common/strview.h new file mode 100644 index 0000000000..ac94c67c6c --- /dev/null +++ b/usr/src/lib/libdemangle/common/strview.h @@ -0,0 +1,140 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019, Joyent, Inc. + */ + +#ifndef _STRVIEW_H +#define _STRVIEW_H + +#include <inttypes.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * strview_t's represent a read-only subset of a string. It is somewhat + * similar to the concept of ranges found in other languages in that one can + * create a strview_t, and then create a smaller range for iteration. + * + * sv_first is the address of the first location (and is advanced as values + * are consumed) in the string. + * + * sv_last is the address one byte after the last valid value of the subset. + * Basically, the length of the range is equal to 'sv_last - sv_first'. For + * example, in the string 'abcdef' to create a view 'bcd', *sv_first would + * equal 'b' and *sv_last would equal 'e'. + * + * sv_rem is the number of bytes remaining in the range. + * + * A strview_t maintains references to the underlying string, so the lifetime + * of a strview_t should be equal to or less than the underlying string (i.e. + * it doesn't copy the data from the underlying string, but maintains pointers + * to the original data). + * + * While the underlying string does not need to be NUL-terminated, NUL is still + * used as a sentinel value in some instances (e.g. sv_peek()), and should not + * be contained within the defined range. + * + * As hinted above, the functions currently do not deal with multi-byte + * characters, i.e. each character is assumed to be a single byte. The + * current consumers do not need to handle multi-byte characters (UTF-8 + * or otherwise), so this is sufficient at the current time. + */ +typedef struct strview { + const char *sv_first; + const char *sv_last; + size_t sv_rem; +} strview_t; + +/* + * SV_PRINT() is used for printing strview_t values during debugging, e.g. + * `DEMDEBUG("%*.s", SV_PRINT(sv));` + */ +#define SV_PRINT(_sv) (int)(_sv)->sv_rem, (_sv)->sv_first + +/* + * Initialize a strview_t from an already initialized strview_t -- the state of + * the source strview_t is duplicated in the newly initialized strview_t. + */ +void sv_init_sv(strview_t *, const strview_t *); + +/* + * Initialize a strview_t as a subset of an already initialized strview_t. + * The size of the subset (size_t) must be <= sv_remaining(src). + */ +void sv_init_sv_range(strview_t *, const strview_t *, size_t); + +/* + * Initialize a strview_t from a string. The two const char * pointers are the + * sv_first and sv_last values to use (see above). If the source string is + * NUL-terminated, one can optionally pass NULL for the second parameter in + * which case, the entire NUL-terminated string (starting at sv_first) is + * treated as a strview_t. + */ +void sv_init_str(strview_t *, const char *, const char *); + +/* + * Return the number of bytes remaining to consume in the strview_t + */ +size_t sv_remaining(const strview_t *); + +/* + * Return the char at the given position in the strview_t (without advancing + * the position). Position values >=0 are relative to the current position + * of the strview_t (e.g. '0' will return the next character, '1' will return + * the character after that), while negative position values are relative to + * the end of the strview_t (e.g. '-1' will return the last character, '-2' + * will return the second to last character). + * + * If the position value is out of range, '\0' is returned. + */ +char sv_peek(const strview_t *, ssize_t); + +/* + * Return the next character and advance the strview_t position. If no more + * characters are available, '\0' is returned. + */ +char sv_consume_c(strview_t *); + +/* + * Advance the position of the strview_t by the given number of bytes. The + * amount must be <= the number of bytes remaining in the strview_t. + */ +void sv_consume_n(strview_t *, size_t); + +/* + * Advance the strview_t position if the bytes of the strview starting at the + * current position match the given NUL-terminated string. The length of the + * NUL-terminated string must be <= the number of bytes remaining in the + * strview_t. + * + * If there is a match, the position of the strview_t is advanced by the + * length of the NUL-terminated comparison string, and B_TRUE is returned. If + * there is no match, the position is not advanced and B_FALSE is returned. + */ +boolean_t sv_consume_if(strview_t *, const char *); + +/* + * Advance the position of the strview_t if the next char in the strview_t + * is equal to the given char. If there is a match, the strview_t position + * is advanced one byte and B_TRUE is returned. If they do not match, B_FALSE + * is returned and the position is not advanced. + */ +boolean_t sv_consume_if_c(strview_t *, char); + +#ifdef __cplusplus +} +#endif + +#endif /* _STRVIEW_H */ diff --git a/usr/src/lib/libdemangle/common/util.c b/usr/src/lib/libdemangle/common/util.c index 9ffb72c79b..739c554826 100644 --- a/usr/src/lib/libdemangle/common/util.c +++ b/usr/src/lib/libdemangle/common/util.c @@ -11,6 +11,7 @@ /* * Copyright 2017 Jason King + * Copyright 2019, Joyent, Inc. */ #include <sys/debug.h> @@ -71,6 +72,20 @@ xrealloc(sysdem_ops_t *ops, void *p, size_t oldsz, size_t newsz) return (temp); } +char * +xstrdup(sysdem_ops_t *ops, const char *src) +{ + size_t len = strlen(src); + char *str = zalloc(ops, len + 1); + + if (str == NULL) + return (NULL); + + /* zalloc(len+1) guarantees this will be NUL-terminated */ + (void) memcpy(str, src, len); + return (str); +} + /*ARGSUSED*/ static void def_free(void *p, size_t len) |