summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan McDonald <danmcd@joyent.com>2021-04-21 11:46:07 -0400
committerDan McDonald <danmcd@joyent.com>2021-04-21 11:46:07 -0400
commit043819558530b9580af0efc07bc0af452bcfcef5 (patch)
treecef3a0fe4c57c2904841c5e7d02616294c6c1e6a
parent7e5cd87005240f2e0f5ae527ae003c420a0c10f3 (diff)
parentf5ac85908213ce2217329b835bf3c91f1c04b793 (diff)
downloadillumos-joyent-release-20210422.tar.gz
[illumos-gate merge]release-20210422
commit f5ac85908213ce2217329b835bf3c91f1c04b793 13727 rust demangler loops when characters trail terminating E commit 64b8fdd9a26cb9749e154c721f0688932b5e0094 13178 ::msgbuf could see more than 8KB of logs
-rw-r--r--usr/src/lib/libdemangle/common/demangle.c100
-rw-r--r--usr/src/lib/libdemangle/common/rust.c64
-rw-r--r--usr/src/test/util-tests/tests/demangle/rust.c12
-rw-r--r--usr/src/uts/common/os/logsubr.c2
-rw-r--r--usr/src/uts/common/sys/log.h4
5 files changed, 108 insertions, 74 deletions
diff --git a/usr/src/lib/libdemangle/common/demangle.c b/usr/src/lib/libdemangle/common/demangle.c
index b6db356416..bf7c9ab8c7 100644
--- a/usr/src/lib/libdemangle/common/demangle.c
+++ b/usr/src/lib/libdemangle/common/demangle.c
@@ -10,7 +10,7 @@
*/
/*
- * Copyright 2018 Jason King
+ * Copyright 2021 Jason King
* Copyright 2019, Joyent, Inc.
*/
@@ -18,6 +18,7 @@
#include <stdio.h>
#include <string.h>
#include <errno.h>
+#include <limits.h>
#include <pthread.h>
#include <sys/ctype.h>
#include <sys/debug.h>
@@ -25,6 +26,7 @@
#include <stdarg.h>
#include "demangle-sys.h"
#include "demangle_int.h"
+#include "strview.h"
#define DEMANGLE_DEBUG "DEMANGLE_DEBUG"
@@ -68,46 +70,24 @@ sysdem_parse_lang(const char *str, sysdem_lang_t *langp)
return (B_FALSE);
}
-static sysdem_lang_t
-detect_lang(const char *str, size_t n)
+/*
+ * A quick check if str can possibly be a mangled string. Currently, that
+ * means it must start with _Z or __Z.
+ */
+static boolean_t
+is_mangled(const char *str, size_t n)
{
- const char *p = str;
- size_t len;
-
- if (n < 3 || str[0] != '_')
- return (SYSDEM_LANG_AUTO);
-
- /*
- * Check for ^_Z or ^__Z
- */
- p = str + 1;
- if (*p == '_') {
- p++;
- }
+ strview_t sv;
- if (*p != 'Z')
- return (SYSDEM_LANG_AUTO);
+ sv_init_str(&sv, str, str + n);
- /*
- * Sadly, rust currently uses the same prefix as C++, however
- * demangling rust as a C++ mangled name yields less than desirable
- * results. However rust names end with a hash. We use that to
- * attempt to disambiguate
- */
-
- /* Find 'h'<hexdigit>+E$ */
- if ((p = strrchr(p, 'h')) == NULL)
- return (SYSDEM_LANG_CPP);
-
- if ((len = strspn(p + 1, "0123456789abcdef")) == 0)
- return (SYSDEM_LANG_CPP);
+ if (!sv_consume_if_c(&sv, '_'))
+ return (B_FALSE);
+ (void) sv_consume_if_c(&sv, '_');
+ if (sv_consume_if_c(&sv, 'Z'))
+ return (B_TRUE);
- p += len + 1;
-
- if (p[0] != 'E' || p[1] != '\0')
- return (SYSDEM_LANG_CPP);
-
- return (SYSDEM_LANG_RUST);
+ return (B_FALSE);
}
static void
@@ -120,6 +100,7 @@ check_debug(void)
char *
sysdemangle(const char *str, sysdem_lang_t lang, sysdem_ops_t *ops)
{
+ char *res = NULL;
/*
* While the language specific demangler code can handle non-NUL
* terminated strings, we currently don't expose this to consumers.
@@ -152,29 +133,50 @@ sysdemangle(const char *str, sysdem_lang_t lang, sysdem_ops_t *ops)
if (ops == NULL)
ops = sysdem_ops_default;
- if (lang == SYSDEM_LANG_AUTO) {
- lang = detect_lang(str, slen);
- if (lang != SYSDEM_LANG_AUTO)
- DEMDEBUG("detected language is %s", langstr(lang));
- }
-
+ /*
+ * If we were given an explicit language to demangle, we always
+ * use that. If not, we try to demangle as rust, then c++. Any
+ * mangled C++ symbol that manages to successfully demangle as a
+ * legacy rust symbol _should_ look the same as it can really
+ * only be a very simple C++ symbol. Otherwise, the rust demangling
+ * should fail and we can try C++.
+ */
switch (lang) {
case SYSDEM_LANG_CPP:
return (cpp_demangle(str, slen, ops));
case SYSDEM_LANG_RUST:
return (rust_demangle(str, slen, ops));
case SYSDEM_LANG_AUTO:
- DEMDEBUG("could not detect language");
- errno = ENOTSUP;
- return (NULL);
- default:
+ break;
+ }
+
+ /*
+ * To save us some potential work, if the symbol cannot
+ * possibly be a rust or C++ mangled name, we don't
+ * even attempt to demangle either.
+ */
+ if (!is_mangled(str, slen)) {
/*
- * This can't happen unless there's a bug with detect_lang,
- * but gcc doesn't know that.
+ * This does mean if we somehow get a string > 2GB
+ * the debugging output will be truncated, but that
+ * seems an acceptable tradeoff.
*/
+ int len = slen > INT_MAX ? INT_MAX : slen;
+
+ DEMDEBUG("ERROR: '%.*s' cannot be a mangled string", len, str);
errno = EINVAL;
return (NULL);
}
+
+ DEMDEBUG("trying rust");
+ res = rust_demangle(str, slen, ops);
+
+ IMPLY(ret != NULL, errno == 0);
+ if (res != NULL)
+ return (res);
+
+ DEMDEBUG("trying C++");
+ return (cpp_demangle(str, slen, ops));
}
int
diff --git a/usr/src/lib/libdemangle/common/rust.c b/usr/src/lib/libdemangle/common/rust.c
index b3aeb3a2a1..9b145ca841 100644
--- a/usr/src/lib/libdemangle/common/rust.c
+++ b/usr/src/lib/libdemangle/common/rust.c
@@ -11,6 +11,7 @@
/*
* Copyright 2019, Joyent, Inc.
+ * Copyright 2021 Jason King
*/
#include <errno.h>
@@ -32,12 +33,12 @@
* https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/
*
* A mangled rust name is:
- * <prefix> <name> <hash> E
+ * <prefix> <name>
*
* <prefix> ::= _Z
* __Z
*
- * <name> ::= <name-segment>+
+ * <name> ::= N <name-segment>+ [<hash>] E
*
* <name-segment> ::= <len> <name-chars>{len}
*
@@ -166,15 +167,18 @@ rust_demangle(const char *s, size_t slen, sysdem_ops_t *ops)
if ((ret = custr_xalloc(&st.rds_demangled, &custr_alloc)) != 0)
return (NULL);
- while (sv_remaining(&sv) > 1) {
- if (rustdem_parse_name(&st, &sv))
- continue;
- if (st.rds_error != 0)
- goto fail;
+ if (!rustdem_parse_name(&st, &sv)) {
+ if (st.rds_error == 0)
+ st.rds_error = EINVAL;
+ goto fail;
}
- if (st.rds_error != 0 || !sv_consume_if_c(&sv, 'E'))
+ if (sv_remaining(&sv) > 0) {
+ DEMDEBUG("ERROR: unexpected trailing characters after "
+ "terminating 'E': '%.*s'", SV_PRINT(&sv));
+ st.rds_error = EINVAL;
goto fail;
+ }
char *res = xstrdup(ops, custr_cstr(st.rds_demangled));
if (res == NULL) {
@@ -199,7 +203,7 @@ rustdem_parse_prefix(rustdem_state_t *st, strview_t *svp)
sv_init_sv(&pfx, svp);
- DEMDEBUG("checking for '_ZN' or '__ZN' in '%.*s'", SV_PRINT(&pfx));
+ DEMDEBUG("checking for '_Z' or '__Z' in '%.*s'", SV_PRINT(&pfx));
if (st->rds_error != 0)
return (B_FALSE);
@@ -209,7 +213,7 @@ rustdem_parse_prefix(rustdem_state_t *st, strview_t *svp)
(void) sv_consume_if_c(&pfx, '_');
- if (!sv_consume_if_c(&pfx, 'Z') || !sv_consume_if_c(&pfx, 'N'))
+ if (!sv_consume_if_c(&pfx, 'Z'))
return (B_FALSE);
/* Update svp with new position */
@@ -260,12 +264,21 @@ rustdem_parse_name_segment(rustdem_state_t *st, strview_t *svp, boolean_t first)
/*
* A rust hash starts with 'h', and is the last component of a name
- * before the terminating 'E'
+ * before the terminating 'E'. It is however not always present
+ * in every mangled symbol, and a last segment that starts with 'h'
+ * could be confused for it, so failing to parse it just means
+ * we don't have a trailing hash.
*/
if (sv_peek(&name, 0) == 'h' && last) {
- if (!rustdem_parse_hash(st, &name))
- return (B_FALSE);
- goto done;
+ if (rustdem_parse_hash(st, &name))
+ goto done;
+
+ /*
+ * However any error other than 'not a hash' (e.g. ENOMEM)
+ * means we should fail.
+ */
+ if (st->rds_error != 0)
+ goto done;
}
while (sv_remaining(&name) > 0) {
@@ -306,12 +319,17 @@ rustdem_parse_name_segment(rustdem_state_t *st, strview_t *svp, boolean_t first)
}
done:
- DEMDEBUG("%s: consumed '%.*s'", __func__, (int)len, svp->sv_first);
sv_consume_n(&sv, len);
+ VERIFY3P(svp->sv_first, <=, sv.sv_first);
+ DEMDEBUG("%s: consumed '%.*s'", __func__,
+ (int)(sv.sv_first - svp->sv_first), svp->sv_first);
sv_init_sv(svp, &sv);
return (B_TRUE);
}
+/*
+ * Parse N (<num><name>{num})+[<num>h<hex digits>]E
+ */
static boolean_t
rustdem_parse_name(rustdem_state_t *st, strview_t *svp)
{
@@ -323,14 +341,28 @@ rustdem_parse_name(rustdem_state_t *st, strview_t *svp)
sv_init_sv(&name, svp);
- if (sv_remaining(&name) == 0)
+ DEMDEBUG("%s: name = '%.*s'", __func__, SV_PRINT(&name));
+
+ if (sv_remaining(&name) == 0) {
+ DEMDEBUG("%s: empty name", __func__);
return (B_FALSE);
+ }
+
+ if (!sv_consume_if_c(&name, 'N')) {
+ DEMDEBUG("%s: does not start with 'N'", __func__);
+ return (B_FALSE);
+ }
while (sv_remaining(&name) > 0 && sv_peek(&name, 0) != 'E') {
if (!rustdem_parse_name_segment(st, &name, first))
return (B_FALSE);
first = B_FALSE;
}
+ VERIFY(sv_consume_if_c(&name, 'E'));
+
+ VERIFY3P(svp->sv_first, <=, name.sv_first);
+ DEMDEBUG("%s: consumed '%.*s'", __func__,
+ (int)(name.sv_first - svp->sv_first), svp->sv_first);
sv_init_sv(svp, &name);
return (B_TRUE);
diff --git a/usr/src/test/util-tests/tests/demangle/rust.c b/usr/src/test/util-tests/tests/demangle/rust.c
index 0b13c9db7e..db2fae28e4 100644
--- a/usr/src/test/util-tests/tests/demangle/rust.c
+++ b/usr/src/test/util-tests/tests/demangle/rust.c
@@ -27,6 +27,7 @@
*/
/*
* Copyright 2019, Joyent, Inc.
+ * Copyright 2021 Jason King
*/
/*
@@ -84,25 +85,25 @@ GROUP(demangle_osx,
"<core::option::Option<T>>::unwrap::_MSG_FILE_LINE_COL::haf7cb8d5824ee659"),
T("__ZN4core5slice89_$LT$impl$u20$core..iter..traits..IntoIterator$u20$for$u20$$RF$$u27$a$u20$$u5b$T$u5d$$GT$9into_iter17h450e234d27262170E",
"core::slice::<impl core::iter::traits::IntoIterator for &'a [T]>::into_iter::h450e234d27262170"));
-/* END CSTYLED */
GROUP(demangle_elements_beginning_with_underscore,
T("_ZN13_$LT$test$GT$E", "<test>"),
T("_ZN28_$u7b$$u7b$closure$u7d$$u7d$E", "{{closure}}"),
T("_ZN15__STATIC_FMTSTRE", "__STATIC_FMTSTR"));
-/* BEGIN CSTYLED */
GROUP(demangle_trait_impls,
T("_ZN71_$LT$Test$u20$$u2b$$u20$$u27$static$u20$as$u20$foo..Bar$LT$Test$GT$$GT$3barE",
"<Test + 'static as foo::Bar<Test>>::bar"));
-/* END CSTYLED */
GROUP(invalid_no_chop, T_ERR("_ZNfooE"));
-/* BEGIN CSTYLED */
GROUP(handle_assoc_types,
T("_ZN151_$LT$alloc..boxed..Box$LT$alloc..boxed..FnBox$LT$A$C$$u20$Output$u3d$R$GT$$u20$$u2b$$u20$$u27$a$GT$$u20$as$u20$core..ops..function..FnOnce$LT$A$GT$$GT$9call_once17h69e8f44b3723e1caE",
"<alloc::boxed::Box<alloc::boxed::FnBox<A, Output=R> + 'a> as core::ops::function::FnOnce<A>>::call_once::h69e8f44b3723e1ca"));
+
+/* C++ mangled names that aren't valid rust names */
+GROUP(cplusplus_as_rust, T_ERR("_ZN7mozilla3dom13BrowserParent22RecvUpdateContentCacheERKNS_12ContentCacheE"));
+
/* END CSTYLED */
static rust_test_grp_t *rust_tests[] = {
@@ -113,7 +114,8 @@ static rust_test_grp_t *rust_tests[] = {
&demangle_elements_beginning_with_underscore,
&demangle_trait_impls,
&invalid_no_chop,
- &handle_assoc_types
+ &handle_assoc_types,
+ &cplusplus_as_rust,
};
static const size_t n_rust_tests = ARRAY_SIZE(rust_tests);
diff --git a/usr/src/uts/common/os/logsubr.c b/usr/src/uts/common/os/logsubr.c
index b0a442065c..5ae6821917 100644
--- a/usr/src/uts/common/os/logsubr.c
+++ b/usr/src/uts/common/os/logsubr.c
@@ -217,7 +217,7 @@ log_init(void)
log_intrq = log_makeq(0, LOG_HIWAT, (void *)ipltospl(SPL8));
/*
- * Create a queue to hold the most recent 8K of console messages.
+ * Create a queue to hold the most recent 64K of console messages.
* Useful for debugging. Required by the "$<msgbuf" adb macro.
*/
log_recentq = log_makeq(0, LOG_RECENTSIZE, NULL);
diff --git a/usr/src/uts/common/sys/log.h b/usr/src/uts/common/sys/log.h
index 2d1fb46b73..934b4111fd 100644
--- a/usr/src/uts/common/sys/log.h
+++ b/usr/src/uts/common/sys/log.h
@@ -30,8 +30,6 @@
#ifndef _SYS_LOG_H
#define _SYS_LOG_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/strlog.h>
#include <sys/stream.h>
@@ -55,7 +53,7 @@ extern "C" {
#define LOG_HIWAT 1048576 /* threshold for tossing messages */
#define LOG_MAGIC 0xf00d4109U /* "food for log" - unsent msg magic */
-#define LOG_RECENTSIZE 8192 /* queue of most recent messages */
+#define LOG_RECENTSIZE 65536 /* queue of most recent messages */
#define LOG_MINFREE 4096 /* message cache low water mark */
#define LOG_MAXFREE 8192 /* message cache high water mark */