diff options
author | Dan McDonald <danmcd@joyent.com> | 2021-04-21 11:46:07 -0400 |
---|---|---|
committer | Dan McDonald <danmcd@joyent.com> | 2021-04-21 11:46:07 -0400 |
commit | 043819558530b9580af0efc07bc0af452bcfcef5 (patch) | |
tree | cef3a0fe4c57c2904841c5e7d02616294c6c1e6a | |
parent | 7e5cd87005240f2e0f5ae527ae003c420a0c10f3 (diff) | |
parent | f5ac85908213ce2217329b835bf3c91f1c04b793 (diff) | |
download | illumos-joyent-release-20210422.tar.gz |
[illumos-gate merge]release-20210422
commit f5ac85908213ce2217329b835bf3c91f1c04b793
13727 rust demangler loops when characters trail terminating E
commit 64b8fdd9a26cb9749e154c721f0688932b5e0094
13178 ::msgbuf could see more than 8KB of logs
-rw-r--r-- | usr/src/lib/libdemangle/common/demangle.c | 100 | ||||
-rw-r--r-- | usr/src/lib/libdemangle/common/rust.c | 64 | ||||
-rw-r--r-- | usr/src/test/util-tests/tests/demangle/rust.c | 12 | ||||
-rw-r--r-- | usr/src/uts/common/os/logsubr.c | 2 | ||||
-rw-r--r-- | usr/src/uts/common/sys/log.h | 4 |
5 files changed, 108 insertions, 74 deletions
diff --git a/usr/src/lib/libdemangle/common/demangle.c b/usr/src/lib/libdemangle/common/demangle.c index b6db356416..bf7c9ab8c7 100644 --- a/usr/src/lib/libdemangle/common/demangle.c +++ b/usr/src/lib/libdemangle/common/demangle.c @@ -10,7 +10,7 @@ */ /* - * Copyright 2018 Jason King + * Copyright 2021 Jason King * Copyright 2019, Joyent, Inc. */ @@ -18,6 +18,7 @@ #include <stdio.h> #include <string.h> #include <errno.h> +#include <limits.h> #include <pthread.h> #include <sys/ctype.h> #include <sys/debug.h> @@ -25,6 +26,7 @@ #include <stdarg.h> #include "demangle-sys.h" #include "demangle_int.h" +#include "strview.h" #define DEMANGLE_DEBUG "DEMANGLE_DEBUG" @@ -68,46 +70,24 @@ sysdem_parse_lang(const char *str, sysdem_lang_t *langp) return (B_FALSE); } -static sysdem_lang_t -detect_lang(const char *str, size_t n) +/* + * A quick check if str can possibly be a mangled string. Currently, that + * means it must start with _Z or __Z. + */ +static boolean_t +is_mangled(const char *str, size_t n) { - const char *p = str; - size_t len; - - if (n < 3 || str[0] != '_') - return (SYSDEM_LANG_AUTO); - - /* - * Check for ^_Z or ^__Z - */ - p = str + 1; - if (*p == '_') { - p++; - } + strview_t sv; - if (*p != 'Z') - return (SYSDEM_LANG_AUTO); + sv_init_str(&sv, str, str + n); - /* - * Sadly, rust currently uses the same prefix as C++, however - * demangling rust as a C++ mangled name yields less than desirable - * results. However rust names end with a hash. We use that to - * attempt to disambiguate - */ - - /* Find 'h'<hexdigit>+E$ */ - if ((p = strrchr(p, 'h')) == NULL) - return (SYSDEM_LANG_CPP); - - if ((len = strspn(p + 1, "0123456789abcdef")) == 0) - return (SYSDEM_LANG_CPP); + if (!sv_consume_if_c(&sv, '_')) + return (B_FALSE); + (void) sv_consume_if_c(&sv, '_'); + if (sv_consume_if_c(&sv, 'Z')) + return (B_TRUE); - p += len + 1; - - if (p[0] != 'E' || p[1] != '\0') - return (SYSDEM_LANG_CPP); - - return (SYSDEM_LANG_RUST); + return (B_FALSE); } static void @@ -120,6 +100,7 @@ check_debug(void) char * sysdemangle(const char *str, sysdem_lang_t lang, sysdem_ops_t *ops) { + char *res = NULL; /* * While the language specific demangler code can handle non-NUL * terminated strings, we currently don't expose this to consumers. @@ -152,29 +133,50 @@ sysdemangle(const char *str, sysdem_lang_t lang, sysdem_ops_t *ops) if (ops == NULL) ops = sysdem_ops_default; - if (lang == SYSDEM_LANG_AUTO) { - lang = detect_lang(str, slen); - if (lang != SYSDEM_LANG_AUTO) - DEMDEBUG("detected language is %s", langstr(lang)); - } - + /* + * If we were given an explicit language to demangle, we always + * use that. If not, we try to demangle as rust, then c++. Any + * mangled C++ symbol that manages to successfully demangle as a + * legacy rust symbol _should_ look the same as it can really + * only be a very simple C++ symbol. Otherwise, the rust demangling + * should fail and we can try C++. + */ switch (lang) { case SYSDEM_LANG_CPP: return (cpp_demangle(str, slen, ops)); case SYSDEM_LANG_RUST: return (rust_demangle(str, slen, ops)); case SYSDEM_LANG_AUTO: - DEMDEBUG("could not detect language"); - errno = ENOTSUP; - return (NULL); - default: + break; + } + + /* + * To save us some potential work, if the symbol cannot + * possibly be a rust or C++ mangled name, we don't + * even attempt to demangle either. + */ + if (!is_mangled(str, slen)) { /* - * This can't happen unless there's a bug with detect_lang, - * but gcc doesn't know that. + * This does mean if we somehow get a string > 2GB + * the debugging output will be truncated, but that + * seems an acceptable tradeoff. */ + int len = slen > INT_MAX ? INT_MAX : slen; + + DEMDEBUG("ERROR: '%.*s' cannot be a mangled string", len, str); errno = EINVAL; return (NULL); } + + DEMDEBUG("trying rust"); + res = rust_demangle(str, slen, ops); + + IMPLY(ret != NULL, errno == 0); + if (res != NULL) + return (res); + + DEMDEBUG("trying C++"); + return (cpp_demangle(str, slen, ops)); } int diff --git a/usr/src/lib/libdemangle/common/rust.c b/usr/src/lib/libdemangle/common/rust.c index b3aeb3a2a1..9b145ca841 100644 --- a/usr/src/lib/libdemangle/common/rust.c +++ b/usr/src/lib/libdemangle/common/rust.c @@ -11,6 +11,7 @@ /* * Copyright 2019, Joyent, Inc. + * Copyright 2021 Jason King */ #include <errno.h> @@ -32,12 +33,12 @@ * https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/ * * A mangled rust name is: - * <prefix> <name> <hash> E + * <prefix> <name> * * <prefix> ::= _Z * __Z * - * <name> ::= <name-segment>+ + * <name> ::= N <name-segment>+ [<hash>] E * * <name-segment> ::= <len> <name-chars>{len} * @@ -166,15 +167,18 @@ rust_demangle(const char *s, size_t slen, sysdem_ops_t *ops) if ((ret = custr_xalloc(&st.rds_demangled, &custr_alloc)) != 0) return (NULL); - while (sv_remaining(&sv) > 1) { - if (rustdem_parse_name(&st, &sv)) - continue; - if (st.rds_error != 0) - goto fail; + if (!rustdem_parse_name(&st, &sv)) { + if (st.rds_error == 0) + st.rds_error = EINVAL; + goto fail; } - if (st.rds_error != 0 || !sv_consume_if_c(&sv, 'E')) + if (sv_remaining(&sv) > 0) { + DEMDEBUG("ERROR: unexpected trailing characters after " + "terminating 'E': '%.*s'", SV_PRINT(&sv)); + st.rds_error = EINVAL; goto fail; + } char *res = xstrdup(ops, custr_cstr(st.rds_demangled)); if (res == NULL) { @@ -199,7 +203,7 @@ rustdem_parse_prefix(rustdem_state_t *st, strview_t *svp) sv_init_sv(&pfx, svp); - DEMDEBUG("checking for '_ZN' or '__ZN' in '%.*s'", SV_PRINT(&pfx)); + DEMDEBUG("checking for '_Z' or '__Z' in '%.*s'", SV_PRINT(&pfx)); if (st->rds_error != 0) return (B_FALSE); @@ -209,7 +213,7 @@ rustdem_parse_prefix(rustdem_state_t *st, strview_t *svp) (void) sv_consume_if_c(&pfx, '_'); - if (!sv_consume_if_c(&pfx, 'Z') || !sv_consume_if_c(&pfx, 'N')) + if (!sv_consume_if_c(&pfx, 'Z')) return (B_FALSE); /* Update svp with new position */ @@ -260,12 +264,21 @@ rustdem_parse_name_segment(rustdem_state_t *st, strview_t *svp, boolean_t first) /* * A rust hash starts with 'h', and is the last component of a name - * before the terminating 'E' + * before the terminating 'E'. It is however not always present + * in every mangled symbol, and a last segment that starts with 'h' + * could be confused for it, so failing to parse it just means + * we don't have a trailing hash. */ if (sv_peek(&name, 0) == 'h' && last) { - if (!rustdem_parse_hash(st, &name)) - return (B_FALSE); - goto done; + if (rustdem_parse_hash(st, &name)) + goto done; + + /* + * However any error other than 'not a hash' (e.g. ENOMEM) + * means we should fail. + */ + if (st->rds_error != 0) + goto done; } while (sv_remaining(&name) > 0) { @@ -306,12 +319,17 @@ rustdem_parse_name_segment(rustdem_state_t *st, strview_t *svp, boolean_t first) } done: - DEMDEBUG("%s: consumed '%.*s'", __func__, (int)len, svp->sv_first); sv_consume_n(&sv, len); + VERIFY3P(svp->sv_first, <=, sv.sv_first); + DEMDEBUG("%s: consumed '%.*s'", __func__, + (int)(sv.sv_first - svp->sv_first), svp->sv_first); sv_init_sv(svp, &sv); return (B_TRUE); } +/* + * Parse N (<num><name>{num})+[<num>h<hex digits>]E + */ static boolean_t rustdem_parse_name(rustdem_state_t *st, strview_t *svp) { @@ -323,14 +341,28 @@ rustdem_parse_name(rustdem_state_t *st, strview_t *svp) sv_init_sv(&name, svp); - if (sv_remaining(&name) == 0) + DEMDEBUG("%s: name = '%.*s'", __func__, SV_PRINT(&name)); + + if (sv_remaining(&name) == 0) { + DEMDEBUG("%s: empty name", __func__); return (B_FALSE); + } + + if (!sv_consume_if_c(&name, 'N')) { + DEMDEBUG("%s: does not start with 'N'", __func__); + return (B_FALSE); + } while (sv_remaining(&name) > 0 && sv_peek(&name, 0) != 'E') { if (!rustdem_parse_name_segment(st, &name, first)) return (B_FALSE); first = B_FALSE; } + VERIFY(sv_consume_if_c(&name, 'E')); + + VERIFY3P(svp->sv_first, <=, name.sv_first); + DEMDEBUG("%s: consumed '%.*s'", __func__, + (int)(name.sv_first - svp->sv_first), svp->sv_first); sv_init_sv(svp, &name); return (B_TRUE); diff --git a/usr/src/test/util-tests/tests/demangle/rust.c b/usr/src/test/util-tests/tests/demangle/rust.c index 0b13c9db7e..db2fae28e4 100644 --- a/usr/src/test/util-tests/tests/demangle/rust.c +++ b/usr/src/test/util-tests/tests/demangle/rust.c @@ -27,6 +27,7 @@ */ /* * Copyright 2019, Joyent, Inc. + * Copyright 2021 Jason King */ /* @@ -84,25 +85,25 @@ GROUP(demangle_osx, "<core::option::Option<T>>::unwrap::_MSG_FILE_LINE_COL::haf7cb8d5824ee659"), T("__ZN4core5slice89_$LT$impl$u20$core..iter..traits..IntoIterator$u20$for$u20$$RF$$u27$a$u20$$u5b$T$u5d$$GT$9into_iter17h450e234d27262170E", "core::slice::<impl core::iter::traits::IntoIterator for &'a [T]>::into_iter::h450e234d27262170")); -/* END CSTYLED */ GROUP(demangle_elements_beginning_with_underscore, T("_ZN13_$LT$test$GT$E", "<test>"), T("_ZN28_$u7b$$u7b$closure$u7d$$u7d$E", "{{closure}}"), T("_ZN15__STATIC_FMTSTRE", "__STATIC_FMTSTR")); -/* BEGIN CSTYLED */ GROUP(demangle_trait_impls, T("_ZN71_$LT$Test$u20$$u2b$$u20$$u27$static$u20$as$u20$foo..Bar$LT$Test$GT$$GT$3barE", "<Test + 'static as foo::Bar<Test>>::bar")); -/* END CSTYLED */ GROUP(invalid_no_chop, T_ERR("_ZNfooE")); -/* BEGIN CSTYLED */ GROUP(handle_assoc_types, T("_ZN151_$LT$alloc..boxed..Box$LT$alloc..boxed..FnBox$LT$A$C$$u20$Output$u3d$R$GT$$u20$$u2b$$u20$$u27$a$GT$$u20$as$u20$core..ops..function..FnOnce$LT$A$GT$$GT$9call_once17h69e8f44b3723e1caE", "<alloc::boxed::Box<alloc::boxed::FnBox<A, Output=R> + 'a> as core::ops::function::FnOnce<A>>::call_once::h69e8f44b3723e1ca")); + +/* C++ mangled names that aren't valid rust names */ +GROUP(cplusplus_as_rust, T_ERR("_ZN7mozilla3dom13BrowserParent22RecvUpdateContentCacheERKNS_12ContentCacheE")); + /* END CSTYLED */ static rust_test_grp_t *rust_tests[] = { @@ -113,7 +114,8 @@ static rust_test_grp_t *rust_tests[] = { &demangle_elements_beginning_with_underscore, &demangle_trait_impls, &invalid_no_chop, - &handle_assoc_types + &handle_assoc_types, + &cplusplus_as_rust, }; static const size_t n_rust_tests = ARRAY_SIZE(rust_tests); diff --git a/usr/src/uts/common/os/logsubr.c b/usr/src/uts/common/os/logsubr.c index b0a442065c..5ae6821917 100644 --- a/usr/src/uts/common/os/logsubr.c +++ b/usr/src/uts/common/os/logsubr.c @@ -217,7 +217,7 @@ log_init(void) log_intrq = log_makeq(0, LOG_HIWAT, (void *)ipltospl(SPL8)); /* - * Create a queue to hold the most recent 8K of console messages. + * Create a queue to hold the most recent 64K of console messages. * Useful for debugging. Required by the "$<msgbuf" adb macro. */ log_recentq = log_makeq(0, LOG_RECENTSIZE, NULL); diff --git a/usr/src/uts/common/sys/log.h b/usr/src/uts/common/sys/log.h index 2d1fb46b73..934b4111fd 100644 --- a/usr/src/uts/common/sys/log.h +++ b/usr/src/uts/common/sys/log.h @@ -30,8 +30,6 @@ #ifndef _SYS_LOG_H #define _SYS_LOG_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/strlog.h> #include <sys/stream.h> @@ -55,7 +53,7 @@ extern "C" { #define LOG_HIWAT 1048576 /* threshold for tossing messages */ #define LOG_MAGIC 0xf00d4109U /* "food for log" - unsent msg magic */ -#define LOG_RECENTSIZE 8192 /* queue of most recent messages */ +#define LOG_RECENTSIZE 65536 /* queue of most recent messages */ #define LOG_MINFREE 4096 /* message cache low water mark */ #define LOG_MAXFREE 8192 /* message cache high water mark */ |