summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn Hodge <tpg@ucc.asn.au>2019-08-10 18:56:12 +0800
committerJohn Hodge <tpg@ucc.asn.au>2019-08-10 18:56:12 +0800
commite4d2fa3a40bd804da8caa01e731489ce8eea5a64 (patch)
tree248304eafadd066b46ae8886dd1f6ac187567694 /src
parent5467b92813e62a7efbce87c5082c96c6fe155de7 (diff)
downloadmrust-e4d2fa3a40bd804da8caa01e731489ce8eea5a64.tar.gz
Trans - Alternative mangling scheme with no '$' characters
Diffstat (limited to 'src')
-rw-r--r--src/trans/mangling_v2.cpp286
1 files changed, 286 insertions, 0 deletions
diff --git a/src/trans/mangling_v2.cpp b/src/trans/mangling_v2.cpp
new file mode 100644
index 00000000..887c689e
--- /dev/null
+++ b/src/trans/mangling_v2.cpp
@@ -0,0 +1,286 @@
+/*
+ * MRustC - Rust Compiler
+ * - By John Hodge (Mutabah/thePowersGang)
+ *
+ * trans/mangling_v2.cpp
+ * - Name mangling (encoding of rust paths into symbols)
+ */
+#include <debug.hpp>
+#include <string_view.hpp>
+#include <hir/hir.hpp> // ABI_RUST
+#include <hir/type.hpp>
+
+class Mangler
+{
+ ::std::ostream& m_os;
+public:
+ Mangler(::std::ostream& os):
+ m_os(os)
+ {
+ }
+
+ // Item names:
+ // - These can have a single '#' in them (either leading or in the middle)
+ // TODO: Some other invalid characters can appear:
+ // - '-' (crate names)
+ void fmt_name(const RcString& s)
+ {
+ const char* hash_pos = nullptr;
+ // - Search the string for the '#' character
+ for(const auto* p = s.c_str(); *p; p++)
+ {
+ if( p == s.c_str() ) {
+ ASSERT_BUG(Span(), !isdigit(*p), "Leading digit not valid in '" << s << "'");
+ }
+
+ if( isalnum(*p) ) {
+ }
+ else if( *p == '_' ) {
+ }
+ else if( *p == '#' || *p == '-' ) { // HACK: Treat '-' and '#' as the same in encoding
+ // Multiple hash characters? abort/error
+ ASSERT_BUG(Span(), hash_pos == nullptr, "Multiple '#' characters in '" << s << "'");
+ hash_pos = p;
+ }
+ else {
+ BUG(Span(), "Encounteded invalid character '" << *p << "' in '" << s << "'");
+ }
+ }
+
+ // If there's a hash, then prefix with a letter indicating its location?
+ // - Using a 3 character overhead currently (but a letter could work really well)
+ if( hash_pos != nullptr )
+ {
+ auto pre_hash_len = static_cast<int>(hash_pos - s.c_str());
+#if 0
+ assert(pre_hash_len < 26);
+ // <posletter> <full_len> <body1> <body2>
+ m_os << 'a' + pre_hash_len;
+ m_os << s.size() - 1;
+ m_os << ::stdx::string_view(s.c_str(), s.c_str() + pre_hash_len);
+ m_os << hash_pos + 1;;
+#else
+ // 'h' <len1> <body1> <len2> <body2>
+ m_os << "h";
+ m_os << pre_hash_len;
+ m_os << ::stdx::string_view(s.c_str(), s.c_str() + pre_hash_len);
+ m_os << s.size() - pre_hash_len - 1;
+ m_os << hash_pos + 1;;
+#endif
+ }
+ else
+ {
+ m_os << s.size();
+ m_os << s;
+ }
+ }
+ // SimplePath : <ncomp> 'c' [<RcString> ...]
+ void fmt_simple_path(const ::HIR::SimplePath& sp)
+ {
+ m_os << sp.m_components.size();
+ m_os << "c"; // Needed to separate the component count from the crate name
+ this->fmt_name(sp.m_crate_name);
+ for(const auto& c : sp.m_components)
+ {
+ this->fmt_name(c);
+ }
+ }
+
+ // PathParams : <ntys> 'g' [<TypeRef> ...]
+ void fmt_path_params(const ::HIR::PathParams& pp)
+ {
+ // Type Parameter count
+ m_os << pp.m_types.size();
+ m_os << "g";
+ for(const auto& ty : pp.m_types)
+ {
+ fmt_type(ty);
+ }
+ }
+ // GenericPath : <SimplePath> <PathParams>
+ void fmt_generic_path(const ::HIR::GenericPath& gp)
+ {
+ this->fmt_simple_path(gp.m_path);
+ this->fmt_path_params(gp.m_params);
+ }
+
+ void fmt_path(const ::HIR::Path& p)
+ {
+ // Path type
+ // - Generic: starts with `G`
+ // - Inherent: Starts with `I`
+ // - Trait: Starts with `T`
+ TU_MATCH_HDRA( (p.m_data), {)
+ TU_ARMA(Generic, e) {
+ m_os << "G";
+ this->fmt_generic_path(e);
+ }
+ TU_ARMA(UfcsInherent, e) {
+ m_os << "I";
+ this->fmt_type(*e.type);
+ this->fmt_name(e.item);
+ this->fmt_path_params(e.params);
+ }
+ TU_ARMA(UfcsKnown, e) {
+ m_os << "T";
+ this->fmt_type(*e.type);
+ this->fmt_generic_path(e.trait);
+ this->fmt_name(e.item);
+ this->fmt_path_params(e.params);
+ }
+ TU_ARMA(UfcsUnknown, e)
+ BUG(Span(), "Non-encodable path " << p);
+ }
+ }
+
+ // Type
+ // - Tuple: 'T' <nelem> [<TypeRef> ...]
+ // - Slice: 'S' <TypeRef>
+ // - Array: 'A' <size> <TypeRef>
+ // - Path: 'N' <Path>
+ // - TraitObject: 'D' <data:GenericPath> <nmarker> [markers: <GenericPath> ...] <naty> [<TypeRef> ...] TODO: Does this need to include the ATY name?
+ // - Borrow: 'B' ('s'|'u'|'o') <TypeRef>
+ // - RawPointer: 'P' ('s'|'u'|'o') <TypeRef>
+ // - Function: 'F' <abi:RcString> <nargs> [args: <TypeRef> ...] <ret:TypeRef>
+ // - Primitives::
+ // - u8 : 'C' 'a'
+ // - i8 : 'C' 'b'
+ // - u16 : 'C' 'c'
+ // - i16 : 'C' 'd'
+ // - u32 : 'C' 'e'
+ // - i32 : 'C' 'f'
+ // - u64 : 'C' 'g'
+ // - i64 : 'C' 'h'
+ // - u128: 'C' 'i'
+ // - i128: 'C' 'j'
+ // --
+ // - f32 : 'C' 'n'
+ // - f64 : 'C' 'o'
+ // --
+ // - usize: 'C' 'u'
+ // - isize: 'C' 'v'
+ // - bool : 'C' 'x'
+ // - char : 'C' 'x'
+ // - str : 'C' 'y'
+ // - Diverge: 'C' 'z'
+ void fmt_type(const ::HIR::TypeRef& ty)
+ {
+ TU_MATCH_HDRA( (ty.m_data), { )
+ case ::HIR::TypeRef::Data::TAG_Infer:
+ case ::HIR::TypeRef::Data::TAG_Generic:
+ case ::HIR::TypeRef::Data::TAG_ErasedType:
+ case ::HIR::TypeRef::Data::TAG_Closure:
+ BUG(Span(), "Non-encodable type " << ty);
+ TU_ARMA(Tuple, e) {
+ m_os << "T" << e.size();
+ for(const auto& sty : e)
+ this->fmt_type(sty);
+ }
+ TU_ARMA(Slice, e) {
+ m_os << "S";
+ this->fmt_type(*e.inner);
+ }
+ TU_ARMA(Array, e) {
+ m_os << "A" << e.size_val;
+ this->fmt_type(*e.inner);
+ }
+ TU_ARMA(Path, e) {
+ m_os << "N";
+ this->fmt_path(e.path);
+ }
+ TU_ARMA(TraitObject, e) {
+ // - TraitObject: 'D' <data:GenericPath> <naty> [<TypeRef> ...] <nmarker> [markers: <GenericPath> ...]
+ m_os << "D";
+ this->fmt_generic_path(e.m_trait.m_path);
+ m_os << e.m_trait.m_type_bounds.size();
+ // HACK: Assume all TraitObject types have the same aty set (std::map is deterministic)
+ for(const auto& aty : e.m_trait.m_type_bounds)
+ this->fmt_type(aty.second);
+ m_os << e.m_markers.size();
+ for(const auto& p : e.m_markers)
+ this->fmt_generic_path(p);
+ }
+ TU_ARMA(Function, e) {
+ // - Function: 'F' <abi:RcString> <nargs> [args: <TypeRef> ...] <ret:TypeRef>
+ m_os << "F";
+ m_os << (e.is_unsafe ? "u" : ""); // Optional allowed, next is a number
+ if( e.m_abi != ABI_RUST )
+ {
+ // TODO: Encode the ABI properly
+ //this->fmt_name(e.m_abi);
+ }
+ m_os << e.m_arg_types.size();
+ for(const auto& t : e.m_arg_types)
+ this->fmt_type(t);
+ this->fmt_type(*e.m_rettype);
+ }
+ TU_ARMA(Borrow, e) {
+ m_os << "B";
+ switch(e.type)
+ {
+ case ::HIR::BorrowType::Shared: m_os << "s"; break;
+ case ::HIR::BorrowType::Unique: m_os << "u"; break;
+ case ::HIR::BorrowType::Owned: m_os << "o"; break;
+ }
+ this->fmt_type(*e.inner);
+ }
+ TU_ARMA(Pointer, e) {
+ m_os << "P";
+ switch(e.type)
+ {
+ case ::HIR::BorrowType::Shared: m_os << "s"; break;
+ case ::HIR::BorrowType::Unique: m_os << "u"; break;
+ case ::HIR::BorrowType::Owned: m_os << "o"; break;
+ }
+ this->fmt_type(*e.inner);
+ }
+ TU_ARMA(Primitive, e) {
+ switch(e)
+ {
+ case ::HIR::CoreType::U8 : m_os << 'C' << 'a'; break;
+ case ::HIR::CoreType::I8 : m_os << 'C' << 'b'; break;
+ case ::HIR::CoreType::U16 : m_os << 'C' << 'c'; break;
+ case ::HIR::CoreType::I16 : m_os << 'C' << 'd'; break;
+ case ::HIR::CoreType::U32 : m_os << 'C' << 'e'; break;
+ case ::HIR::CoreType::I32 : m_os << 'C' << 'f'; break;
+ case ::HIR::CoreType::U64 : m_os << 'C' << 'g'; break;
+ case ::HIR::CoreType::I64 : m_os << 'C' << 'h'; break;
+ case ::HIR::CoreType::U128: m_os << 'C' << 'i'; break;
+ case ::HIR::CoreType::I128: m_os << 'C' << 'j'; break;
+ case ::HIR::CoreType::F32 : m_os << 'C' << 'n'; break;
+ case ::HIR::CoreType::F64 : m_os << 'C' << 'o'; break;
+ case ::HIR::CoreType::Usize: m_os << 'C' << 'u'; break;
+ case ::HIR::CoreType::Isize: m_os << 'C' << 'v'; break;
+ case ::HIR::CoreType::Bool: m_os << 'C' << 'w'; break;
+ case ::HIR::CoreType::Char: m_os << 'C' << 'x'; break;
+ case ::HIR::CoreType::Str : m_os << 'C' << 'y'; break;
+ }
+ }
+ TU_ARMA(Diverge, _e) {
+ m_os << 'C' << 'z';
+ }
+ }
+ }
+};
+
+::FmtLambda Trans_ManglePath(const ::HIR::Path& p)
+{
+ return FMT_CB(os, os << "ZRp"; Mangler(os).fmt_path(p));
+}
+::FmtLambda Trans_MangleSimplePath(const ::HIR::SimplePath& p)
+{
+ return FMT_CB(os, os << "ZRpG"; Mangler(os).fmt_simple_path(p); os << "0g";);
+}
+::FmtLambda Trans_MangleGenericPath(const ::HIR::GenericPath& p)
+{
+ return FMT_CB(os, os << "ZRpG"; Mangler(os).fmt_generic_path(p));
+}
+::FmtLambda Trans_MangleType(const ::HIR::TypeRef& p)
+{
+ return FMT_CB(os, os << "ZRt"; Mangler(os).fmt_type(p));
+}
+
+::FmtLambda Trans_Mangle(const ::HIR::SimplePath& path) { return Trans_MangleSimplePath(path); }
+::FmtLambda Trans_Mangle(const ::HIR::GenericPath& path) { return Trans_MangleGenericPath(path); }
+::FmtLambda Trans_Mangle(const ::HIR::Path& path) { return Trans_ManglePath(path); }
+::FmtLambda Trans_Mangle(const ::HIR::TypeRef& ty) { return Trans_MangleType(ty); }