diff options
| author | John Hodge <tpg@mutabah.net> | 2016-05-21 20:10:25 +0800 | 
|---|---|---|
| committer | John Hodge <tpg@mutabah.net> | 2016-05-21 20:10:25 +0800 | 
| commit | ad93bc7fda1988e49b4e3a0d85344d7e3dc7df10 (patch) | |
| tree | d4fee563f881b5a4ab90dfbb7b40be3486d01349 | |
| parent | be0892fb5cd1442013ee9e761e60294a374f4566 (diff) | |
| download | mrust-ad93bc7fda1988e49b4e3a0d85344d7e3dc7df10.tar.gz | |
Parse - Updates for better memory efficiency (hopefully)
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | src/ast/path.hpp | 10 | ||||
| -rw-r--r-- | src/ast/pattern.hpp | 2 | ||||
| -rw-r--r-- | src/expand/cfg.cpp | 2 | ||||
| -rw-r--r-- | src/expand/file_line.cpp | 2 | ||||
| -rw-r--r-- | src/expand/format_args.cpp | 5 | ||||
| -rw-r--r-- | src/expand/macro_rules.cpp | 2 | ||||
| -rw-r--r-- | src/include/rc_string.hpp | 104 | ||||
| -rw-r--r-- | src/include/serialise.hpp | 10 | ||||
| -rw-r--r-- | src/include/span.hpp | 11 | ||||
| -rw-r--r-- | src/include/tagged_union.hpp | 1 | ||||
| -rw-r--r-- | src/macros.cpp | 92 | ||||
| -rw-r--r-- | src/macros.hpp | 18 | ||||
| -rw-r--r-- | src/parse/common.hpp | 1 | ||||
| -rw-r--r-- | src/parse/expr.cpp | 8 | ||||
| -rw-r--r-- | src/parse/lex.cpp | 277 | ||||
| -rw-r--r-- | src/parse/lex.hpp | 94 | ||||
| -rw-r--r-- | src/parse/token.cpp | 313 | ||||
| -rw-r--r-- | src/parse/token.hpp | 121 | ||||
| -rw-r--r-- | src/parse/tokentree.hpp | 26 | 
20 files changed, 637 insertions, 464 deletions
| @@ -30,7 +30,7 @@ OBJ := main.o macros.o types.o serialise.o  OBJ += span.o  OBJ += ast/ast.o ast/crate.o ast/path.o ast/expr.o ast/pattern.o  OBJ += ast/provided_module.o -OBJ += parse/parseerror.o parse/lex.o +OBJ += parse/parseerror.o parse/lex.o parse/token.o  OBJ += parse/root.o parse/paths.o parse/types.o parse/expr.o parse/pattern.o parse/macro_rules.o  OBJ += expand/mod.o expand/macro_rules.o expand/cfg.o  OBJ +=  expand/format_args.o diff --git a/src/ast/path.hpp b/src/ast/path.hpp index 9a2f3579..7eb17aab 100644 --- a/src/ast/path.hpp +++ b/src/ast/path.hpp @@ -156,7 +156,7 @@ public:      // ABSOLUTE      Path(::std::string crate, ::std::vector<PathNode> nodes): -        m_class( Class::make_Absolute({crate: mv$(crate), nodes: mv$(nodes)}) ) +        m_class( Class::make_Absolute({ mv$(crate), mv$(nodes)}) )      {}      // UFCS @@ -170,23 +170,23 @@ public:          m_class( Class::make_Local({ mv$(name) }) )      {}      Path(::std::string name): -        m_class( Class::make_Local({name: mv$(name)}) ) +        m_class( Class::make_Local({ mv$(name) }) )      {}      // RELATIVE      struct TagRelative {};      Path(TagRelative, ::std::vector<PathNode> nodes): -        m_class( Class::make_Relative({nodes: mv$(nodes)}) ) +        m_class( Class::make_Relative({ mv$(nodes) }) )      {}      // SELF      struct TagSelf {};      Path(TagSelf, ::std::vector<PathNode> nodes): -        m_class( Class::make_Self({nodes: nodes}) ) +        m_class( Class::make_Self({ nodes }) )      {}      // SUPER      struct TagSuper {};      Path(TagSuper, unsigned int count, ::std::vector<PathNode> nodes): -        m_class( Class::make_Super({count: count, nodes: mv$(nodes)}) ) +        m_class( Class::make_Super({ count, mv$(nodes) }) )      {}      //void set_crate(::std::string crate) { diff --git a/src/ast/pattern.hpp b/src/ast/pattern.hpp index a115eaf9..199d6692 100644 --- a/src/ast/pattern.hpp +++ b/src/ast/pattern.hpp @@ -141,7 +141,7 @@ public:      static ::std::unique_ptr<Pattern> from_deserialiser(Deserialiser& s) {          ::std::unique_ptr<Pattern> ret(new Pattern);          s.item(*ret); -        return ::std::move(ret); +        return ret;      }  }; diff --git a/src/expand/cfg.cpp b/src/expand/cfg.cpp index 1372d982..345c46ab 100644 --- a/src/expand/cfg.cpp +++ b/src/expand/cfg.cpp @@ -88,7 +88,7 @@ class CCfgExpander:              ERROR(sp, E0000, "cfg! doesn't take an identifier");          } -        auto lex = TTStreamO(tt); +        auto lex = TTStream(tt);          auto attrs = Parse_MetaItem(lex);          DEBUG("cfg!() - " << attrs); diff --git a/src/expand/file_line.cpp b/src/expand/file_line.cpp index a4117d70..2532613f 100644 --- a/src/expand/file_line.cpp +++ b/src/expand/file_line.cpp @@ -10,7 +10,7 @@ class CExpanderFile:      ::std::unique_ptr<TokenStream> expand(Span sp, const AST::Crate& crate, const ::std::string& ident, const TokenTree& tt, AST::Module& mod) override      { -        return box$( TTStreamO(TokenTree(Token(TOK_STRING, sp.filename))) ); +        return box$( TTStreamO(TokenTree(Token(TOK_STRING, sp.filename.c_str()))) );      }  }; diff --git a/src/expand/format_args.cpp b/src/expand/format_args.cpp index d12096a7..6a3665ec 100644 --- a/src/expand/format_args.cpp +++ b/src/expand/format_args.cpp @@ -45,7 +45,10 @@ class CFormatArgsExpander:          }          // TODO: Expand format_args! -        return box$( TTStreamO(TokenTree(::std::vector<TokenTree>{TokenTree(TOK_PAREN_OPEN), TokenTree(TOK_PAREN_CLOSE)})) ); +        ::std::vector<TokenTree>	toks; +	toks.push_back( TokenTree(TOK_PAREN_OPEN) ); +	toks.push_back( TokenTree(TOK_PAREN_CLOSE) ); +        return box$( TTStreamO(TokenTree(mv$(toks))) );      }  }; diff --git a/src/expand/macro_rules.cpp b/src/expand/macro_rules.cpp index 562b1468..968bf5f9 100644 --- a/src/expand/macro_rules.cpp +++ b/src/expand/macro_rules.cpp @@ -19,7 +19,7 @@ class CMacroRulesExpander:          auto mac = Parse_MacroRules(lex);          mod.add_macro( false, ident, mac ); -        return box$( TTStreamO(TokenTree()) ); +        return ::std::unique_ptr<TokenStream>( new TTStreamO(TokenTree()) );      }  }; diff --git a/src/include/rc_string.hpp b/src/include/rc_string.hpp new file mode 100644 index 00000000..6091d748 --- /dev/null +++ b/src/include/rc_string.hpp @@ -0,0 +1,104 @@ +/* + */ +#pragma once + +#include <cstring> +#include <ostream> + +class RcString +{ +    unsigned int*   m_ptr; +    unsigned int    m_len; +public: +    RcString(): +        m_ptr(nullptr), +        m_len(0) +    {} +    RcString(const char* s, unsigned int len): +        m_ptr( new unsigned int[1 + (len+1 + sizeof(unsigned int)-1) / sizeof(unsigned int)] ), +        m_len(len) +    { +        *m_ptr = 1; +        char* data_mut = reinterpret_cast<char*>(m_ptr + 1); +        for(unsigned int j = 0; j < len; j ++ ) +            data_mut[j] = s[j]; +        data_mut[len] = '\0'; +    } +    RcString(const char* s): +        RcString(s, ::std::strlen(s)) +    { +    } +    RcString(const ::std::string& s): +        RcString(s.data(), s.size()) +    { +    } +     +    RcString(const RcString& x): +        m_ptr(x.m_ptr), +        m_len(x.m_len) +    { +        *m_ptr += 1; +    } +    RcString(RcString&& x): +        m_ptr(x.m_ptr), +        m_len(x.m_len) +    { +        x.m_ptr = nullptr; +        x.m_len = 0; +    } +     +    ~RcString() +    { +        if(m_ptr) +        { +            *m_ptr -= 1; +            if( *m_ptr == 0 ) +            { +                delete[] m_ptr; +                m_ptr = nullptr; +            } +        } +    } +     +    RcString& operator=(const RcString& x) +    { +        if( &x != this ) +        { +            this->~RcString(); +            m_ptr = x.m_ptr; +            m_len = x.m_len; +            *m_ptr += 1; +        } +        return *this; +    } +    RcString& operator=(RcString&& x) +    { +        if( &x != this ) +        { +            this->~RcString(); +            m_ptr = x.m_ptr; +            m_len = x.m_len; +            x.m_ptr = nullptr; +            x.m_len = 0; +        } +        return *this; +    } +     +     +    const char* c_str() const { +        return reinterpret_cast<const char*>(m_ptr + 1); +    } +    bool operator==(const char* s) const { +        if( m_len == 0 ) +            return *s == '\0'; +        auto m = this->c_str(); +        do { +            if( *m != *s ) +                return false; +        } while( *m++ != '\0' && *s++ != '\0' ); +        return true; +    } +    friend ::std::ostream& operator<<(::std::ostream& os, const RcString& x) { +        return os << x.c_str(); +    } +}; diff --git a/src/include/serialise.hpp b/src/include/serialise.hpp index 5556a510..9af379e7 100644 --- a/src/include/serialise.hpp +++ b/src/include/serialise.hpp @@ -26,13 +26,13 @@ class Deserialiser;  class DeserialiseFailure:      public ::std::runtime_error  { -    const char *m_fcn; -    const char *m_message; +    //const char *m_fcn; +    //const char *m_message;  public:      DeserialiseFailure(const char *fcn, const char *message): -        ::std::runtime_error("Deserialise failure"), -        m_fcn(fcn), -        m_message(message) +        ::std::runtime_error("Deserialise failure")//, +        //m_fcn(fcn), +        //m_message(message)      {}  }; diff --git a/src/include/span.hpp b/src/include/span.hpp index e5dd8ccc..f57fa8a9 100644 --- a/src/include/span.hpp +++ b/src/include/span.hpp @@ -5,9 +5,10 @@   * include/span.hpp   * - Spans and error handling   */ -  #pragma once +#include <rc_string.hpp> +  enum ErrorType  {      E0000, @@ -21,22 +22,22 @@ class Position;  struct ProtoSpan  { -    ::std::string   filename; +    RcString   filename;      unsigned int start_line;      unsigned int start_ofs;  };  struct Span  { -    ::std::string   filename; +    RcString    filename;      unsigned int start_line;      unsigned int start_ofs;      unsigned int end_line;      unsigned int end_ofs; -    Span(::std::string filename, unsigned int start_line, unsigned int start_ofs,  unsigned int end_line, unsigned int end_ofs): -        filename(filename), +    Span(RcString filename, unsigned int start_line, unsigned int start_ofs,  unsigned int end_line, unsigned int end_ofs): +        filename( ::std::move(filename) ),          start_line(start_line),          start_ofs(start_ofs),          end_line(end_line), diff --git a/src/include/tagged_union.hpp b/src/include/tagged_union.hpp index 9f1becc5..2c8d4f31 100644 --- a/src/include/tagged_union.hpp +++ b/src/include/tagged_union.hpp @@ -9,6 +9,7 @@  #define INCLUDED_TAGGED_UNION_H_  //#include "cpp_unpack.h" +#include <cassert>  #define TU_CASE_ITEM(src, mod, var, name)	mod auto& name = src.as_##var(); (void)&name;  #define TU_CASE_BODY(class,var, ...)	case class::var: { __VA_ARGS__ } break; diff --git a/src/macros.cpp b/src/macros.cpp index 12f6df05..5f4216fd 100644 --- a/src/macros.cpp +++ b/src/macros.cpp @@ -17,25 +17,32 @@ class ParameterMappings      // MultiMap (layer, name) -> TokenTree
      // - Multiple values are only allowed for layer>0
      typedef ::std::pair<unsigned, unsigned> t_mapping_block;
 -    struct Mapping {
 +    struct Mapping
 +    {
          t_mapping_block block;
          ::std::vector<TokenTree>    entries;
          friend ::std::ostream& operator<<(::std::ostream& os, const Mapping& x) {
 -            os << "(" << x.block.first << ", " << x.block.second << "): '"<<x.entries<<"')";
 +            os << "(" << x.block.first << ", " << x.block.second << "): '" << x.entries << "')";
              return os;
          }
      };
      struct less_cstr {
          bool operator()(const char *a, const char *b) const { return ::std::strcmp(a,b) < 0; }
      };
 -    ::std::map<const char *, Mapping, less_cstr> m_inner;
 -    unsigned m_layer_count = 0;
 +    
 +    typedef ::std::map<const char *, Mapping, less_cstr>    t_inner_map;
 +    
 +    t_inner_map m_inner;
 +    unsigned m_layer_count;
  public:
 -    ParameterMappings()
 +    ParameterMappings():
 +        m_inner(),
 +        m_layer_count(0)
      {
      }
 +    ParameterMappings(ParameterMappings&&) = default;
 -    const ::std::map<const char *, Mapping, less_cstr>& inner_() const {
 +    const t_inner_map& inner_() const {
          return m_inner;
      }
 @@ -50,7 +57,7 @@ public:          if(v.first->second.block.first != layer) {
              throw ParseError::Generic(FMT("matching '"<<name<<"' at multiple layers"));
          }
 -        v.first->second.entries.push_back( data );
 +        v.first->second.entries.push_back( mv$(data) );
      }
      const TokenTree* get(unsigned int layer, unsigned int iteration, const char *name, unsigned int idx) const
 @@ -130,16 +137,17 @@ private:      ::std::unique_ptr<TTStream>   m_ttstream;
  public:
 -    MacroExpander(const MacroExpander& x):
 -        m_macro_name( x.m_macro_name ),
 -        m_crate_name(x.m_crate_name),
 -        m_root_contents(x.m_root_contents),
 -        m_mappings(x.m_mappings),
 -        m_offsets({ {0,0,0} }),
 -        m_cur_ents(&m_root_contents)
 -    {
 -        prep_counts();
 -    }
 +    MacroExpander(const MacroExpander& x) = delete;
 +    //MacroExpander(const MacroExpander& x):
 +    //    m_macro_name( x.m_macro_name ),
 +    //    m_crate_name(x.m_crate_name),
 +    //    m_root_contents(x.m_root_contents),
 +    //    m_mappings(x.m_mappings),
 +    //    m_offsets({ {0,0,0} }),
 +    //    m_cur_ents(&m_root_contents)
 +    //{
 +    //    prep_counts();
 +    //}
      MacroExpander(::std::string macro_name, const ::std::vector<MacroRuleEnt>& contents, ParameterMappings mappings, ::std::string crate_name):
          m_macro_name( mv$(macro_name) ),
          m_crate_name( mv$(crate_name) ),
 @@ -195,11 +203,11 @@ bool Macro_TryPattern(TTStream& lex, const MacroPatEnt& pat)      case MacroPatEnt::PAT_TOKEN: {
          Token tok = lex.getToken();
          if( tok != pat.tok ) {
 -            lex.putback(tok);
 +            PUTBACK(tok, lex);
              return false;
          }
          else {
 -            lex.putback(tok);
 +            PUTBACK(tok, lex);
              return true;
          }
          }
 @@ -294,7 +302,7 @@ bool Macro_HandlePattern(TTStream& lex, const MacroPatEnt& pat, unsigned int lay              {
                  if( GET_TOK(tok, lex) != pat.tok.type() )
                  {
 -                    lex.putback(tok);
 +                    lex.putback( mv$(tok) );
                      break;
                  }
              }
 @@ -307,7 +315,7 @@ bool Macro_HandlePattern(TTStream& lex, const MacroPatEnt& pat, unsigned int lay          if( GET_TOK(tok, lex) == TOK_EOF )
              throw ParseError::Unexpected(lex, TOK_EOF);
          else
 -            lex.putback(tok);
 +            PUTBACK(tok, lex);
          val = Parse_TT(lex, false);
          if(0)
      case MacroPatEnt::PAT_PAT:
 @@ -345,7 +353,7 @@ bool Macro_HandlePattern(TTStream& lex, const MacroPatEnt& pat, unsigned int lay      return true;
  }
 -::std::unique_ptr<TokenStream> Macro_InvokeRules(const char *name, const MacroRules& rules, TokenTree input)
 +::std::unique_ptr<TokenStream> Macro_InvokeRules(const char *name, const MacroRules& rules, const TokenTree& input)
  {
      TRACE_FUNCTION;
 @@ -387,7 +395,7 @@ bool Macro_HandlePattern(TTStream& lex, const MacroPatEnt& pat, unsigned int lay              }
              DEBUG("TODO: Obtain crate name correctly");
 -            TokenStream* ret_ptr = new MacroExpander(name, rule.m_contents, bound_tts, "");
 +            TokenStream* ret_ptr = new MacroExpander(name, rule.m_contents, mv$(bound_tts), "");
              // HACK! Disable nested macro expansion
              //ret_ptr->parse_state().no_expand_macros = true;
 @@ -403,44 +411,6 @@ bool Macro_HandlePattern(TTStream& lex, const MacroPatEnt& pat, unsigned int lay      throw ParseError::Todo(/*source_span, */"Error when macro fails to match");
  }
 -#if 0
 -::std::unique_ptr<TokenStream> Macro_Invoke(const TokenStream& olex, const ::std::string& name, TokenTree input)
 -{
 -    DEBUG("Invoke " << name << " from " << olex.getPosition());
 -    // XXX: EVIL HACK! - This should be removed when std loading is implemented
 -   
 -    if( name == "concat_idents" ) {
 -        return Macro_Invoke_Concat(input, TOK_IDENT);
 -    }
 -    else if( name == "concat_strings" || name == "concat" ) {
 -        return Macro_Invoke_Concat(input, TOK_STRING);
 -    }
 -    else if( name == "cfg" ) {
 -        return Macro_Invoke_Cfg(input);
 -    }
 -    else if( name == "stringify" ) {
 -        return ::std::unique_ptr<TokenStream>( (TokenStream*)new MacroStringify(input) );
 -    }
 -    else if( name == "file" ) {
 -        const ::std::string& pos = olex.getPosition().filename;
 -        return ::std::unique_ptr<TokenStream>( (TokenStream*)new MacroToken(Token(TOK_STRING, pos)) );
 -    }
 -    else if( name == "line" ) {
 -        auto pos = olex.getPosition().line;
 -        return ::std::unique_ptr<TokenStream>( (TokenStream*)new MacroToken(Token((uint64_t)pos, CORETYPE_U32)) );
 -    }
 -     
 -    // Look for macro in builtins
 -    t_macro_regs::iterator macro_reg = g_macro_registrations.find(name);
 -    if( macro_reg != g_macro_registrations.end() )
 -    {
 -        return Macro_InvokeRules(olex.getPosition(), macro_reg->first.c_str(), macro_reg->second, input);
 -    }
 -
 -    throw ParseError::Generic(olex, FMT("Macro '" << name << "' was not found") );
 -}
 -#endif
 -
  Position MacroExpander::getPosition() const
  {
 diff --git a/src/macros.hpp b/src/macros.hpp index 606d3278..006f384f 100644 --- a/src/macros.hpp +++ b/src/macros.hpp @@ -29,17 +29,17 @@ public:      {
      }
      MacroRuleEnt(Token tok):
 -        tok(tok),
 +        tok( mv$(tok) ),
          name("")
      {
      }
      MacroRuleEnt(::std::string name):
 -        name(name)
 +        name( mv$(name) )
      {
      }
      MacroRuleEnt(Token tok, ::std::vector<MacroRuleEnt> subpats):
 -        tok(tok),
 -        subpats(subpats)
 +        tok( mv$(tok) ),
 +        subpats( mv$(subpats) )
      {
      }
 @@ -75,13 +75,13 @@ struct MacroPatEnt:      {
      }
      MacroPatEnt(Token tok):
 -        tok(tok),
 +        tok( mv$(tok) ),
          type(PAT_TOKEN)
      {
      }
      MacroPatEnt(::std::string name, Type type):
 -        name(name),
 +        name( mv$(name) ),
          tok(),
          type(type)
      {
 @@ -89,7 +89,7 @@ struct MacroPatEnt:      MacroPatEnt(Token sep, bool need_once, ::std::vector<MacroPatEnt> ents):
          name( need_once ? "+" : "*" ),
 -        tok(sep),
 +        tok( mv$(sep) ),
          subpats( move(ents) ),
          type(PAT_LOOP)
      {
 @@ -130,7 +130,7 @@ public:      SERIALISABLE_PROTOTYPES();
  };
 -extern ::std::unique_ptr<TokenStream>   Macro_InvokeRules(const char *name, const MacroRules& rules, TokenTree input);
 -extern ::std::unique_ptr<TokenStream>   Macro_Invoke(const TokenStream& lex, const ::std::string& name, TokenTree input);
 +extern ::std::unique_ptr<TokenStream>   Macro_InvokeRules(const char *name, const MacroRules& rules, const TokenTree& input);
 +extern ::std::unique_ptr<TokenStream>   Macro_Invoke(const TokenStream& lex, const ::std::string& name, const TokenTree& input);
  #endif // MACROS_HPP_INCLUDED
 diff --git a/src/parse/common.hpp b/src/parse/common.hpp index b1927512..9ba8b8cd 100644 --- a/src/parse/common.hpp +++ b/src/parse/common.hpp @@ -11,6 +11,7 @@  #include "../ast/ast.hpp"
  #define GET_TOK(tok, lex) ((tok = lex.getToken()).type())
 +#define PUTBACK(tok, lex) lex.putback( ::std::move(tok) )
  #define LOOK_AHEAD(lex) (lex.lookahead(0))
  #define GET_CHECK_TOK(tok, lex, exp) do {\
      if((tok = lex.getToken()).type() != exp) { \
 diff --git a/src/parse/expr.cpp b/src/parse/expr.cpp index 3c12941d..e047c408 100644 --- a/src/parse/expr.cpp +++ b/src/parse/expr.cpp @@ -804,7 +804,7 @@ ExprNodeP Parse_ExprFC(TokenStream& lex)                      break;
                  default:
                      val = NEWNODE( AST::ExprNode_Field, ::std::move(val), ::std::string(path.name()) );
 -                    lex.putback(tok);
 +                    PUTBACK(tok, lex);
                      break;
                  }
                  break; }
 @@ -812,11 +812,11 @@ ExprNodeP Parse_ExprFC(TokenStream& lex)                  val = NEWNODE( AST::ExprNode_Field, ::std::move(val), FMT(tok.intval()) );
                  break;
              default:
 -                throw ParseError::Unexpected(lex, tok);
 +                throw ParseError::Unexpected(lex, mv$(tok));
              }
              break;
          default:
 -            lex.putback(tok);
 +            PUTBACK(tok, lex);
              return val;
          }
      }
 @@ -1178,7 +1178,7 @@ TokenTree Parse_TT(TokenStream& lex, bool unwrapped)      }
      if( !unwrapped )
          items.push_back(tok);
 -    return TokenTree(items);
 +    return TokenTree(mv$(items));
  }
  /// A wrapping lexer that 
 diff --git a/src/parse/lex.cpp b/src/parse/lex.cpp index b0fbed9a..dfa8b489 100644 --- a/src/parse/lex.cpp +++ b/src/parse/lex.cpp @@ -20,6 +20,8 @@  #include <typeinfo>  #include <algorithm>	// std::count +const bool DEBUG_PRINT_TOKENS = false; +  Lexer::Lexer(::std::string filename):      m_path(filename),      m_line(1), @@ -927,270 +929,6 @@ void Lexer::ungetc()      m_last_char_valid = true;  } -Token::Token(): -    m_type(TOK_NULL), -    m_str("") -{ -} -Token::Token(enum eTokenType type): -    m_type(type), -    m_str("") -{ -} -Token::Token(enum eTokenType type, ::std::string str): -    m_type(type), -    m_str(str) -{ -} -Token::Token(uint64_t val, enum eCoreType datatype): -    m_type(TOK_INTEGER), -    m_datatype(datatype), -    m_intval(val) -{ -} -Token::Token(double val, enum eCoreType datatype): -    m_type(TOK_FLOAT), -    m_datatype(datatype), -    m_floatval(val) -{ -} - -const char* Token::typestr(enum eTokenType type) -{ -    switch(type) -    { -    #define _(t)    case t: return #t; -    #include "eTokenType.enum.h" -    #undef _ -    } -    return ">>BUGCHECK: BADTOK<<"; -} - -enum eTokenType Token::typefromstr(const ::std::string& s) -{ -    if(s == "") -        return TOK_NULL; -    #define _(t)    else if( s == #t ) return t; -    #include "eTokenType.enum.h" -    #undef _ -    else -        return TOK_NULL; -} - -struct EscapedString { -    const ::std::string& s; -    EscapedString(const ::std::string& s): s(s) {} -     -    friend ::std::ostream& operator<<(::std::ostream& os, const EscapedString& x) { -        for(auto b : x.s) { -            switch(b) -            { -            case '"': -                os << "\\\""; -                break; -            case '\\': -                os << "\\\\"; -                break; -            case '\n': -                os << "\\n"; -                break; -            default: -                if( ' ' <= b && b < 0x7F ) -                    os << b; -                else -                    os << "\\u{" << ::std::hex << (unsigned int)b << "}"; -                break; -            } -        } -        return os; -    } -}; - -::std::string Token::to_str() const -{ -    switch(m_type) -    { -    case TOK_NULL:  return "/*null*/";  -    case TOK_EOF:   return "/*eof*/"; - -    case TOK_NEWLINE:    return "\n"; -    case TOK_WHITESPACE: return " "; -    case TOK_COMMENT:    return "/*" + m_str + "*/"; -    // Value tokens -    case TOK_IDENT:     return m_str; -    case TOK_MACRO:     return m_str + "!"; -    case TOK_LIFETIME:  return "'" + m_str; -    case TOK_INTEGER:   return FMT(m_intval);    // TODO: suffix for type -    case TOK_CHAR:      return FMT("'\\u{"<< ::std::hex << m_intval << "}"); -    case TOK_FLOAT:     return FMT(m_floatval); -    case TOK_STRING:    return FMT("\"" << EscapedString(m_str) << "\""); -    case TOK_BYTESTRING:return FMT("b\"" << m_str << "\""); -    case TOK_CATTR_OPEN:return "#!["; -    case TOK_ATTR_OPEN: return "#["; -    case TOK_UNDERSCORE:return "_"; -    // Symbols -    case TOK_PAREN_OPEN:    return "("; -    case TOK_PAREN_CLOSE:   return ")"; -    case TOK_BRACE_OPEN:    return "{"; -    case TOK_BRACE_CLOSE:   return "}"; -    case TOK_LT:    return "<"; -    case TOK_GT:    return ">"; -    case TOK_SQUARE_OPEN:   return "["; -    case TOK_SQUARE_CLOSE:  return "]"; -    case TOK_COMMA:     return ","; -    case TOK_SEMICOLON: return ";"; -    case TOK_COLON:     return ":"; -    case TOK_DOUBLE_COLON:  return ":"; -    case TOK_STAR:  return "*"; -    case TOK_AMP:   return "&"; -    case TOK_PIPE:  return "|"; - -    case TOK_FATARROW:  return "=>";       // => -    case TOK_THINARROW: return "->";      // -> - -    case TOK_PLUS:  return "+"; -    case TOK_DASH:  return "-"; -    case TOK_EXCLAM:    return "!"; -    case TOK_PERCENT:   return "%"; -    case TOK_SLASH:     return "/"; - -    case TOK_DOT:   return "."; -    case TOK_DOUBLE_DOT:    return "..."; -    case TOK_TRIPLE_DOT:    return ".."; - -    case TOK_EQUAL: return "="; -    case TOK_PLUS_EQUAL:    return "+="; -    case TOK_DASH_EQUAL:    return "-"; -    case TOK_PERCENT_EQUAL: return "%="; -    case TOK_SLASH_EQUAL:   return "/="; -    case TOK_STAR_EQUAL:    return "*="; -    case TOK_AMP_EQUAL:     return "&="; -    case TOK_PIPE_EQUAL:    return "|="; - -    case TOK_DOUBLE_EQUAL:  return "=="; -    case TOK_EXCLAM_EQUAL:  return "!="; -    case TOK_GTE:    return ">="; -    case TOK_LTE:    return "<="; - -    case TOK_DOUBLE_AMP:    return "&&"; -    case TOK_DOUBLE_PIPE:   return "||"; -    case TOK_DOUBLE_LT:     return "<<"; -    case TOK_DOUBLE_GT:     return ">>"; -    case TOK_DOUBLE_LT_EQUAL:   return "<="; -    case TOK_DOUBLE_GT_EQUAL:   return ">="; - -    case TOK_DOLLAR:    return "$"; - -    case TOK_QMARK: return "?"; -    case TOK_AT:    return "@"; -    case TOK_TILDE:     return "~"; -    case TOK_BACKSLASH: return "\\"; -    case TOK_CARET:     return "^"; -    case TOK_CARET_EQUAL:   return "^="; -    case TOK_BACKTICK:  return "`"; - -    // Reserved Words -    case TOK_RWORD_PUB:     return "pub"; -    case TOK_RWORD_PRIV:    return "priv"; -    case TOK_RWORD_MUT:     return "mut"; -    case TOK_RWORD_CONST:   return "const"; -    case TOK_RWORD_STATIC:  return "static"; -    case TOK_RWORD_UNSAFE:  return "unsafe"; -    case TOK_RWORD_EXTERN:  return "extern"; - -    case TOK_RWORD_CRATE:   return "crate"; -    case TOK_RWORD_MOD:     return "mod"; -    case TOK_RWORD_STRUCT:  return "struct"; -    case TOK_RWORD_ENUM:    return "enum"; -    case TOK_RWORD_TRAIT:   return "trait"; -    case TOK_RWORD_FN:      return "fn"; -    case TOK_RWORD_USE:     return "use"; -    case TOK_RWORD_IMPL:    return "impl"; -    case TOK_RWORD_TYPE:    return "type"; - -    case TOK_RWORD_WHERE:   return "where"; -    case TOK_RWORD_AS:      return "as"; - -    case TOK_RWORD_LET:     return "let"; -    case TOK_RWORD_MATCH:   return "match"; -    case TOK_RWORD_IF:      return "if"; -    case TOK_RWORD_ELSE:    return "else"; -    case TOK_RWORD_LOOP:    return "loop"; -    case TOK_RWORD_WHILE:   return "while"; -    case TOK_RWORD_FOR:     return "for"; -    case TOK_RWORD_IN:      return "in"; -    case TOK_RWORD_DO:      return "do"; - -    case TOK_RWORD_CONTINUE:return "continue"; -    case TOK_RWORD_BREAK:   return "break"; -    case TOK_RWORD_RETURN:  return "return"; -    case TOK_RWORD_YIELD:   return "yeild"; -    case TOK_RWORD_BOX:     return "box"; -    case TOK_RWORD_REF:     return "ref"; - -    case TOK_RWORD_FALSE:   return "false"; -    case TOK_RWORD_TRUE:    return "true"; -    case TOK_RWORD_SELF:    return "self"; -    case TOK_RWORD_SUPER:   return "super"; - -    case TOK_RWORD_PROC:    return "proc"; -    case TOK_RWORD_MOVE:    return "move"; - -    case TOK_RWORD_ABSTRACT:return "abstract"; -    case TOK_RWORD_FINAL:   return "final"; -    case TOK_RWORD_PURE:    return "pure"; -    case TOK_RWORD_OVERRIDE:return "override"; -    case TOK_RWORD_VIRTUAL: return "virtual"; - -    case TOK_RWORD_ALIGNOF: return "alignof"; -    case TOK_RWORD_OFFSETOF:return "offsetof"; -    case TOK_RWORD_SIZEOF:  return "sizeof"; -    case TOK_RWORD_TYPEOF:  return "typeof"; - -    case TOK_RWORD_BE:      return "be"; -    case TOK_RWORD_UNSIZED: return "unsized"; -    } -    throw ParseError::BugCheck("Reached end of Token::to_str"); -} - -void operator%(Serialiser& s, enum eTokenType c) { -    s << Token::typestr(c); -} -void operator%(::Deserialiser& s, enum eTokenType& c) { -    ::std::string   n; -    s.item(n); -    c = Token::typefromstr(n); -} -SERIALISE_TYPE_S(Token, { -    s % m_type; -    s.item(m_str); -}); - -::std::ostream&  operator<<(::std::ostream& os, const Token& tok) -{ -    os << Token::typestr(tok.type()); -    switch(tok.type()) -    { -    case TOK_STRING: -    case TOK_BYTESTRING: -    case TOK_IDENT: -    case TOK_MACRO: -    case TOK_LIFETIME: -        os << "\"" << EscapedString(tok.str()) << "\""; -        break; -    case TOK_INTEGER: -        os << ":" << tok.intval(); -        break; -    default: -        break; -    } -    return os; -} -::std::ostream& operator<<(::std::ostream& os, const Position& p) -{ -    return os << p.filename << ":" << p.line; -} -  TTStream::TTStream(const TokenTree& input_tt):      m_input_tt(input_tt)  { @@ -1293,14 +1031,13 @@ TokenStream::~TokenStream()  Token TokenStream::innerGetToken()  {      Token ret = this->realGetToken(); -    if( ret.get_pos().filename.size() == 0 ) +    if( ret.get_pos().filename == "" )          ret.set_pos( this->getPosition() );      //DEBUG("ret.get_pos() = " << ret.get_pos());      return ret;  }  Token TokenStream::getToken()  { -    const bool DEBUG_PRINT_TOKENS = false;      if( m_cache_valid )      {          m_cache_valid = false; @@ -1308,10 +1045,10 @@ Token TokenStream::getToken()      }      else if( m_lookahead.size() )      { -        Token ret = m_lookahead.front(); +        Token ret = mv$( m_lookahead.front() );          m_lookahead.erase(m_lookahead.begin());          if( DEBUG_PRINT_TOKENS ) { -            ::std::cout << "getToken[" << typeid(*this).name() << "] - " << ret << ::std::endl; +            ::std::cout << "getToken[" << typeid(*this).name() << "] - " << ret.get_pos() << "-" << ret << ::std::endl;          }          return ret;      } @@ -1319,7 +1056,7 @@ Token TokenStream::getToken()      {          Token ret = this->innerGetToken();          if( DEBUG_PRINT_TOKENS ) { -            ::std::cout << "getToken[" << typeid(*this).name() << "] - " << ret << ::std::endl; +            ::std::cout << "getToken[" << typeid(*this).name() << "] - " << ret.get_pos() << "-" << ret << ::std::endl;          }          return ret;      } @@ -1334,7 +1071,7 @@ void TokenStream::putback(Token tok)      else      {          m_cache_valid = true; -        m_cache = tok; +        m_cache = mv$(tok);      }  } diff --git a/src/parse/lex.hpp b/src/parse/lex.hpp index 703f26ee..ccd0ab8a 100644 --- a/src/parse/lex.hpp +++ b/src/parse/lex.hpp @@ -16,99 +16,7 @@  #include "../include/span.hpp" -enum eTokenType -{ -    #define _(t)    t, -    #include "eTokenType.enum.h" -    #undef _ -}; - -struct Position -{ -    ::std::string   filename; -    unsigned int    line; -    unsigned int    ofs; -     -    Position(): -        filename(""), -        line(0), -        ofs(0) -    {} -    Position(::std::string filename, unsigned int line, unsigned int ofs): -        filename(filename), -        line(line), -        ofs(ofs) -    { -    } -}; -extern ::std::ostream& operator<<(::std::ostream& os, const Position& p); - -class Token: -    public Serialisable -{ -    enum eTokenType m_type; -    ::std::string   m_str; -    enum eCoreType  m_datatype; -    union { -        uint64_t    m_intval; -        double  m_floatval; -    }; -    Position    m_pos; -public: -    Token(); -    Token(const Token& t) = default; -    Token& operator =(const Token& t) = default; -    Token(Token&& t): -        m_type(t.m_type), -        m_str( ::std::move(t.m_str) ), -        m_datatype( t.m_datatype ), -        m_intval( t.m_intval ), -        m_pos( ::std::move(t.m_pos) ) -    { -        t.m_type = TOK_NULL; -    } -    Token(enum eTokenType type); -    Token(enum eTokenType type, ::std::string str); -    Token(uint64_t val, enum eCoreType datatype); -    Token(double val, enum eCoreType datatype); - -    enum eTokenType type() const { return m_type; } -    const ::std::string& str() const { return m_str; } -    enum eCoreType  datatype() const { return m_datatype; } -    uint64_t intval() const { return m_intval; } -    double floatval() const { return m_floatval; } -    bool operator==(const Token& r) const { -        if(type() != r.type()) -            return false; -        switch(type()) -        { -        case TOK_STRING: -        case TOK_IDENT: -        case TOK_LIFETIME: -            return str() == r.str(); -        case TOK_INTEGER: -            return intval() == r.intval() && datatype() == r.datatype(); -        case TOK_FLOAT: -            return floatval() == r.floatval() && datatype() == r.datatype(); -        default: -            return true; -        } -    } -    bool operator!=(const Token& r) { return !(*this == r); } - -    ::std::string to_str() const; -     -    void set_pos(Position pos) { m_pos = pos; } -    const Position& get_pos() const { return m_pos; } -     -    static const char* typestr(enum eTokenType type); -    static eTokenType typefromstr(const ::std::string& s); -     -    SERIALISABLE_PROTOTYPES(); -}; - -extern ::std::ostream&  operator<<(::std::ostream& os, const Token& tok); - +#include "token.hpp"  namespace AST {      class Module; diff --git a/src/parse/token.cpp b/src/parse/token.cpp new file mode 100644 index 00000000..e61afcb9 --- /dev/null +++ b/src/parse/token.cpp @@ -0,0 +1,313 @@ +/* + * MRustC - Rust Compiler + * - By John Hodge (Mutabah/thePowersGang) + */ +#include "token.hpp" +#include <common.hpp> +#include <parse/parseerror.hpp> + +Token::Token(): +    m_type(TOK_NULL) +{ +} +Token::Token(enum eTokenType type): +    m_type(type) +{ +} +Token::Token(enum eTokenType type, ::std::string str): +    m_type(type), +    m_data(Data::make_String(mv$(str))) +{ +} +Token::Token(uint64_t val, enum eCoreType datatype): +    m_type(TOK_INTEGER), +    m_data( Data::make_Integer({datatype, val}) ) +{ +} +Token::Token(double val, enum eCoreType datatype): +    m_type(TOK_FLOAT), +    m_data( Data::make_Float({datatype, val}) ) +{ +} + +const char* Token::typestr(enum eTokenType type) +{ +    switch(type) +    { +    #define _(t)    case t: return #t; +    #include "eTokenType.enum.h" +    #undef _ +    } +    return ">>BUGCHECK: BADTOK<<"; +} + +enum eTokenType Token::typefromstr(const ::std::string& s) +{ +    if(s == "") +        return TOK_NULL; +    #define _(t)    else if( s == #t ) return t; +    #include "eTokenType.enum.h" +    #undef _ +    else +        return TOK_NULL; +} + +struct EscapedString { +    const ::std::string& s; +    EscapedString(const ::std::string& s): s(s) {} +     +    friend ::std::ostream& operator<<(::std::ostream& os, const EscapedString& x) { +        for(auto b : x.s) { +            switch(b) +            { +            case '"': +                os << "\\\""; +                break; +            case '\\': +                os << "\\\\"; +                break; +            case '\n': +                os << "\\n"; +                break; +            default: +                if( ' ' <= b && b < 0x7F ) +                    os << b; +                else +                    os << "\\u{" << ::std::hex << (unsigned int)b << "}"; +                break; +            } +        } +        return os; +    } +}; + +::std::string Token::to_str() const +{ +    switch(m_type) +    { +    case TOK_NULL:  return "/*null*/";  +    case TOK_EOF:   return "/*eof*/"; + +    case TOK_NEWLINE:    return "\n"; +    case TOK_WHITESPACE: return " "; +    case TOK_COMMENT:    return "/*" + m_data.as_String() + "*/"; +    // Value tokens +    case TOK_IDENT:     return m_data.as_String(); +    case TOK_MACRO:     return m_data.as_String() + "!"; +    case TOK_LIFETIME:  return "'" + m_data.as_String(); +    case TOK_INTEGER:   return FMT(m_data.as_Integer().m_intval);    // TODO: suffix for type +    case TOK_CHAR:      return FMT("'\\u{"<< ::std::hex << m_data.as_Integer().m_intval << "}"); +    case TOK_FLOAT:     return FMT(m_data.as_Float().m_floatval); +    case TOK_STRING:    return FMT("\"" << EscapedString(m_data.as_String()) << "\""); +    case TOK_BYTESTRING:return FMT("b\"" << m_data.as_String() << "\""); +    case TOK_CATTR_OPEN:return "#!["; +    case TOK_ATTR_OPEN: return "#["; +    case TOK_UNDERSCORE:return "_"; +    // Symbols +    case TOK_PAREN_OPEN:    return "("; +    case TOK_PAREN_CLOSE:   return ")"; +    case TOK_BRACE_OPEN:    return "{"; +    case TOK_BRACE_CLOSE:   return "}"; +    case TOK_LT:    return "<"; +    case TOK_GT:    return ">"; +    case TOK_SQUARE_OPEN:   return "["; +    case TOK_SQUARE_CLOSE:  return "]"; +    case TOK_COMMA:     return ","; +    case TOK_SEMICOLON: return ";"; +    case TOK_COLON:     return ":"; +    case TOK_DOUBLE_COLON:  return ":"; +    case TOK_STAR:  return "*"; +    case TOK_AMP:   return "&"; +    case TOK_PIPE:  return "|"; + +    case TOK_FATARROW:  return "=>";       // => +    case TOK_THINARROW: return "->";      // -> + +    case TOK_PLUS:  return "+"; +    case TOK_DASH:  return "-"; +    case TOK_EXCLAM:    return "!"; +    case TOK_PERCENT:   return "%"; +    case TOK_SLASH:     return "/"; + +    case TOK_DOT:   return "."; +    case TOK_DOUBLE_DOT:    return "..."; +    case TOK_TRIPLE_DOT:    return ".."; + +    case TOK_EQUAL: return "="; +    case TOK_PLUS_EQUAL:    return "+="; +    case TOK_DASH_EQUAL:    return "-"; +    case TOK_PERCENT_EQUAL: return "%="; +    case TOK_SLASH_EQUAL:   return "/="; +    case TOK_STAR_EQUAL:    return "*="; +    case TOK_AMP_EQUAL:     return "&="; +    case TOK_PIPE_EQUAL:    return "|="; + +    case TOK_DOUBLE_EQUAL:  return "=="; +    case TOK_EXCLAM_EQUAL:  return "!="; +    case TOK_GTE:    return ">="; +    case TOK_LTE:    return "<="; + +    case TOK_DOUBLE_AMP:    return "&&"; +    case TOK_DOUBLE_PIPE:   return "||"; +    case TOK_DOUBLE_LT:     return "<<"; +    case TOK_DOUBLE_GT:     return ">>"; +    case TOK_DOUBLE_LT_EQUAL:   return "<="; +    case TOK_DOUBLE_GT_EQUAL:   return ">="; + +    case TOK_DOLLAR:    return "$"; + +    case TOK_QMARK: return "?"; +    case TOK_AT:    return "@"; +    case TOK_TILDE:     return "~"; +    case TOK_BACKSLASH: return "\\"; +    case TOK_CARET:     return "^"; +    case TOK_CARET_EQUAL:   return "^="; +    case TOK_BACKTICK:  return "`"; + +    // Reserved Words +    case TOK_RWORD_PUB:     return "pub"; +    case TOK_RWORD_PRIV:    return "priv"; +    case TOK_RWORD_MUT:     return "mut"; +    case TOK_RWORD_CONST:   return "const"; +    case TOK_RWORD_STATIC:  return "static"; +    case TOK_RWORD_UNSAFE:  return "unsafe"; +    case TOK_RWORD_EXTERN:  return "extern"; + +    case TOK_RWORD_CRATE:   return "crate"; +    case TOK_RWORD_MOD:     return "mod"; +    case TOK_RWORD_STRUCT:  return "struct"; +    case TOK_RWORD_ENUM:    return "enum"; +    case TOK_RWORD_TRAIT:   return "trait"; +    case TOK_RWORD_FN:      return "fn"; +    case TOK_RWORD_USE:     return "use"; +    case TOK_RWORD_IMPL:    return "impl"; +    case TOK_RWORD_TYPE:    return "type"; + +    case TOK_RWORD_WHERE:   return "where"; +    case TOK_RWORD_AS:      return "as"; + +    case TOK_RWORD_LET:     return "let"; +    case TOK_RWORD_MATCH:   return "match"; +    case TOK_RWORD_IF:      return "if"; +    case TOK_RWORD_ELSE:    return "else"; +    case TOK_RWORD_LOOP:    return "loop"; +    case TOK_RWORD_WHILE:   return "while"; +    case TOK_RWORD_FOR:     return "for"; +    case TOK_RWORD_IN:      return "in"; +    case TOK_RWORD_DO:      return "do"; + +    case TOK_RWORD_CONTINUE:return "continue"; +    case TOK_RWORD_BREAK:   return "break"; +    case TOK_RWORD_RETURN:  return "return"; +    case TOK_RWORD_YIELD:   return "yeild"; +    case TOK_RWORD_BOX:     return "box"; +    case TOK_RWORD_REF:     return "ref"; + +    case TOK_RWORD_FALSE:   return "false"; +    case TOK_RWORD_TRUE:    return "true"; +    case TOK_RWORD_SELF:    return "self"; +    case TOK_RWORD_SUPER:   return "super"; + +    case TOK_RWORD_PROC:    return "proc"; +    case TOK_RWORD_MOVE:    return "move"; + +    case TOK_RWORD_ABSTRACT:return "abstract"; +    case TOK_RWORD_FINAL:   return "final"; +    case TOK_RWORD_PURE:    return "pure"; +    case TOK_RWORD_OVERRIDE:return "override"; +    case TOK_RWORD_VIRTUAL: return "virtual"; + +    case TOK_RWORD_ALIGNOF: return "alignof"; +    case TOK_RWORD_OFFSETOF:return "offsetof"; +    case TOK_RWORD_SIZEOF:  return "sizeof"; +    case TOK_RWORD_TYPEOF:  return "typeof"; + +    case TOK_RWORD_BE:      return "be"; +    case TOK_RWORD_UNSIZED: return "unsized"; +    } +    throw ParseError::BugCheck("Reached end of Token::to_str"); +} + +void operator%(::Serialiser& s, enum eTokenType c) { +    s << Token::typestr(c); +} +void operator%(::Deserialiser& s, enum eTokenType& c) { +    ::std::string   n; +    s.item(n); +    c = Token::typefromstr(n); +} +SERIALISE_TYPE(Token::, "Token", { +    s % m_type; +    s << Token::Data::tag_to_str(m_data.tag()); +    TU_MATCH(Token::Data, (m_data), (e), +    (None, ), +    (String, +        s << e; +        ), +    (Integer, +        s % e.m_datatype; +        s.item( e.m_intval ); +        ), +    (Float, +        s % e.m_datatype; +        s.item( e.m_floatval ); +        ) +    ) +},{ +    s % m_type; +    Token::Data::Tag    tag; +    { +        ::std::string   tag_str; +        s.item( tag_str ); +        tag = Token::Data::tag_from_str(tag_str); +    } +    switch(tag) +    { +    case Token::Data::TAGDEAD:  break; +    case Token::Data::TAG_None: break; +    case Token::Data::TAG_String:{  +        ::std::string str; +        s.item( str ); +        m_data = Token::Data::make_String(str); +        break; } +    case Token::Data::TAG_Integer:{  +        enum eCoreType  dt; +        uint64_t    v; +        s % dt; +        s.item( v ); +        m_data = Token::Data::make_Integer({dt, v}); +        break; } +    case Token::Data::TAG_Float:{  +        enum eCoreType  dt; +        double   v; +        s % dt; +        s.item( v ); +        m_data = Token::Data::make_Float({dt, v}); +        break; } +    } +}); + +::std::ostream&  operator<<(::std::ostream& os, const Token& tok) +{ +    os << Token::typestr(tok.type()); +    switch(tok.type()) +    { +    case TOK_STRING: +    case TOK_BYTESTRING: +    case TOK_IDENT: +    case TOK_MACRO: +    case TOK_LIFETIME: +        os << "\"" << EscapedString(tok.str()) << "\""; +        break; +    case TOK_INTEGER: +        os << ":" << tok.intval(); +        break; +    default: +        break; +    } +    return os; +} +::std::ostream& operator<<(::std::ostream& os, const Position& p) +{ +    return os << p.filename << ":" << p.line; +} diff --git a/src/parse/token.hpp b/src/parse/token.hpp new file mode 100644 index 00000000..e2e1f16f --- /dev/null +++ b/src/parse/token.hpp @@ -0,0 +1,121 @@ +/* + */ +#pragma once + +#include <rc_string.hpp> +#include <tagged_union.hpp> +#include <serialise.hpp> +#include "../coretypes.hpp" + +enum eTokenType +{ +    #define _(t)    t, +    #include "eTokenType.enum.h" +    #undef _ +}; + +class Position +{ +public: +    RcString    filename; +    unsigned int    line; +    unsigned int    ofs; +     +    Position(): +        filename(""), +        line(0), +        ofs(0) +    {} +    Position(::std::string filename, unsigned int line, unsigned int ofs): +        filename(filename), +        line(line), +        ofs(ofs) +    { +    } +}; +extern ::std::ostream& operator<<(::std::ostream& os, const Position& p); + +class Token: +    public Serialisable +{ +    TAGGED_UNION(Data, None, +    (None, struct {}), +    (String, ::std::string), +    (Integer, struct { +        enum eCoreType  m_datatype; +        uint64_t    m_intval; +        }), +    (Float, struct { +        enum eCoreType  m_datatype; +        double  m_floatval; +        }) +    ); +     +    enum eTokenType m_type; +    Data    m_data; +    Position    m_pos; +public: +    Token(); +    Token& operator=(Token&& t) +    { +        m_type = t.m_type;  t.m_type = TOK_NULL; +        m_data = ::std::move(t.m_data); +        m_pos = ::std::move(t.m_pos); +        return *this; +    } +    Token(Token&& t): +        m_type(t.m_type), +        m_data( ::std::move(t.m_data) ), +        m_pos( ::std::move(t.m_pos) ) +    { +        t.m_type = TOK_NULL; +    } +    Token(const Token& t): +        m_type(t.m_type), +        m_data( Data::make_None({}) ), +        m_pos( t.m_pos ) +    { +        assert( t.m_data.tag() != Data::TAGDEAD ); +        TU_MATCH(Data, (t.m_data), (e), +        (None,  ), +        (String,    m_data = Data::make_String(e); ), +        (Integer,   m_data = Data::make_Integer(e);), +        (Float, m_data = Data::make_Float(e);) +        ) +    } +     +    Token(enum eTokenType type); +    Token(enum eTokenType type, ::std::string str); +    Token(uint64_t val, enum eCoreType datatype); +    Token(double val, enum eCoreType datatype); + +    enum eTokenType type() const { return m_type; } +    const ::std::string& str() const { return m_data.as_String(); } +    enum eCoreType  datatype() const { TU_MATCH_DEF(Data, (m_data), (e), (assert(!"Getting datatype of invalid token type");), (Integer, return e.m_datatype;), (Float, return e.m_datatype;)) } +    uint64_t intval() const { return m_data.as_Integer().m_intval; } +    double floatval() const { return m_data.as_Float().m_floatval; } +    bool operator==(const Token& r) const { +        if(type() != r.type()) +            return false; +        TU_MATCH(Data, (m_data, r.m_data), (e, re), +        (None, return true;), +        (String, return e == re;), +        (Integer, return e.m_datatype == re.m_datatype && e.m_intval == re.m_intval;), +        (Float, return e.m_datatype == re.m_datatype && e.m_floatval == re.m_floatval;) +        ) +        throw ""; +    } +    bool operator!=(const Token& r) { return !(*this == r); } + +    ::std::string to_str() const; +     +    void set_pos(Position pos) { m_pos = pos; } +    const Position& get_pos() const { return m_pos; } +     +    static const char* typestr(enum eTokenType type); +    static eTokenType typefromstr(const ::std::string& s); +     +    SERIALISABLE_PROTOTYPES(); +}; +extern ::std::ostream&  operator<<(::std::ostream& os, const Token& tok); + diff --git a/src/parse/tokentree.hpp b/src/parse/tokentree.hpp index fddefdd1..89537a64 100644 --- a/src/parse/tokentree.hpp +++ b/src/parse/tokentree.hpp @@ -11,14 +11,16 @@ class TokenTree:      ::std::vector<TokenTree>    m_subtrees;
  public:
      TokenTree() {}
 +    TokenTree(TokenTree&&) = default;
      TokenTree(Token tok):
 -        m_tok(tok)
 +        m_tok( ::std::move(tok) )
      {
      }
      TokenTree(::std::vector<TokenTree> subtrees):
 -        m_subtrees(subtrees)
 +        m_subtrees( ::std::move(subtrees) )
      {
      }
 +    TokenTree& operator=(TokenTree&&) = default;
      bool is_token() const {
          return m_tok.type() != TOK_NULL;
 @@ -36,8 +38,18 @@ public:      friend ::std::ostream& operator<<(::std::ostream& os, const TokenTree& tt) {
          if( tt.m_subtrees.size() == 0 )
              return os << tt.m_tok;
 -        else
 -            return os << "TT([" << tt.m_subtrees << "])";
 +        else {
 +            os << "TT([";
 +            bool first = true;
 +            for(const auto& i : tt.m_subtrees) {
 +                if(!first)
 +                    os << ", ";
 +                os << i;
 +                first = false;
 +            }
 +            os << "])";
 +            return os;
 +        }
      }
      SERIALISABLE_PROTOTYPES();
 @@ -64,13 +76,15 @@ class TTStreamO:      public TokenStream
  {
      Position    m_last_pos;
 -    const TokenTree m_input_tt;
 +    TokenTree	m_input_tt;
      ::std::vector< ::std::pair<unsigned int, const TokenTree*> > m_stack;
  public:
 -    TTStreamO(const TokenTree input_tt);
 +    TTStreamO(TokenTree input_tt);
 +    TTStreamO(TTStreamO&& x) = default;
      ~TTStreamO();
      TTStreamO& operator=(const TTStreamO& x) { m_stack = x.m_stack; return *this; }
 +    TTStreamO& operator=(TTStreamO&& x) = default;
      virtual Position getPosition() const override;
 | 
