summaryrefslogtreecommitdiff
path: root/src/parse/lex.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/parse/lex.hpp')
-rw-r--r--src/parse/lex.hpp413
1 files changed, 214 insertions, 199 deletions
diff --git a/src/parse/lex.hpp b/src/parse/lex.hpp
index 710f3a6c..38f730c5 100644
--- a/src/parse/lex.hpp
+++ b/src/parse/lex.hpp
@@ -1,201 +1,216 @@
-#ifndef LEX_HPP_INCLUDED
-#define LEX_HPP_INCLUDED
-
-#include "../types.hpp"
+#ifndef LEX_HPP_INCLUDED
+#define LEX_HPP_INCLUDED
+
+#include "../types.hpp"
#include <string>
-#include <fstream>
-
-enum eTokenType
-{
- TOK_NULL,
- TOK_EOF,
-
- TOK_WHITESPACE,
- TOK_COMMENT,
-
- // Value tokens
- TOK_IDENT,
- TOK_MACRO,
- TOK_LIFETIME,
- TOK_INTEGER,
- TOK_CHAR,
- TOK_FLOAT,
+#include <fstream>
+
+enum eTokenType
+{
+ TOK_NULL,
+ TOK_EOF,
+
+ TOK_NEWLINE,
+ TOK_WHITESPACE,
+ TOK_COMMENT,
+
+ // Value tokens
+ TOK_IDENT,
+ TOK_MACRO,
+ TOK_LIFETIME,
+ TOK_INTEGER,
+ TOK_CHAR,
+ TOK_FLOAT,
TOK_STRING,
-
- TOK_CATTR_OPEN,
- TOK_ATTR_OPEN,
-
- // Symbols
- TOK_PAREN_OPEN, TOK_PAREN_CLOSE,
- TOK_BRACE_OPEN, TOK_BRACE_CLOSE,
- TOK_LT, TOK_GT,
- TOK_SQUARE_OPEN,TOK_SQUARE_CLOSE,
- TOK_COMMA,
- TOK_SEMICOLON,
- TOK_COLON,
- TOK_DOUBLE_COLON,
- TOK_STAR, TOK_AMP,
- TOK_PIPE,
-
- TOK_FATARROW, // =>
- TOK_THINARROW, // ->
-
- TOK_PLUS, TOK_DASH,
- TOK_EXCLAM,
- TOK_PERCENT,
- TOK_SLASH,
-
- TOK_DOT,
- TOK_DOUBLE_DOT,
- TOK_TRIPLE_DOT,
-
- TOK_EQUAL,
- TOK_PLUS_EQUAL,
- TOK_DASH_EQUAL,
- TOK_PERCENT_EQUAL,
- TOK_SLASH_EQUAL,
- TOK_STAR_EQUAL,
- TOK_AMP_EQUAL,
- TOK_PIPE_EQUAL,
-
- TOK_DOUBLE_EQUAL,
- TOK_EXCLAM_EQUAL,
- TOK_GTE,
- TOK_LTE,
-
- TOK_DOUBLE_AMP,
- TOK_DOUBLE_PIPE,
- TOK_DOUBLE_LT,
- TOK_DOUBLE_GT,
-
- TOK_QMARK,
- TOK_AT,
- TOK_TILDE,
- TOK_BACKSLASH,
- TOK_CARET,
- TOK_BACKTICK,
-
- // Reserved Words
- TOK_RWORD_PUB,
- TOK_RWORD_PRIV,
- TOK_RWORD_MUT,
- TOK_RWORD_CONST,
- TOK_RWORD_STATIC,
- TOK_RWORD_UNSAFE,
- TOK_RWORD_EXTERN,
-
- TOK_RWORD_CRATE,
- TOK_RWORD_MOD,
- TOK_RWORD_STRUCT,
- TOK_RWORD_ENUM,
- TOK_RWORD_TRAIT,
- TOK_RWORD_FN,
- TOK_RWORD_USE,
- TOK_RWORD_IMPL,
- TOK_RWORD_TYPE,
-
- TOK_RWORD_WHERE,
- TOK_RWORD_AS,
-
- TOK_RWORD_LET,
- TOK_RWORD_MATCH,
- TOK_RWORD_IF,
- TOK_RWORD_ELSE,
- TOK_RWORD_LOOP,
- TOK_RWORD_WHILE,
- TOK_RWORD_FOR,
- TOK_RWORD_IN,
- TOK_RWORD_DO,
-
- TOK_RWORD_CONTINUE,
- TOK_RWORD_BREAK,
- TOK_RWORD_RETURN,
- TOK_RWORD_YIELD,
- TOK_RWORD_BOX,
- TOK_RWORD_REF,
-
- TOK_RWORD_FALSE,
- TOK_RWORD_TRUE,
- TOK_RWORD_SELF,
- TOK_RWORD_SUPER,
-
- TOK_RWORD_PROC,
- TOK_RWORD_MOVE,
- TOK_RWORD_ONCE,
-
- TOK_RWORD_ABSTRACT,
- TOK_RWORD_FINAL,
- TOK_RWORD_PURE,
- TOK_RWORD_OVERRIDE,
- TOK_RWORD_VIRTUAL,
-
- TOK_RWORD_ALIGNOF,
- TOK_RWORD_OFFSETOF,
- TOK_RWORD_SIZEOF,
- TOK_RWORD_TYPEOF,
-
- TOK_RWORD_BE,
- TOK_RWORD_UNSIZED,
-};
-
-class Token
-{
- enum eTokenType m_type;
- ::std::string m_str;
- enum eCoreType m_datatype;
- union {
- uint64_t m_intval;
- double m_floatval;
- };
-public:
- Token();
- Token(enum eTokenType type);
- Token(enum eTokenType type, ::std::string str);
- Token(uint64_t val, enum eCoreType datatype);
- Token(double val, enum eCoreType datatype);
-
- enum eTokenType type() const { return m_type; }
- const ::std::string& str() const { return m_str; }
- enum eCoreType datatype() const { return m_datatype; }
- uint64_t intval() const { return m_intval; }
- double floatval() const { return m_floatval; }
-
- static const char* typestr(enum eTokenType type);
-};
-
-extern ::std::ostream& operator<<(::std::ostream& os, Token& tok);
-
-class TokenStream
-{
- bool m_cache_valid;
- Token m_cache;
-public:
- TokenStream();
- virtual ~TokenStream();
- Token getToken();
- void putback(Token tok);
-protected:
- virtual Token realGetToken() = 0;
-};
-
-class Lexer
-{
- ::std::ifstream m_istream;
- bool m_last_char_valid;
- char m_last_char;
-public:
- Lexer(::std::string filename);
-
- Token getToken();
-
-private:
- signed int getSymbol();
- uint32_t parseEscape(char enclosing);
-
- char getc();
- void putback();
-
- class EndOfFile {};
-};
-
-#endif // LEX_HPP_INCLUDED
+
+ TOK_CATTR_OPEN,
+ TOK_ATTR_OPEN,
+
+ // Symbols
+ TOK_PAREN_OPEN, TOK_PAREN_CLOSE,
+ TOK_BRACE_OPEN, TOK_BRACE_CLOSE,
+ TOK_LT, TOK_GT,
+ TOK_SQUARE_OPEN,TOK_SQUARE_CLOSE,
+ TOK_COMMA,
+ TOK_SEMICOLON,
+ TOK_COLON,
+ TOK_DOUBLE_COLON,
+ TOK_STAR, TOK_AMP,
+ TOK_PIPE,
+
+ TOK_FATARROW, // =>
+ TOK_THINARROW, // ->
+
+ TOK_PLUS, TOK_DASH,
+ TOK_EXCLAM,
+ TOK_PERCENT,
+ TOK_SLASH,
+
+ TOK_DOT,
+ TOK_DOUBLE_DOT,
+ TOK_TRIPLE_DOT,
+
+ TOK_EQUAL,
+ TOK_PLUS_EQUAL,
+ TOK_DASH_EQUAL,
+ TOK_PERCENT_EQUAL,
+ TOK_SLASH_EQUAL,
+ TOK_STAR_EQUAL,
+ TOK_AMP_EQUAL,
+ TOK_PIPE_EQUAL,
+
+ TOK_DOUBLE_EQUAL,
+ TOK_EXCLAM_EQUAL,
+ TOK_GTE,
+ TOK_LTE,
+
+ TOK_DOUBLE_AMP,
+ TOK_DOUBLE_PIPE,
+ TOK_DOUBLE_LT,
+ TOK_DOUBLE_GT,
+
+ TOK_QMARK,
+ TOK_AT,
+ TOK_TILDE,
+ TOK_BACKSLASH,
+ TOK_CARET,
+ TOK_BACKTICK,
+
+ // Reserved Words
+ TOK_RWORD_PUB,
+ TOK_RWORD_PRIV,
+ TOK_RWORD_MUT,
+ TOK_RWORD_CONST,
+ TOK_RWORD_STATIC,
+ TOK_RWORD_UNSAFE,
+ TOK_RWORD_EXTERN,
+
+ TOK_RWORD_CRATE,
+ TOK_RWORD_MOD,
+ TOK_RWORD_STRUCT,
+ TOK_RWORD_ENUM,
+ TOK_RWORD_TRAIT,
+ TOK_RWORD_FN,
+ TOK_RWORD_USE,
+ TOK_RWORD_IMPL,
+ TOK_RWORD_TYPE,
+
+ TOK_RWORD_WHERE,
+ TOK_RWORD_AS,
+
+ TOK_RWORD_LET,
+ TOK_RWORD_MATCH,
+ TOK_RWORD_IF,
+ TOK_RWORD_ELSE,
+ TOK_RWORD_LOOP,
+ TOK_RWORD_WHILE,
+ TOK_RWORD_FOR,
+ TOK_RWORD_IN,
+ TOK_RWORD_DO,
+
+ TOK_RWORD_CONTINUE,
+ TOK_RWORD_BREAK,
+ TOK_RWORD_RETURN,
+ TOK_RWORD_YIELD,
+ TOK_RWORD_BOX,
+ TOK_RWORD_REF,
+
+ TOK_RWORD_FALSE,
+ TOK_RWORD_TRUE,
+ TOK_RWORD_SELF,
+ TOK_RWORD_SUPER,
+
+ TOK_RWORD_PROC,
+ TOK_RWORD_MOVE,
+ TOK_RWORD_ONCE,
+
+ TOK_RWORD_ABSTRACT,
+ TOK_RWORD_FINAL,
+ TOK_RWORD_PURE,
+ TOK_RWORD_OVERRIDE,
+ TOK_RWORD_VIRTUAL,
+
+ TOK_RWORD_ALIGNOF,
+ TOK_RWORD_OFFSETOF,
+ TOK_RWORD_SIZEOF,
+ TOK_RWORD_TYPEOF,
+
+ TOK_RWORD_BE,
+ TOK_RWORD_UNSIZED,
+};
+
+class Token
+{
+ enum eTokenType m_type;
+ ::std::string m_str;
+ enum eCoreType m_datatype;
+ union {
+ uint64_t m_intval;
+ double m_floatval;
+ };
+public:
+ Token();
+ Token(enum eTokenType type);
+ Token(enum eTokenType type, ::std::string str);
+ Token(uint64_t val, enum eCoreType datatype);
+ Token(double val, enum eCoreType datatype);
+
+ enum eTokenType type() const { return m_type; }
+ const ::std::string& str() const { return m_str; }
+ enum eCoreType datatype() const { return m_datatype; }
+ uint64_t intval() const { return m_intval; }
+ double floatval() const { return m_floatval; }
+
+ static const char* typestr(enum eTokenType type);
+};
+
+extern ::std::ostream& operator<<(::std::ostream& os, const Token& tok);
+
+struct Position
+{
+ ::std::string filename;
+ unsigned int line;
+
+ Position(::std::string filename, unsigned int line):
+ filename(filename),
+ line(line)
+ {
+ }
+};
+extern ::std::ostream& operator<<(::std::ostream& os, const Position& p);
+
+class TokenStream
+{
+ bool m_cache_valid;
+ Token m_cache;
+public:
+ TokenStream();
+ virtual ~TokenStream();
+ Token getToken();
+ void putback(Token tok);
+ virtual Position getPosition() const = 0;
+protected:
+ virtual Token realGetToken() = 0;
+};
+
+class Lexer
+{
+ ::std::ifstream m_istream;
+ bool m_last_char_valid;
+ char m_last_char;
+public:
+ Lexer(::std::string filename);
+
+ Token getToken();
+
+private:
+ signed int getSymbol();
+ uint32_t parseEscape(char enclosing);
+
+ char getc();
+ void putback();
+
+ class EndOfFile {};
+};
+
+#endif // LEX_HPP_INCLUDED