summaryrefslogtreecommitdiff
path: root/parse
diff options
context:
space:
mode:
authorJohn Hodge (bugs) <tpg@mutabah.net>2014-11-22 23:36:20 +0800
committerJohn Hodge (bugs) <tpg@mutabah.net>2014-11-22 23:36:20 +0800
commit7ad1a8b4b40784e0a10bd453c75dd0dcf123d5f1 (patch)
tree0172ed546bdb00e86297d8d36d7cb0a0a1d111f3 /parse
downloadmrust-7ad1a8b4b40784e0a10bd453c75dd0dcf123d5f1.tar.gz
Initial commit, lexer structurally complete, parsing hacking up
Diffstat (limited to 'parse')
-rw-r--r--parse/lex.cpp454
-rw-r--r--parse/lex.hpp157
-rw-r--r--parse/parseerror.cpp39
-rw-r--r--parse/parseerror.hpp49
-rw-r--r--parse/preproc.cpp50
-rw-r--r--parse/preproc.hpp24
-rw-r--r--parse/root.cpp77
7 files changed, 850 insertions, 0 deletions
diff --git a/parse/lex.cpp b/parse/lex.cpp
new file mode 100644
index 00000000..cbd753f8
--- /dev/null
+++ b/parse/lex.cpp
@@ -0,0 +1,454 @@
+/*
+ * "MRustC" - Primitive rust compiler in C++
+ */
+/**
+ * \file parse/lex.cpp
+ * \brief Low-level lexer
+ */
+#include "lex.hpp"
+#include "parseerror.hpp"
+#include <cassert>
+#include <iostream>
+#include <cstdlib> // strtol
+
+Lexer::Lexer(::std::string filename):
+ m_istream(filename.c_str()),
+ m_last_char_valid(false)
+{
+ if( !m_istream.is_open() )
+ {
+ throw ::std::runtime_error("Unable to open file");
+ }
+}
+
+#define LINECOMMENT -1
+#define BLOCKCOMMENT -2
+#define SINGLEQUOTE -3
+#define DOUBLEQUOTE -4
+
+// NOTE: This array must be kept reverse sorted
+#define TOKENT(str, sym) {sizeof(str)-1, str, sym}
+const struct {
+ unsigned char len;
+ const char* chars;
+ signed int type;
+} TOKENMAP[] = {
+ TOKENT("!" , TOK_EXLAM),
+ TOKENT("!=", TOK_EXLAM_EQUAL),
+ TOKENT("\"", DOUBLEQUOTE),
+ TOKENT("#", 0),
+ TOKENT("#![",TOK_CATTR_OPEN),
+ TOKENT("#[", TOK_ATTR_OPEN),
+ //TOKENT("$", 0),
+ TOKENT("%" , TOK_PERCENT),
+ TOKENT("%=", TOK_PERCENT_EQUAL),
+ TOKENT("&" , TOK_AMP),
+ TOKENT("&&", TOK_DOUBLE_AMP),
+ TOKENT("&=", TOK_AMP_EQUAL),
+ TOKENT("'" , SINGLEQUOTE),
+ TOKENT("(" , TOK_PAREN_OPEN),
+ TOKENT(")" , TOK_PAREN_CLOSE),
+ TOKENT("*" , TOK_STAR),
+ TOKENT("*=", TOK_STAR_EQUAL),
+ TOKENT("+" , TOK_PLUS),
+ TOKENT("+=", TOK_PLUS_EQUAL),
+ TOKENT("," , TOK_COMMA),
+ TOKENT("-" , TOK_DASH),
+ TOKENT("-=", TOK_DASH_EQUAL),
+ TOKENT("->", TOK_THINARROW),
+ TOKENT(".", TOK_DOT),
+ TOKENT("..", TOK_DOUBLE_DOT),
+ TOKENT("...",TOK_TRIPLE_DOT),
+ TOKENT("/" , TOK_SLASH),
+ TOKENT("/*", BLOCKCOMMENT),
+ TOKENT("//", LINECOMMENT),
+ TOKENT("/=", TOK_SLASH_EQUAL),
+ // 0-9 :: Elsewhere
+ TOKENT(":", TOK_COLON),
+ TOKENT("::", TOK_DOUBLE_COLON),
+ TOKENT(";", TOK_SEMICOLON),
+ TOKENT("<", TOK_LT),
+ TOKENT("<<", TOK_DOUBLE_LT),
+ TOKENT("<=", TOK_LTE),
+ TOKENT("=" , TOK_EQUAL),
+ TOKENT("==", TOK_DOUBLE_EQUAL),
+ TOKENT("=>", TOK_FATARROW),
+ TOKENT(">", TOK_GT),
+ TOKENT(">>", TOK_DOUBLE_GT),
+ TOKENT(">=", TOK_GTE),
+ TOKENT("?", TOK_QMARK),
+ TOKENT("@", TOK_AT),
+ // A-Z :: Elsewhere
+ TOKENT("[", TOK_SQUARE_OPEN),
+ TOKENT("\\", TOK_BACKSLASH),
+ TOKENT("]", TOK_SQUARE_CLOSE),
+ TOKENT("^", TOK_CARET),
+ TOKENT("`", TOK_BACKTICK),
+
+ TOKENT("as", TOK_RWORD_AS),
+ TOKENT("const", TOK_RWORD_CONST),
+ TOKENT("fn", TOK_RWORD_FN),
+ TOKENT("for", TOK_RWORD_FOR),
+ TOKENT("static",TOK_RWORD_STATIC),
+ TOKENT("use", TOK_RWORD_USE),
+
+ TOKENT("{", TOK_BRACE_OPEN),
+ TOKENT("|", TOK_PIPE),
+ TOKENT("|=", TOK_PIPE_EQUAL),
+ TOKENT("||", TOK_DOUBLE_PIPE),
+ TOKENT("}", TOK_BRACE_CLOSE),
+ TOKENT("~", TOK_TILDE),
+};
+#define LEN(arr) (sizeof(arr)/sizeof(arr[0]))
+
+signed int Lexer::getSymbol()
+{
+ char ch = this->getc();
+ // 1. lsearch for character
+ // 2. Consume as many characters as currently match
+ // 3. IF: a smaller character or, EOS is hit - Return current best
+ unsigned ofs = 0;
+ signed int best = 0;
+ for(unsigned i = 0; i < LEN(TOKENMAP); i ++)
+ {
+ const char* const chars = TOKENMAP[i].chars;
+ const size_t len = TOKENMAP[i].len;
+
+ //::std::cout << "ofs=" << ofs << ", chars[ofs] = " << chars[ofs] << ", ch = " << ch << ", len = " << len << ::std::endl;
+
+ if( ofs >= len || chars[ofs] > ch ) {
+ this->putback();
+ return best;
+ }
+
+ while( chars[ofs] && chars[ofs] == ch )
+ {
+ ch = this->getc();
+ ofs ++;
+ }
+ if( chars[ofs] == 0 )
+ {
+ best = TOKENMAP[i].type;
+ }
+ }
+
+ this->putback();
+ return best;
+}
+
+bool issym(char ch)
+{
+ if( ::std::isalnum(ch) )
+ return true;
+ if( ch == '_' )
+ return true;
+ if( ch == '$' )
+ return true;
+ return false;
+}
+
+Token Lexer::getToken()
+{
+ try
+ {
+ char ch = this->getc();
+
+ if( isspace(ch) )
+ {
+ while( isspace(this->getc()) )
+ ;
+ this->putback();
+ return Token(TOK_WHITESPACE);
+ }
+ this->putback();
+
+ const signed int sym = this->getSymbol();
+ if( sym == 0 )
+ {
+ // No match at all, check for symbol
+ char ch = this->getc();
+ if( issym(ch) )
+ {
+ ::std::string str;
+ while( issym(ch) )
+ {
+ str.push_back(ch);
+ ch = this->getc();
+ }
+ this->putback();
+
+ if( ch == '!' )
+ {
+ return Token(TOK_MACRO, str);
+ }
+ else
+ {
+ return Token(TOK_IDENT, str);
+ }
+ }
+ else if( isdigit(ch) )
+ {
+ // TODO: handle integers/floats
+ throw ParseError::Todo("Lex Numbers");
+ }
+ else
+ {
+ throw ParseError::BadChar(ch);
+ }
+ }
+ else if( sym > 0 )
+ {
+ return Token((enum eTokenType)sym);
+ }
+ else
+ {
+ switch(sym)
+ {
+ case LINECOMMENT: {
+ // Line comment
+ ::std::string str;
+ char ch = this->getc();
+ while(ch != '\n' && ch != '\r')
+ {
+ str.push_back(ch);
+ ch = this->getc();
+ }
+ return Token(TOK_COMMENT, str); }
+ case BLOCKCOMMENT: {
+ ::std::string str;
+ while(true)
+ {
+ if( ch == '*' ) {
+ ch = this->getc();
+ if( ch == '/' ) break;
+ this->putback();
+ }
+ str.push_back(ch);
+ ch = this->getc();
+ }
+ return Token(TOK_COMMENT, str); }
+ case SINGLEQUOTE: {
+ char firstchar = this->getc();
+ if( firstchar != '\\' ) {
+ ch = this->getc();
+ if( ch == '\'' ) {
+ // Character constant
+ return Token((uint64_t)ch, CORETYPE_CHAR);
+ }
+ else {
+ // Lifetime name
+ ::std::string str;
+ str.push_back(firstchar);
+ while( issym(ch) )
+ {
+ str.push_back(ch);
+ ch = this->getc();
+ }
+ this->putback();
+ return Token(TOK_LIFETIME, str);
+ }
+ }
+ else {
+ // Character constant with an escape code
+ uint32_t val = this->parseEscape('\'');
+ if(this->getc() != '\'') {
+ throw ParseError::Todo("Proper error for lex failures");
+ }
+ return Token((uint64_t)val, CORETYPE_CHAR);
+ }
+ break; }
+ case DOUBLEQUOTE:
+ throw ParseError::Todo("Strings");
+ break;
+ default:
+ assert(!"bugcheck");
+ }
+ }
+ }
+ catch(const Lexer::EndOfFile& e)
+ {
+ return Token(TOK_EOF);
+ }
+ //assert(!"bugcheck");
+}
+
+uint32_t Lexer::parseEscape(char enclosing)
+{
+ char ch = this->getc();
+ switch(ch)
+ {
+ case 'u': {
+ // Unicode (up to six hex digits)
+ uint32_t val = 0;
+ ch = this->getc();
+ if( !isxdigit(ch) )
+ throw ParseError::Todo("Proper lex error for escape sequences");
+ while( isxdigit(ch) )
+ {
+ char tmp[2] = {ch, 0};
+ val *= 16;
+ val += ::std::strtol(tmp, NULL, 16);
+ ch = this->getc();
+ }
+ this->putback();
+ return val; }
+ case '\\':
+ return '\\';
+ default:
+ throw ParseError::Todo("Proper lex error for escape sequences");
+ }
+}
+
+char Lexer::getc()
+{
+ if( m_last_char_valid )
+ {
+ m_last_char_valid = false;
+ }
+ else
+ {
+ m_last_char = m_istream.get();
+ if( m_istream.eof() )
+ throw Lexer::EndOfFile();
+ }
+// ::std::cout << "getc(): '" << m_last_char << "'" << ::std::endl;
+ return m_last_char;
+}
+
+void Lexer::putback()
+{
+// ::std::cout << "putback(): " << m_last_char_valid << " '" << m_last_char << "'" << ::std::endl;
+ assert(!m_last_char_valid);
+ m_last_char_valid = true;
+}
+
+Token::Token():
+ m_type(TOK_NULL),
+ m_str("")
+{
+}
+Token::Token(enum eTokenType type):
+ m_type(type),
+ m_str("")
+{
+}
+Token::Token(enum eTokenType type, ::std::string str):
+ m_type(type),
+ m_str(str)
+{
+}
+Token::Token(uint64_t val, enum eCoreType datatype):
+ m_type(TOK_INTEGER),
+ m_datatype(datatype),
+ m_intval(val)
+{
+}
+
+const char* Token::typestr(enum eTokenType type)
+{
+ switch(type)
+ {
+ case TOK_NULL: return "TOK_NULL";
+ case TOK_EOF: return "TOK_EOF";
+
+ case TOK_WHITESPACE: return "TOK_WHITESPACE";
+ case TOK_COMMENT: return "TOK_COMMENT";
+
+ // Value tokens
+ case TOK_IDENT: return "TOK_IDENT";
+ case TOK_MACRO: return "TOK_MACRO";
+ case TOK_LIFETIME: return "TOK_LIFETIME";
+ case TOK_INTEGER: return "TOK_INTEGER";
+ case TOK_CHAR: return "TOK_CHAR";
+ case TOK_FLOAT: return "TOK_FLOAT";
+ case TOK_UNDERSCORE: return "TOK_UNDERSCORE";
+
+ case TOK_CATTR_OPEN: return "TOK_CATTR_OPEN";
+ case TOK_ATTR_OPEN: return "TOK_ATTR_OPEN";
+
+ // Symbols
+ case TOK_PAREN_OPEN: return "TOK_PAREN_OPEN"; case TOK_PAREN_CLOSE: return "TOK_PAREN_CLOSE";
+ case TOK_BRACE_OPEN: return "TOK_BRACE_OPEN"; case TOK_BRACE_CLOSE: return "TOK_BRACE_CLOSE";
+ case TOK_LT: return "TOK_LT"; case TOK_GT: return "TOK_GT";
+ case TOK_SQUARE_OPEN: return "TOK_SQUARE_OPEN";case TOK_SQUARE_CLOSE: return "TOK_SQUARE_CLOSE";
+ case TOK_COMMA: return "TOK_COMMA";
+ case TOK_SEMICOLON: return "TOK_SEMICOLON";
+ case TOK_COLON: return "TOK_COLON";
+ case TOK_DOUBLE_COLON: return "TOK_DOUBLE_COLON";
+ case TOK_STAR: return "TOK_STAR"; case TOK_AMP: return "TOK_AMP";
+ case TOK_PIPE: return "TOK_PIPE";
+
+ case TOK_FATARROW: return "TOK_FATARROW"; // =>
+ case TOK_THINARROW: return "TOK_THINARROW"; // ->
+
+ case TOK_PLUS: return "TOK_PLUS"; case TOK_DASH: return "TOK_DASH";
+ case TOK_EXLAM: return "TOK_EXLAM";
+ case TOK_PERCENT: return "TOK_PERCENT";
+ case TOK_SLASH: return "TOK_SLASH";
+
+ case TOK_DOT: return "TOK_DOT";
+ case TOK_DOUBLE_DOT: return "TOK_DOUBLE_DOT";
+ case TOK_TRIPLE_DOT: return "TOK_TRIPLE_DOT";
+
+ case TOK_EQUAL: return "TOK_EQUAL";
+ case TOK_PLUS_EQUAL: return "TOK_PLUS_EQUAL";
+ case TOK_DASH_EQUAL: return "TOK_DASH_EQUAL";
+ case TOK_PERCENT_EQUAL: return "TOK_PERCENT_EQUAL";
+ case TOK_SLASH_EQUAL: return "TOK_SLASH_EQUAL";
+ case TOK_STAR_EQUAL: return "TOK_STAR_EQUAL";
+ case TOK_AMP_EQUAL: return "TOK_AMP_EQUAL";
+ case TOK_PIPE_EQUAL: return "TOK_PIPE_EQUAL";
+
+ case TOK_DOUBLE_EQUAL: return "TOK_DOUBLE_EQUAL";
+ case TOK_EXLAM_EQUAL: return "TOK_EXLAM_EQUAL";
+ case TOK_GTE: return "TOK_GTE";
+ case TOK_LTE: return "TOK_LTE";
+
+ case TOK_DOUBLE_AMP: return "TOK_DOUBLE_AMP";
+ case TOK_DOUBLE_PIPE: return "TOK_DOUBLE_PIPE";
+ case TOK_DOUBLE_LT: return "TOK_DOUBLE_LT";
+ case TOK_DOUBLE_GT: return "TOK_DOUBLE_GT";
+
+ case TOK_QMARK: return "TOK_QMARK";
+ case TOK_AT: return "TOK_AT";
+ case TOK_TILDE: return "TOK_TILDE";
+ case TOK_BACKSLASH: return "TOK_BACKSLASH";
+ case TOK_CARET: return "TOK_CARET";
+ case TOK_BACKTICK: return "TOK_BACKTICK";
+
+ // Reserved Words
+ case TOK_RWORD_PUB: return "TOK_RWORD_PUB";
+ case TOK_RWORD_MUT: return "TOK_RWORD_MUT";
+ case TOK_RWORD_CONST: return "TOK_RWORD_CONST";
+ case TOK_RWORD_STATIC: return "TOK_RWORD_STATIC";
+ case TOK_RWORD_UNSAFE: return "TOK_RWORD_UNSAFE";
+
+ case TOK_RWORD_STRUCT: return "TOK_RWORD_STRUCT";
+ case TOK_RWORD_ENUM: return "TOK_RWORD_ENUM";
+ case TOK_RWORD_TRAIT: return "TOK_RWORD_TRAIT";
+ case TOK_RWORD_FN: return "TOK_RWORD_FN";
+ case TOK_RWORD_USE: return "TOK_RWORD_USE";
+
+ case TOK_RWORD_SELF: return "TOK_RWORD_SELF";
+ case TOK_RWORD_AS: return "TOK_RWORD_AS";
+
+ case TOK_RWORD_LET: return "TOK_RWORD_LET";
+ case TOK_RWORD_MATCH: return "TOK_RWORD_MATCH";
+ case TOK_RWORD_IF: return "TOK_RWORD_IF";
+ case TOK_RWORD_ELSE: return "TOK_RWORD_ELSE";
+ case TOK_RWORD_WHILE: return "TOK_RWORD_WHILE";
+ case TOK_RWORD_FOR: return "TOK_RWORD_FOR";
+
+ case TOK_RWORD_CONTINUE: return "TOK_RWORD_CONTINUE";
+ case TOK_RWORD_BREAK: return "TOK_RWORD_BREAK";
+ case TOK_RWORD_RETURN: return "TOK_RWORD_RETURN";
+ }
+ return ">>BUGCHECK: BADTOK<<";
+}
+
+::std::ostream& operator<<(::std::ostream& os, Token& tok)
+{
+ os << Token::typestr(tok.type()) << "\"" << tok.str() << "\"";
+ return os;
+}
+
diff --git a/parse/lex.hpp b/parse/lex.hpp
new file mode 100644
index 00000000..5e3d2dd4
--- /dev/null
+++ b/parse/lex.hpp
@@ -0,0 +1,157 @@
+#ifndef LEX_HPP_INCLUDED
+#define LEX_HPP_INCLUDED
+
+#include "../types.hpp"
+#include <string>
+#include <fstream>
+
+enum eTokenType
+{
+ TOK_NULL,
+ TOK_EOF,
+
+ TOK_WHITESPACE,
+ TOK_COMMENT,
+
+ // Value tokens
+ TOK_IDENT,
+ TOK_MACRO,
+ TOK_LIFETIME,
+ TOK_INTEGER,
+ TOK_CHAR,
+ TOK_FLOAT,
+ TOK_UNDERSCORE,
+
+ TOK_CATTR_OPEN,
+ TOK_ATTR_OPEN,
+
+ // Symbols
+ TOK_PAREN_OPEN, TOK_PAREN_CLOSE,
+ TOK_BRACE_OPEN, TOK_BRACE_CLOSE,
+ TOK_LT, TOK_GT,
+ TOK_SQUARE_OPEN,TOK_SQUARE_CLOSE,
+ TOK_COMMA,
+ TOK_SEMICOLON,
+ TOK_COLON,
+ TOK_DOUBLE_COLON,
+ TOK_STAR, TOK_AMP,
+ TOK_PIPE,
+
+ TOK_FATARROW, // =>
+ TOK_THINARROW, // ->
+
+ TOK_PLUS, TOK_DASH,
+ TOK_EXLAM,
+ TOK_PERCENT,
+ TOK_SLASH,
+
+ TOK_DOT,
+ TOK_DOUBLE_DOT,
+ TOK_TRIPLE_DOT,
+
+ TOK_EQUAL,
+ TOK_PLUS_EQUAL,
+ TOK_DASH_EQUAL,
+ TOK_PERCENT_EQUAL,
+ TOK_SLASH_EQUAL,
+ TOK_STAR_EQUAL,
+ TOK_AMP_EQUAL,
+ TOK_PIPE_EQUAL,
+
+ TOK_DOUBLE_EQUAL,
+ TOK_EXLAM_EQUAL,
+ TOK_GTE,
+ TOK_LTE,
+
+ TOK_DOUBLE_AMP,
+ TOK_DOUBLE_PIPE,
+ TOK_DOUBLE_LT,
+ TOK_DOUBLE_GT,
+
+ TOK_QMARK,
+ TOK_AT,
+ TOK_TILDE,
+ TOK_BACKSLASH,
+ TOK_CARET,
+ TOK_BACKTICK,
+
+ // Reserved Words
+ TOK_RWORD_PUB,
+ TOK_RWORD_MUT,
+ TOK_RWORD_CONST,
+ TOK_RWORD_STATIC,
+ TOK_RWORD_UNSAFE,
+
+ TOK_RWORD_STRUCT,
+ TOK_RWORD_ENUM,
+ TOK_RWORD_TRAIT,
+ TOK_RWORD_FN,
+ TOK_RWORD_USE,
+
+ TOK_RWORD_SELF,
+ TOK_RWORD_AS,
+
+ TOK_RWORD_LET,
+ TOK_RWORD_MATCH,
+ TOK_RWORD_IF,
+ TOK_RWORD_ELSE,
+ TOK_RWORD_WHILE,
+ TOK_RWORD_FOR,
+
+ TOK_RWORD_CONTINUE,
+ TOK_RWORD_BREAK,
+ TOK_RWORD_RETURN,
+};
+
+class Token
+{
+ enum eTokenType m_type;
+ ::std::string m_str;
+ enum eCoreType m_datatype;
+ union {
+ uint64_t m_intval;
+ double m_floatval;
+ };
+public:
+ Token();
+ Token(enum eTokenType type);
+ Token(enum eTokenType type, ::std::string str);
+ Token(uint64_t val, enum eCoreType datatype);
+ Token(double val, enum eCoreType datatype);
+
+ enum eTokenType type() { return m_type; }
+ const ::std::string& str() { return m_str; }
+
+ static const char* typestr(enum eTokenType type);
+};
+
+extern ::std::ostream& operator<<(::std::ostream& os, Token& tok);
+
+class TokenStream
+{
+public:
+ virtual Token getToken() = 0;
+ virtual void putback(Token tok) = 0;
+};
+
+class Lexer
+{
+ ::std::ifstream m_istream;
+ bool m_last_char_valid;
+ char m_last_char;
+public:
+ Lexer(::std::string filename);
+
+ Token getToken();
+
+private:
+ signed int getSymbol();
+ uint32_t parseEscape(char enclosing);
+
+ char getc();
+ void putback();
+
+ class EndOfFile {};
+};
+
+#endif // LEX_HPP_INCLUDED
diff --git a/parse/parseerror.cpp b/parse/parseerror.cpp
new file mode 100644
index 00000000..044847e7
--- /dev/null
+++ b/parse/parseerror.cpp
@@ -0,0 +1,39 @@
+/*
+ */
+#include "parseerror.hpp"
+#include <iostream>
+
+ParseError::Base::~Base() throw()
+{
+}
+
+ParseError::Todo::Todo(::std::string message):
+ m_message(message)
+{
+ ::std::cout << "Todo(" << message << ")" << ::std::endl;
+}
+ParseError::Todo::~Todo() throw()
+{
+}
+
+ParseError::BadChar::BadChar(char character):
+ m_char(character)
+{
+ ::std::cout << "BadChar(" << character << ")" << ::std::endl;
+}
+ParseError::BadChar::~BadChar() throw()
+{
+}
+
+ParseError::Unexpected::Unexpected(Token tok):
+ m_tok(tok)
+{
+ ::std::cout << "Unexpected(" << tok << ")" << ::std::endl;
+}
+ParseError::Unexpected::Unexpected(Token tok, Token exp)
+{
+ ::std::cout << "Unexpected(" << tok << ", " << exp << ")" << ::std::endl;
+}
+ParseError::Unexpected::~Unexpected() throw()
+{
+}
diff --git a/parse/parseerror.hpp b/parse/parseerror.hpp
new file mode 100644
index 00000000..487c1a6e
--- /dev/null
+++ b/parse/parseerror.hpp
@@ -0,0 +1,49 @@
+#ifndef PARSEERROR_HPP_INCLUDED
+#define PARSEERROR_HPP_INCLUDED
+
+#include <stdexcept>
+#include "lex.hpp"
+
+namespace ParseError {
+
+class Base:
+ public ::std::exception
+{
+public:
+ ~Base() throw();
+};
+
+class Todo:
+ public Base
+{
+ ::std::string m_message;
+public:
+ Todo(::std::string message);
+ ~Todo() throw ();
+
+};
+
+class BadChar:
+ public Base
+{
+ char m_char;
+public:
+ BadChar(char character);
+ ~BadChar() throw ();
+
+};
+
+class Unexpected:
+ public Base
+{
+ Token m_tok;
+public:
+ Unexpected(Token tok);
+ Unexpected(Token tok, Token exp);
+ ~Unexpected() throw ();
+
+};
+
+}
+
+#endif // PARSEERROR_HPP_INCLUDED
diff --git a/parse/preproc.cpp b/parse/preproc.cpp
new file mode 100644
index 00000000..ec64d731
--- /dev/null
+++ b/parse/preproc.cpp
@@ -0,0 +1,50 @@
+#include "preproc.hpp"
+#include <iostream>
+
+Preproc::Preproc(::std::string path):
+ m_cache_valid(false),
+ m_lex(path)
+{
+ //ctor
+}
+
+Preproc::~Preproc()
+{
+ //dtor
+}
+
+Token Preproc::getTokenInt()
+{
+ while(true)
+ {
+ Token tok = m_lex.getToken();
+ ::std::cout << "getTokenInt: tok = " << tok << ::std::endl;
+ switch(tok.type())
+ {
+ case TOK_WHITESPACE:
+ continue;
+ case TOK_COMMENT:
+ continue;
+ default:
+ return tok;
+ }
+ }
+}
+
+Token Preproc::getToken()
+{
+ if( m_cache_valid )
+ {
+ m_cache_valid = false;
+ return m_cache;
+ }
+ else
+ {
+ return this->getTokenInt();
+ }
+}
+void Preproc::putback(Token tok)
+{
+ m_cache_valid = true;
+ m_cache = tok;
+}
diff --git a/parse/preproc.hpp b/parse/preproc.hpp
new file mode 100644
index 00000000..827ce249
--- /dev/null
+++ b/parse/preproc.hpp
@@ -0,0 +1,24 @@
+#ifndef PREPROC_H
+#define PREPROC_H
+
+#include "lex.hpp"
+
+class Preproc:
+ public TokenStream
+{
+ Lexer m_lex;
+
+ bool m_cache_valid;
+ Token m_cache;
+
+public:
+ Preproc(::std::string path);
+ ~Preproc();
+
+ virtual Token getToken();
+ virtual void putback(Token tok);
+private:
+ Token getTokenInt();
+};
+
+#endif // PREPROC_H
diff --git a/parse/root.cpp b/parse/root.cpp
new file mode 100644
index 00000000..620a1fce
--- /dev/null
+++ b/parse/root.cpp
@@ -0,0 +1,77 @@
+/*
+ */
+#include "preproc.hpp"
+#include "../ast/ast.hpp"
+#include "parseerror.hpp"
+#include <cassert>
+
+AST::Path Parse_Path(TokenStream& lex)
+{
+ AST::Path path;
+ for(;;)
+ {
+ Token tok = lex.getToken();
+ if(tok.type() != TOK_IDENT)
+ throw ParseError::Unexpected(tok);
+ path.append( tok.str() );
+ tok = lex.getToken();
+ if(tok.type() != TOK_DOUBLE_COLON) {
+ lex.putback(tok);
+ break;
+ }
+ }
+ return path;
+}
+
+AST::Module Parse_ModRoot(bool is_own_file, Preproc& lex)
+{
+ AST::Module mod;
+ for(;;)
+ {
+ bool is_public = false;
+ Token tok = lex.getToken();
+ switch(tok.type())
+ {
+ case TOK_BRACE_CLOSE:
+ if( is_own_file )
+ throw ParseError::Unexpected(tok);
+ return mod;
+ case TOK_EOF:
+ if( !is_own_file )
+ throw ParseError::Unexpected(tok);
+ return mod;
+
+ case TOK_RWORD_PUB:
+ assert(!is_public);
+ is_public = false;
+ break;
+
+ case TOK_RWORD_USE:
+ mod.add_alias( Parse_Path(lex) );
+ tok = lex.getToken();
+ if( tok.type() != TOK_SEMICOLON )
+ throw ParseError::Unexpected(tok, Token(TOK_SEMICOLON));
+ break;
+
+ case TOK_RWORD_CONST:
+ //mod.add_constant(is_public, name, type, value);
+ throw ParseError::Todo("modroot const");
+ case TOK_RWORD_STATIC:
+ //mod.add_global(is_public, is_mut, name, type, value);
+ throw ParseError::Todo("modroot static");
+ case TOK_RWORD_FN:
+ throw ParseError::Todo("modroot fn");
+ case TOK_RWORD_STRUCT:
+ throw ParseError::Todo("modroot struct");
+
+ default:
+ throw ParseError::Unexpected(tok);
+ }
+ }
+}
+
+void Parse_Crate(::std::string mainfile)
+{
+ Preproc lex(mainfile);
+ AST::Module rootmodule = Parse_ModRoot(true, lex);
+}