diff options
author | John Hodge <tpg@mutabah.net> | 2015-03-17 21:49:49 +0800 |
---|---|---|
committer | John Hodge <tpg@mutabah.net> | 2015-03-17 21:49:49 +0800 |
commit | 3ebea363b19e933df06faf7c1d1ad5d4251c2ca3 (patch) | |
tree | 73e47917296dd7411cc49ae8a959d25c3838619b /src/parse/lex.cpp | |
parent | 0c2d1d043d71450ea5087937c97af4c12c6d33fc (diff) | |
download | mrust-3ebea363b19e933df06faf7c1d1ad5d4251c2ca3.tar.gz |
Rework handling of blocks, add sub-modules
Diffstat (limited to 'src/parse/lex.cpp')
-rw-r--r-- | src/parse/lex.cpp | 88 |
1 files changed, 69 insertions, 19 deletions
diff --git a/src/parse/lex.cpp b/src/parse/lex.cpp index 4237b48f..a2459e14 100644 --- a/src/parse/lex.cpp +++ b/src/parse/lex.cpp @@ -12,8 +12,11 @@ #include <iostream> #include <cstdlib> // strtol #include <typeinfo> +#include <algorithm> // std::count Lexer::Lexer(::std::string filename): + m_path(filename), + m_line(1), m_istream(filename.c_str()), m_last_char_valid(false) { @@ -170,7 +173,7 @@ signed int Lexer::getSymbol() //::std::cout << "ofs=" << ofs << ", chars[ofs] = " << chars[ofs] << ", ch = " << ch << ", len = " << len << ::std::endl; if( ofs >= len || chars[ofs] > ch ) { - this->putback(); + this->ungetc(); return best; } @@ -185,7 +188,7 @@ signed int Lexer::getSymbol() } } - this->putback(); + this->ungetc(); return best; } @@ -198,7 +201,37 @@ bool issym(char ch) return false; } -Token Lexer::getToken() +Position Lexer::getPosition() const +{ + return Position(m_path, m_line); +} +Token Lexer::realGetToken() +{ + while(true) + { + Token tok = getTokenInt(); + //::std::cout << "getTokenInt: tok = " << tok << ::std::endl; + switch(tok.type()) + { + case TOK_NEWLINE: + m_line ++; + //DEBUG("m_line = " << m_line << " (NL)"); + continue; + case TOK_WHITESPACE: + continue; + case TOK_COMMENT: { + ::std::string comment = tok.str(); + unsigned int c = ::std::count(comment.begin(), comment.end(), '\n'); + m_line += c; + //DEBUG("m_line = " << m_line << " (comment w/ "<<c<<")"); + continue; } + default: + return tok; + } + } +} + +Token Lexer::getTokenInt() { if( this->m_next_token.type() != TOK_NULL ) { @@ -214,10 +247,10 @@ Token Lexer::getToken() { while( isspace(ch = this->getc()) && ch != '\n' ) ; - this->putback(); + this->ungetc(); return Token(TOK_WHITESPACE); } - this->putback(); + this->ungetc(); const signed int sym = this->getSymbol(); if( sym == 0 ) @@ -297,7 +330,7 @@ Token Lexer::getToken() this->m_next_token = Token(TOK_TRIPLE_DOT); } else { - this->putback(); + this->ungetc(); this->m_next_token = Token(TOK_DOUBLE_DOT); } return Token(val, CORETYPE_ANY); @@ -309,7 +342,7 @@ Token Lexer::getToken() return Token(val, CORETYPE_ANY); } - this->putback(); + this->ungetc(); double fval = this->parseFloat(val); if( (ch = this->getc()) == 'f' ) { @@ -319,7 +352,7 @@ Token Lexer::getToken() suffix.push_back(ch); ch = this->getc(); } - this->putback(); + this->ungetc(); if( suffix == "f32" ) { num_type = CORETYPE_F32; } @@ -332,13 +365,13 @@ Token Lexer::getToken() } else { - this->putback(); + this->ungetc(); } return Token( fval, num_type); } else { - this->putback(); + this->ungetc(); return Token(val, num_type); } } @@ -357,7 +390,7 @@ Token Lexer::getToken() } else { - this->putback(); + this->ungetc(); for( unsigned int i = 0; i < LEN(RWORDS); i ++ ) { if( str < RWORDS[i].chars ) break; @@ -388,7 +421,7 @@ Token Lexer::getToken() str.push_back(ch); ch = this->getc(); } - this->putback(); + this->ungetc(); return Token(TOK_COMMENT, str); } case BLOCKCOMMENT: { ::std::string str; @@ -397,7 +430,7 @@ Token Lexer::getToken() if( ch == '*' ) { ch = this->getc(); if( ch == '/' ) break; - this->putback(); + this->ungetc(); } str.push_back(ch); ch = this->getc(); @@ -420,7 +453,7 @@ Token Lexer::getToken() str.push_back(ch); ch = this->getc(); } - this->putback(); + this->ungetc(); return Token(TOK_LIFETIME, str); } } @@ -485,7 +518,7 @@ double Lexer::parseFloat(uint64_t whole) ch = this->getc_num(); } while( isdigit(ch) ); } - this->putback(); + this->ungetc(); buf[ofs] = 0; return ::std::strtod(buf, NULL); @@ -501,8 +534,13 @@ uint32_t Lexer::parseEscape(char enclosing) // Unicode (up to six hex digits) uint32_t val = 0; ch = this->getc(); + bool req_close_brace = false; + if( ch == '{' ) { + req_close_brace = true; + ch = this->getc(); + } if( !isxdigit(ch) ) - throw ParseError::Todo( FMT("Found invalid character '\\x" << ::std::hex << (int)ch << "' in \\u sequence" ) ); + throw ParseError::Generic(*this, FMT("Found invalid character '\\x" << ::std::hex << (int)ch << "' in \\u sequence" ) ); while( isxdigit(ch) ) { char tmp[2] = {ch, 0}; @@ -510,13 +548,25 @@ uint32_t Lexer::parseEscape(char enclosing) val += ::std::strtol(tmp, NULL, 16); ch = this->getc(); } - this->putback(); + if( !req_close_brace ) + this->ungetc(); + else if( ch != '}' ) + throw ParseError::Generic(*this, "Expected terminating } in \\u sequence"); + else + ; return val; } case '\\': return '\\'; + case '\'': + return '\''; + case 'r': + return '\r'; case 'n': return '\n'; + case 't': + return '\t'; case '\n': + m_line ++; while( isspace(ch) ) ch = this->getc(); return ch; @@ -550,9 +600,9 @@ char Lexer::getc_num() return ch; } -void Lexer::putback() +void Lexer::ungetc() { -// ::std::cout << "putback(): " << m_last_char_valid << " '" << m_last_char << "'" << ::std::endl; +// ::std::cout << "ungetc(): " << m_last_char_valid << " '" << m_last_char << "'" << ::std::endl; assert(!m_last_char_valid); m_last_char_valid = true; } |