diff options
author | John Hodge <tpg@mutabah.net> | 2015-03-12 12:40:36 +0800 |
---|---|---|
committer | John Hodge <tpg@mutabah.net> | 2015-03-12 12:40:36 +0800 |
commit | 06e7e28cd4de3871f55255150b66821b12365881 (patch) | |
tree | 97d1e22f999ee53845455932a411cc25ca2d9de2 /src/parse/lex.cpp | |
parent | e71fccd80610bc05b0a90338b2b3beb9a0b94c22 (diff) | |
download | mrust-06e7e28cd4de3871f55255150b66821b12365881.tar.gz |
Float parsing, module-level macro expansion
Diffstat (limited to 'src/parse/lex.cpp')
-rw-r--r-- | src/parse/lex.cpp | 132 |
1 files changed, 128 insertions, 4 deletions
diff --git a/src/parse/lex.cpp b/src/parse/lex.cpp index 678da4d0..fc2c7401 100644 --- a/src/parse/lex.cpp +++ b/src/parse/lex.cpp @@ -200,6 +200,10 @@ bool issym(char ch) Token Lexer::getToken() { + if( this->m_next_token.type() != TOK_NULL ) + { + return ::std::move(this->m_next_token); + } try { char ch = this->getc(); @@ -222,12 +226,20 @@ Token Lexer::getToken() char ch = this->getc(); if( isdigit(ch) ) { + enum eCoreType num_type = CORETYPE_ANY; + enum { + BIN, + OCT, + DEC, + HEX, + } num_mode = DEC; // TODO: handle integers/floats uint64_t val = 0; if( ch == '0' ) { // Octal/hex handling ch = this->getc(); if( ch == 'x' ) { + num_mode = HEX; while( isxdigit(ch = this->getc()) ) { val *= 16; @@ -239,10 +251,16 @@ Token Lexer::getToken() val += ch - 'a' + 10; } } + else if( ch == 'b' ) { + num_mode = BIN; + throw ParseError::Todo("Lex binary numbers"); + } else if( isdigit(ch) ) { + num_mode = OCT; throw ParseError::Todo("Lex octal numbers"); } else { + num_mode = DEC; val = 0; } } @@ -259,11 +277,60 @@ Token Lexer::getToken() throw ParseError::Todo("Lex number suffixes"); } else if( ch == '.' ) { - throw ParseError::Todo("Lex floats"); + if( num_mode != DEC ) + throw ParseError::Todo("Non-decimal floats"); + + ch = this->getc(); + + // Double/Triple Dot + if( ch == '.' ) { + if( this->getc() == '.') { + this->m_next_token = Token(TOK_TRIPLE_DOT); + } + else { + this->putback(); + this->m_next_token = Token(TOK_DOUBLE_DOT); + } + return Token(val, CORETYPE_ANY); + } + // Single dot + else if( !isdigit(ch) ) + { + this->m_next_token = Token(TOK_DOT); + return Token(val, CORETYPE_ANY); + } + + this->putback(); + double fval = this->parseFloat(val); + if( (ch = this->getc()) == 'f' ) + { + ::std::string suffix; + while( issym(ch) ) + { + suffix.push_back(ch); + ch = this->getc(); + } + this->putback(); + if( suffix == "f32" ) { + num_type = CORETYPE_F32; + } + else if( suffix == "f64" ) { + num_type = CORETYPE_F64; + } + else { + throw ParseError::Generic( FMT("Unknown number suffix " << suffix) ); + } + } + else + { + this->putback(); + } + return Token( fval, num_type); + } else { this->putback(); - return Token(val, CORETYPE_ANY); + return Token(val, num_type); } } else if( issym(ch) ) @@ -379,6 +446,42 @@ Token Lexer::getToken() //assert(!"bugcheck"); } +// Takes the VERY lazy way of reading the float into a string then passing to strtod +double Lexer::parseFloat(uint64_t whole) +{ + const int MAX_LEN = 63; + const int MAX_SIG = MAX_LEN - 1 - 4; + char buf[MAX_LEN+1]; + int ofs = snprintf(buf, MAX_LEN+1, "%llu", (unsigned long long)whole); + + char ch = this->getc_num(); + #define PUTC(ch) do { if( ofs < MAX_SIG ) { buf[ofs] = ch; ofs ++; } else { throw ParseError::Generic("Oversized float"); } } while(0) + while( isdigit(ch) ) + { + PUTC(ch); + ch = this->getc_num(); + } + if( ch == 'e' || ch == 'E' ) + { + PUTC(ch); + ch = this->getc_num(); + if( ch == '-' || ch == '+' ) { + PUTC(ch); + ch = this->getc_num(); + } + if( !isdigit(ch) ) + throw ParseError::Generic( FMT("Non-numeric '"<<ch<<"' in float exponent") ); + do { + PUTC(ch); + ch = this->getc_num(); + } while( isdigit(ch) ); + } + this->putback(); + buf[ofs] = 0; + + return ::std::strtod(buf, NULL); +} + uint32_t Lexer::parseEscape(char enclosing) { char ch = this->getc(); @@ -389,7 +492,7 @@ uint32_t Lexer::parseEscape(char enclosing) uint32_t val = 0; ch = this->getc(); if( !isxdigit(ch) ) - throw ParseError::Todo("Proper lex error for escape sequences"); + throw ParseError::Todo( FMT("Found invalid character '\\x" << ::std::hex << (int)ch << "' in \\u sequence" ) ); while( isxdigit(ch) ) { char tmp[2] = {ch, 0}; @@ -401,8 +504,14 @@ uint32_t Lexer::parseEscape(char enclosing) return val; } case '\\': return '\\'; + case 'n': + return '\n'; + case '\n': + while( isspace(ch) ) + ch = this->getc(); + return ch; default: - throw ParseError::Todo("Proper lex error for escape sequences"); + throw ParseError::Todo( FMT("Unknown escape sequence \\" << ch) ); } } @@ -422,6 +531,15 @@ char Lexer::getc() return m_last_char; } +char Lexer::getc_num() +{ + char ch; + do { + ch = this->getc(); + } while( ch == '_' ); + return ch; +} + void Lexer::putback() { // ::std::cout << "putback(): " << m_last_char_valid << " '" << m_last_char << "'" << ::std::endl; @@ -450,6 +568,12 @@ Token::Token(uint64_t val, enum eCoreType datatype): m_intval(val) { } +Token::Token(double val, enum eCoreType datatype): + m_type(TOK_FLOAT), + m_datatype(datatype), + m_floatval(val) +{ +} const char* Token::typestr(enum eTokenType type) { |