diff options
-rw-r--r-- | src/parse/lex.cpp | 21 | ||||
-rw-r--r-- | src/parse/lex.hpp | 7 |
2 files changed, 24 insertions, 4 deletions
diff --git a/src/parse/lex.cpp b/src/parse/lex.cpp index 1e0e0712..b6463b21 100644 --- a/src/parse/lex.cpp +++ b/src/parse/lex.cpp @@ -440,9 +440,17 @@ Token Lexer::getTokenInt() ::std::string str; while( (ch = this->getc()) != '"' ) { - if( ch == '\\' ) - ch = this->parseEscape('"'); - str.push_back(ch); + if( ch == '\\' ) { + auto v = this->parseEscape('"'); + if( v != ~0u ) { + if( v > 256 ) + throw ParseError::Generic(*this, "Value out of range for byte literal"); + str += (char)v; + } + } + else { + str.push_back(ch); + } } return Token(TOK_BYTESTRING, str); } @@ -745,7 +753,11 @@ uint32_t Lexer::parseEscape(char enclosing) m_line ++; while( isspace(ch) ) ch = this->getc(); - return ch; + this->ungetc(); + if( ch == enclosing ) + return ~0; + else + return ch; default: throw ParseError::Todo( FMT("Unknown escape sequence \\" << ch) ); } @@ -1056,6 +1068,7 @@ SERIALISE_TYPE_S(Token, { switch(tok.type()) { case TOK_STRING: + case TOK_BYTESTRING: case TOK_IDENT: case TOK_MACRO: case TOK_LIFETIME: diff --git a/src/parse/lex.hpp b/src/parse/lex.hpp index 3607fef0..3081e255 100644 --- a/src/parse/lex.hpp +++ b/src/parse/lex.hpp @@ -174,7 +174,14 @@ public: struct Codepoint { uint32_t v; + Codepoint(uint32_t v): v(v) { } friend ::std::string& operator+=(::std::string& s, const Codepoint& cp) { + if( cp.v < 128 ) { + s += (char)cp.v; + } + else { + throw ::std::runtime_error("TODO: Encode UTF-8 codepoint"); + } return s; } }; |