summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/parse/lex.cpp21
-rw-r--r--src/parse/lex.hpp7
2 files changed, 24 insertions, 4 deletions
diff --git a/src/parse/lex.cpp b/src/parse/lex.cpp
index 1e0e0712..b6463b21 100644
--- a/src/parse/lex.cpp
+++ b/src/parse/lex.cpp
@@ -440,9 +440,17 @@ Token Lexer::getTokenInt()
::std::string str;
while( (ch = this->getc()) != '"' )
{
- if( ch == '\\' )
- ch = this->parseEscape('"');
- str.push_back(ch);
+ if( ch == '\\' ) {
+ auto v = this->parseEscape('"');
+ if( v != ~0u ) {
+ if( v > 256 )
+ throw ParseError::Generic(*this, "Value out of range for byte literal");
+ str += (char)v;
+ }
+ }
+ else {
+ str.push_back(ch);
+ }
}
return Token(TOK_BYTESTRING, str);
}
@@ -745,7 +753,11 @@ uint32_t Lexer::parseEscape(char enclosing)
m_line ++;
while( isspace(ch) )
ch = this->getc();
- return ch;
+ this->ungetc();
+ if( ch == enclosing )
+ return ~0;
+ else
+ return ch;
default:
throw ParseError::Todo( FMT("Unknown escape sequence \\" << ch) );
}
@@ -1056,6 +1068,7 @@ SERIALISE_TYPE_S(Token, {
switch(tok.type())
{
case TOK_STRING:
+ case TOK_BYTESTRING:
case TOK_IDENT:
case TOK_MACRO:
case TOK_LIFETIME:
diff --git a/src/parse/lex.hpp b/src/parse/lex.hpp
index 3607fef0..3081e255 100644
--- a/src/parse/lex.hpp
+++ b/src/parse/lex.hpp
@@ -174,7 +174,14 @@ public:
struct Codepoint {
uint32_t v;
+ Codepoint(uint32_t v): v(v) { }
friend ::std::string& operator+=(::std::string& s, const Codepoint& cp) {
+ if( cp.v < 128 ) {
+ s += (char)cp.v;
+ }
+ else {
+ throw ::std::runtime_error("TODO: Encode UTF-8 codepoint");
+ }
return s;
}
};