summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Hodge <tpg@mutabah.net>2016-02-27 08:21:47 +0800
committerJohn Hodge <tpg@mutabah.net>2016-02-27 08:21:47 +0800
commit11dfb163601c8cc71f7b0ee9f9103b5b9cf72706 (patch)
tree6d437ba716dc405f8b7bde3f4092631dc71ec0b7
parentba563a6e9a5d698990e1bddaeeecc512fd670bcc (diff)
downloadmrust-11dfb163601c8cc71f7b0ee9f9103b5b9cf72706.tar.gz
Parse/lex - Fix handling of escape codes
-rw-r--r--src/parse/lex.cpp40
-rw-r--r--src/parse/lex.hpp10
2 files changed, 39 insertions, 11 deletions
diff --git a/src/parse/lex.cpp b/src/parse/lex.cpp
index b6463b21..77d12f7f 100644
--- a/src/parse/lex.cpp
+++ b/src/parse/lex.cpp
@@ -557,8 +557,17 @@ Token Lexer::getTokenInt()
while( (ch = this->getc()) != '"' )
{
if( ch == '\\' )
- ch = this->parseEscape('"');
- str.push_back(ch);
+ {
+ auto v = this->parseEscape('"');
+ if( v != ~0u )
+ {
+ str += Codepoint(v);
+ }
+ }
+ else
+ {
+ str.push_back(ch);
+ }
}
return Token(TOK_STRING, str);
}
@@ -1231,3 +1240,30 @@ SERIALISE_TYPE_A(TokenTree::, "TokenTree", {
s.item(m_subtrees);
})
+::std::string& operator+=(::std::string& s, const Codepoint& cp)
+{
+ if( cp.v < 0x80 ) {
+ s += (char)cp.v;
+ }
+ else if( cp.v < (0x1F+1)<<(1*6) ) {
+
+ s += (char)(0xC0 | ((cp.v >> 6) & 0x1F));
+ s += (char)(0x80 | ((cp.v >> 0) & 0x3F));
+ }
+ else if( cp.v <= (0x0F+1)<<(2*6) ) {
+ s += (char)(0xE0 | ((cp.v >> 12) & 0x0F));
+ s += (char)(0x80 | ((cp.v >> 6) & 0x3F));
+ s += (char)(0x80 | ((cp.v >> 0) & 0x3F));
+ }
+ else if( cp.v <= (0x07+1)<<(2*6) ) {
+ s += (char)(0xF0 | ((cp.v >> 18) & 0x07));
+ s += (char)(0x80 | ((cp.v >> 12) & 0x3F));
+ s += (char)(0x80 | ((cp.v >> 6) & 0x3F));
+ s += (char)(0x80 | ((cp.v >> 0) & 0x3F));
+ }
+ else {
+ throw ::std::runtime_error("BUGCHECK: Bad unicode codepoint encountered");
+ }
+ return s;
+}
+
diff --git a/src/parse/lex.hpp b/src/parse/lex.hpp
index 3081e255..937be516 100644
--- a/src/parse/lex.hpp
+++ b/src/parse/lex.hpp
@@ -175,16 +175,8 @@ public:
struct Codepoint {
uint32_t v;
Codepoint(uint32_t v): v(v) { }
- friend ::std::string& operator+=(::std::string& s, const Codepoint& cp) {
- if( cp.v < 128 ) {
- s += (char)cp.v;
- }
- else {
- throw ::std::runtime_error("TODO: Encode UTF-8 codepoint");
- }
- return s;
- }
};
+extern ::std::string& operator+=(::std::string& s, const Codepoint& cp);
class Lexer:
public TokenStream