1 files changed, 144 insertions, 61 deletions
diff --git a/src/parse/lex.cpp b/src/parse/lex.cpp
index 2338adcd..dc206ad8 100644
--- a/src/parse/lex.cpp
+++ b/src/parse/lex.cpp
@@ -200,12 +200,14 @@ signed int Lexer::getSymbol()
     return best;
 }
 
-bool issym(char ch)
+bool issym(int ch)
 {
     if( ::std::isalnum(ch) )
         return true;
     if( ch == '_' )
         return true;
+    if( ch >= 128 || ch < 0 )
+        return true;
     return false;
 }
 
@@ -279,7 +281,7 @@ Token Lexer::getTokenInt()
                 uint64_t    val = 0;
                 if( ch == '0' ) {
                     // Octal/hex handling
-                    ch = this->getc();
+                    ch = this->getc_num();
                     if( ch == 'x' ) {
                         num_mode = HEX;
                         while( isxdigit(ch = this->getc_num()) )
@@ -323,7 +325,7 @@ Token Lexer::getTokenInt()
                     }
                 }
 
-                if(ch == 'u' || ch == 'i') {
+                if(issym(ch)) {
                     // Unsigned
                     ::std::string   suffix;
                     while( issym(ch) )
@@ -344,6 +346,8 @@ Token Lexer::getTokenInt()
                     else if(suffix == "u32") num_type = CORETYPE_U32;
                     else if(suffix == "u64") num_type = CORETYPE_U64;
                     else if(suffix == "usize") num_type = CORETYPE_UINT;
+                    else if(suffix == "f32") num_type = CORETYPE_F32;
+                    else if(suffix == "f64") num_type = CORETYPE_F64;
                     else
                         throw ParseError::Generic(*this, FMT("Unknown integer suffix '" << suffix << "'"));
                     return Token(val, num_type);
@@ -375,7 +379,7 @@ Token Lexer::getTokenInt()
                     
                     this->ungetc();
                     double fval = this->parseFloat(val);
-                    if( (ch = this->getc()) == 'f' )
+                    if( issym(ch = this->getc()) )
                     {
                         ::std::string   suffix;
                         while( issym(ch) )
@@ -403,62 +407,60 @@ Token Lexer::getTokenInt()
                     return Token(val, num_type);
                 }
             }
-            // Symbols
-            else if( issym(ch) )
+            // Byte/Raw strings
+            else if( ch == 'b' || ch == 'r' )
             {
-                ::std::string   str;
-                while( issym(ch) )
-                {
-                    str.push_back(ch);
+                bool is_byte = false;
+                if(ch == 'b') {
+                    is_byte = true;
                     ch = this->getc();
                 }
-
-                if( ch == '!' )
-                {
-                    return Token(TOK_MACRO, str);
+                
+                if(ch == 'r') {
+                    return this->getTokenInt_RawString(is_byte);
                 }
-                else
-                {
-                    if( str == "b" )
-                    {
-                        if( ch == '\'' ) {
-                            // Byte constant
-                            ch = this->getc();
-                            if( ch == '\\' ) {
-                                uint32_t val = this->parseEscape('\'');
-                                if( this->getc() != '\'' )
-                                    throw ParseError::Generic(*this, "Multi-byte character literal");
-                                return Token((uint64_t)val, CORETYPE_U8);
-                            }
-                            else {
-                                if( this->getc() != '\'' )
-                                    throw ParseError::Generic(*this, "Multi-byte character literal");
-                                return Token((uint64_t)ch, CORETYPE_U8);
-                            }
+                else {
+                    assert(is_byte);
+                    
+                    // Byte string
+                    if( ch == '"' ) {
+                        ::std::string str;
+                        while( (ch = this->getc()) != '"' )
+                        {
+                            if( ch == '\\' )
+                                ch = this->parseEscape('"');
+                            str.push_back(ch);
                         }
-                        else if( ch == '"') {
-                            ::std::string str;
-                            while( (ch = this->getc()) != '"' )
-                            {
-                                if( ch == '\\' )
-                                    ch = this->parseEscape('"');
-                                str.push_back(ch);
-                            }
-                            return Token(TOK_BYTESTRING, str);
+                        return Token(TOK_BYTESTRING, str);
+                    }
+                    // Byte constant
+                    else if( ch == '\'' ) {
+                        // Byte constant
+                        ch = this->getc();
+                        if( ch == '\\' ) {
+                            uint32_t val = this->parseEscape('\'');
+                            if( this->getc() != '\'' )
+                                throw ParseError::Generic(*this, "Multi-byte character literal");
+                            return Token((uint64_t)val, CORETYPE_U8);
                         }
                         else {
+                            if( this->getc() != '\'' )
+                                throw ParseError::Generic(*this, "Multi-byte character literal");
+                            return Token((uint64_t)ch, CORETYPE_U8);
                         }
                     }
-                
-                    this->ungetc();
-                    for( unsigned int i = 0; i < LEN(RWORDS); i ++ )
-                    {
-                        if( str < RWORDS[i].chars ) break;
-                        if( str == RWORDS[i].chars )    return Token((enum eTokenType)RWORDS[i].type);
+                    else {
+                        assert(is_byte);
+                        this->ungetc();
+                        return this->getTokenInt_Identifier('b');
                     }
-                    return Token(TOK_IDENT, str);
                 }
             }
+            // Symbols
+            else if( issym(ch) )
+            {
+                return this->getTokenInt_Identifier(ch);
+            }
             else
             {
                 throw ParseError::BadChar(ch);
@@ -497,17 +499,25 @@ Token Lexer::getTokenInt()
                 }
                 return Token(TOK_COMMENT, str); }
             case SINGLEQUOTE: {
-                char firstchar = this->getc();
-                if( firstchar != '\\' ) {
+                auto firstchar = this->getc_codepoint();
+                if( firstchar.v == '\\' ) {
+                    // Character constant with an escape code
+                    uint32_t val = this->parseEscape('\'');
+                    if(this->getc() != '\'') {
+                        throw ParseError::Todo("Proper error for lex failures");
+                    }
+                    return Token((uint64_t)val, CORETYPE_CHAR);
+                }
+                else {
                     ch = this->getc();
                     if( ch == '\'' ) {
                         // Character constant
-                        return Token((uint64_t)firstchar, CORETYPE_CHAR);
+                        return Token((uint64_t)firstchar.v, CORETYPE_CHAR);
                     }
-                    else {
+                    else if( issym(firstchar.v) ) {
                         // Lifetime name
                         ::std::string   str;
-                        str.push_back(firstchar);
+                        str += firstchar;
                         while( issym(ch) )
                         {
                             str.push_back(ch);
@@ -516,14 +526,9 @@ Token Lexer::getTokenInt()
                         this->ungetc();
                         return Token(TOK_LIFETIME, str);
                     }
-                }
-                else {
-                    // Character constant with an escape code
-                    uint32_t val = this->parseEscape('\'');
-                    if(this->getc() != '\'') {
-                        throw ParseError::Todo("Proper error for lex failures");
+                    else {
+                        throw ParseError::Todo("Lex Fail - Expected ' after character constant");
                     }
-                    return Token((uint64_t)val, CORETYPE_CHAR);
                 }
                 break; }
             case DOUBLEQUOTE: {
@@ -548,6 +553,72 @@ Token Lexer::getTokenInt()
     //assert(!"bugcheck");
 }
 
+Token Lexer::getTokenInt_RawString(bool is_byte)
+{
+    // Raw string (possibly byte)
+    char ch = this->getc();
+    unsigned int hashes = 0;
+    while(ch == '#')
+    {
+        hashes ++;
+        ch = this->getc();
+    }
+    if( hashes == 0 && ch != '"' ) {
+        this->ungetc();
+        return this->getTokenInt_Identifier('r');
+    }
+    char terminator = ch;
+    ::std::string   val;
+
+    for(;;)
+    {
+        ch = this->getc();
+        if( ch == terminator ) {
+            for(unsigned i = 0; i < hashes; i ++)
+            {
+                ch = this->getc();
+                if( ch != '#' ) {
+                    val += terminator;
+                    while( i -- )
+                        val += '#';
+                    break ;
+                }
+            }
+            if( hashes == 0 || ch == '#' ) {
+                return Token(is_byte ? TOK_BYTESTRING : TOK_STRING, val);
+            }
+        }
+        else {
+            val += ch;
+        }
+    }
+}
+Token Lexer::getTokenInt_Identifier(char leader)
+{
+    char ch = leader;
+    ::std::string   str;
+    while( issym(ch) )
+    {
+        str.push_back(ch);
+        ch = this->getc();
+    }
+
+    if( ch == '!' )
+    {
+        return Token(TOK_MACRO, str);
+    }
+    else
+    {
+        this->ungetc();
+        for( unsigned int i = 0; i < LEN(RWORDS); i ++ )
+        {
+            if( str < RWORDS[i].chars ) break;
+            if( str == RWORDS[i].chars )    return Token((enum eTokenType)RWORDS[i].type);
+        }
+        return Token(TOK_IDENT, str);
+    }
+}
+
 // Takes the VERY lazy way of reading the float into a string then passing to strtod
 double Lexer::parseFloat(uint64_t whole)
 {
@@ -615,6 +686,8 @@ uint32_t Lexer::parseEscape(char enclosing)
         else
             ;
         return val; }
+    case '0':
+        return '\0';
     case '\\':
         return '\\';
     case '\'':
@@ -628,7 +701,7 @@ uint32_t Lexer::parseEscape(char enclosing)
     case 't':
         return '\t';
     case '\n':
-	    m_line ++;
+        m_line ++;
         while( isspace(ch) )
             ch = this->getc();
         return ch;
@@ -662,6 +735,16 @@ char Lexer::getc_num()
     } while( ch == '_' );
     return ch;
 }
+Codepoint Lexer::getc_codepoint()
+{
+    uint8_t v1 = this->getc();
+    if( v1 < 128 ) {
+        return {v1};
+    }
+    else {
+        throw ParseError::Todo("getc_codepoint");
+    }
+}
 
 void Lexer::ungetc()
 {