diff options
author | John Hodge <tpg@mutabah.net> | 2015-09-26 22:39:25 +0800 |
---|---|---|
committer | John Hodge <tpg@mutabah.net> | 2015-09-26 22:39:25 +0800 |
commit | be427e449c1d492050279311ccecc3de8b17e838 (patch) | |
tree | a2b6f2e3fbdef2ae003700578c339da8559ea2ca | |
parent | 0121cefaec3f14a5f35ac84321787e01849585e0 (diff) | |
download | mrust-be427e449c1d492050279311ccecc3de8b17e838.tar.gz |
BNF - Improvements, partially parsing std now
-rw-r--r-- | bnf/Makefile | 4 | ||||
-rw-r--r-- | bnf/ast_types.hpp | 15 | ||||
-rw-r--r-- | bnf/rust.lex | 59 | ||||
-rw-r--r-- | bnf/rust.y | 23 | ||||
-rw-r--r-- | bnf/rust_expr.y_tree.h | 2 | ||||
-rw-r--r-- | bnf/rust_tts.y.h | 4 |
6 files changed, 96 insertions, 11 deletions
diff --git a/bnf/Makefile b/bnf/Makefile index ad8aaf7d..cbfffef4 100644 --- a/bnf/Makefile +++ b/bnf/Makefile @@ -4,8 +4,10 @@ OBJS := main.o rust.tab.o rust.lex.o +RUSTSRC := ../../rust_os/rustc_src/ + TSTFILES := ../samples/1.rs -TSTFILES += ../../rust_os/rustc_src/libcore/lib.rs +TSTFILES += $(addprefix $(RUSTSRC), libcore/lib.rs libstd/lib.rs) diff --git a/bnf/ast_types.hpp b/bnf/ast_types.hpp index daa71b89..09343c71 100644 --- a/bnf/ast_types.hpp +++ b/bnf/ast_types.hpp @@ -212,6 +212,21 @@ public: {} }; +class ExternCrate: + public Item +{ + ::std::string m_name; + ::std::string m_alias; +public: + ExternCrate(::std::string name): + m_name(name), + m_alias(name) + {} + ExternCrate(::std::string name, ::std::string alias): + m_name(name), + m_alias(alias) + {} +}; class UseItem { diff --git a/bnf/rust.lex b/bnf/rust.lex index 1151aa32..21080235 100644 --- a/bnf/rust.lex +++ b/bnf/rust.lex @@ -27,6 +27,7 @@ int yylex(YYSTYPE* lvalp, ParserContext& context) { void handle_block_comment(); ::std::string parse_escaped_string(const char* s); +::std::string handle_raw_string(const char* s); %} @@ -62,9 +63,11 @@ int_suffix ([ui](size|8|16|32|64))? "pub" { return RWD_pub; } "where" { return RWD_where; } "extern" { return RWD_extern; } +"crate" { return RWD_crate; } "let" { return RWD_let; } "ref" { return RWD_ref; } +"box" { return RWD_box; } "self" { return RWD_self; } "super" { return RWD_super; } @@ -142,14 +145,16 @@ int_suffix ([ui](size|8|16|32|64))? } } [0-9]{dec_digit}*"."{dec_digit}+(e[+\-]?{dec_digit}+)?(f32|f64)? { lvalp->FLOAT = strtod(yytext, NULL); return FLOAT; } +[0-9]{dec_digit}*(f32|f64) { lvalp->FLOAT = strtod(yytext, NULL); return FLOAT; } [0-9]{dec_digit}*{int_suffix} { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; } 0x[0-9a-fA-F_]+{int_suffix} { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; } 0b[01_]+{int_suffix} { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; } {ident_c}({ident_c}|[0-9])*"!" { lvalp->MACRO = new ::std::string(yytext, 0, strlen(yytext)-1); return MACRO; } '{ident_c}{ident_c}* { lvalp->LIFETIME = new ::std::string(yytext, 1); return LIFETIME; } -b?'(.|\\'|\\[^']+)' { lvalp->CHARLIT = yytext[0]; return CHARLIT; } -b?\"([^"]|\\\")*\" { lvalp->STRING = new ::std::string( parse_escaped_string(yytext) ); return STRING; } +b?'(.|\\'|\\[^']+|[\x80-\xFF]*)' { lvalp->CHARLIT = yytext[0]; return CHARLIT; } +b?\"(\\.|[^\\"]|\\\n)*\" { lvalp->STRING = new ::std::string( parse_escaped_string(yytext) ); return STRING; } +b?r#*\" { auto rs = handle_raw_string( (*yytext=='b' ? yytext+2 : yytext+1) ); lvalp->STRING = new ::std::string(rs); return STRING; } . { fprintf(stderr, "\x1b[31m" "ERROR: %s:%d: Invalid character '%c'\x1b[0m\n", context.filename.c_str(), yylineno, *yytext); exit(1); } @@ -194,6 +199,7 @@ uint32_t parse_char_literal(const char *_s) { } ::std::string parse_escaped_string(const char* s) { + printf("parse_escaped_string(%s)\n", s); if( *s == 'b' ) { s ++; } @@ -211,7 +217,20 @@ uint32_t parse_char_literal(const char *_s) { case 'n': rv += '\n'; break; case 'r': rv += '\r'; break; case '"': rv += '"'; break; + case '0': rv += '\0'; break; + case '\\': rv += '\\'; break; case '\n': break; + case 'x': + rv += (char)strtoul((const char*)(s+1), NULL, 16); + s += 2; + break; + case 'u': { + char *out; + assert(s[1] == '{'); + rv += (char)strtoul((const char*)(s+2), &out, 16); + s = out; + assert(*s == '}'); + break; } default: fprintf(stderr, "Unknown escape code '\\%c' in string\n", *s); exit(1); @@ -248,3 +267,39 @@ loop: //if (c != 0) // putchar(c1); } + +::std::string handle_raw_string(const char* s) { + int num_hash = 0; + for(; *s == '#'; s++) + num_hash ++; + assert(*s == '"'); + + ::std::string rv; + + for(;;) + { + char c; + if( (c = yyinput()) == '"' ) { + if( num_hash == 0 ) + break; + int i; + for(i = 0; i < num_hash; i ++) { + if( (c = yyinput()) != '#' ) + break; + } + // Found `num_hash` '#' characters in a row, break out + if( i == num_hash ) { + break; + } + // Didn't find enough, append to output + rv += '"'; + while(i--) rv += '#'; + + } + else { + rv += c; + } + } + + return rv; +} @@ -31,11 +31,11 @@ %token DOUBLELT DOUBLEGT DOUBLELTEQUAL DOUBLEGTEQUAL %token RWD_mod RWD_fn RWD_const RWD_static RWD_use RWD_struct RWD_enum RWD_trait RWD_impl RWD_type %token RWD_as RWD_in RWD_mut RWD_ref RWD_pub RWD_where RWD_unsafe -%token RWD_let +%token RWD_let RWD_box %token RWD_self RWD_super %token RWD_match RWD_if RWD_while RWD_loop RWD_for RWD_else %token RWD_return RWD_break RWD_continue -%token RWD_extern +%token RWD_extern RWD_crate %type <Module*> module_root %type <int> tt_tok @@ -58,6 +58,7 @@ %type <Enum*> enum_def %type <Trait*> trait_def %type <Fn*> fn_def fn_def_hdr fn_def_hdr_PROTO +%type <ExternCrate*> extern_crate %type <ExternBlock*> extern_block %type <Impl*> impl_def @@ -171,6 +172,8 @@ item | RWD_unsafe unsafe_item { $$ = $2; } | RWD_impl impl_def { $$ = $2; } | RWD_extern extern_block { $$ = $2; } + | RWD_extern RWD_crate extern_crate { $$ = $3; } + | RWD_pub RWD_extern RWD_crate extern_crate { $$ = $4; $$->set_pub(); } | MACRO IDENT tt_brace { $$ = new Macro(consume($1), consume($2), consume($3)); } | MACRO tt_brace { $$ = new Macro(consume($1), consume($2)); } | MACRO tt_paren ';' { $$ = new Macro(consume($1), consume($2)); } @@ -196,6 +199,12 @@ unsafe_item | RWD_impl impl_def { $$ = $2; } ; + +extern_crate + : IDENT ';' { $$ = new ExternCrate( consume($1) ); } + | IDENT RWD_as IDENT ';' { $$ = new ExternCrate( consume($1), consume($3)); } + ; + extern_block: extern_abi '{' extern_items '}' { $$ = new ExternBlock( consume($1), consume($3) ); }; extern_abi: { $$ = new ::std::string("C"); } | STRING; extern_items @@ -400,7 +409,7 @@ type_args ; expr_path - : ufcs_path DOUBLECOLON IDENT + : ufcs_path DOUBLECOLON expr_path_segs | DOUBLECOLON expr_path_segs | RWD_self DOUBLECOLON expr_path_segs | RWD_super DOUBLECOLON expr_path_segs @@ -430,7 +439,8 @@ type_path ; ufcs_path: '<' ufcs_path_tail; ufcs_path_tail - : type RWD_as trait_path '>' + : type '>' + | type RWD_as trait_path '>' | type RWD_as trait_path DOUBLEGT { context.pushback('>'); } ; type_path_segs @@ -457,6 +467,7 @@ type type_ele : type_path | RWD_fn '(' type_list ')' fn_def_ret + | RWD_extern extern_abi RWD_fn '(' type_list ')' fn_def_ret | '_' | '&' opt_lifetime type_ele | DOUBLEAMP opt_lifetime type_ele @@ -483,11 +494,11 @@ type_list: type_list ',' type | type; Patterns ========================================= */ -tuple_pattern: '(' pattern_list ')' | '(' pattern_list ',' ')'; +tuple_pattern: '(' ')' | '(' pattern_list ')' | '(' pattern_list ',' ')'; struct_pattern : expr_path '{' struct_pattern_items '}' - | expr_path tuple_pattern + | expr_path '(' pattern_list ')' ; struct_pattern_item: IDENT | IDENT ':' pattern; struct_pattern_items: struct_pattern_items ',' struct_pattern_item | struct_pattern_item; diff --git a/bnf/rust_expr.y_tree.h b/bnf/rust_expr.y_tree.h index 0687790d..82707da9 100644 --- a/bnf/rust_expr.y_tree.h +++ b/bnf/rust_expr.y_tree.h @@ -12,6 +12,7 @@ _(expr_range) | _(expr_range_n) DOUBLEDOT | DOUBLEDOT _(expr_range_n) | _(expr_range_n) DOUBLEDOT _(expr_range_n) + | DOUBLEDOT ; _(expr_range_n): _(expr_bor); @@ -82,6 +83,7 @@ _(expr_12) | '&' RWD_mut _(expr_12) | DOUBLEAMP _(expr_12) { } | DOUBLEAMP RWD_mut _(expr_12) { } + | RWD_box _(expr) ; _(expr_fc) diff --git a/bnf/rust_tts.y.h b/bnf/rust_tts.y.h index 1cd84318..366ffa3d 100644 --- a/bnf/rust_tts.y.h +++ b/bnf/rust_tts.y.h @@ -7,10 +7,10 @@ tt_tok | _T(FLOAT) | _C(',') | _C(';') | _C('_') | _T(RWD_self) | _T(RWD_super) | _T(RWD_mut) | _T(RWD_ref) | _T(RWD_let) | _T(RWD_where) | _T(RWD_pub) | _T(RWD_in) | _T(RWD_as) - | _T(RWD_for ) | _T(RWD_while) | _T(RWD_loop) | _T(RWD_if) | _T(RWD_else) | _T(RWD_match) + | _T(RWD_for ) | _T(RWD_while) | _T(RWD_loop) | _T(RWD_if) | _T(RWD_else) | _T(RWD_match) | _T(RWD_box) | _T(RWD_return) | _T(RWD_continue) | _T(RWD_break) | _T(RWD_impl) | _T(RWD_struct) | _T(RWD_enum) | _T(RWD_fn) | _T(RWD_type) | _T(RWD_static) | _T(RWD_const) | _T(RWD_trait) | _T(RWD_use) - | _T(RWD_extern) | _T(RWD_unsafe) + | _T(RWD_extern) | _T(RWD_crate) | _T(RWD_unsafe) | _C('/') | _T(SLASHEQUAL) | _C('%') | _T(PERCENTEQUAL) | _C('*') | _T(STAREQUAL) |