diff options
author | John Hodge <tpg@mutabah.net> | 2015-09-23 13:23:40 +0800 |
---|---|---|
committer | John Hodge <tpg@mutabah.net> | 2015-09-23 13:23:40 +0800 |
commit | fd0c758bcc492f5b25550e7d4a5d67dc90eeba6a (patch) | |
tree | 85063399453a8a04647724fab4a764c92c85542d | |
parent | 558ac2ad0ae0e9585f551525f98896cbe3a2a9b8 (diff) | |
download | mrust-fd0c758bcc492f5b25550e7d4a5d67dc90eeba6a.tar.gz |
BNF - Extended grammar with struct literal hack
-rw-r--r-- | .gitignore | 13 | ||||
-rw-r--r-- | bnf/Makefile | 5 | ||||
-rw-r--r-- | bnf/rust.lex | 15 | ||||
-rw-r--r-- | bnf/rust.y | 218 | ||||
-rw-r--r-- | bnf/rust_expr.y.h | 14 | ||||
-rw-r--r-- | bnf/rust_expr.y_tree.h | 112 |
6 files changed, 226 insertions, 151 deletions
@@ -3,9 +3,14 @@ /mrustc.depend /mrustc.layout /.obj -*.swp +*.gch +*.gch.dep +*.sw[po] /output -/bnf/*.c -/bnf/*.h -/bnf/*.output +/bnf/.rust.y +/bnf/rust.lex.c +/bnf/rust.tab.c +/bnf/rust.tab.h +/bnf/rust.output +/bnf/test.bin diff --git a/bnf/Makefile b/bnf/Makefile index 02ddaa41..806ba272 100644 --- a/bnf/Makefile +++ b/bnf/Makefile @@ -13,8 +13,11 @@ test: test.bin $(TSTFILE) test.bin: rust.tab.c rust.lex.c gcc rust.tab.c rust.lex.c -o $@ -rust.tab.c: rust.y +rust.tab.c: .rust.y yacc -o $@ $< -d --verbose +.rust.y: rust.y rust_expr.y.h rust_expr.y_tree.h + cat rust.y > $@ + cpp -P rust_expr.y.h >> $@ rust.lex.c: rust.lex lex -o $@ $< diff --git a/bnf/rust.lex b/bnf/rust.lex index 083ed0dd..658fb8ee 100644 --- a/bnf/rust.lex +++ b/bnf/rust.lex @@ -1,3 +1,6 @@ + +%option yylineno + %{ #include "rust.tab.h" #include <stdio.h> @@ -26,10 +29,11 @@ ident_c [a-zA-Z_] %% -"//"[^/].*\n { yylineno += 1; } -"///".*\n { yylineno += 1; } // TODO: Handle /// by desugaring -"/*"[^*]([^(\*/)])*"*/" { const char *c; for(c = yytext; *c; c ++) if( *c == '\n') yylineno += 1; } -\n { yylineno += 1; } +"//"[^/].*\n { } +"///".*\n { /* TODO: Handle /// by desugaring */ } +"/*"[^*]([^(\*/)])*"*/" { } +"/**"([^(\*/)])*"*/" { /* TODO: handle / ** by desugaring */ } +\n /* */ \r /* */ [ \t] /* */ @@ -46,6 +50,7 @@ ident_c [a-zA-Z_] "fn" { return RWD_fn; } "as" { return RWD_as; } +"in" { return RWD_in; } "mut" { return RWD_mut; } "pub" { return RWD_pub; } "where" { return RWD_where; } @@ -115,7 +120,7 @@ ident_c [a-zA-Z_] [0-9]{dec_digit}* { yylval.integer = strtoull(yytext, NULL, 0); return INTEGER; } 0x[0-9a-fA-F]* { yylval.integer = strtoull(yytext, NULL, 0); return INTEGER; } -[b]'(.|\\['rn])' { yylval.text = strdup(yytext); return CHARLIT; } +b?'(.|\\['rn])' { yylval.text = strdup(yytext); return CHARLIT; } \"([^"])+\" { yylval.text = strdup(yytext); return STRING; } . { fprintf(stderr, "\x1b[31m" "ERROR: Invalid character '%c' on line %i\x1b[0m\n", *yytext, yylineno); exit(1); } @@ -1,14 +1,14 @@ %token <text> IDENT LIFETIME STRING MACRO %token <integer> INTEGER CHARLIT %token <realnum> FLOAT -%token SUPER_ATTR SUB_ATTR DOC_COMMENT SUPER_DOC_COMMENT +%token DOC_COMMENT SUPER_DOC_COMMENT %token DOUBLECOLON THINARROW FATARROW DOUBLEDOT TRIPLEDOT %token DOUBLEEQUAL EXCLAMEQUAL DOUBLEPIPE DOUBLEAMP %token GTEQUAL LTEQUAL %token PLUSEQUAL MINUSEQUAL STAREQUAL SLASHEQUAL %token DOUBLELT DOUBLEGT %token RWD_mod RWD_fn RWD_const RWD_static RWD_use RWD_struct RWD_enum RWD_trait RWD_impl RWD_type -%token RWD_as RWD_mut RWD_ref RWD_pub RWD_where RWD_unsafe +%token RWD_as RWD_in RWD_mut RWD_ref RWD_pub RWD_where RWD_unsafe %token RWD_let %token RWD_self RWD_super %token RWD_match RWD_if RWD_while RWD_loop RWD_for RWD_else @@ -65,6 +65,28 @@ Root */ crate : super_attrs module_body; +tt_list: | tt_list tt_item; +tt_item: tt_paren | tt_brace | tt_square | tt_tok; +tt_tok + : IDENT | STRING | CHARLIT | LIFETIME | INTEGER | MACRO + | '+' | '*' | '/' | '!' | ',' | ';' | '#' + | RWD_self | RWD_super | RWD_mut | RWD_ref | RWD_let | RWD_where + | RWD_for | RWD_while | RWD_loop | RWD_if | RWD_else | RWD_match | RWD_return + | RWD_impl | RWD_pub | RWD_struct | RWD_enum | RWD_fn | RWD_type | RWD_static | RWD_const + | '-' | THINARROW + | '&' | DOUBLEAMP + | ':' | DOUBLECOLON + | '|' | DOUBLEPIPE + | '=' | DOUBLEEQUAL | FATARROW + | '<' | DOUBLELT | LTEQUAL + | '>' | DOUBLEGT | GTEQUAL + | '.' | DOUBLEDOT | TRIPLEDOT + | '$' + ; +tt_paren: '(' tt_list ')'; +tt_brace: '{' tt_list '}'; +tt_square: '[' tt_list ']'; + super_attrs : | super_attrs super_attr; opt_pub @@ -130,14 +152,16 @@ module_def fn_def: fn_def_hdr code { bnf_trace("function defined"); }; fn_def_hdr: IDENT generic_def '(' fn_def_args ')' fn_def_ret where_clause { bnf_trace("function '%s'", $1); }; -fn_def_ret: /* -> () */ | THINARROW type; +fn_def_ret: /* -> () */ | THINARROW type | THINARROW '!'; fn_def_args: /* empty */ | fn_def_self | fn_def_self ',' fn_def_arg_list | fn_def_arg_list; fn_def_self : RWD_self | RWD_mut RWD_self | '&' RWD_self + | '&' LIFETIME RWD_self | '&' RWD_mut RWD_self + | '&' LIFETIME RWD_mut RWD_self ; fn_def_arg_list: fn_def_arg | fn_def_arg_list ',' fn_def_arg; fn_def_arg : pattern ':' type; @@ -268,6 +292,8 @@ use_path expr_path : ufcs_path DOUBLECOLON IDENT | DOUBLECOLON expr_path_segs + | RWD_self DOUBLECOLON expr_path_segs + | RWD_super DOUBLECOLON expr_path_segs | expr_path_segs ; expr_path_segs @@ -350,17 +376,19 @@ pattern nonbind_pattern : '_' { } | DOUBLEDOT { } - | STRING { } - | INTEGER { } - | CHARLIT { } | struct_pattern | tuple_pattern -/* | expr_path '(' refutable_pattern_list opt_comma ')' - | expr_path '{' refutable_struct_patern '}' */ - | expr_path + | value_pattern + | value_pattern TRIPLEDOT value_pattern | '&' pattern | '&' RWD_mut pattern ; +value_pattern + : expr_path + | INTEGER + | CHARLIT + | STRING + ; pattern_list : pattern_list ',' pattern @@ -378,7 +406,17 @@ code: '{' block_contents '}' { bnf_trace("code parsed"); }; block_contents : | block_lines - | block_lines expr + | block_lines tail_expr + ; +tail_expr + : expr + | flow_control + ; +flow_control + : RWD_return opt_semicolon {} + | RWD_return expr_0 opt_semicolon {} + | RWD_break opt_lifetime opt_semicolon {} + | RWD_continue opt_lifetime opt_semicolon {} ; block_lines: | block_lines block_line; block_line @@ -398,30 +436,35 @@ stmt : expr ';' ; -expr: expr_assign; +expr_list: expr_list ',' expr | expr | /* mt */; -expr_assign - : expr_0 assign_op expr_0 - | expr_0 +struct_literal_ent: IDENT | IDENT ':' expr; +struct_literal_list + : struct_literal_list ',' struct_literal_ent + | struct_literal_ent ; -assign_op: '=' | PLUSEQUAL | MINUSEQUAL | STAREQUAL | SLASHEQUAL; expr_blocks - : RWD_match expr_path '{' match_arms opt_comma '}' { } - | RWD_match expr '{' match_arms opt_comma '}' { } + : RWD_match expr_NOSTRLIT '{' match_arms opt_comma '}' { } | RWD_if if_block | RWD_unsafe '{' block_contents '}' { } | RWD_loop '{' block_contents '}' { } - | RWD_while expr '{' block_contents '}' { } - | RWD_return {} - | RWD_return expr_0 {} + | RWD_while expr_NOSTRLIT '{' block_contents '}' { } + | RWD_for pattern RWD_in expr_NOSTRLIT '{' block_contents '}' { } + | flow_control | '{' block_contents '}' ; +opt_lifetime: | LIFETIME; +opt_semicolon: | ';'; if_block - : expr code { } - | expr code RWD_else code { } - | expr code RWD_else RWD_if if_block { } + : if_block_head {} + | if_block_head RWD_else code { } + | if_block_head RWD_else RWD_if if_block { } + ; +if_block_head + : expr_NOSTRLIT code {} + | RWD_let pattern '=' expr_NOSTRLIT code {} ; match_arms : match_arm ',' match_arms @@ -429,126 +472,19 @@ match_arms | match_arm | match_arm ',' ; -match_pattern: pattern | pattern RWD_if expr_0; -match_arm - : match_pattern FATARROW expr { bnf_trace("match_arm"); } - | match_arm_brace +match_pattern + : pattern + | pattern RWD_if expr_0 ; -match_arm_brace : match_pattern FATARROW '{' block_contents '}'; - -expr_0: expr_range; - -expr_range - : expr_range_n - | expr_range_n DOUBLEDOT - | DOUBLEDOT expr_range_n - | expr_range_n DOUBLEDOT expr_range_n - ; -expr_range_n: expr_bor; - -expr_bor: expr_band | expr_bor DOUBLEPIPE expr_band { } ; -expr_band: expr_equ | expr_band DOUBLEAMP expr_equ { } ; -expr_equ - : expr_cmp - | expr_equ DOUBLEEQUAL expr_cmp - | expr_equ EXCLAMEQUAL expr_cmp - ; -expr_cmp - : expr_cmp_n - | expr_cmp '<' expr_cmp_n {} - | expr_cmp '>' expr_cmp_n {} - | expr_cmp GTEQUAL expr_cmp_n {} - | expr_cmp LTEQUAL expr_cmp_n {} - ; - -expr_cmp_n: expr_or; - -expr_or: expr_and | expr_or '|' expr_and { }; -expr_and: expr_xor | expr_and '&' expr_xor { }; -expr_xor: expr_8 | expr_xor '^' expr_8 { }; -expr_8 - : expr_9 - | expr_8 DOUBLELT expr_9 {} - | expr_8 DOUBLEGT expr_9 {} - ; -expr_9 - : expr_cast - | expr_9 '+' expr_cast {} - | expr_9 '-' expr_cast {} - ; -/* 10: Cast */ -expr_cast: expr_11 | expr_cast RWD_as type { bnf_trace("expr:cast"); }; -/* 11: Times/Div/Modulo */ -expr_11 - : expr_11n - | expr_11 '*' expr_11n {} - | expr_11 '/' expr_11n {} - | expr_11 '%' expr_11n {} - ; -expr_11n: expr_12; -expr_12 - : expr_fc - | '-' expr_12 - | '!' expr_12 - | '*' expr_12 -/* | RWD_box expr_12 */ - | '&' expr_12 - | '&' RWD_mut expr_12 - | DOUBLEAMP expr_12 { } - | DOUBLEAMP RWD_mut expr_12 { } - ; - -expr_fc - : expr_value - | expr_fc '(' expr_list ')' - | expr_fc '[' expr ']' - | expr_fc '.' INTEGER - | expr_fc '.' expr_path_seg '(' expr_list ')' - | expr_fc '.' expr_path_seg - -expr_value - : CHARLIT | INTEGER | FLOAT | STRING - | expr_blocks - | expr_path '(' expr_list ')' { bnf_trace("function call"); } - | expr_path '{' struct_literal_list '}' - | expr_path '{' struct_literal_list ',' DOUBLEDOT expr_0 '}' - | expr_path - | RWD_self - | '(' expr ')' - | '(' ')' - | '(' expr ',' expr_list ')' - | MACRO tt_paren { bnf_trace("Expr macro invocation"); } - | '|' pattern_list '|' expr - | DOUBLEPIPE expr +match_patterns + : match_patterns '|' match_pattern + | match_pattern ; - -expr_list: expr_list ',' expr | expr | /* mt */; - -struct_literal_ent: IDENT | IDENT ':' expr; -struct_literal_list - : struct_literal_list ',' struct_literal_ent - | struct_literal_ent +match_arm + : match_patterns FATARROW expr { bnf_trace("match_arm"); } + | match_arm_brace ; +match_arm_brace : match_patterns FATARROW '{' block_contents '}'; -tt_list: | tt_list tt_item; -tt_item: tt_paren | tt_brace | tt_square | tt_tok; -tt_tok - : IDENT | STRING | CHARLIT | LIFETIME | INTEGER | MACRO - | '+' | '*' | '/' | '!' | ',' | ';' | '#' - | RWD_self | RWD_super | RWD_mut | RWD_ref | RWD_let | RWD_where - | RWD_for | RWD_while | RWD_loop | RWD_if | RWD_else | RWD_match | RWD_return - | RWD_impl | RWD_pub | RWD_struct | RWD_enum | RWD_fn | RWD_type | RWD_static | RWD_const - | '-' | THINARROW - | '&' | DOUBLEAMP - | ':' | DOUBLECOLON - | '|' | DOUBLEPIPE - | '=' | DOUBLEEQUAL | FATARROW - | '<' | DOUBLELT - | '>' | DOUBLEGT - | '.' | DOUBLEDOT | TRIPLEDOT - | '$' - ; -tt_paren: '(' tt_list ')'; -tt_brace: '{' tt_list '}'; -tt_square: '[' tt_list ']'; +/* rust_expr.y.h inserted */ diff --git a/bnf/rust_expr.y.h b/bnf/rust_expr.y.h new file mode 100644 index 00000000..7ad43703 --- /dev/null +++ b/bnf/rust_expr.y.h @@ -0,0 +1,14 @@ + +assign_op: '=' | PLUSEQUAL | MINUSEQUAL | STAREQUAL | SLASHEQUAL; + +#define SUFFIX_is_ +#define _(v) v +#include "rust_expr.y_tree.h" +#undef SUFFIX_is_ +#undef _ + +#define SUFFIX_is__NOSTRLIT +#define _(v) v##_NOSTRLIT +#include "rust_expr.y_tree.h" +#undef SUFFIX +#undef _ diff --git a/bnf/rust_expr.y_tree.h b/bnf/rust_expr.y_tree.h new file mode 100644 index 00000000..489cc08b --- /dev/null +++ b/bnf/rust_expr.y_tree.h @@ -0,0 +1,112 @@ +_(expr): _(expr_assign); + +_(expr_assign) + : _(expr_0) assign_op _(expr_0) + | _(expr_0) + ; + +_(expr_0): _(expr_range); + +_(expr_range) + : _(expr_range_n) + | _(expr_range_n) DOUBLEDOT + | DOUBLEDOT _(expr_range_n) + | _(expr_range_n) DOUBLEDOT _(expr_range_n) + ; +_(expr_range_n): _(expr_bor); + +_(expr_bor) + : _(expr_band) + | _(expr_bor) DOUBLEPIPE _(expr_band) { } + ; +_(expr_band) + : _(expr_equ) + | _(expr_band) DOUBLEAMP _(expr_equ) { } + ; +_(expr_equ) + : _(expr_cmp) + | _(expr_equ) DOUBLEEQUAL _(expr_cmp) + | _(expr_equ) EXCLAMEQUAL _(expr_cmp) + ; +_(expr_cmp) + : _(expr_cmp_n) + | _(expr_cmp) '<' _(expr_cmp_n) {} + | _(expr_cmp) '>' _(expr_cmp_n) {} + | _(expr_cmp) GTEQUAL _(expr_cmp_n) {} + | _(expr_cmp) LTEQUAL _(expr_cmp_n) {} + ; +_(expr_cmp_n): _(expr_or); + +_(expr_or) + : _(expr_and) + | _(expr_or) '|' _(expr_and) { } + ; +_(expr_and) + : _(expr_xor) + | _(expr_and) '&' _(expr_xor) { } + ; +_(expr_xor) + : _(expr_8) + | _(expr_xor) '^' _(expr_8) { } + ; +_(expr_8) + : _(expr_9) + | _(expr_8) DOUBLELT _(expr_9) {} + | _(expr_8) DOUBLEGT _(expr_9) {} + ; +_(expr_9) + : _(expr_cast) + | _(expr_9) '+' _(expr_cast) {} + | _(expr_9) '-' _(expr_cast) {} + ; +/* 10: Cast */ +_(expr_cast) + : _(expr_11) + | _(expr_cast) RWD_as type { bnf_trace("expr:cast"); } + ; +/* 11: Times/Div/Modulo */ +_(expr_11) + : _(expr_11n) + | _(expr_11) '*' _(expr_11n) {} + | _(expr_11) '/' _(expr_11n) {} + | _(expr_11) '%' _(expr_11n) {} + ; +_(expr_11n): _(expr_12); +_(expr_12) + : _(expr_fc) + | '-' _(expr_12) + | '!' _(expr_12) + | '*' _(expr_12) +/* | RWD_box expr_12 */ + | '&' _(expr_12) + | '&' RWD_mut _(expr_12) + | DOUBLEAMP _(expr_12) { } + | DOUBLEAMP RWD_mut _(expr_12) { } + ; + +_(expr_fc) + : _(expr_value) + | _(expr_fc) '(' expr_list ')' + | _(expr_fc) '[' expr ']' + | _(expr_fc) '.' INTEGER + | _(expr_fc) '.' expr_path_seg '(' expr_list ')' + | _(expr_fc) '.' expr_path_seg + +_(expr_value) + : CHARLIT | INTEGER | FLOAT | STRING + | expr_blocks + | expr_path '(' expr_list ')' { bnf_trace("function call"); } +#ifndef SUFFIX_is__NOSTRLIT + | expr_path '{' struct_literal_list '}' + | expr_path '{' struct_literal_list ',' '}' + | expr_path '{' struct_literal_list ',' DOUBLEDOT expr_0 '}' +#endif + | expr_path + | RWD_self + | '(' expr ')' + | '(' ')' + | '(' expr ',' expr_list ')' + | MACRO tt_paren { bnf_trace("Expr macro invocation"); } + | '|' pattern_list '|' expr + | DOUBLEPIPE expr + ; |