summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Hodge <tpg@mutabah.net>2015-09-23 13:23:40 +0800
committerJohn Hodge <tpg@mutabah.net>2015-09-23 13:23:40 +0800
commitfd0c758bcc492f5b25550e7d4a5d67dc90eeba6a (patch)
tree85063399453a8a04647724fab4a764c92c85542d
parent558ac2ad0ae0e9585f551525f98896cbe3a2a9b8 (diff)
downloadmrust-fd0c758bcc492f5b25550e7d4a5d67dc90eeba6a.tar.gz
BNF - Extended grammar with struct literal hack
-rw-r--r--.gitignore13
-rw-r--r--bnf/Makefile5
-rw-r--r--bnf/rust.lex15
-rw-r--r--bnf/rust.y218
-rw-r--r--bnf/rust_expr.y.h14
-rw-r--r--bnf/rust_expr.y_tree.h112
6 files changed, 226 insertions, 151 deletions
diff --git a/.gitignore b/.gitignore
index 3c731915..8e7f5349 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,9 +3,14 @@
/mrustc.depend
/mrustc.layout
/.obj
-*.swp
+*.gch
+*.gch.dep
+*.sw[po]
/output
-/bnf/*.c
-/bnf/*.h
-/bnf/*.output
+/bnf/.rust.y
+/bnf/rust.lex.c
+/bnf/rust.tab.c
+/bnf/rust.tab.h
+/bnf/rust.output
+/bnf/test.bin
diff --git a/bnf/Makefile b/bnf/Makefile
index 02ddaa41..806ba272 100644
--- a/bnf/Makefile
+++ b/bnf/Makefile
@@ -13,8 +13,11 @@ test: test.bin $(TSTFILE)
test.bin: rust.tab.c rust.lex.c
gcc rust.tab.c rust.lex.c -o $@
-rust.tab.c: rust.y
+rust.tab.c: .rust.y
yacc -o $@ $< -d --verbose
+.rust.y: rust.y rust_expr.y.h rust_expr.y_tree.h
+ cat rust.y > $@
+ cpp -P rust_expr.y.h >> $@
rust.lex.c: rust.lex
lex -o $@ $<
diff --git a/bnf/rust.lex b/bnf/rust.lex
index 083ed0dd..658fb8ee 100644
--- a/bnf/rust.lex
+++ b/bnf/rust.lex
@@ -1,3 +1,6 @@
+
+%option yylineno
+
%{
#include "rust.tab.h"
#include <stdio.h>
@@ -26,10 +29,11 @@ ident_c [a-zA-Z_]
%%
-"//"[^/].*\n { yylineno += 1; }
-"///".*\n { yylineno += 1; } // TODO: Handle /// by desugaring
-"/*"[^*]([^(\*/)])*"*/" { const char *c; for(c = yytext; *c; c ++) if( *c == '\n') yylineno += 1; }
-\n { yylineno += 1; }
+"//"[^/].*\n { }
+"///".*\n { /* TODO: Handle /// by desugaring */ }
+"/*"[^*]([^(\*/)])*"*/" { }
+"/**"([^(\*/)])*"*/" { /* TODO: handle / ** by desugaring */ }
+\n /* */
\r /* */
[ \t] /* */
@@ -46,6 +50,7 @@ ident_c [a-zA-Z_]
"fn" { return RWD_fn; }
"as" { return RWD_as; }
+"in" { return RWD_in; }
"mut" { return RWD_mut; }
"pub" { return RWD_pub; }
"where" { return RWD_where; }
@@ -115,7 +120,7 @@ ident_c [a-zA-Z_]
[0-9]{dec_digit}* { yylval.integer = strtoull(yytext, NULL, 0); return INTEGER; }
0x[0-9a-fA-F]* { yylval.integer = strtoull(yytext, NULL, 0); return INTEGER; }
-[b]'(.|\\['rn])' { yylval.text = strdup(yytext); return CHARLIT; }
+b?'(.|\\['rn])' { yylval.text = strdup(yytext); return CHARLIT; }
\"([^"])+\" { yylval.text = strdup(yytext); return STRING; }
. { fprintf(stderr, "\x1b[31m" "ERROR: Invalid character '%c' on line %i\x1b[0m\n", *yytext, yylineno); exit(1); }
diff --git a/bnf/rust.y b/bnf/rust.y
index 9c675b21..046db6b4 100644
--- a/bnf/rust.y
+++ b/bnf/rust.y
@@ -1,14 +1,14 @@
%token <text> IDENT LIFETIME STRING MACRO
%token <integer> INTEGER CHARLIT
%token <realnum> FLOAT
-%token SUPER_ATTR SUB_ATTR DOC_COMMENT SUPER_DOC_COMMENT
+%token DOC_COMMENT SUPER_DOC_COMMENT
%token DOUBLECOLON THINARROW FATARROW DOUBLEDOT TRIPLEDOT
%token DOUBLEEQUAL EXCLAMEQUAL DOUBLEPIPE DOUBLEAMP
%token GTEQUAL LTEQUAL
%token PLUSEQUAL MINUSEQUAL STAREQUAL SLASHEQUAL
%token DOUBLELT DOUBLEGT
%token RWD_mod RWD_fn RWD_const RWD_static RWD_use RWD_struct RWD_enum RWD_trait RWD_impl RWD_type
-%token RWD_as RWD_mut RWD_ref RWD_pub RWD_where RWD_unsafe
+%token RWD_as RWD_in RWD_mut RWD_ref RWD_pub RWD_where RWD_unsafe
%token RWD_let
%token RWD_self RWD_super
%token RWD_match RWD_if RWD_while RWD_loop RWD_for RWD_else
@@ -65,6 +65,28 @@ Root
*/
crate : super_attrs module_body;
+tt_list: | tt_list tt_item;
+tt_item: tt_paren | tt_brace | tt_square | tt_tok;
+tt_tok
+ : IDENT | STRING | CHARLIT | LIFETIME | INTEGER | MACRO
+ | '+' | '*' | '/' | '!' | ',' | ';' | '#'
+ | RWD_self | RWD_super | RWD_mut | RWD_ref | RWD_let | RWD_where
+ | RWD_for | RWD_while | RWD_loop | RWD_if | RWD_else | RWD_match | RWD_return
+ | RWD_impl | RWD_pub | RWD_struct | RWD_enum | RWD_fn | RWD_type | RWD_static | RWD_const
+ | '-' | THINARROW
+ | '&' | DOUBLEAMP
+ | ':' | DOUBLECOLON
+ | '|' | DOUBLEPIPE
+ | '=' | DOUBLEEQUAL | FATARROW
+ | '<' | DOUBLELT | LTEQUAL
+ | '>' | DOUBLEGT | GTEQUAL
+ | '.' | DOUBLEDOT | TRIPLEDOT
+ | '$'
+ ;
+tt_paren: '(' tt_list ')';
+tt_brace: '{' tt_list '}';
+tt_square: '[' tt_list ']';
+
super_attrs : | super_attrs super_attr;
opt_pub
@@ -130,14 +152,16 @@ module_def
fn_def: fn_def_hdr code { bnf_trace("function defined"); };
fn_def_hdr: IDENT generic_def '(' fn_def_args ')' fn_def_ret where_clause { bnf_trace("function '%s'", $1); };
-fn_def_ret: /* -> () */ | THINARROW type;
+fn_def_ret: /* -> () */ | THINARROW type | THINARROW '!';
fn_def_args: /* empty */ | fn_def_self | fn_def_self ',' fn_def_arg_list | fn_def_arg_list;
fn_def_self
: RWD_self
| RWD_mut RWD_self
| '&' RWD_self
+ | '&' LIFETIME RWD_self
| '&' RWD_mut RWD_self
+ | '&' LIFETIME RWD_mut RWD_self
;
fn_def_arg_list: fn_def_arg | fn_def_arg_list ',' fn_def_arg;
fn_def_arg : pattern ':' type;
@@ -268,6 +292,8 @@ use_path
expr_path
: ufcs_path DOUBLECOLON IDENT
| DOUBLECOLON expr_path_segs
+ | RWD_self DOUBLECOLON expr_path_segs
+ | RWD_super DOUBLECOLON expr_path_segs
| expr_path_segs
;
expr_path_segs
@@ -350,17 +376,19 @@ pattern
nonbind_pattern
: '_' { }
| DOUBLEDOT { }
- | STRING { }
- | INTEGER { }
- | CHARLIT { }
| struct_pattern
| tuple_pattern
-/* | expr_path '(' refutable_pattern_list opt_comma ')'
- | expr_path '{' refutable_struct_patern '}' */
- | expr_path
+ | value_pattern
+ | value_pattern TRIPLEDOT value_pattern
| '&' pattern
| '&' RWD_mut pattern
;
+value_pattern
+ : expr_path
+ | INTEGER
+ | CHARLIT
+ | STRING
+ ;
pattern_list
: pattern_list ',' pattern
@@ -378,7 +406,17 @@ code: '{' block_contents '}' { bnf_trace("code parsed"); };
block_contents
:
| block_lines
- | block_lines expr
+ | block_lines tail_expr
+ ;
+tail_expr
+ : expr
+ | flow_control
+ ;
+flow_control
+ : RWD_return opt_semicolon {}
+ | RWD_return expr_0 opt_semicolon {}
+ | RWD_break opt_lifetime opt_semicolon {}
+ | RWD_continue opt_lifetime opt_semicolon {}
;
block_lines: | block_lines block_line;
block_line
@@ -398,30 +436,35 @@ stmt
: expr ';'
;
-expr: expr_assign;
+expr_list: expr_list ',' expr | expr | /* mt */;
-expr_assign
- : expr_0 assign_op expr_0
- | expr_0
+struct_literal_ent: IDENT | IDENT ':' expr;
+struct_literal_list
+ : struct_literal_list ',' struct_literal_ent
+ | struct_literal_ent
;
-assign_op: '=' | PLUSEQUAL | MINUSEQUAL | STAREQUAL | SLASHEQUAL;
expr_blocks
- : RWD_match expr_path '{' match_arms opt_comma '}' { }
- | RWD_match expr '{' match_arms opt_comma '}' { }
+ : RWD_match expr_NOSTRLIT '{' match_arms opt_comma '}' { }
| RWD_if if_block
| RWD_unsafe '{' block_contents '}' { }
| RWD_loop '{' block_contents '}' { }
- | RWD_while expr '{' block_contents '}' { }
- | RWD_return {}
- | RWD_return expr_0 {}
+ | RWD_while expr_NOSTRLIT '{' block_contents '}' { }
+ | RWD_for pattern RWD_in expr_NOSTRLIT '{' block_contents '}' { }
+ | flow_control
| '{' block_contents '}'
;
+opt_lifetime: | LIFETIME;
+opt_semicolon: | ';';
if_block
- : expr code { }
- | expr code RWD_else code { }
- | expr code RWD_else RWD_if if_block { }
+ : if_block_head {}
+ | if_block_head RWD_else code { }
+ | if_block_head RWD_else RWD_if if_block { }
+ ;
+if_block_head
+ : expr_NOSTRLIT code {}
+ | RWD_let pattern '=' expr_NOSTRLIT code {}
;
match_arms
: match_arm ',' match_arms
@@ -429,126 +472,19 @@ match_arms
| match_arm
| match_arm ','
;
-match_pattern: pattern | pattern RWD_if expr_0;
-match_arm
- : match_pattern FATARROW expr { bnf_trace("match_arm"); }
- | match_arm_brace
+match_pattern
+ : pattern
+ | pattern RWD_if expr_0
;
-match_arm_brace : match_pattern FATARROW '{' block_contents '}';
-
-expr_0: expr_range;
-
-expr_range
- : expr_range_n
- | expr_range_n DOUBLEDOT
- | DOUBLEDOT expr_range_n
- | expr_range_n DOUBLEDOT expr_range_n
- ;
-expr_range_n: expr_bor;
-
-expr_bor: expr_band | expr_bor DOUBLEPIPE expr_band { } ;
-expr_band: expr_equ | expr_band DOUBLEAMP expr_equ { } ;
-expr_equ
- : expr_cmp
- | expr_equ DOUBLEEQUAL expr_cmp
- | expr_equ EXCLAMEQUAL expr_cmp
- ;
-expr_cmp
- : expr_cmp_n
- | expr_cmp '<' expr_cmp_n {}
- | expr_cmp '>' expr_cmp_n {}
- | expr_cmp GTEQUAL expr_cmp_n {}
- | expr_cmp LTEQUAL expr_cmp_n {}
- ;
-
-expr_cmp_n: expr_or;
-
-expr_or: expr_and | expr_or '|' expr_and { };
-expr_and: expr_xor | expr_and '&' expr_xor { };
-expr_xor: expr_8 | expr_xor '^' expr_8 { };
-expr_8
- : expr_9
- | expr_8 DOUBLELT expr_9 {}
- | expr_8 DOUBLEGT expr_9 {}
- ;
-expr_9
- : expr_cast
- | expr_9 '+' expr_cast {}
- | expr_9 '-' expr_cast {}
- ;
-/* 10: Cast */
-expr_cast: expr_11 | expr_cast RWD_as type { bnf_trace("expr:cast"); };
-/* 11: Times/Div/Modulo */
-expr_11
- : expr_11n
- | expr_11 '*' expr_11n {}
- | expr_11 '/' expr_11n {}
- | expr_11 '%' expr_11n {}
- ;
-expr_11n: expr_12;
-expr_12
- : expr_fc
- | '-' expr_12
- | '!' expr_12
- | '*' expr_12
-/* | RWD_box expr_12 */
- | '&' expr_12
- | '&' RWD_mut expr_12
- | DOUBLEAMP expr_12 { }
- | DOUBLEAMP RWD_mut expr_12 { }
- ;
-
-expr_fc
- : expr_value
- | expr_fc '(' expr_list ')'
- | expr_fc '[' expr ']'
- | expr_fc '.' INTEGER
- | expr_fc '.' expr_path_seg '(' expr_list ')'
- | expr_fc '.' expr_path_seg
-
-expr_value
- : CHARLIT | INTEGER | FLOAT | STRING
- | expr_blocks
- | expr_path '(' expr_list ')' { bnf_trace("function call"); }
- | expr_path '{' struct_literal_list '}'
- | expr_path '{' struct_literal_list ',' DOUBLEDOT expr_0 '}'
- | expr_path
- | RWD_self
- | '(' expr ')'
- | '(' ')'
- | '(' expr ',' expr_list ')'
- | MACRO tt_paren { bnf_trace("Expr macro invocation"); }
- | '|' pattern_list '|' expr
- | DOUBLEPIPE expr
+match_patterns
+ : match_patterns '|' match_pattern
+ | match_pattern
;
-
-expr_list: expr_list ',' expr | expr | /* mt */;
-
-struct_literal_ent: IDENT | IDENT ':' expr;
-struct_literal_list
- : struct_literal_list ',' struct_literal_ent
- | struct_literal_ent
+match_arm
+ : match_patterns FATARROW expr { bnf_trace("match_arm"); }
+ | match_arm_brace
;
+match_arm_brace : match_patterns FATARROW '{' block_contents '}';
-tt_list: | tt_list tt_item;
-tt_item: tt_paren | tt_brace | tt_square | tt_tok;
-tt_tok
- : IDENT | STRING | CHARLIT | LIFETIME | INTEGER | MACRO
- | '+' | '*' | '/' | '!' | ',' | ';' | '#'
- | RWD_self | RWD_super | RWD_mut | RWD_ref | RWD_let | RWD_where
- | RWD_for | RWD_while | RWD_loop | RWD_if | RWD_else | RWD_match | RWD_return
- | RWD_impl | RWD_pub | RWD_struct | RWD_enum | RWD_fn | RWD_type | RWD_static | RWD_const
- | '-' | THINARROW
- | '&' | DOUBLEAMP
- | ':' | DOUBLECOLON
- | '|' | DOUBLEPIPE
- | '=' | DOUBLEEQUAL | FATARROW
- | '<' | DOUBLELT
- | '>' | DOUBLEGT
- | '.' | DOUBLEDOT | TRIPLEDOT
- | '$'
- ;
-tt_paren: '(' tt_list ')';
-tt_brace: '{' tt_list '}';
-tt_square: '[' tt_list ']';
+/* rust_expr.y.h inserted */
diff --git a/bnf/rust_expr.y.h b/bnf/rust_expr.y.h
new file mode 100644
index 00000000..7ad43703
--- /dev/null
+++ b/bnf/rust_expr.y.h
@@ -0,0 +1,14 @@
+
+assign_op: '=' | PLUSEQUAL | MINUSEQUAL | STAREQUAL | SLASHEQUAL;
+
+#define SUFFIX_is_
+#define _(v) v
+#include "rust_expr.y_tree.h"
+#undef SUFFIX_is_
+#undef _
+
+#define SUFFIX_is__NOSTRLIT
+#define _(v) v##_NOSTRLIT
+#include "rust_expr.y_tree.h"
+#undef SUFFIX
+#undef _
diff --git a/bnf/rust_expr.y_tree.h b/bnf/rust_expr.y_tree.h
new file mode 100644
index 00000000..489cc08b
--- /dev/null
+++ b/bnf/rust_expr.y_tree.h
@@ -0,0 +1,112 @@
+_(expr): _(expr_assign);
+
+_(expr_assign)
+ : _(expr_0) assign_op _(expr_0)
+ | _(expr_0)
+ ;
+
+_(expr_0): _(expr_range);
+
+_(expr_range)
+ : _(expr_range_n)
+ | _(expr_range_n) DOUBLEDOT
+ | DOUBLEDOT _(expr_range_n)
+ | _(expr_range_n) DOUBLEDOT _(expr_range_n)
+ ;
+_(expr_range_n): _(expr_bor);
+
+_(expr_bor)
+ : _(expr_band)
+ | _(expr_bor) DOUBLEPIPE _(expr_band) { }
+ ;
+_(expr_band)
+ : _(expr_equ)
+ | _(expr_band) DOUBLEAMP _(expr_equ) { }
+ ;
+_(expr_equ)
+ : _(expr_cmp)
+ | _(expr_equ) DOUBLEEQUAL _(expr_cmp)
+ | _(expr_equ) EXCLAMEQUAL _(expr_cmp)
+ ;
+_(expr_cmp)
+ : _(expr_cmp_n)
+ | _(expr_cmp) '<' _(expr_cmp_n) {}
+ | _(expr_cmp) '>' _(expr_cmp_n) {}
+ | _(expr_cmp) GTEQUAL _(expr_cmp_n) {}
+ | _(expr_cmp) LTEQUAL _(expr_cmp_n) {}
+ ;
+_(expr_cmp_n): _(expr_or);
+
+_(expr_or)
+ : _(expr_and)
+ | _(expr_or) '|' _(expr_and) { }
+ ;
+_(expr_and)
+ : _(expr_xor)
+ | _(expr_and) '&' _(expr_xor) { }
+ ;
+_(expr_xor)
+ : _(expr_8)
+ | _(expr_xor) '^' _(expr_8) { }
+ ;
+_(expr_8)
+ : _(expr_9)
+ | _(expr_8) DOUBLELT _(expr_9) {}
+ | _(expr_8) DOUBLEGT _(expr_9) {}
+ ;
+_(expr_9)
+ : _(expr_cast)
+ | _(expr_9) '+' _(expr_cast) {}
+ | _(expr_9) '-' _(expr_cast) {}
+ ;
+/* 10: Cast */
+_(expr_cast)
+ : _(expr_11)
+ | _(expr_cast) RWD_as type { bnf_trace("expr:cast"); }
+ ;
+/* 11: Times/Div/Modulo */
+_(expr_11)
+ : _(expr_11n)
+ | _(expr_11) '*' _(expr_11n) {}
+ | _(expr_11) '/' _(expr_11n) {}
+ | _(expr_11) '%' _(expr_11n) {}
+ ;
+_(expr_11n): _(expr_12);
+_(expr_12)
+ : _(expr_fc)
+ | '-' _(expr_12)
+ | '!' _(expr_12)
+ | '*' _(expr_12)
+/* | RWD_box expr_12 */
+ | '&' _(expr_12)
+ | '&' RWD_mut _(expr_12)
+ | DOUBLEAMP _(expr_12) { }
+ | DOUBLEAMP RWD_mut _(expr_12) { }
+ ;
+
+_(expr_fc)
+ : _(expr_value)
+ | _(expr_fc) '(' expr_list ')'
+ | _(expr_fc) '[' expr ']'
+ | _(expr_fc) '.' INTEGER
+ | _(expr_fc) '.' expr_path_seg '(' expr_list ')'
+ | _(expr_fc) '.' expr_path_seg
+
+_(expr_value)
+ : CHARLIT | INTEGER | FLOAT | STRING
+ | expr_blocks
+ | expr_path '(' expr_list ')' { bnf_trace("function call"); }
+#ifndef SUFFIX_is__NOSTRLIT
+ | expr_path '{' struct_literal_list '}'
+ | expr_path '{' struct_literal_list ',' '}'
+ | expr_path '{' struct_literal_list ',' DOUBLEDOT expr_0 '}'
+#endif
+ | expr_path
+ | RWD_self
+ | '(' expr ')'
+ | '(' ')'
+ | '(' expr ',' expr_list ')'
+ | MACRO tt_paren { bnf_trace("Expr macro invocation"); }
+ | '|' pattern_list '|' expr
+ | DOUBLEPIPE expr
+ ;