summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Hodge <tpg@mutabah.net>2015-09-22 21:10:08 +0800
committerJohn Hodge <tpg@mutabah.net>2015-09-22 21:10:08 +0800
commit558ac2ad0ae0e9585f551525f98896cbe3a2a9b8 (patch)
tree10f5d677a7a17b16275f02aef10720acc61bed41
parent3663599d6d0748c3d79ac91dcc89982faaa820ed (diff)
downloadmrust-558ac2ad0ae0e9585f551525f98896cbe3a2a9b8.tar.gz
Heavy improviements to the BNF parser
-rw-r--r--bnf/Makefile9
-rw-r--r--bnf/rust.lex43
-rw-r--r--bnf/rust.y254
3 files changed, 225 insertions, 81 deletions
diff --git a/bnf/Makefile b/bnf/Makefile
index 1bd962d4..02ddaa41 100644
--- a/bnf/Makefile
+++ b/bnf/Makefile
@@ -3,9 +3,12 @@
all: test.bin
-test: test.bin ../samples/1.rs
- ./test.bin < ../samples/1.rs
-# ./test.bin < ../../../RustPorts/OS/libcore/lib.rs
+TSTFILE := ../samples/1.rs
+TSTFILE := ../../rust_os/libcore/lib.rs
+TSTFILE := ../../rust_os/libcore/str/mod.rs
+
+test: test.bin $(TSTFILE)
+ ./test.bin < $(TSTFILE)
test.bin: rust.tab.c rust.lex.c
gcc rust.tab.c rust.lex.c -o $@
diff --git a/bnf/rust.lex b/bnf/rust.lex
index 21de6d92..083ed0dd 100644
--- a/bnf/rust.lex
+++ b/bnf/rust.lex
@@ -4,9 +4,20 @@
void yyerror(const char *s);
extern int yydebug;
-#define YY_DECL int yylex(int force_ret)
+int rustbnf_forcetoken = 0;
+#define YY_DECL int yylex_inner()
+
+int yylex() {
+ if(rustbnf_forcetoken>0) {
+ int rv = rustbnf_forcetoken;
+ rustbnf_forcetoken = 0;
+ return rv;
+ }
+ else {
+ return yylex_inner();
+ }
+}
-#define YY_USER_ACTION if(force_ret>0) return force_ret;
%}
@@ -17,6 +28,7 @@ ident_c [a-zA-Z_]
"//"[^/].*\n { yylineno += 1; }
"///".*\n { yylineno += 1; } // TODO: Handle /// by desugaring
+"/*"[^*]([^(\*/)])*"*/" { const char *c; for(c = yytext; *c; c ++) if( *c == '\n') yylineno += 1; }
\n { yylineno += 1; }
\r /* */
[ \t] /* */
@@ -36,6 +48,8 @@ ident_c [a-zA-Z_]
"as" { return RWD_as; }
"mut" { return RWD_mut; }
"pub" { return RWD_pub; }
+"where" { return RWD_where; }
+"extern" { return RWD_extern; }
"let" { return RWD_let; }
"ref" { return RWD_ref; }
@@ -49,21 +63,33 @@ ident_c [a-zA-Z_]
"loop" { return RWD_loop; }
"while" { return RWD_while; }
"for" { return RWD_for; }
+"unsafe" { return RWD_unsafe; }
+"return" { return RWD_return; }
+"break" { return RWD_break; }
+"continue" { return RWD_continue; }
"::" { return DOUBLECOLON; }
"->" { return THINARROW; }
"=>" { return FATARROW; }
-"#![" { return SUPER_ATTR; }
-"#[" { return SUB_ATTR; }
"==" { return DOUBLEEQUAL; }
"!=" { return EXCLAMEQUAL; }
+">=" { return GTEQUAL; }
+"<=" { return LTEQUAL; }
+"+=" { return PLUSEQUAL; }
+"-=" { return MINUSEQUAL; }
+"*=" { return STAREQUAL; }
+"/=" { return SLASHEQUAL; }
"&&" { return DOUBLEAMP; }
"||" { return DOUBLEPIPE; }
"<<" { return DOUBLELT; }
">>" { return DOUBLEGT; }
+".." { return DOUBLEDOT; }
+"..." { return TRIPLEDOT; }
+"#" { return *yytext; }
+"$" { return *yytext; }
"&" { return *yytext; }
"|" { return *yytext; }
"!" { return *yytext; }
@@ -77,19 +103,26 @@ ident_c [a-zA-Z_]
"<" { return *yytext; }
">" { return *yytext; }
"," { return *yytext; }
+"/" { return *yytext; }
+"*" { return *yytext; }
+"+" { return *yytext; }
+"-" { return *yytext; }
{ident_c}({ident_c}|[0-9])* { yylval.text = strdup(yytext); return IDENT; }
{ident_c}({ident_c}|[0-9])*"!" { yylval.text = strdup(yytext); return MACRO; }
+'{ident_c}{ident_c}* { yylval.text = strdup(yytext+1); return LIFETIME; }
[0-9]{dec_digit}*"."{dec_digit}+ { yylval.realnum = strtod(yytext, NULL); return FLOAT; }
[0-9]{dec_digit}* { yylval.integer = strtoull(yytext, NULL, 0); return INTEGER; }
0x[0-9a-fA-F]* { yylval.integer = strtoull(yytext, NULL, 0); return INTEGER; }
-'(\\.|[^\\'])+' { return CHARLIT; }
+[b]'(.|\\['rn])' { yylval.text = strdup(yytext); return CHARLIT; }
+\"([^"])+\" { yylval.text = strdup(yytext); return STRING; }
. { fprintf(stderr, "\x1b[31m" "ERROR: Invalid character '%c' on line %i\x1b[0m\n", *yytext, yylineno); exit(1); }
%%
int main() {
+ yylineno = 1;
yydebug = (getenv("BNFDEBUG") != NULL);
yyparse();
return 0;
diff --git a/bnf/rust.y b/bnf/rust.y
index 81b7aa40..9c675b21 100644
--- a/bnf/rust.y
+++ b/bnf/rust.y
@@ -4,12 +4,16 @@
%token SUPER_ATTR SUB_ATTR DOC_COMMENT SUPER_DOC_COMMENT
%token DOUBLECOLON THINARROW FATARROW DOUBLEDOT TRIPLEDOT
%token DOUBLEEQUAL EXCLAMEQUAL DOUBLEPIPE DOUBLEAMP
+%token GTEQUAL LTEQUAL
+%token PLUSEQUAL MINUSEQUAL STAREQUAL SLASHEQUAL
%token DOUBLELT DOUBLEGT
%token RWD_mod RWD_fn RWD_const RWD_static RWD_use RWD_struct RWD_enum RWD_trait RWD_impl RWD_type
-%token RWD_as RWD_mut RWD_ref RWD_pub RWD_where
+%token RWD_as RWD_mut RWD_ref RWD_pub RWD_where RWD_unsafe
%token RWD_let
%token RWD_self RWD_super
%token RWD_match RWD_if RWD_while RWD_loop RWD_for RWD_else
+%token RWD_return RWD_break RWD_continue
+%token RWD_extern
%start crate
%union {
@@ -27,7 +31,7 @@
#include <assert.h>
extern int yylineno;
-static inline void bnf_trace(const char* fmt, ...) {
+/*static inline*/ void bnf_trace(const char* fmt, ...) {
fprintf(stderr, "\x1b[32m""TRACE: ");
va_list args;
va_start(args, fmt);
@@ -42,18 +46,14 @@ static void yyprint(FILE *outstream, int type, YYSTYPE value)
{
case IDENT: fprintf(outstream, "%s", value.text); break;
case MACRO: fprintf(outstream, "%s!", value.text); break;
+ case STRING: fprintf(outstream, "\"%s\"", value.text); break;
+ case LIFETIME: fprintf(outstream, "'%s", value.text); break;
default:
break;
}
}
-int rustbnf_forcetoken = 0;
-static inline int read_and_clear(int* ptr) {
- int rv = *ptr;
- *ptr = 0;
- return rv;
-}
-#define YYLEX_PARAM read_and_clear(&rustbnf_forcetoken)
+extern int rustbnf_forcetoken;
%}
%%
@@ -80,16 +80,14 @@ module_body
attrs: attrs attr | ;
super_attr
- : SUPER_ATTR meta_items ']'
+ : '#' '!' '[' meta_items ']'
| SUPER_DOC_COMMENT
;
attr
- : SUB_ATTR meta_items ']'
+ : '#' '[' meta_items ']'
| DOC_COMMENT
;
-meta_items
- : meta_item
- | meta_items ',' meta_item;
+meta_items: meta_item | meta_items ',' meta_item;
meta_item
: IDENT '(' meta_items ')'
| IDENT '=' STRING
@@ -104,19 +102,28 @@ Root Items
*/
item
: opt_pub RWD_mod module_def
- | opt_pub RWD_fn fn_def
+ | opt_pub fn_qualifiers RWD_fn fn_def
| opt_pub RWD_use use_def
| opt_pub RWD_static static_def
| opt_pub RWD_const const_def
| opt_pub RWD_struct struct_def
| opt_pub RWD_enum enum_def
| opt_pub RWD_trait trait_def
+ | RWD_extern extern_block
| RWD_impl impl_def
+ | MACRO IDENT tt_brace
+ | MACRO tt_brace
+ | MACRO tt_paren ';'
;
+extern_block: extern_abi | '{' extern_items '}';
+extern_abi: | STRING;
+extern_items: | extern_items extern_item;
+extern_item: opt_pub RWD_fn fn_def_hdr ';';
+
module_def
: IDENT '{' module_body '}'
- | IDENT ';'
+ | IDENT ';' { bnf_trace("mod %s;", $1); }
;
/* --- Function --- */
@@ -133,14 +140,31 @@ fn_def_self
| '&' RWD_mut RWD_self
;
fn_def_arg_list: fn_def_arg | fn_def_arg_list ',' fn_def_arg;
-fn_def_arg : irrefutable_pattern ':' type;
+fn_def_arg : pattern ':' type;
+
+fn_qualifiers
+ :
+ | RWD_extern extern_abi
+ | RWD_unsafe
+ | RWD_const
+ | RWD_unsafe RWD_const
+ ;
/* --- Use --- */
use_def
- : use_path RWD_as IDENT ';'
- | use_path '*' ';'
- | use_path '{' use_picks '}' ';'
- | use_path ';'
+ : RWD_self use_def_tail
+ | RWD_self DOUBLECOLON use_path use_def_tail
+ | RWD_super use_def_tail
+ | RWD_super DOUBLECOLON use_path use_def_tail
+ | DOUBLECOLON use_path use_def_tail
+ | use_path use_def_tail
+ | '{' use_picks '}' ';'
+ ;
+use_def_tail
+ : RWD_as IDENT ';'
+ | DOUBLECOLON '*' ';'
+ | DOUBLECOLON '{' use_picks '}' ';'
+ | ';'
/* | RWD_use error ';' */
;
use_picks
@@ -166,7 +190,7 @@ const_value
/* --- Struct --- */
struct_def
: IDENT generic_def ';' { bnf_trace("unit-like struct"); }
- | IDENT generic_def '(' tuple_struct_def_items opt_comma ')' { bnf_trace("tuple struct"); }
+ | IDENT generic_def '(' tuple_struct_def_items opt_comma ')' ';' { bnf_trace("tuple struct"); }
| IDENT generic_def '{' struct_def_items opt_comma '}' { bnf_trace("normal struct"); }
;
@@ -177,8 +201,8 @@ tuple_struct_def_items
tuple_struct_def_item: attrs opt_pub type;
struct_def_items
- : /*struct_def_items ',' struct_def_item
- |*/ struct_def_item { bnf_trace("struct_def_item"); }
+ : struct_def_items ',' struct_def_item
+ | struct_def_item { bnf_trace("struct_def_item"); }
;
struct_def_item: attrs opt_pub IDENT ':' type;
@@ -186,18 +210,27 @@ struct_def_item: attrs opt_pub IDENT ':' type;
enum_def:
;
/* --- Trait --- */
-trait_def:
+trait_def: IDENT generic_def trait_bounds '{' trait_items '}';
+trait_bounds: ':' type_path | ;
+trait_bound_list: trait_bound_list '+' trait_bound | trait_bound;
+trait_bound: type_path | LIFETIME;
+trait_items: | trait_items attrs trait_item;
+trait_item
+ : RWD_type IDENT ';'
+ | RWD_type IDENT ':' trait_bound_list ';'
+ | fn_qualifiers RWD_fn fn_def_hdr ';'
;
+
/* --- Impl --- */
impl_def: impl_def_line '{' impl_items '}'
impl_def_line
- : generic_def type RWD_for type { bnf_trace("trait impl"); }
- | generic_def type RWD_for DOUBLEDOT { bnf_trace("wildcard impl"); }
- | generic_def type { bnf_trace("inherent impl"); }
+ : generic_def type RWD_for type where_clause { bnf_trace("trait impl"); }
+ | generic_def type RWD_for DOUBLEDOT where_clause { bnf_trace("wildcard impl"); }
+ | generic_def type where_clause { bnf_trace("inherent impl"); }
;
impl_items: | impl_items attrs impl_item;
impl_item
- : opt_pub RWD_fn fn_def
+ : opt_pub fn_qualifiers RWD_fn fn_def
| opt_pub RWD_type generic_def IDENT '=' type ';'
;
@@ -209,7 +242,10 @@ generic_def_one
: IDENT '=' type ':' bounds
| IDENT '=' type
| IDENT ':' bounds { bnf_trace("bounded ident"); }
- | IDENT ;
+ | IDENT
+ | LIFETIME
+ | LIFETIME ':' LIFETIME
+ ;
where_clause: | RWD_where where_clauses;
where_clauses
@@ -260,7 +296,8 @@ type_path_seg
| IDENT '<' type_exprs '>'
| IDENT '<' type_exprs DOUBLEGT { bnf_trace("Double-gt terminated type expr"); rustbnf_forcetoken = '>'; }
;
-type_exprs: type_exprs ',' type | type;
+type_exprs: type_exprs ',' type_arg | type_arg;
+type_arg: type | LIFETIME | IDENT '=' type;
/*
=========================================
@@ -270,55 +307,65 @@ Types
type
: type_path
| '&' type
+ | '&' LIFETIME type
| '&' RWD_mut type
+ | '&' LIFETIME RWD_mut type
| '*' RWD_const type
| '*' RWD_mut type
| '[' type ']'
| '[' type ';' expr ']'
+ | '(' ')'
+ | '(' type ')'
+ | '(' type ',' ')'
+ | '(' type ',' type_list ')'
;
+type_list: type_list ',' type | type;
/*
=========================================
Patterns
=========================================
*/
-irrefutable_pattern
- : tuple_pattern
- | bind_pattern
- /*| struct_pattern */
- ;
-
-tuple_pattern: '(' ')';
-
-bind_pattern: IDENT;
+tuple_pattern: '(' pattern_list ')' | '(' pattern_list ',' ')';
struct_pattern
- : expr_path '{' '}'
+ : expr_path '{' struct_pattern_items '}'
| expr_path tuple_pattern
;
+struct_pattern_item: IDENT | IDENT ':' pattern;
+struct_pattern_items: struct_pattern_items ',' struct_pattern_item | struct_pattern_item;
-refutable_pattern
- : IDENT { /* maybe bind */ }
- | IDENT '@' nonbind_pattern
+pattern
+ : /*IDENT { /* maybe bind * / }
+ */| IDENT '@' nonbind_pattern
| RWD_ref IDENT
| RWD_ref IDENT '@' nonbind_pattern
+ | RWD_mut IDENT
+ | RWD_mut IDENT '@' nonbind_pattern
| RWD_ref RWD_mut IDENT
| RWD_ref RWD_mut IDENT '@' nonbind_pattern
- | '&' refutable_pattern
- | '&' RWD_mut refutable_pattern
- | nonbind_pattern;
+ | nonbind_pattern
+ ;
nonbind_pattern
: '_' { }
| DOUBLEDOT { }
- | expr_path '(' refutable_pattern_list opt_comma ')'
- | expr_path '{' refutable_struct_patern '}'
+ | STRING { }
+ | INTEGER { }
+ | CHARLIT { }
+ | struct_pattern
+ | tuple_pattern
+/* | expr_path '(' refutable_pattern_list opt_comma ')'
+ | expr_path '{' refutable_struct_patern '}' */
| expr_path
-/* | '&' nonbind_pattern */
+ | '&' pattern
+ | '&' RWD_mut pattern
;
-refutable_pattern_list: refutable_pattern_list ',' refutable_pattern | refutable_pattern;
-refutable_struct_patern: /* TODO */;
+pattern_list
+ : pattern_list ',' pattern
+ | pattern
+ ;
/*
@@ -328,41 +375,76 @@ Expressions!
*/
code: '{' block_contents '}' { bnf_trace("code parsed"); };
-block_contents: block_contents block_line | ;
+block_contents
+ :
+ | block_lines
+ | block_lines expr
+ ;
+block_lines: | block_lines block_line;
block_line
: RWD_let let_binding ';'
+ | MACRO IDENT tt_brace
+ | attr item
+ | expr_blocks
| stmt
- | expr
;
opt_type_annotation: | ':' type;
let_binding
- : irrefutable_pattern opt_type_annotation '=' expr
- | irrefutable_pattern opt_type_annotation
+ : pattern opt_type_annotation '=' expr
+ | pattern opt_type_annotation
stmt
: expr ';'
;
-expr: expr_blocks;
+expr: expr_assign;
+
+expr_assign
+ : expr_0 assign_op expr_0
+ | expr_0
+ ;
+assign_op: '=' | PLUSEQUAL | MINUSEQUAL | STAREQUAL | SLASHEQUAL;
expr_blocks
- : RWD_match expr '{' match_arms opt_comma '}' { }
+ : RWD_match expr_path '{' match_arms opt_comma '}' { }
+ | RWD_match expr '{' match_arms opt_comma '}' { }
| RWD_if if_block
- | expr_0
+ | RWD_unsafe '{' block_contents '}' { }
+ | RWD_loop '{' block_contents '}' { }
+ | RWD_while expr '{' block_contents '}' { }
+ | RWD_return {}
+ | RWD_return expr_0 {}
+ | '{' block_contents '}'
;
+
if_block
: expr code { }
| expr code RWD_else code { }
| expr code RWD_else RWD_if if_block { }
;
match_arms
- : match_arms ',' match_arm
+ : match_arm ',' match_arms
+ | match_arm_brace match_arms
| match_arm
+ | match_arm ','
;
-match_arm: refutable_pattern FATARROW expr { bnf_trace("match_arm"); };
+match_pattern: pattern | pattern RWD_if expr_0;
+match_arm
+ : match_pattern FATARROW expr { bnf_trace("match_arm"); }
+ | match_arm_brace
+ ;
+match_arm_brace : match_pattern FATARROW '{' block_contents '}';
+
+expr_0: expr_range;
-expr_0: expr_bor;
+expr_range
+ : expr_range_n
+ | expr_range_n DOUBLEDOT
+ | DOUBLEDOT expr_range_n
+ | expr_range_n DOUBLEDOT expr_range_n
+ ;
+expr_range_n: expr_bor;
expr_bor: expr_band | expr_bor DOUBLEPIPE expr_band { } ;
expr_band: expr_equ | expr_band DOUBLEAMP expr_equ { } ;
@@ -372,10 +454,15 @@ expr_equ
| expr_equ EXCLAMEQUAL expr_cmp
;
expr_cmp
- : expr_or
- | expr_cmp '<' expr_or {}
- | expr_cmp '>' expr_or {}
+ : expr_cmp_n
+ | expr_cmp '<' expr_cmp_n {}
+ | expr_cmp '>' expr_cmp_n {}
+ | expr_cmp GTEQUAL expr_cmp_n {}
+ | expr_cmp LTEQUAL expr_cmp_n {}
;
+
+expr_cmp_n: expr_or;
+
expr_or: expr_and | expr_or '|' expr_and { };
expr_and: expr_xor | expr_and '&' expr_xor { };
expr_xor: expr_8 | expr_xor '^' expr_8 { };
@@ -402,7 +489,7 @@ expr_11n: expr_12;
expr_12
: expr_fc
| '-' expr_12
- | '-' expr_12
+ | '!' expr_12
| '*' expr_12
/* | RWD_box expr_12 */
| '&' expr_12
@@ -413,33 +500,54 @@ expr_12
expr_fc
: expr_value
- | expr_fc '(' ')'
+ | expr_fc '(' expr_list ')'
| expr_fc '[' expr ']'
| expr_fc '.' INTEGER
- | expr_fc '.' expr_path_seg '(' ')'
+ | expr_fc '.' expr_path_seg '(' expr_list ')'
| expr_fc '.' expr_path_seg
expr_value
: CHARLIT | INTEGER | FLOAT | STRING
+ | expr_blocks
| expr_path '(' expr_list ')' { bnf_trace("function call"); }
| expr_path '{' struct_literal_list '}'
+ | expr_path '{' struct_literal_list ',' DOUBLEDOT expr_0 '}'
| expr_path
| RWD_self
| '(' expr ')'
+ | '(' ')'
+ | '(' expr ',' expr_list ')'
| MACRO tt_paren { bnf_trace("Expr macro invocation"); }
+ | '|' pattern_list '|' expr
+ | DOUBLEPIPE expr
;
expr_list: expr_list ',' expr | expr | /* mt */;
+struct_literal_ent: IDENT | IDENT ':' expr;
struct_literal_list
- : struct_literal_list IDENT ':' expr
- | struct_literal_list IDENT ':' expr ','
- | ;
-
+ : struct_literal_list ',' struct_literal_ent
+ | struct_literal_ent
+ ;
tt_list: | tt_list tt_item;
tt_item: tt_paren | tt_brace | tt_square | tt_tok;
-tt_tok: IDENT | '+' | '-' | '.' | '!' | RWD_self;
+tt_tok
+ : IDENT | STRING | CHARLIT | LIFETIME | INTEGER | MACRO
+ | '+' | '*' | '/' | '!' | ',' | ';' | '#'
+ | RWD_self | RWD_super | RWD_mut | RWD_ref | RWD_let | RWD_where
+ | RWD_for | RWD_while | RWD_loop | RWD_if | RWD_else | RWD_match | RWD_return
+ | RWD_impl | RWD_pub | RWD_struct | RWD_enum | RWD_fn | RWD_type | RWD_static | RWD_const
+ | '-' | THINARROW
+ | '&' | DOUBLEAMP
+ | ':' | DOUBLECOLON
+ | '|' | DOUBLEPIPE
+ | '=' | DOUBLEEQUAL | FATARROW
+ | '<' | DOUBLELT
+ | '>' | DOUBLEGT
+ | '.' | DOUBLEDOT | TRIPLEDOT
+ | '$'
+ ;
tt_paren: '(' tt_list ')';
tt_brace: '{' tt_list '}';
tt_square: '[' tt_list ']';