diff options
-rw-r--r-- | bnf/Makefile | 6 | ||||
-rw-r--r-- | bnf/lex.hpp | 22 | ||||
-rw-r--r-- | bnf/rust.lex | 76 | ||||
-rw-r--r-- | bnf/rust.y | 44 | ||||
-rw-r--r-- | bnf/rust_expr.y.h | 11 | ||||
-rw-r--r-- | bnf/rust_expr.y_tree.h | 4 | ||||
-rw-r--r-- | bnf/rust_tts.y.h | 11 |
7 files changed, 132 insertions, 42 deletions
diff --git a/bnf/Makefile b/bnf/Makefile index 90f726dc..ad8aaf7d 100644 --- a/bnf/Makefile +++ b/bnf/Makefile @@ -17,9 +17,9 @@ test: test.bin $(TSTFILES) test.bin: $(OBJS) g++ -std=c++11 $(OBJS) -o $@ -%.o: %.cpp ast_types.hpp +%.o: %.cpp ast_types.hpp lex.hpp g++ -x c++ -std=c++11 $< -c -o $@ -I . -%.o: .gen/%.cpp ast_types.hpp +%.o: .gen/%.cpp ast_types.hpp lex.hpp g++ -x c++ -std=c++11 $< -c -o $@ -I . .gen/rust.tab.cpp: .gen/.rust.y @@ -31,7 +31,7 @@ test.bin: $(OBJS) @awk '{ if($$0 ~ /yacc.c:1909/) { ignore = 1; print $$0; } else if( ignore == 1 ) { trigger = $$0; ignore = 2; } else if( ignore == 2 ) { if($$0 == trigger) { ignore = 0 } } if( ignore > 0 ) { } else { print $$0; } }' < ${@:%.cpp=%.hpp.OLD} > $(@:%.cpp=%.hpp) .gen/rust.tab.hpp: .gen/rust.tab.cpp -.gen/.rust.y: Makefile rust.y rust_expr.y.h rust_tts.y.h +.gen/.rust.y: Makefile rust.y rust_expr.y.h rust_expr.y_tree.h rust_tts.y.h @mkdir -p $(dir $@) cat rust.y > $@ cpp -P rust_expr.y.h >> $@ diff --git a/bnf/lex.hpp b/bnf/lex.hpp index b9865552..4c4c6576 100644 --- a/bnf/lex.hpp +++ b/bnf/lex.hpp @@ -10,18 +10,30 @@ struct ParserContext ::std::unique_ptr<Module> output_module; // semi-evil hack used to break '>>' apart into '>' '>' - int next_token; + ::std::vector<int> next_token; ParserContext(::std::string filename): filename(filename), output_module(), next_token(0) - {} - + { + //next_token.reserve(2); + } + + int popback() { + if( next_token.size() > 0 ) { + int rv = next_token.back(); + next_token.pop_back(); + return rv; + } + else { + return 0; + } + } void pushback(int tok) { - assert(next_token == 0); - next_token = tok; + assert(next_token.size() < 2); + next_token.push_back( tok ); } }; #include ".gen/rust.tab.hpp" diff --git a/bnf/rust.lex b/bnf/rust.lex index f3214d0b..6d744485 100644 --- a/bnf/rust.lex +++ b/bnf/rust.lex @@ -5,7 +5,7 @@ %} %option yylineno -%option noyywrap batch debug +%option noyywrap batch %{ int rustbnf_forcetoken = 0; @@ -15,9 +15,9 @@ int rustbnf_forcetoken = 0; YY_DECL; // Wrap the real yylex with one that can yeild a pushbacked token int yylex(YYSTYPE* lvalp, ParserContext& context) { - if(context.next_token > 0) { - int rv = context.next_token; - context.next_token = 0; + int rv = context.popback(); + if(rv > 0) { + printf("--return %i\n", rv); return rv; } else { @@ -32,6 +32,7 @@ void handle_block_comment(); dec_digit [0-9_] ident_c [a-zA-Z_] +int_suffix ([ui](size|8|16|32))? %% @@ -93,10 +94,15 @@ ident_c [a-zA-Z_] "/=" { return SLASHEQUAL; } "%=" { return PERCENTEQUAL; } +"|=" { return PIPEEQUAL; } +"&=" { return AMPEQUAL; } + "&&" { return DOUBLEAMP; } "||" { return DOUBLEPIPE; } "<<" { return DOUBLELT; } ">>" { return DOUBLEGT; } +"<<=" { return DOUBLELTEQUAL; } +">>=" { return DOUBLEGTEQUAL; } ".." { return DOUBLEDOT; } "..." { return TRIPLEDOT; } @@ -104,6 +110,7 @@ ident_c [a-zA-Z_] "?" { return *yytext; } "#" { return *yytext; } +"@" { return *yytext; } "$" { return *yytext; } "&" { return *yytext; } "|" { return *yytext; } @@ -134,25 +141,66 @@ ident_c [a-zA-Z_] } } [0-9]{dec_digit}*"."{dec_digit}+(e[+\-]?{dec_digit}+)?(f32|f64)? { lvalp->FLOAT = strtod(yytext, NULL); return FLOAT; } -[0-9]{dec_digit}* { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; } -0x[0-9a-fA-F_]+ { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; } -0b[01_]+ { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; } +[0-9]{dec_digit}*{int_suffix} { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; } +0x[0-9a-fA-F_]+{int_suffix} { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; } +0b[01_]+{int_suffix} { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; } {ident_c}({ident_c}|[0-9])*"!" { lvalp->MACRO = new ::std::string(yytext, 0, strlen(yytext)-1); return MACRO; } '{ident_c}{ident_c}* { lvalp->LIFETIME = new ::std::string(yytext, 1); return LIFETIME; } b?'(.|\\'|\\[^']+)' { lvalp->CHARLIT = yytext[0]; return CHARLIT; } -\"([^"])*\" { lvalp->STRING = new ::std::string( parse_escaped_string(yytext) ); return STRING; } +b?\"([^"]|\\\")*\" { lvalp->STRING = new ::std::string( parse_escaped_string(yytext) ); return STRING; } . { fprintf(stderr, "\x1b[31m" "ERROR: %s:%d: Invalid character '%c'\x1b[0m\n", context.filename.c_str(), yylineno, *yytext); exit(1); } %% +uint32_t parse_char_literal(const char *_s) { + const uint8_t* s = (const uint8_t*)_s; + + assert(*s++ == '\''); + uint32_t rv = 0; + + if( *s == '\\' ) { + s ++; + switch(*s) + { + case 'n': rv = '\0'; break; + case 'r': rv = '\0'; break; + case 'x': + rv = strtoul((const char*)(s+1), NULL, 16); + s += 2; + break; + //case 'u': + // rv = strtoul((const char*)(s+1), NULL, 16); + // s += 2; + // break; + default: + return 0; + } + } + else if( *s < 0x80 ) { + rv = *s; + } + else { + fprintf(stderr, "TODO: UTF-8 char literals"); + exit(1); + } + s ++; + if( *s != '\'' ) { + exit(1); + } + assert(*s == '\0'); + return rv; +} ::std::string parse_escaped_string(const char* s) { + if( *s == 'b' ) { + s ++; + } assert(*s++ == '"'); ::std::string rv; - for( ; *s != '\0'; s ++ ) + for( ; *s != '"'; s ++ ) { if( *s == '\\' ) { @@ -161,22 +209,22 @@ b?'(.|\\'|\\[^']+)' { lvalp->CHARLIT = yytext[0]; return CHARLIT; } { case 'n': rv += '\n'; break; case 'r': rv += '\r'; break; + case '"': rv += '"'; break; case '\n': break; default: fprintf(stderr, "Unknown escape code '\\%c' in string\n", *s); exit(1); } } - else if( *s == '"') { - break ; + else if( *s == '\0' ) { + // wut? + fprintf(stderr, "Unexpected EOS\n"); + exit(1); } else { rv += *s; } } - if( *s == '\0' ) { - // wut? - } assert(*s++ == '"'); assert(*s == '\0'); return rv; @@ -25,9 +25,10 @@ %token HASHBANG %token DOUBLECOLON THINARROW FATARROW DOUBLEDOT TRIPLEDOT %token DOUBLEEQUAL EXCLAMEQUAL DOUBLEPIPE DOUBLEAMP +%token PIPEEQUAL AMPEQUAL %token GTEQUAL LTEQUAL %token PLUSEQUAL MINUSEQUAL STAREQUAL SLASHEQUAL PERCENTEQUAL -%token DOUBLELT DOUBLEGT +%token DOUBLELT DOUBLEGT DOUBLELTEQUAL DOUBLEGTEQUAL %token RWD_mod RWD_fn RWD_const RWD_static RWD_use RWD_struct RWD_enum RWD_trait RWD_impl RWD_type %token RWD_as RWD_in RWD_mut RWD_ref RWD_pub RWD_where RWD_unsafe %token RWD_let @@ -241,6 +242,7 @@ fn_def_arg: pattern ':' type; fn_def_arg_list_PROTO: fn_def_arg_PROTO | fn_def_arg_list_PROTO ',' fn_def_arg_PROTO; fn_def_arg_PROTO : IDENT ':' type + | RWD_mut IDENT ':' type | type ; @@ -367,10 +369,15 @@ generic_def_one where_clause: | RWD_where where_clauses; where_clauses - : where_clause_ent ',' where_clauses - | where_clause_ent; + : where_clause_ent ',' where_clauses + | where_clause_ent ',' + | where_clause_ent + ; where_clause_ent - : type ':' bounds; + : hrlb_def type ':' bounds + ; +hrlb_def: | RWD_for '<' lifetime_list '>'; +lifetime_list: LIFETIME | lifetime_list ',' LIFETIME bounds: bounds '+' bound | bound; bound: LIFETIME | '?' trait_path | trait_path; @@ -383,6 +390,14 @@ use_path : use_path DOUBLECOLON IDENT | IDENT; +dlt: DOUBLELT { context.pushback('<'); context.pushback('<'); } + +type_args + : '<' type_exprs '>' + | '<' type_exprs DOUBLEGT { bnf_trace("Double-gt terminated type expr"); context.pushback('>'); } + | dlt type_args + ; + expr_path : ufcs_path DOUBLECOLON IDENT | DOUBLECOLON expr_path_segs @@ -391,13 +406,13 @@ expr_path | expr_path_segs ; expr_path_segs - : IDENT DOUBLECOLON '<' type_exprs '>' - | IDENT DOUBLECOLON '<' type_exprs '>' DOUBLECOLON expr_path_segs + : IDENT DOUBLECOLON type_args + | IDENT DOUBLECOLON type_args DOUBLECOLON expr_path_segs | IDENT DOUBLECOLON expr_path_segs | IDENT ; expr_path_seg - : IDENT DOUBLECOLON '<' type_exprs '>' + : IDENT DOUBLECOLON type_args | IDENT ; @@ -412,15 +427,19 @@ type_path : ufcs_path DOUBLECOLON IDENT | trait_path ; -ufcs_path: '<' type RWD_as trait_path '>'; +ufcs_path: '<' ufcs_path_tail; +ufcs_path_tail + : type RWD_as trait_path '>' + | type RWD_as trait_path DOUBLEGT { context.pushback('>'); } + ; type_path_segs : type_path_segs DOUBLECOLON type_path_seg | type_path_seg ; type_path_seg : IDENT - | IDENT '<' type_exprs '>' - | IDENT '<' type_exprs DOUBLEGT { bnf_trace("Double-gt terminated type expr"); context.pushback('>'); } + | IDENT type_args + | IDENT type_args ; type_exprs: type_exprs ',' type_arg | type_arg; type_arg: type | LIFETIME | IDENT '=' type; @@ -436,7 +455,7 @@ type ; type_ele : type_path - | RWD_fn '(' type_list ')' + | RWD_fn '(' type_list ')' fn_def_ret | '_' | '&' opt_lifetime type_ele | DOUBLEAMP opt_lifetime type_ele @@ -454,7 +473,8 @@ type_ele | '(' type ',' type_list ')' ; trait_list: type_path '+' trait_list_inner; -trait_list_inner: type_path | trait_list_inner '+' type_path; +trait_list_inner: trait_list_ent | trait_list_inner '+' trait_list_ent; +trait_list_ent: trait_path | LIFETIME; type_list: type_list ',' type | type; /* diff --git a/bnf/rust_expr.y.h b/bnf/rust_expr.y.h index 7ad43703..78121070 100644 --- a/bnf/rust_expr.y.h +++ b/bnf/rust_expr.y.h @@ -1,5 +1,14 @@ -assign_op: '=' | PLUSEQUAL | MINUSEQUAL | STAREQUAL | SLASHEQUAL; +assign_op: '=' | PLUSEQUAL | MINUSEQUAL | STAREQUAL | SLASHEQUAL | DOUBLELTEQUAL | DOUBLEGTEQUAL | PIPEEQUAL | AMPEQUAL; + +closure_arg_list: | closure_arg_list_; +closure_arg_list_ + : closure_arg + | closure_arg_list ',' closure_arg +closure_arg + : pattern + | pattern ':' type + ; #define SUFFIX_is_ #define _(v) v diff --git a/bnf/rust_expr.y_tree.h b/bnf/rust_expr.y_tree.h index e7358e2f..26beb77c 100644 --- a/bnf/rust_expr.y_tree.h +++ b/bnf/rust_expr.y_tree.h @@ -62,7 +62,7 @@ _(expr_9) /* 10: Cast */ _(expr_cast) : _(expr_11) - | _(expr_cast) RWD_as type { bnf_trace("expr:cast"); } + | _(expr_cast) RWD_as type_ele { bnf_trace("expr:cast"); } ; /* 11: Times/Div/Modulo */ _(expr_11) @@ -109,6 +109,6 @@ _(expr_value) | '[' expr_list opt_comma ']' | '[' expr ';' expr ']' | MACRO tt_paren { bnf_trace("Expr macro invocation"); } - | '|' pattern_list '|' expr + | '|' closure_arg_list '|' expr | DOUBLEPIPE expr ; diff --git a/bnf/rust_tts.y.h b/bnf/rust_tts.y.h index 9a1d1d26..dc930120 100644 --- a/bnf/rust_tts.y.h +++ b/bnf/rust_tts.y.h @@ -17,14 +17,15 @@ tt_tok | _C('+') | _T(PLUSEQUAL) | _C('-') | _T(MINUSEQUAL) | _T(THINARROW) | _C('!') | _T(EXCLAMEQUAL) - | _C('&') | _T(DOUBLEAMP) + | _C('&') | _T(DOUBLEAMP) | _T(AMPEQUAL) + | _C('|') | _T(DOUBLEPIPE) | _T(PIPEEQUAL) | _C(':') | _T(DOUBLECOLON) - | _C('|') | _T(DOUBLEPIPE) | _C('^') | _C('=') | _T(DOUBLEEQUAL) | _T(FATARROW) - | _C('<') | _T(DOUBLELT) | _T(LTEQUAL) - | _C('>') | _T(DOUBLEGT) | _T(GTEQUAL) + | _C('<') | _T(DOUBLELT) | _T(LTEQUAL) | _T(DOUBLELTEQUAL) + | _C('>') | _T(DOUBLEGT) | _T(GTEQUAL) | _T(DOUBLEGTEQUAL) | _C('.') | _T(DOUBLEDOT) | _T(TRIPLEDOT) - | _C('$') | _C('#') | _C('@') | _C('?') + | _C('$') | _C('@') | _C('?') + | _C('#') | _T(HASHBANG) ; #undef _ |