summaryrefslogtreecommitdiff
path: root/bnf
diff options
context:
space:
mode:
authorJohn Hodge <tpg@mutabah.net>2015-09-26 15:15:08 +0800
committerJohn Hodge <tpg@mutabah.net>2015-09-26 15:15:08 +0800
commitfd3d1113c91764d4044b83d8502e04a9726d58e2 (patch)
treef654240136a9e9f6e8d0750df83e7005e34fd152 /bnf
parent3828791d2c2d94b7751efb5ec9ec4d9a31b15b71 (diff)
downloadmrust-fd3d1113c91764d4044b83d8502e04a9726d58e2.tar.gz
BNF - Top layer of libcore is parsing (all direct child modules)
Diffstat (limited to 'bnf')
-rw-r--r--bnf/Makefile6
-rw-r--r--bnf/lex.hpp22
-rw-r--r--bnf/rust.lex76
-rw-r--r--bnf/rust.y44
-rw-r--r--bnf/rust_expr.y.h11
-rw-r--r--bnf/rust_expr.y_tree.h4
-rw-r--r--bnf/rust_tts.y.h11
7 files changed, 132 insertions, 42 deletions
diff --git a/bnf/Makefile b/bnf/Makefile
index 90f726dc..ad8aaf7d 100644
--- a/bnf/Makefile
+++ b/bnf/Makefile
@@ -17,9 +17,9 @@ test: test.bin $(TSTFILES)
test.bin: $(OBJS)
g++ -std=c++11 $(OBJS) -o $@
-%.o: %.cpp ast_types.hpp
+%.o: %.cpp ast_types.hpp lex.hpp
g++ -x c++ -std=c++11 $< -c -o $@ -I .
-%.o: .gen/%.cpp ast_types.hpp
+%.o: .gen/%.cpp ast_types.hpp lex.hpp
g++ -x c++ -std=c++11 $< -c -o $@ -I .
.gen/rust.tab.cpp: .gen/.rust.y
@@ -31,7 +31,7 @@ test.bin: $(OBJS)
@awk '{ if($$0 ~ /yacc.c:1909/) { ignore = 1; print $$0; } else if( ignore == 1 ) { trigger = $$0; ignore = 2; } else if( ignore == 2 ) { if($$0 == trigger) { ignore = 0 } } if( ignore > 0 ) { } else { print $$0; } }' < ${@:%.cpp=%.hpp.OLD} > $(@:%.cpp=%.hpp)
.gen/rust.tab.hpp: .gen/rust.tab.cpp
-.gen/.rust.y: Makefile rust.y rust_expr.y.h rust_tts.y.h
+.gen/.rust.y: Makefile rust.y rust_expr.y.h rust_expr.y_tree.h rust_tts.y.h
@mkdir -p $(dir $@)
cat rust.y > $@
cpp -P rust_expr.y.h >> $@
diff --git a/bnf/lex.hpp b/bnf/lex.hpp
index b9865552..4c4c6576 100644
--- a/bnf/lex.hpp
+++ b/bnf/lex.hpp
@@ -10,18 +10,30 @@ struct ParserContext
::std::unique_ptr<Module> output_module;
// semi-evil hack used to break '>>' apart into '>' '>'
- int next_token;
+ ::std::vector<int> next_token;
ParserContext(::std::string filename):
filename(filename),
output_module(),
next_token(0)
- {}
-
+ {
+ //next_token.reserve(2);
+ }
+
+ int popback() {
+ if( next_token.size() > 0 ) {
+ int rv = next_token.back();
+ next_token.pop_back();
+ return rv;
+ }
+ else {
+ return 0;
+ }
+ }
void pushback(int tok) {
- assert(next_token == 0);
- next_token = tok;
+ assert(next_token.size() < 2);
+ next_token.push_back( tok );
}
};
#include ".gen/rust.tab.hpp"
diff --git a/bnf/rust.lex b/bnf/rust.lex
index f3214d0b..6d744485 100644
--- a/bnf/rust.lex
+++ b/bnf/rust.lex
@@ -5,7 +5,7 @@
%}
%option yylineno
-%option noyywrap batch debug
+%option noyywrap batch
%{
int rustbnf_forcetoken = 0;
@@ -15,9 +15,9 @@ int rustbnf_forcetoken = 0;
YY_DECL;
// Wrap the real yylex with one that can yeild a pushbacked token
int yylex(YYSTYPE* lvalp, ParserContext& context) {
- if(context.next_token > 0) {
- int rv = context.next_token;
- context.next_token = 0;
+ int rv = context.popback();
+ if(rv > 0) {
+ printf("--return %i\n", rv);
return rv;
}
else {
@@ -32,6 +32,7 @@ void handle_block_comment();
dec_digit [0-9_]
ident_c [a-zA-Z_]
+int_suffix ([ui](size|8|16|32))?
%%
@@ -93,10 +94,15 @@ ident_c [a-zA-Z_]
"/=" { return SLASHEQUAL; }
"%=" { return PERCENTEQUAL; }
+"|=" { return PIPEEQUAL; }
+"&=" { return AMPEQUAL; }
+
"&&" { return DOUBLEAMP; }
"||" { return DOUBLEPIPE; }
"<<" { return DOUBLELT; }
">>" { return DOUBLEGT; }
+"<<=" { return DOUBLELTEQUAL; }
+">>=" { return DOUBLEGTEQUAL; }
".." { return DOUBLEDOT; }
"..." { return TRIPLEDOT; }
@@ -104,6 +110,7 @@ ident_c [a-zA-Z_]
"?" { return *yytext; }
"#" { return *yytext; }
+"@" { return *yytext; }
"$" { return *yytext; }
"&" { return *yytext; }
"|" { return *yytext; }
@@ -134,25 +141,66 @@ ident_c [a-zA-Z_]
}
}
[0-9]{dec_digit}*"."{dec_digit}+(e[+\-]?{dec_digit}+)?(f32|f64)? { lvalp->FLOAT = strtod(yytext, NULL); return FLOAT; }
-[0-9]{dec_digit}* { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; }
-0x[0-9a-fA-F_]+ { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; }
-0b[01_]+ { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; }
+[0-9]{dec_digit}*{int_suffix} { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; }
+0x[0-9a-fA-F_]+{int_suffix} { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; }
+0b[01_]+{int_suffix} { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; }
{ident_c}({ident_c}|[0-9])*"!" { lvalp->MACRO = new ::std::string(yytext, 0, strlen(yytext)-1); return MACRO; }
'{ident_c}{ident_c}* { lvalp->LIFETIME = new ::std::string(yytext, 1); return LIFETIME; }
b?'(.|\\'|\\[^']+)' { lvalp->CHARLIT = yytext[0]; return CHARLIT; }
-\"([^"])*\" { lvalp->STRING = new ::std::string( parse_escaped_string(yytext) ); return STRING; }
+b?\"([^"]|\\\")*\" { lvalp->STRING = new ::std::string( parse_escaped_string(yytext) ); return STRING; }
. { fprintf(stderr, "\x1b[31m" "ERROR: %s:%d: Invalid character '%c'\x1b[0m\n", context.filename.c_str(), yylineno, *yytext); exit(1); }
%%
+uint32_t parse_char_literal(const char *_s) {
+ const uint8_t* s = (const uint8_t*)_s;
+
+ assert(*s++ == '\'');
+ uint32_t rv = 0;
+
+ if( *s == '\\' ) {
+ s ++;
+ switch(*s)
+ {
+ case 'n': rv = '\0'; break;
+ case 'r': rv = '\0'; break;
+ case 'x':
+ rv = strtoul((const char*)(s+1), NULL, 16);
+ s += 2;
+ break;
+ //case 'u':
+ // rv = strtoul((const char*)(s+1), NULL, 16);
+ // s += 2;
+ // break;
+ default:
+ return 0;
+ }
+ }
+ else if( *s < 0x80 ) {
+ rv = *s;
+ }
+ else {
+ fprintf(stderr, "TODO: UTF-8 char literals");
+ exit(1);
+ }
+ s ++;
+ if( *s != '\'' ) {
+ exit(1);
+ }
+ assert(*s == '\0');
+ return rv;
+}
::std::string parse_escaped_string(const char* s) {
+ if( *s == 'b' ) {
+ s ++;
+ }
assert(*s++ == '"');
::std::string rv;
- for( ; *s != '\0'; s ++ )
+ for( ; *s != '"'; s ++ )
{
if( *s == '\\' )
{
@@ -161,22 +209,22 @@ b?'(.|\\'|\\[^']+)' { lvalp->CHARLIT = yytext[0]; return CHARLIT; }
{
case 'n': rv += '\n'; break;
case 'r': rv += '\r'; break;
+ case '"': rv += '"'; break;
case '\n': break;
default:
fprintf(stderr, "Unknown escape code '\\%c' in string\n", *s);
exit(1);
}
}
- else if( *s == '"') {
- break ;
+ else if( *s == '\0' ) {
+ // wut?
+ fprintf(stderr, "Unexpected EOS\n");
+ exit(1);
}
else {
rv += *s;
}
}
- if( *s == '\0' ) {
- // wut?
- }
assert(*s++ == '"');
assert(*s == '\0');
return rv;
diff --git a/bnf/rust.y b/bnf/rust.y
index 4ee1d427..7e460fd0 100644
--- a/bnf/rust.y
+++ b/bnf/rust.y
@@ -25,9 +25,10 @@
%token HASHBANG
%token DOUBLECOLON THINARROW FATARROW DOUBLEDOT TRIPLEDOT
%token DOUBLEEQUAL EXCLAMEQUAL DOUBLEPIPE DOUBLEAMP
+%token PIPEEQUAL AMPEQUAL
%token GTEQUAL LTEQUAL
%token PLUSEQUAL MINUSEQUAL STAREQUAL SLASHEQUAL PERCENTEQUAL
-%token DOUBLELT DOUBLEGT
+%token DOUBLELT DOUBLEGT DOUBLELTEQUAL DOUBLEGTEQUAL
%token RWD_mod RWD_fn RWD_const RWD_static RWD_use RWD_struct RWD_enum RWD_trait RWD_impl RWD_type
%token RWD_as RWD_in RWD_mut RWD_ref RWD_pub RWD_where RWD_unsafe
%token RWD_let
@@ -241,6 +242,7 @@ fn_def_arg: pattern ':' type;
fn_def_arg_list_PROTO: fn_def_arg_PROTO | fn_def_arg_list_PROTO ',' fn_def_arg_PROTO;
fn_def_arg_PROTO
: IDENT ':' type
+ | RWD_mut IDENT ':' type
| type
;
@@ -367,10 +369,15 @@ generic_def_one
where_clause: | RWD_where where_clauses;
where_clauses
- : where_clause_ent ',' where_clauses
- | where_clause_ent;
+ : where_clause_ent ',' where_clauses
+ | where_clause_ent ','
+ | where_clause_ent
+ ;
where_clause_ent
- : type ':' bounds;
+ : hrlb_def type ':' bounds
+ ;
+hrlb_def: | RWD_for '<' lifetime_list '>';
+lifetime_list: LIFETIME | lifetime_list ',' LIFETIME
bounds: bounds '+' bound | bound;
bound: LIFETIME | '?' trait_path | trait_path;
@@ -383,6 +390,14 @@ use_path
: use_path DOUBLECOLON IDENT
| IDENT;
+dlt: DOUBLELT { context.pushback('<'); context.pushback('<'); }
+
+type_args
+ : '<' type_exprs '>'
+ | '<' type_exprs DOUBLEGT { bnf_trace("Double-gt terminated type expr"); context.pushback('>'); }
+ | dlt type_args
+ ;
+
expr_path
: ufcs_path DOUBLECOLON IDENT
| DOUBLECOLON expr_path_segs
@@ -391,13 +406,13 @@ expr_path
| expr_path_segs
;
expr_path_segs
- : IDENT DOUBLECOLON '<' type_exprs '>'
- | IDENT DOUBLECOLON '<' type_exprs '>' DOUBLECOLON expr_path_segs
+ : IDENT DOUBLECOLON type_args
+ | IDENT DOUBLECOLON type_args DOUBLECOLON expr_path_segs
| IDENT DOUBLECOLON expr_path_segs
| IDENT
;
expr_path_seg
- : IDENT DOUBLECOLON '<' type_exprs '>'
+ : IDENT DOUBLECOLON type_args
| IDENT
;
@@ -412,15 +427,19 @@ type_path
: ufcs_path DOUBLECOLON IDENT
| trait_path
;
-ufcs_path: '<' type RWD_as trait_path '>';
+ufcs_path: '<' ufcs_path_tail;
+ufcs_path_tail
+ : type RWD_as trait_path '>'
+ | type RWD_as trait_path DOUBLEGT { context.pushback('>'); }
+ ;
type_path_segs
: type_path_segs DOUBLECOLON type_path_seg
| type_path_seg
;
type_path_seg
: IDENT
- | IDENT '<' type_exprs '>'
- | IDENT '<' type_exprs DOUBLEGT { bnf_trace("Double-gt terminated type expr"); context.pushback('>'); }
+ | IDENT type_args
+ | IDENT type_args
;
type_exprs: type_exprs ',' type_arg | type_arg;
type_arg: type | LIFETIME | IDENT '=' type;
@@ -436,7 +455,7 @@ type
;
type_ele
: type_path
- | RWD_fn '(' type_list ')'
+ | RWD_fn '(' type_list ')' fn_def_ret
| '_'
| '&' opt_lifetime type_ele
| DOUBLEAMP opt_lifetime type_ele
@@ -454,7 +473,8 @@ type_ele
| '(' type ',' type_list ')'
;
trait_list: type_path '+' trait_list_inner;
-trait_list_inner: type_path | trait_list_inner '+' type_path;
+trait_list_inner: trait_list_ent | trait_list_inner '+' trait_list_ent;
+trait_list_ent: trait_path | LIFETIME;
type_list: type_list ',' type | type;
/*
diff --git a/bnf/rust_expr.y.h b/bnf/rust_expr.y.h
index 7ad43703..78121070 100644
--- a/bnf/rust_expr.y.h
+++ b/bnf/rust_expr.y.h
@@ -1,5 +1,14 @@
-assign_op: '=' | PLUSEQUAL | MINUSEQUAL | STAREQUAL | SLASHEQUAL;
+assign_op: '=' | PLUSEQUAL | MINUSEQUAL | STAREQUAL | SLASHEQUAL | DOUBLELTEQUAL | DOUBLEGTEQUAL | PIPEEQUAL | AMPEQUAL;
+
+closure_arg_list: | closure_arg_list_;
+closure_arg_list_
+ : closure_arg
+ | closure_arg_list ',' closure_arg
+closure_arg
+ : pattern
+ | pattern ':' type
+ ;
#define SUFFIX_is_
#define _(v) v
diff --git a/bnf/rust_expr.y_tree.h b/bnf/rust_expr.y_tree.h
index e7358e2f..26beb77c 100644
--- a/bnf/rust_expr.y_tree.h
+++ b/bnf/rust_expr.y_tree.h
@@ -62,7 +62,7 @@ _(expr_9)
/* 10: Cast */
_(expr_cast)
: _(expr_11)
- | _(expr_cast) RWD_as type { bnf_trace("expr:cast"); }
+ | _(expr_cast) RWD_as type_ele { bnf_trace("expr:cast"); }
;
/* 11: Times/Div/Modulo */
_(expr_11)
@@ -109,6 +109,6 @@ _(expr_value)
| '[' expr_list opt_comma ']'
| '[' expr ';' expr ']'
| MACRO tt_paren { bnf_trace("Expr macro invocation"); }
- | '|' pattern_list '|' expr
+ | '|' closure_arg_list '|' expr
| DOUBLEPIPE expr
;
diff --git a/bnf/rust_tts.y.h b/bnf/rust_tts.y.h
index 9a1d1d26..dc930120 100644
--- a/bnf/rust_tts.y.h
+++ b/bnf/rust_tts.y.h
@@ -17,14 +17,15 @@ tt_tok
| _C('+') | _T(PLUSEQUAL)
| _C('-') | _T(MINUSEQUAL) | _T(THINARROW)
| _C('!') | _T(EXCLAMEQUAL)
- | _C('&') | _T(DOUBLEAMP)
+ | _C('&') | _T(DOUBLEAMP) | _T(AMPEQUAL)
+ | _C('|') | _T(DOUBLEPIPE) | _T(PIPEEQUAL)
| _C(':') | _T(DOUBLECOLON)
- | _C('|') | _T(DOUBLEPIPE)
| _C('^')
| _C('=') | _T(DOUBLEEQUAL) | _T(FATARROW)
- | _C('<') | _T(DOUBLELT) | _T(LTEQUAL)
- | _C('>') | _T(DOUBLEGT) | _T(GTEQUAL)
+ | _C('<') | _T(DOUBLELT) | _T(LTEQUAL) | _T(DOUBLELTEQUAL)
+ | _C('>') | _T(DOUBLEGT) | _T(GTEQUAL) | _T(DOUBLEGTEQUAL)
| _C('.') | _T(DOUBLEDOT) | _T(TRIPLEDOT)
- | _C('$') | _C('#') | _C('@') | _C('?')
+ | _C('$') | _C('@') | _C('?')
+ | _C('#') | _T(HASHBANG)
;
#undef _