summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Hodge <tpg@mutabah.net>2015-09-26 22:39:25 +0800
committerJohn Hodge <tpg@mutabah.net>2015-09-26 22:39:25 +0800
commitbe427e449c1d492050279311ccecc3de8b17e838 (patch)
treea2b6f2e3fbdef2ae003700578c339da8559ea2ca
parent0121cefaec3f14a5f35ac84321787e01849585e0 (diff)
downloadmrust-be427e449c1d492050279311ccecc3de8b17e838.tar.gz
BNF - Improvements, partially parsing std now
-rw-r--r--bnf/Makefile4
-rw-r--r--bnf/ast_types.hpp15
-rw-r--r--bnf/rust.lex59
-rw-r--r--bnf/rust.y23
-rw-r--r--bnf/rust_expr.y_tree.h2
-rw-r--r--bnf/rust_tts.y.h4
6 files changed, 96 insertions, 11 deletions
diff --git a/bnf/Makefile b/bnf/Makefile
index ad8aaf7d..cbfffef4 100644
--- a/bnf/Makefile
+++ b/bnf/Makefile
@@ -4,8 +4,10 @@
OBJS := main.o rust.tab.o rust.lex.o
+RUSTSRC := ../../rust_os/rustc_src/
+
TSTFILES := ../samples/1.rs
-TSTFILES += ../../rust_os/rustc_src/libcore/lib.rs
+TSTFILES += $(addprefix $(RUSTSRC), libcore/lib.rs libstd/lib.rs)
diff --git a/bnf/ast_types.hpp b/bnf/ast_types.hpp
index daa71b89..09343c71 100644
--- a/bnf/ast_types.hpp
+++ b/bnf/ast_types.hpp
@@ -212,6 +212,21 @@ public:
{}
};
+class ExternCrate:
+ public Item
+{
+ ::std::string m_name;
+ ::std::string m_alias;
+public:
+ ExternCrate(::std::string name):
+ m_name(name),
+ m_alias(name)
+ {}
+ ExternCrate(::std::string name, ::std::string alias):
+ m_name(name),
+ m_alias(alias)
+ {}
+};
class UseItem
{
diff --git a/bnf/rust.lex b/bnf/rust.lex
index 1151aa32..21080235 100644
--- a/bnf/rust.lex
+++ b/bnf/rust.lex
@@ -27,6 +27,7 @@ int yylex(YYSTYPE* lvalp, ParserContext& context) {
void handle_block_comment();
::std::string parse_escaped_string(const char* s);
+::std::string handle_raw_string(const char* s);
%}
@@ -62,9 +63,11 @@ int_suffix ([ui](size|8|16|32|64))?
"pub" { return RWD_pub; }
"where" { return RWD_where; }
"extern" { return RWD_extern; }
+"crate" { return RWD_crate; }
"let" { return RWD_let; }
"ref" { return RWD_ref; }
+"box" { return RWD_box; }
"self" { return RWD_self; }
"super" { return RWD_super; }
@@ -142,14 +145,16 @@ int_suffix ([ui](size|8|16|32|64))?
}
}
[0-9]{dec_digit}*"."{dec_digit}+(e[+\-]?{dec_digit}+)?(f32|f64)? { lvalp->FLOAT = strtod(yytext, NULL); return FLOAT; }
+[0-9]{dec_digit}*(f32|f64) { lvalp->FLOAT = strtod(yytext, NULL); return FLOAT; }
[0-9]{dec_digit}*{int_suffix} { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; }
0x[0-9a-fA-F_]+{int_suffix} { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; }
0b[01_]+{int_suffix} { lvalp->INTEGER = strtoull(yytext, NULL, 0); return INTEGER; }
{ident_c}({ident_c}|[0-9])*"!" { lvalp->MACRO = new ::std::string(yytext, 0, strlen(yytext)-1); return MACRO; }
'{ident_c}{ident_c}* { lvalp->LIFETIME = new ::std::string(yytext, 1); return LIFETIME; }
-b?'(.|\\'|\\[^']+)' { lvalp->CHARLIT = yytext[0]; return CHARLIT; }
-b?\"([^"]|\\\")*\" { lvalp->STRING = new ::std::string( parse_escaped_string(yytext) ); return STRING; }
+b?'(.|\\'|\\[^']+|[\x80-\xFF]*)' { lvalp->CHARLIT = yytext[0]; return CHARLIT; }
+b?\"(\\.|[^\\"]|\\\n)*\" { lvalp->STRING = new ::std::string( parse_escaped_string(yytext) ); return STRING; }
+b?r#*\" { auto rs = handle_raw_string( (*yytext=='b' ? yytext+2 : yytext+1) ); lvalp->STRING = new ::std::string(rs); return STRING; }
. { fprintf(stderr, "\x1b[31m" "ERROR: %s:%d: Invalid character '%c'\x1b[0m\n", context.filename.c_str(), yylineno, *yytext); exit(1); }
@@ -194,6 +199,7 @@ uint32_t parse_char_literal(const char *_s) {
}
::std::string parse_escaped_string(const char* s) {
+ printf("parse_escaped_string(%s)\n", s);
if( *s == 'b' ) {
s ++;
}
@@ -211,7 +217,20 @@ uint32_t parse_char_literal(const char *_s) {
case 'n': rv += '\n'; break;
case 'r': rv += '\r'; break;
case '"': rv += '"'; break;
+ case '0': rv += '\0'; break;
+ case '\\': rv += '\\'; break;
case '\n': break;
+ case 'x':
+ rv += (char)strtoul((const char*)(s+1), NULL, 16);
+ s += 2;
+ break;
+ case 'u': {
+ char *out;
+ assert(s[1] == '{');
+ rv += (char)strtoul((const char*)(s+2), &out, 16);
+ s = out;
+ assert(*s == '}');
+ break; }
default:
fprintf(stderr, "Unknown escape code '\\%c' in string\n", *s);
exit(1);
@@ -248,3 +267,39 @@ loop:
//if (c != 0)
// putchar(c1);
}
+
+::std::string handle_raw_string(const char* s) {
+ int num_hash = 0;
+ for(; *s == '#'; s++)
+ num_hash ++;
+ assert(*s == '"');
+
+ ::std::string rv;
+
+ for(;;)
+ {
+ char c;
+ if( (c = yyinput()) == '"' ) {
+ if( num_hash == 0 )
+ break;
+ int i;
+ for(i = 0; i < num_hash; i ++) {
+ if( (c = yyinput()) != '#' )
+ break;
+ }
+ // Found `num_hash` '#' characters in a row, break out
+ if( i == num_hash ) {
+ break;
+ }
+ // Didn't find enough, append to output
+ rv += '"';
+ while(i--) rv += '#';
+
+ }
+ else {
+ rv += c;
+ }
+ }
+
+ return rv;
+}
diff --git a/bnf/rust.y b/bnf/rust.y
index f0fe11e4..27625a3a 100644
--- a/bnf/rust.y
+++ b/bnf/rust.y
@@ -31,11 +31,11 @@
%token DOUBLELT DOUBLEGT DOUBLELTEQUAL DOUBLEGTEQUAL
%token RWD_mod RWD_fn RWD_const RWD_static RWD_use RWD_struct RWD_enum RWD_trait RWD_impl RWD_type
%token RWD_as RWD_in RWD_mut RWD_ref RWD_pub RWD_where RWD_unsafe
-%token RWD_let
+%token RWD_let RWD_box
%token RWD_self RWD_super
%token RWD_match RWD_if RWD_while RWD_loop RWD_for RWD_else
%token RWD_return RWD_break RWD_continue
-%token RWD_extern
+%token RWD_extern RWD_crate
%type <Module*> module_root
%type <int> tt_tok
@@ -58,6 +58,7 @@
%type <Enum*> enum_def
%type <Trait*> trait_def
%type <Fn*> fn_def fn_def_hdr fn_def_hdr_PROTO
+%type <ExternCrate*> extern_crate
%type <ExternBlock*> extern_block
%type <Impl*> impl_def
@@ -171,6 +172,8 @@ item
| RWD_unsafe unsafe_item { $$ = $2; }
| RWD_impl impl_def { $$ = $2; }
| RWD_extern extern_block { $$ = $2; }
+ | RWD_extern RWD_crate extern_crate { $$ = $3; }
+ | RWD_pub RWD_extern RWD_crate extern_crate { $$ = $4; $$->set_pub(); }
| MACRO IDENT tt_brace { $$ = new Macro(consume($1), consume($2), consume($3)); }
| MACRO tt_brace { $$ = new Macro(consume($1), consume($2)); }
| MACRO tt_paren ';' { $$ = new Macro(consume($1), consume($2)); }
@@ -196,6 +199,12 @@ unsafe_item
| RWD_impl impl_def { $$ = $2; }
;
+
+extern_crate
+ : IDENT ';' { $$ = new ExternCrate( consume($1) ); }
+ | IDENT RWD_as IDENT ';' { $$ = new ExternCrate( consume($1), consume($3)); }
+ ;
+
extern_block: extern_abi '{' extern_items '}' { $$ = new ExternBlock( consume($1), consume($3) ); };
extern_abi: { $$ = new ::std::string("C"); } | STRING;
extern_items
@@ -400,7 +409,7 @@ type_args
;
expr_path
- : ufcs_path DOUBLECOLON IDENT
+ : ufcs_path DOUBLECOLON expr_path_segs
| DOUBLECOLON expr_path_segs
| RWD_self DOUBLECOLON expr_path_segs
| RWD_super DOUBLECOLON expr_path_segs
@@ -430,7 +439,8 @@ type_path
;
ufcs_path: '<' ufcs_path_tail;
ufcs_path_tail
- : type RWD_as trait_path '>'
+ : type '>'
+ | type RWD_as trait_path '>'
| type RWD_as trait_path DOUBLEGT { context.pushback('>'); }
;
type_path_segs
@@ -457,6 +467,7 @@ type
type_ele
: type_path
| RWD_fn '(' type_list ')' fn_def_ret
+ | RWD_extern extern_abi RWD_fn '(' type_list ')' fn_def_ret
| '_'
| '&' opt_lifetime type_ele
| DOUBLEAMP opt_lifetime type_ele
@@ -483,11 +494,11 @@ type_list: type_list ',' type | type;
Patterns
=========================================
*/
-tuple_pattern: '(' pattern_list ')' | '(' pattern_list ',' ')';
+tuple_pattern: '(' ')' | '(' pattern_list ')' | '(' pattern_list ',' ')';
struct_pattern
: expr_path '{' struct_pattern_items '}'
- | expr_path tuple_pattern
+ | expr_path '(' pattern_list ')'
;
struct_pattern_item: IDENT | IDENT ':' pattern;
struct_pattern_items: struct_pattern_items ',' struct_pattern_item | struct_pattern_item;
diff --git a/bnf/rust_expr.y_tree.h b/bnf/rust_expr.y_tree.h
index 0687790d..82707da9 100644
--- a/bnf/rust_expr.y_tree.h
+++ b/bnf/rust_expr.y_tree.h
@@ -12,6 +12,7 @@ _(expr_range)
| _(expr_range_n) DOUBLEDOT
| DOUBLEDOT _(expr_range_n)
| _(expr_range_n) DOUBLEDOT _(expr_range_n)
+ | DOUBLEDOT
;
_(expr_range_n): _(expr_bor);
@@ -82,6 +83,7 @@ _(expr_12)
| '&' RWD_mut _(expr_12)
| DOUBLEAMP _(expr_12) { }
| DOUBLEAMP RWD_mut _(expr_12) { }
+ | RWD_box _(expr)
;
_(expr_fc)
diff --git a/bnf/rust_tts.y.h b/bnf/rust_tts.y.h
index 1cd84318..366ffa3d 100644
--- a/bnf/rust_tts.y.h
+++ b/bnf/rust_tts.y.h
@@ -7,10 +7,10 @@ tt_tok
| _T(FLOAT)
| _C(',') | _C(';') | _C('_')
| _T(RWD_self) | _T(RWD_super) | _T(RWD_mut) | _T(RWD_ref) | _T(RWD_let) | _T(RWD_where) | _T(RWD_pub) | _T(RWD_in) | _T(RWD_as)
- | _T(RWD_for ) | _T(RWD_while) | _T(RWD_loop) | _T(RWD_if) | _T(RWD_else) | _T(RWD_match)
+ | _T(RWD_for ) | _T(RWD_while) | _T(RWD_loop) | _T(RWD_if) | _T(RWD_else) | _T(RWD_match) | _T(RWD_box)
| _T(RWD_return) | _T(RWD_continue) | _T(RWD_break)
| _T(RWD_impl) | _T(RWD_struct) | _T(RWD_enum) | _T(RWD_fn) | _T(RWD_type) | _T(RWD_static) | _T(RWD_const) | _T(RWD_trait) | _T(RWD_use)
- | _T(RWD_extern) | _T(RWD_unsafe)
+ | _T(RWD_extern) | _T(RWD_crate) | _T(RWD_unsafe)
| _C('/') | _T(SLASHEQUAL)
| _C('%') | _T(PERCENTEQUAL)
| _C('*') | _T(STAREQUAL)