summaryrefslogtreecommitdiff
path: root/src/pkg/datafmt/parser.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/datafmt/parser.go')
-rw-r--r--src/pkg/datafmt/parser.go447
1 files changed, 447 insertions, 0 deletions
diff --git a/src/pkg/datafmt/parser.go b/src/pkg/datafmt/parser.go
new file mode 100644
index 000000000..0d597dcb5
--- /dev/null
+++ b/src/pkg/datafmt/parser.go
@@ -0,0 +1,447 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package datafmt
+
+import (
+ "container/vector";
+ "datafmt";
+ "fmt";
+ "go/scanner";
+ "go/token";
+ "io";
+ "os";
+ "strconv";
+ "strings";
+)
+
+// ----------------------------------------------------------------------------
+// Error handling
+
+// Error describes an individual error. The position Pos, if valid,
+// indicates the format source position the error relates to. The
+// error is specified with the Msg string.
+//
+type Error struct {
+ Pos token.Position;
+ Msg string;
+}
+
+
+func (e *Error) String() string {
+ pos := "";
+ if e.Pos.IsValid() {
+ pos = fmt.Sprintf("%d:%d: ", e.Pos.Line, e.Pos.Column);
+ }
+ return pos + e.Msg;
+}
+
+
+// An ErrorList is a list of errors encountered during parsing.
+type ErrorList []*Error
+
+
+// ErrorList implements SortInterface and the os.Error interface.
+
+func (p ErrorList) Len() int { return len(p); }
+func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i]; }
+func (p ErrorList) Less(i, j int) bool { return p[i].Pos.Offset < p[j].Pos.Offset; }
+
+
+func (p ErrorList) String() string {
+ switch len(p) {
+ case 0: return "unspecified error";
+ case 1: return p[0].String();
+ }
+ return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p) - 1);
+}
+
+
+// ----------------------------------------------------------------------------
+// Parsing
+
+type parser struct {
+ errors vector.Vector;
+ scanner scanner.Scanner;
+ pos token.Position; // token position
+ tok token.Token; // one token look-ahead
+ lit []byte; // token literal
+
+ packs map [string] string; // PackageName -> ImportPath
+ rules map [string] expr; // RuleName -> Expression
+}
+
+
+func (p *parser) next() {
+ p.pos, p.tok, p.lit = p.scanner.Scan();
+ switch p.tok {
+ case token.CHAN, token.FUNC, token.INTERFACE, token.MAP, token.STRUCT:
+ // Go keywords for composite types are type names
+ // returned by reflect. Accept them as identifiers.
+ p.tok = token.IDENT; // p.lit is already set correctly
+ }
+}
+
+
+func (p *parser) init(src []byte) {
+ p.errors.Init(0);
+ p.scanner.Init(src, p, scanner.AllowIllegalChars); // return '@' as token.ILLEGAL w/o error message
+ p.next(); // initializes pos, tok, lit
+ p.packs = make(map [string] string);
+ p.rules = make(map [string] expr);
+}
+
+
+// The parser implements scanner.Error.
+func (p *parser) Error(pos token.Position, msg string) {
+ // Don't collect errors that are on the same line as the previous error
+ // in the hope to reduce the number of spurious errors due to incorrect
+ // parser synchronization.
+ if p.errors.Len() == 0 || p.errors.Last().(*Error).Pos.Line != pos.Line {
+ p.errors.Push(&Error{pos, msg});
+ }
+}
+
+
+func (p *parser) errorExpected(pos token.Position, msg string) {
+ msg = "expected " + msg;
+ if pos.Offset == p.pos.Offset {
+ // the error happened at the current position;
+ // make the error message more specific
+ msg += ", found '" + p.tok.String() + "'";
+ if p.tok.IsLiteral() {
+ msg += " " + string(p.lit);
+ }
+ }
+ p.Error(pos, msg);
+}
+
+
+func (p *parser) expect(tok token.Token) token.Position {
+ pos := p.pos;
+ if p.tok != tok {
+ p.errorExpected(pos, "'" + tok.String() + "'");
+ }
+ p.next(); // make progress in any case
+ return pos;
+}
+
+
+func (p *parser) parseIdentifier() string {
+ name := string(p.lit);
+ p.expect(token.IDENT);
+ return name;
+}
+
+
+func (p *parser) parseTypeName() (string, bool) {
+ pos := p.pos;
+ name, isIdent := p.parseIdentifier(), true;
+ if p.tok == token.PERIOD {
+ // got a package name, lookup package
+ if importPath, found := p.packs[name]; found {
+ name = importPath;
+ } else {
+ p.Error(pos, "package not declared: " + name);
+ }
+ p.next();
+ name, isIdent = name + "." + p.parseIdentifier(), false;
+ }
+ return name, isIdent;
+}
+
+
+// Parses a rule name and returns it. If the rule name is
+// a package-qualified type name, the package name is resolved.
+// The 2nd result value is true iff the rule name consists of a
+// single identifier only (and thus could be a package name).
+//
+func (p *parser) parseRuleName() (string, bool) {
+ name, isIdent := "", false;
+ switch p.tok {
+ case token.IDENT:
+ name, isIdent = p.parseTypeName();
+ case token.DEFAULT:
+ name = "default";
+ p.next();
+ case token.QUO:
+ name = "/";
+ p.next();
+ default:
+ p.errorExpected(p.pos, "rule name");
+ p.next(); // make progress in any case
+ }
+ return name, isIdent;
+}
+
+
+func (p *parser) parseString() string {
+ s := "";
+ if p.tok == token.STRING {
+ var err os.Error;
+ s, err = strconv.Unquote(string(p.lit));
+ // Unquote may fail with an error, but only if the scanner found
+ // an illegal string in the first place. In this case the error
+ // has already been reported.
+ p.next();
+ return s;
+ } else {
+ p.expect(token.STRING);
+ }
+ return s;
+}
+
+
+func (p *parser) parseLiteral() literal {
+ s := io.StringBytes(p.parseString());
+
+ // A string literal may contain %-format specifiers. To simplify
+ // and speed up printing of the literal, split it into segments
+ // that start with "%" possibly followed by a last segment that
+ // starts with some other character.
+ var list vector.Vector;
+ list.Init(0);
+ i0 := 0;
+ for i := 0; i < len(s); i++ {
+ if s[i] == '%' && i+1 < len(s) {
+ // the next segment starts with a % format
+ if i0 < i {
+ // the current segment is not empty, split it off
+ list.Push(s[i0 : i]);
+ i0 = i;
+ }
+ i++; // skip %; let loop skip over char after %
+ }
+ }
+ // the final segment may start with any character
+ // (it is empty iff the string is empty)
+ list.Push(s[i0 : len(s)]);
+
+ // convert list into a literal
+ lit := make(literal, list.Len());
+ for i := 0; i < list.Len(); i++ {
+ lit[i] = list.At(i).([]byte);
+ }
+
+ return lit;
+}
+
+
+func (p *parser) parseField() expr {
+ var fname string;
+ switch p.tok {
+ case token.ILLEGAL:
+ if string(p.lit) != "@" {
+ return nil;
+ }
+ fname = "@";
+ p.next();
+ case token.MUL:
+ fname = "*";
+ p.next();
+ case token.IDENT:
+ fname = p.parseIdentifier();
+ default:
+ return nil;
+ }
+
+ var ruleName string;
+ if p.tok == token.COLON {
+ p.next();
+ var _ bool;
+ ruleName, _ = p.parseRuleName();
+ }
+
+ return &field{fname, ruleName};
+}
+
+
+func (p *parser) parseExpression() expr
+
+func (p *parser) parseOperand() (x expr) {
+ switch p.tok {
+ case token.STRING:
+ x = p.parseLiteral();
+
+ case token.LPAREN:
+ p.next();
+ x = p.parseExpression();
+ if p.tok == token.SHR {
+ p.next();
+ x = &group{x, p.parseExpression()};
+ }
+ p.expect(token.RPAREN);
+
+ case token.LBRACK:
+ p.next();
+ x = &option{p.parseExpression()};
+ p.expect(token.RBRACK);
+
+ case token.LBRACE:
+ p.next();
+ x = p.parseExpression();
+ var div expr;
+ if p.tok == token.QUO {
+ p.next();
+ div = p.parseExpression();
+ }
+ x = &repetition{x, div};
+ p.expect(token.RBRACE);
+
+ default:
+ x = p.parseField(); // may be nil
+ }
+
+ return x;
+}
+
+
+func (p *parser) parseSequence() expr {
+ var list vector.Vector;
+ list.Init(0);
+
+ for x := p.parseOperand(); x != nil; x = p.parseOperand() {
+ list.Push(x);
+ }
+
+ // no need for a sequence if list.Len() < 2
+ switch list.Len() {
+ case 0: return nil;
+ case 1: return list.At(0).(expr);
+ }
+
+ // convert list into a sequence
+ seq := make(sequence, list.Len());
+ for i := 0; i < list.Len(); i++ {
+ seq[i] = list.At(i).(expr);
+ }
+ return seq;
+}
+
+
+func (p *parser) parseExpression() expr {
+ var list vector.Vector;
+ list.Init(0);
+
+ for {
+ x := p.parseSequence();
+ if x != nil {
+ list.Push(x);
+ }
+ if p.tok != token.OR {
+ break;
+ }
+ p.next();
+ }
+
+ // no need for an alternatives if list.Len() < 2
+ switch list.Len() {
+ case 0: return nil;
+ case 1: return list.At(0).(expr);
+ }
+
+ // convert list into a alternatives
+ alt := make(alternatives, list.Len());
+ for i := 0; i < list.Len(); i++ {
+ alt[i] = list.At(i).(expr);
+ }
+ return alt;
+}
+
+
+func (p *parser) parseFormat() {
+ for p.tok != token.EOF {
+ pos := p.pos;
+
+ name, isIdent := p.parseRuleName();
+ switch p.tok {
+ case token.STRING:
+ // package declaration
+ importPath := p.parseString();
+
+ // add package declaration
+ if !isIdent {
+ p.Error(pos, "illegal package name: " + name);
+ } else if _, found := p.packs[name]; !found {
+ p.packs[name] = importPath;
+ } else {
+ p.Error(pos, "package already declared: " + name);
+ }
+
+ case token.ASSIGN:
+ // format rule
+ p.next();
+ x := p.parseExpression();
+
+ // add rule
+ if _, found := p.rules[name]; !found {
+ p.rules[name] = x;
+ } else {
+ p.Error(pos, "format rule already declared: " + name);
+ }
+
+ default:
+ p.errorExpected(p.pos, "package declaration or format rule");
+ p.next(); // make progress in any case
+ }
+
+ if p.tok == token.SEMICOLON {
+ p.next();
+ } else {
+ break;
+ }
+ }
+ p.expect(token.EOF);
+}
+
+
+func remap(p *parser, name string) string {
+ i := strings.Index(name, ".");
+ if i >= 0 {
+ packageName, suffix := name[0 : i], name[i : len(name)];
+ // lookup package
+ if importPath, found := p.packs[packageName]; found {
+ name = importPath + suffix;
+ } else {
+ var invalidPos token.Position;
+ p.Error(invalidPos, "package not declared: " + packageName);
+ }
+ }
+ return name;
+}
+
+
+// Parse parses a set of format productions from source src. Custom
+// formatters may be provided via a map of formatter functions. If
+// there are no errors, the result is a Format and the error is nil.
+// Otherwise the format is nil and a non-empty ErrorList is returned.
+//
+func Parse(src []byte, fmap FormatterMap) (Format, os.Error) {
+ // parse source
+ var p parser;
+ p.init(src);
+ p.parseFormat();
+
+ // add custom formatters, if any
+ for name, form := range fmap {
+ name = remap(&p, name);
+ if t, found := p.rules[name]; !found {
+ p.rules[name] = &custom{name, form};
+ } else {
+ var invalidPos token.Position;
+ p.Error(invalidPos, "formatter already declared: " + name);
+ }
+ }
+
+ // convert errors list, if any
+ if p.errors.Len() > 0 {
+ errors := make(ErrorList, p.errors.Len());
+ for i := 0; i < p.errors.Len(); i++ {
+ errors[i] = p.errors.At(i).(*Error);
+ }
+ return nil, errors;
+ }
+
+ return p.rules, nil;
+}