diff options
Diffstat (limited to 'src/pkg/datafmt/parser.go')
-rw-r--r-- | src/pkg/datafmt/parser.go | 447 |
1 files changed, 447 insertions, 0 deletions
diff --git a/src/pkg/datafmt/parser.go b/src/pkg/datafmt/parser.go new file mode 100644 index 000000000..0d597dcb5 --- /dev/null +++ b/src/pkg/datafmt/parser.go @@ -0,0 +1,447 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package datafmt + +import ( + "container/vector"; + "datafmt"; + "fmt"; + "go/scanner"; + "go/token"; + "io"; + "os"; + "strconv"; + "strings"; +) + +// ---------------------------------------------------------------------------- +// Error handling + +// Error describes an individual error. The position Pos, if valid, +// indicates the format source position the error relates to. The +// error is specified with the Msg string. +// +type Error struct { + Pos token.Position; + Msg string; +} + + +func (e *Error) String() string { + pos := ""; + if e.Pos.IsValid() { + pos = fmt.Sprintf("%d:%d: ", e.Pos.Line, e.Pos.Column); + } + return pos + e.Msg; +} + + +// An ErrorList is a list of errors encountered during parsing. +type ErrorList []*Error + + +// ErrorList implements SortInterface and the os.Error interface. + +func (p ErrorList) Len() int { return len(p); } +func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i]; } +func (p ErrorList) Less(i, j int) bool { return p[i].Pos.Offset < p[j].Pos.Offset; } + + +func (p ErrorList) String() string { + switch len(p) { + case 0: return "unspecified error"; + case 1: return p[0].String(); + } + return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p) - 1); +} + + +// ---------------------------------------------------------------------------- +// Parsing + +type parser struct { + errors vector.Vector; + scanner scanner.Scanner; + pos token.Position; // token position + tok token.Token; // one token look-ahead + lit []byte; // token literal + + packs map [string] string; // PackageName -> ImportPath + rules map [string] expr; // RuleName -> Expression +} + + +func (p *parser) next() { + p.pos, p.tok, p.lit = p.scanner.Scan(); + switch p.tok { + case token.CHAN, token.FUNC, token.INTERFACE, token.MAP, token.STRUCT: + // Go keywords for composite types are type names + // returned by reflect. Accept them as identifiers. + p.tok = token.IDENT; // p.lit is already set correctly + } +} + + +func (p *parser) init(src []byte) { + p.errors.Init(0); + p.scanner.Init(src, p, scanner.AllowIllegalChars); // return '@' as token.ILLEGAL w/o error message + p.next(); // initializes pos, tok, lit + p.packs = make(map [string] string); + p.rules = make(map [string] expr); +} + + +// The parser implements scanner.Error. +func (p *parser) Error(pos token.Position, msg string) { + // Don't collect errors that are on the same line as the previous error + // in the hope to reduce the number of spurious errors due to incorrect + // parser synchronization. + if p.errors.Len() == 0 || p.errors.Last().(*Error).Pos.Line != pos.Line { + p.errors.Push(&Error{pos, msg}); + } +} + + +func (p *parser) errorExpected(pos token.Position, msg string) { + msg = "expected " + msg; + if pos.Offset == p.pos.Offset { + // the error happened at the current position; + // make the error message more specific + msg += ", found '" + p.tok.String() + "'"; + if p.tok.IsLiteral() { + msg += " " + string(p.lit); + } + } + p.Error(pos, msg); +} + + +func (p *parser) expect(tok token.Token) token.Position { + pos := p.pos; + if p.tok != tok { + p.errorExpected(pos, "'" + tok.String() + "'"); + } + p.next(); // make progress in any case + return pos; +} + + +func (p *parser) parseIdentifier() string { + name := string(p.lit); + p.expect(token.IDENT); + return name; +} + + +func (p *parser) parseTypeName() (string, bool) { + pos := p.pos; + name, isIdent := p.parseIdentifier(), true; + if p.tok == token.PERIOD { + // got a package name, lookup package + if importPath, found := p.packs[name]; found { + name = importPath; + } else { + p.Error(pos, "package not declared: " + name); + } + p.next(); + name, isIdent = name + "." + p.parseIdentifier(), false; + } + return name, isIdent; +} + + +// Parses a rule name and returns it. If the rule name is +// a package-qualified type name, the package name is resolved. +// The 2nd result value is true iff the rule name consists of a +// single identifier only (and thus could be a package name). +// +func (p *parser) parseRuleName() (string, bool) { + name, isIdent := "", false; + switch p.tok { + case token.IDENT: + name, isIdent = p.parseTypeName(); + case token.DEFAULT: + name = "default"; + p.next(); + case token.QUO: + name = "/"; + p.next(); + default: + p.errorExpected(p.pos, "rule name"); + p.next(); // make progress in any case + } + return name, isIdent; +} + + +func (p *parser) parseString() string { + s := ""; + if p.tok == token.STRING { + var err os.Error; + s, err = strconv.Unquote(string(p.lit)); + // Unquote may fail with an error, but only if the scanner found + // an illegal string in the first place. In this case the error + // has already been reported. + p.next(); + return s; + } else { + p.expect(token.STRING); + } + return s; +} + + +func (p *parser) parseLiteral() literal { + s := io.StringBytes(p.parseString()); + + // A string literal may contain %-format specifiers. To simplify + // and speed up printing of the literal, split it into segments + // that start with "%" possibly followed by a last segment that + // starts with some other character. + var list vector.Vector; + list.Init(0); + i0 := 0; + for i := 0; i < len(s); i++ { + if s[i] == '%' && i+1 < len(s) { + // the next segment starts with a % format + if i0 < i { + // the current segment is not empty, split it off + list.Push(s[i0 : i]); + i0 = i; + } + i++; // skip %; let loop skip over char after % + } + } + // the final segment may start with any character + // (it is empty iff the string is empty) + list.Push(s[i0 : len(s)]); + + // convert list into a literal + lit := make(literal, list.Len()); + for i := 0; i < list.Len(); i++ { + lit[i] = list.At(i).([]byte); + } + + return lit; +} + + +func (p *parser) parseField() expr { + var fname string; + switch p.tok { + case token.ILLEGAL: + if string(p.lit) != "@" { + return nil; + } + fname = "@"; + p.next(); + case token.MUL: + fname = "*"; + p.next(); + case token.IDENT: + fname = p.parseIdentifier(); + default: + return nil; + } + + var ruleName string; + if p.tok == token.COLON { + p.next(); + var _ bool; + ruleName, _ = p.parseRuleName(); + } + + return &field{fname, ruleName}; +} + + +func (p *parser) parseExpression() expr + +func (p *parser) parseOperand() (x expr) { + switch p.tok { + case token.STRING: + x = p.parseLiteral(); + + case token.LPAREN: + p.next(); + x = p.parseExpression(); + if p.tok == token.SHR { + p.next(); + x = &group{x, p.parseExpression()}; + } + p.expect(token.RPAREN); + + case token.LBRACK: + p.next(); + x = &option{p.parseExpression()}; + p.expect(token.RBRACK); + + case token.LBRACE: + p.next(); + x = p.parseExpression(); + var div expr; + if p.tok == token.QUO { + p.next(); + div = p.parseExpression(); + } + x = &repetition{x, div}; + p.expect(token.RBRACE); + + default: + x = p.parseField(); // may be nil + } + + return x; +} + + +func (p *parser) parseSequence() expr { + var list vector.Vector; + list.Init(0); + + for x := p.parseOperand(); x != nil; x = p.parseOperand() { + list.Push(x); + } + + // no need for a sequence if list.Len() < 2 + switch list.Len() { + case 0: return nil; + case 1: return list.At(0).(expr); + } + + // convert list into a sequence + seq := make(sequence, list.Len()); + for i := 0; i < list.Len(); i++ { + seq[i] = list.At(i).(expr); + } + return seq; +} + + +func (p *parser) parseExpression() expr { + var list vector.Vector; + list.Init(0); + + for { + x := p.parseSequence(); + if x != nil { + list.Push(x); + } + if p.tok != token.OR { + break; + } + p.next(); + } + + // no need for an alternatives if list.Len() < 2 + switch list.Len() { + case 0: return nil; + case 1: return list.At(0).(expr); + } + + // convert list into a alternatives + alt := make(alternatives, list.Len()); + for i := 0; i < list.Len(); i++ { + alt[i] = list.At(i).(expr); + } + return alt; +} + + +func (p *parser) parseFormat() { + for p.tok != token.EOF { + pos := p.pos; + + name, isIdent := p.parseRuleName(); + switch p.tok { + case token.STRING: + // package declaration + importPath := p.parseString(); + + // add package declaration + if !isIdent { + p.Error(pos, "illegal package name: " + name); + } else if _, found := p.packs[name]; !found { + p.packs[name] = importPath; + } else { + p.Error(pos, "package already declared: " + name); + } + + case token.ASSIGN: + // format rule + p.next(); + x := p.parseExpression(); + + // add rule + if _, found := p.rules[name]; !found { + p.rules[name] = x; + } else { + p.Error(pos, "format rule already declared: " + name); + } + + default: + p.errorExpected(p.pos, "package declaration or format rule"); + p.next(); // make progress in any case + } + + if p.tok == token.SEMICOLON { + p.next(); + } else { + break; + } + } + p.expect(token.EOF); +} + + +func remap(p *parser, name string) string { + i := strings.Index(name, "."); + if i >= 0 { + packageName, suffix := name[0 : i], name[i : len(name)]; + // lookup package + if importPath, found := p.packs[packageName]; found { + name = importPath + suffix; + } else { + var invalidPos token.Position; + p.Error(invalidPos, "package not declared: " + packageName); + } + } + return name; +} + + +// Parse parses a set of format productions from source src. Custom +// formatters may be provided via a map of formatter functions. If +// there are no errors, the result is a Format and the error is nil. +// Otherwise the format is nil and a non-empty ErrorList is returned. +// +func Parse(src []byte, fmap FormatterMap) (Format, os.Error) { + // parse source + var p parser; + p.init(src); + p.parseFormat(); + + // add custom formatters, if any + for name, form := range fmap { + name = remap(&p, name); + if t, found := p.rules[name]; !found { + p.rules[name] = &custom{name, form}; + } else { + var invalidPos token.Position; + p.Error(invalidPos, "formatter already declared: " + name); + } + } + + // convert errors list, if any + if p.errors.Len() > 0 { + errors := make(ErrorList, p.errors.Len()); + for i := 0; i < p.errors.Len(); i++ { + errors[i] = p.errors.At(i).(*Error); + } + return nil, errors; + } + + return p.rules, nil; +} |