diff options
Diffstat (limited to 'src/pkg/ebnf/parser.go')
| -rw-r--r-- | src/pkg/ebnf/parser.go | 240 |
1 files changed, 240 insertions, 0 deletions
diff --git a/src/pkg/ebnf/parser.go b/src/pkg/ebnf/parser.go new file mode 100644 index 000000000..84905d5fe --- /dev/null +++ b/src/pkg/ebnf/parser.go @@ -0,0 +1,240 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ebnf + +import ( + "container/vector"; + "ebnf"; + "fmt"; + "go/scanner"; + "go/token"; + "os"; + "strconv"; + "strings"; + "unicode"; + "utf8"; +) + + +type parser struct { + errors vector.Vector; + scanner scanner.Scanner; + pos token.Position; // token position + tok token.Token; // one token look-ahead + lit []byte; // token literal +} + + +func (p *parser) next() { + p.pos, p.tok, p.lit = p.scanner.Scan(); + if p.tok.IsKeyword() { + // TODO Should keyword mapping always happen outside scanner? + // Or should there be a flag to scanner to enable keyword mapping? + p.tok = token.IDENT; + } +} + + +func (p *parser) init(src []byte) { + p.errors.Init(0); + p.scanner.Init(src, p, 0); + p.next(); // initializes pos, tok, lit +} + + +// The parser implements scanner.Error. +func (p *parser) Error(pos token.Position, msg string) { + // Do not collect errors that are on the same line as the previous + // error to reduce the number of spurious errors due to incorrect + // parser synchronization. + if p.errors.Len() == 0 || p.errors.Last().(*Error).Pos.Line != pos.Line { + p.errors.Push(&Error{pos, msg}); + } +} + + +func (p *parser) errorExpected(pos token.Position, msg string) { + msg = "expected " + msg; + if pos.Offset == p.pos.Offset { + // the error happened at the current position; + // make the error message more specific + msg += ", found '" + p.tok.String() + "'"; + if p.tok.IsLiteral() { + msg += " " + string(p.lit); + } + } + p.Error(pos, msg); +} + + +func (p *parser) expect(tok token.Token) token.Position { + pos := p.pos; + if p.tok != tok { + p.errorExpected(pos, "'" + tok.String() + "'"); + } + p.next(); // make progress in any case + return pos; +} + + +func (p *parser) parseIdentifier() *Name { + pos := p.pos; + name := string(p.lit); + p.expect(token.IDENT); + return &Name{pos, name}; +} + + +func (p *parser) parseToken() *Token { + pos := p.pos; + value := ""; + if p.tok == token.STRING { + var err os.Error; + value, err = strconv.Unquote(string(p.lit)); + // Unquote may fail with an error, but only if the scanner found + // an illegal string in the first place. In this case the error + // has already been reported. + p.next(); + } else { + p.expect(token.STRING); + } + return &Token{pos, value}; +} + + +func (p *parser) parseExpression() Expression + +func (p *parser) parseTerm() (x Expression) { + pos := p.pos; + + switch p.tok { + case token.IDENT: + x = p.parseIdentifier(); + + case token.STRING: + tok := p.parseToken(); + x = tok; + if p.tok == token.ELLIPSIS { + p.next(); + x = &Range{tok, p.parseToken()}; + } + + case token.LPAREN: + p.next(); + x = &Group{pos, p.parseExpression()}; + p.expect(token.RPAREN); + + case token.LBRACK: + p.next(); + x = &Option{pos, p.parseExpression()}; + p.expect(token.RBRACK); + + case token.LBRACE: + p.next(); + x = &Repetition{pos, p.parseExpression()}; + p.expect(token.RBRACE); + } + + return x; +} + + +func (p *parser) parseSequence() Expression { + var list vector.Vector; + list.Init(0); + + for x := p.parseTerm(); x != nil; x = p.parseTerm() { + list.Push(x); + } + + // no need for a sequence if list.Len() < 2 + switch list.Len() { + case 0: + return nil; + case 1: + return list.At(0).(Expression); + } + + // convert list into a sequence + seq := make(Sequence, list.Len()); + for i := 0; i < list.Len(); i++ { + seq[i] = list.At(i).(Expression); + } + return seq; +} + + +func (p *parser) parseExpression() Expression { + var list vector.Vector; + list.Init(0); + + for { + x := p.parseSequence(); + if x != nil { + list.Push(x); + } + if p.tok != token.OR { + break; + } + p.next(); + } + + // no need for an Alternative node if list.Len() < 2 + switch list.Len() { + case 0: + return nil; + case 1: + return list.At(0).(Expression); + } + + // convert list into an Alternative node + alt := make(Alternative, list.Len()); + for i := 0; i < list.Len(); i++ { + alt[i] = list.At(i).(Expression); + } + return alt; +} + + +func (p *parser) parseProduction() *Production { + name := p.parseIdentifier(); + p.expect(token.ASSIGN); + expr := p.parseExpression(); + p.expect(token.PERIOD); + return &Production{name, expr}; +} + + +func (p *parser) parse(src []byte) Grammar { + // initialize parser + p.errors.Init(0); + p.scanner.Init(src, p, 0); + p.next(); // initializes pos, tok, lit + + grammar := make(Grammar); + for p.tok != token.EOF { + prod := p.parseProduction(); + name := prod.Name.String; + if prev, found := grammar[name]; !found { + grammar[name] = prod; + } else { + p.Error(prod.Pos(), name + " declared already"); + } + } + + return grammar; +} + + +// Parse parses a set of EBNF productions from source src. +// It returns a set of productions. Errors are reported +// for incorrect syntax and if a production is declared +// more than once. +// +func Parse(src []byte) (Grammar, os.Error) { + var p parser; + grammar := p.parse(src); + return grammar, makeErrorList(&p.errors); +} |
