diff options
Diffstat (limited to 'src/pkg/ebnf/ebnf.go')
-rw-r--r-- | src/pkg/ebnf/ebnf.go | 245 |
1 files changed, 0 insertions, 245 deletions
diff --git a/src/pkg/ebnf/ebnf.go b/src/pkg/ebnf/ebnf.go deleted file mode 100644 index 69da11767..000000000 --- a/src/pkg/ebnf/ebnf.go +++ /dev/null @@ -1,245 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package ebnf is a library for EBNF grammars. The input is text ([]byte) -// satisfying the following grammar (represented itself in EBNF): -// -// Production = name "=" [ Expression ] "." . -// Expression = Alternative { "|" Alternative } . -// Alternative = Term { Term } . -// Term = name | token [ "…" token ] | Group | Option | Repetition . -// Group = "(" Expression ")" . -// Option = "[" Expression "]" . -// Repetition = "{" Expression "}" . -// -// A name is a Go identifier, a token is a Go string, and comments -// and white space follow the same rules as for the Go language. -// Production names starting with an uppercase Unicode letter denote -// non-terminal productions (i.e., productions which allow white-space -// and comments between tokens); all other production names denote -// lexical productions. -// -package ebnf - -import ( - "go/scanner" - "go/token" - "os" - "unicode" - "utf8" -) - -// ---------------------------------------------------------------------------- -// Internal representation - -type ( - // An Expression node represents a production expression. - Expression interface { - // Pos is the position of the first character of the syntactic construct - Pos() token.Pos - } - - // An Alternative node represents a non-empty list of alternative expressions. - Alternative []Expression // x | y | z - - // A Sequence node represents a non-empty list of sequential expressions. - Sequence []Expression // x y z - - // A Name node represents a production name. - Name struct { - StringPos token.Pos - String string - } - - // A Token node represents a literal. - Token struct { - StringPos token.Pos - String string - } - - // A List node represents a range of characters. - Range struct { - Begin, End *Token // begin ... end - } - - // A Group node represents a grouped expression. - Group struct { - Lparen token.Pos - Body Expression // (body) - } - - // An Option node represents an optional expression. - Option struct { - Lbrack token.Pos - Body Expression // [body] - } - - // A Repetition node represents a repeated expression. - Repetition struct { - Lbrace token.Pos - Body Expression // {body} - } - - // A Bad node stands for pieces of source code that lead to a parse error. - Bad struct { - TokPos token.Pos - Error string // parser error message - } - - // A Production node represents an EBNF production. - Production struct { - Name *Name - Expr Expression - } - - // A Grammar is a set of EBNF productions. The map - // is indexed by production name. - // - Grammar map[string]*Production -) - -func (x Alternative) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Alternative -func (x Sequence) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Sequences -func (x *Name) Pos() token.Pos { return x.StringPos } -func (x *Token) Pos() token.Pos { return x.StringPos } -func (x *Range) Pos() token.Pos { return x.Begin.Pos() } -func (x *Group) Pos() token.Pos { return x.Lparen } -func (x *Option) Pos() token.Pos { return x.Lbrack } -func (x *Repetition) Pos() token.Pos { return x.Lbrace } -func (x *Bad) Pos() token.Pos { return x.TokPos } -func (x *Production) Pos() token.Pos { return x.Name.Pos() } - -// ---------------------------------------------------------------------------- -// Grammar verification - -func isLexical(name string) bool { - ch, _ := utf8.DecodeRuneInString(name) - return !unicode.IsUpper(ch) -} - -type verifier struct { - fset *token.FileSet - scanner.ErrorVector - worklist []*Production - reached Grammar // set of productions reached from (and including) the root production - grammar Grammar -} - -func (v *verifier) error(pos token.Pos, msg string) { - v.Error(v.fset.Position(pos), msg) -} - -func (v *verifier) push(prod *Production) { - name := prod.Name.String - if _, found := v.reached[name]; !found { - v.worklist = append(v.worklist, prod) - v.reached[name] = prod - } -} - -func (v *verifier) verifyChar(x *Token) int { - s := x.String - if utf8.RuneCountInString(s) != 1 { - v.error(x.Pos(), "single char expected, found "+s) - return 0 - } - ch, _ := utf8.DecodeRuneInString(s) - return ch -} - -func (v *verifier) verifyExpr(expr Expression, lexical bool) { - switch x := expr.(type) { - case nil: - // empty expression - case Alternative: - for _, e := range x { - v.verifyExpr(e, lexical) - } - case Sequence: - for _, e := range x { - v.verifyExpr(e, lexical) - } - case *Name: - // a production with this name must exist; - // add it to the worklist if not yet processed - if prod, found := v.grammar[x.String]; found { - v.push(prod) - } else { - v.error(x.Pos(), "missing production "+x.String) - } - // within a lexical production references - // to non-lexical productions are invalid - if lexical && !isLexical(x.String) { - v.error(x.Pos(), "reference to non-lexical production "+x.String) - } - case *Token: - // nothing to do for now - case *Range: - i := v.verifyChar(x.Begin) - j := v.verifyChar(x.End) - if i >= j { - v.error(x.Pos(), "decreasing character range") - } - case *Group: - v.verifyExpr(x.Body, lexical) - case *Option: - v.verifyExpr(x.Body, lexical) - case *Repetition: - v.verifyExpr(x.Body, lexical) - default: - panic("unreachable") - } -} - -func (v *verifier) verify(fset *token.FileSet, grammar Grammar, start string) { - // find root production - root, found := grammar[start] - if !found { - // token.NoPos doesn't require a file set; - // ok to set v.fset only afterwards - v.error(token.NoPos, "no start production "+start) - return - } - - // initialize verifier - v.fset = fset - v.ErrorVector.Reset() - v.worklist = v.worklist[0:0] - v.reached = make(Grammar) - v.grammar = grammar - - // work through the worklist - v.push(root) - for { - n := len(v.worklist) - 1 - if n < 0 { - break - } - prod := v.worklist[n] - v.worklist = v.worklist[0:n] - v.verifyExpr(prod.Expr, isLexical(prod.Name.String)) - } - - // check if all productions were reached - if len(v.reached) < len(v.grammar) { - for name, prod := range v.grammar { - if _, found := v.reached[name]; !found { - v.error(prod.Pos(), name+" is unreachable") - } - } - } -} - -// Verify checks that: -// - all productions used are defined -// - all productions defined are used when beginning at start -// - lexical productions refer only to other lexical productions -// -// Position information is interpreted relative to the file set fset. -// -func Verify(fset *token.FileSet, grammar Grammar, start string) os.Error { - var v verifier - v.verify(fset, grammar, start) - return v.GetError(scanner.Sorted) -} |