diff options
author | Rob Pike <r@golang.org> | 2009-06-09 09:53:44 -0700 |
---|---|---|
committer | Rob Pike <r@golang.org> | 2009-06-09 09:53:44 -0700 |
commit | 7249ea4df2b4f12a4e7ed446f270cea87e4ffd34 (patch) | |
tree | 7032a11d0cac2ae4d3e90f7a189b575b5a50f848 /src/pkg/go | |
parent | acf6ef7a82b3fe61516a1bac4563706552bdf078 (diff) | |
download | golang-7249ea4df2b4f12a4e7ed446f270cea87e4ffd34.tar.gz |
mv src/lib to src/pkg
tests: all.bash passes, gobuild still works, godoc still works.
R=rsc
OCL=30096
CL=30102
Diffstat (limited to 'src/pkg/go')
-rw-r--r-- | src/pkg/go/ast/Makefile | 68 | ||||
-rw-r--r-- | src/pkg/go/ast/ast.go | 772 | ||||
-rw-r--r-- | src/pkg/go/ast/format.go | 123 | ||||
-rw-r--r-- | src/pkg/go/doc/Makefile | 68 | ||||
-rw-r--r-- | src/pkg/go/doc/comment.go | 310 | ||||
-rw-r--r-- | src/pkg/go/doc/doc.go | 486 | ||||
-rw-r--r-- | src/pkg/go/parser/Makefile | 60 | ||||
-rw-r--r-- | src/pkg/go/parser/parser.go | 1975 | ||||
-rw-r--r-- | src/pkg/go/parser/parser_test.go | 68 | ||||
-rw-r--r-- | src/pkg/go/scanner/Makefile | 60 | ||||
-rw-r--r-- | src/pkg/go/scanner/scanner.go | 501 | ||||
-rw-r--r-- | src/pkg/go/scanner/scanner_test.go | 276 | ||||
-rw-r--r-- | src/pkg/go/token/Makefile | 60 | ||||
-rw-r--r-- | src/pkg/go/token/token.go | 347 |
14 files changed, 5174 insertions, 0 deletions
diff --git a/src/pkg/go/ast/Makefile b/src/pkg/go/ast/Makefile new file mode 100644 index 000000000..1fd22ae71 --- /dev/null +++ b/src/pkg/go/ast/Makefile @@ -0,0 +1,68 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# DO NOT EDIT. Automatically generated by gobuild. +# gobuild -m >Makefile + +D=/go/ + +include $(GOROOT)/src/Make.$(GOARCH) +AR=gopack + +default: packages + +clean: + rm -rf *.[$(OS)] *.a [$(OS)].out _obj + +test: packages + gotest + +coverage: packages + gotest + 6cov -g `pwd` | grep -v '_test\.go:' + +%.$O: %.go + $(GC) -I_obj $*.go + +%.$O: %.c + $(CC) $*.c + +%.$O: %.s + $(AS) $*.s + +O1=\ + ast.$O\ + +O2=\ + format.$O\ + + +phases: a1 a2 +_obj$D/ast.a: phases + +a1: $(O1) + $(AR) grc _obj$D/ast.a ast.$O + rm -f $(O1) + +a2: $(O2) + $(AR) grc _obj$D/ast.a format.$O + rm -f $(O2) + + +newpkg: clean + mkdir -p _obj$D + $(AR) grc _obj$D/ast.a + +$(O1): newpkg +$(O2): a1 +$(O3): a2 + +nuke: clean + rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/ast.a + +packages: _obj$D/ast.a + +install: packages + test -d $(GOROOT)/pkg && mkdir -p $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D + cp _obj$D/ast.a $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/ast.a diff --git a/src/pkg/go/ast/ast.go b/src/pkg/go/ast/ast.go new file mode 100644 index 000000000..6cac8ea1a --- /dev/null +++ b/src/pkg/go/ast/ast.go @@ -0,0 +1,772 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The AST package declares the types used to represent +// syntax trees for Go source files. +// +package ast + +import ( + "go/token"; + "unicode"; + "utf8"; +) + + +// ---------------------------------------------------------------------------- +// Interfaces +// +// There are 3 main classes of nodes: Expressions and type nodes, +// statement nodes, and declaration nodes. The node names usually +// match the corresponding Go spec production names to which they +// correspond. The node fields correspond to the individual parts +// of the respective productions. +// +// All nodes contain position information marking the beginning of +// the corresponding source text segment; it is accessible via the +// Pos accessor method. Nodes may contain additional position info +// for language constructs where comments may be found between parts +// of the construct (typically any larger, parenthesized subpart). +// That position information is needed to properly position comments +// when printing the construct. + +// TODO: For comment positioning only the byte position and not +// a complete token.Position field is needed. May be able to trim +// node sizes a bit. + + +type ( + ExprVisitor interface; + StmtVisitor interface; + DeclVisitor interface; +) + + +// All expression nodes implement the Expr interface. +type Expr interface { + // For a (dynamic) node type X, calling Visit with an expression + // visitor v invokes the node-specific DoX function of the visitor. + // + Visit(v ExprVisitor); + + // Pos returns the (beginning) position of the expression. + Pos() token.Position; +} + + +// All statement nodes implement the Stmt interface. +type Stmt interface { + // For a (dynamic) node type X, calling Visit with a statement + // visitor v invokes the node-specific DoX function of the visitor. + // + Visit(v StmtVisitor); + + // Pos returns the (beginning) position of the statement. + Pos() token.Position; +} + + +// All declaration nodes implement the Decl interface. +type Decl interface { + // For a (dynamic) node type X, calling Visit with a declaration + // visitor v invokes the node-specific DoX function of the visitor. + // + Visit(v DeclVisitor); + + // Pos returns the (beginning) position of the declaration. + Pos() token.Position; +} + + +// ---------------------------------------------------------------------------- +// Comments + +// A Comment node represents a single //-style or /*-style comment. +type Comment struct { + token.Position; // beginning position of the comment + Text []byte; // the comment text (without '\n' for //-style comments) + EndLine int; // the line where the comment ends +} + + +// A Comments node represents a sequence of single comments +// with no other tokens and no empty lines between. +// +type Comments []*Comment + + +// ---------------------------------------------------------------------------- +// Expressions and types + +// Support types. +type ( + Ident struct; + StringLit struct; + FuncType struct; + BlockStmt struct; + + // A Field represents a Field declaration list in a struct type, + // a method in an interface type, or a parameter/result declaration + // in a signature. + Field struct { + Doc Comments; // associated documentation; or nil + Names []*Ident; // field/method/parameter names; nil if anonymous field + Type Expr; // field/method/parameter type + Tag []*StringLit; // field tag; nil if no tag + }; +) + + +// An expression is represented by a tree consisting of one +// or more of the following concrete expression nodes. +// +type ( + // A BadExpr node is a placeholder for expressions containing + // syntax errors for which no correct expression nodes can be + // created. + // + BadExpr struct { + token.Position; // beginning position of bad expression + }; + + // An Ident node represents an identifier. + Ident struct { + token.Position; // identifier position + Value string; // identifier string (e.g. foobar) + }; + + // An Ellipsis node stands for the "..." type in a + // parameter list or the "..." length in an array type. + // + Ellipsis struct { + token.Position; // position of "..." + }; + + // An IntLit node represents an integer literal. + IntLit struct { + token.Position; // int literal position + Value []byte; // literal string; e.g. 42 or 0x7f + }; + + // A FloatLit node represents a floating-point literal. + FloatLit struct { + token.Position; // float literal position + Value []byte; // literal string; e.g. 3.14 or 1e-9 + }; + + // A CharLit node represents a character literal. + CharLit struct { + token.Position; // char literal position + Value []byte; // literal string, including quotes; e.g. 'a' or '\x7f' + }; + + // A StringLit node represents a string literal. + StringLit struct { + token.Position; // string literal position + Value []byte; // literal string, including quotes; e.g. "foo" or `\m\n\o` + }; + + // A StringList node represents a sequence of adjacent string literals. + // A single string literal (common case) is represented by a StringLit + // node; StringList nodes are used only if there are two or more string + // literals in a sequence. + // + StringList struct { + Strings []*StringLit; // list of strings, len(Strings) > 1 + }; + + // A FuncLit node represents a function literal. + FuncLit struct { + Type *FuncType; // function type + Body *BlockStmt; // function body + }; + + // A CompositeLit node represents a composite literal. + // + CompositeLit struct { + Type Expr; // literal type + Lbrace token.Position; // position of "{" + Elts []Expr; // list of composite elements + Rbrace token.Position; // position of "}" + }; + + // A ParenExpr node represents a parenthesized expression. + ParenExpr struct { + token.Position; // position of "(" + X Expr; // parenthesized expression + Rparen token.Position; // position of ")" + }; + + // A SelectorExpr node represents an expression followed by a selector. + SelectorExpr struct { + X Expr; // expression + Sel *Ident; // field selector + }; + + // An IndexExpr node represents an expression followed by an index or slice. + IndexExpr struct { + X Expr; // expression + Index Expr; // index expression or beginning of slice range + End Expr; // end of slice range; or nil + }; + + // A TypeAssertExpr node represents an expression followed by a + // type assertion. + // + TypeAssertExpr struct { + X Expr; // expression + Type Expr; // asserted type + }; + + // A CallExpr node represents an expression followed by an argument list. + CallExpr struct { + Fun Expr; // function expression + Lparen token.Position; // position of "(" + Args []Expr; // function arguments + Rparen token.Position; // positions of ")" + }; + + // A StarExpr node represents an expression of the form "*" Expression. + // Semantically it could be a unary "*" expression, or a pointer type. + StarExpr struct { + token.Position; // position of "*" + X Expr; // operand + }; + + // A UnaryExpr node represents a unary expression. + // Unary "*" expressions are represented via StarExpr nodes. + // + UnaryExpr struct { + token.Position; // position of Op + Op token.Token; // operator + X Expr; // operand + }; + + // A BinaryExpr node represents a binary expression. + // + BinaryExpr struct { + X Expr; // left operand + OpPos token.Position; // position of Op + Op token.Token; // operator + Y Expr; // right operand + }; + + // A KeyValueExpr node represents (key : value) pairs + // in composite literals. + // + KeyValueExpr struct { + Key Expr; + Colon token.Position; // position of ":" + Value Expr; + }; +) + + +// The direction of a channel type is indicated by one +// of the following constants. +// +type ChanDir int +const ( + SEND ChanDir = 1 << iota; + RECV; +) + + +// A type is represented by a tree consisting of one +// or more of the following type-specific expression +// nodes. +// +type ( + // An ArrayType node represents an array or slice type. + ArrayType struct { + token.Position; // position of "[" + Len Expr; // Ellipsis node for [...]T array types, nil for slice types + Elt Expr; // element type + }; + + // A StructType node represents a struct type. + StructType struct { + token.Position; // position of "struct" keyword + Lbrace token.Position; // position of "{" + Fields []*Field; // list of field declarations; nil if forward declaration + Rbrace token.Position; // position of "}" + }; + + // Pointer types are represented via StarExpr nodes. + + // A FuncType node represents a function type. + FuncType struct { + token.Position; // position of "func" keyword + Params []*Field; // (incoming) parameters + Results []*Field; // (outgoing) results + }; + + // An InterfaceType node represents an interface type. + InterfaceType struct { + token.Position; // position of "interface" keyword + Lbrace token.Position; // position of "{" + Methods []*Field; // list of methods; nil if forward declaration + Rbrace token.Position; // position of "}" + }; + + // A MapType node represents a map type. + MapType struct { + token.Position; // position of "map" keyword + Key Expr; + Value Expr; + }; + + // A ChanType node represents a channel type. + ChanType struct { + token.Position; // position of "chan" keyword or "<-" (whichever comes first) + Dir ChanDir; // channel direction + Value Expr; // value type + }; +) + + +// Pos() implementations for expression/type where the position +// corresponds to the position of a sub-node. +// +func (x *StringList) Pos() token.Position { return x.Strings[0].Pos(); } +func (x *FuncLit) Pos() token.Position { return x.Type.Pos(); } +func (x *CompositeLit) Pos() token.Position { return x.Type.Pos(); } +func (x *SelectorExpr) Pos() token.Position { return x.X.Pos(); } +func (x *IndexExpr) Pos() token.Position { return x.X.Pos(); } +func (x *TypeAssertExpr) Pos() token.Position { return x.X.Pos(); } +func (x *CallExpr) Pos() token.Position { return x.Fun.Pos(); } +func (x *BinaryExpr) Pos() token.Position { return x.X.Pos(); } +func (x *KeyValueExpr) Pos() token.Position { return x.Key.Pos(); } + + +// All expression/type nodes implement a Visit method which takes +// an ExprVisitor as argument. For a given node x of type X, and +// an implementation v of an ExprVisitor, calling x.Visit(v) will +// result in a call of v.DoX(x) (through a double-dispatch). +// +type ExprVisitor interface { + // Expressions + DoBadExpr(x *BadExpr); + DoIdent(x *Ident); + DoIntLit(x *IntLit); + DoFloatLit(x *FloatLit); + DoCharLit(x *CharLit); + DoStringLit(x *StringLit); + DoStringList(x *StringList); + DoFuncLit(x *FuncLit); + DoCompositeLit(x *CompositeLit); + DoParenExpr(x *ParenExpr); + DoSelectorExpr(x *SelectorExpr); + DoIndexExpr(x *IndexExpr); + DoTypeAssertExpr(x *TypeAssertExpr); + DoCallExpr(x *CallExpr); + DoStarExpr(x *StarExpr); + DoUnaryExpr(x *UnaryExpr); + DoBinaryExpr(x *BinaryExpr); + DoKeyValueExpr(x *KeyValueExpr); + + // Type expressions + DoEllipsis(x *Ellipsis); + DoArrayType(x *ArrayType); + DoStructType(x *StructType); + DoFuncType(x *FuncType); + DoInterfaceType(x *InterfaceType); + DoMapType(x *MapType); + DoChanType(x *ChanType); +} + + +// Visit() implementations for all expression/type nodes. +// +func (x *BadExpr) Visit(v ExprVisitor) { v.DoBadExpr(x); } +func (x *Ident) Visit(v ExprVisitor) { v.DoIdent(x); } +func (x *Ellipsis) Visit(v ExprVisitor) { v.DoEllipsis(x); } +func (x *IntLit) Visit(v ExprVisitor) { v.DoIntLit(x); } +func (x *FloatLit) Visit(v ExprVisitor) { v.DoFloatLit(x); } +func (x *CharLit) Visit(v ExprVisitor) { v.DoCharLit(x); } +func (x *StringLit) Visit(v ExprVisitor) { v.DoStringLit(x); } +func (x *StringList) Visit(v ExprVisitor) { v.DoStringList(x); } +func (x *FuncLit) Visit(v ExprVisitor) { v.DoFuncLit(x); } +func (x *CompositeLit) Visit(v ExprVisitor) { v.DoCompositeLit(x); } +func (x *ParenExpr) Visit(v ExprVisitor) { v.DoParenExpr(x); } +func (x *SelectorExpr) Visit(v ExprVisitor) { v.DoSelectorExpr(x); } +func (x *IndexExpr) Visit(v ExprVisitor) { v.DoIndexExpr(x); } +func (x *TypeAssertExpr) Visit(v ExprVisitor) { v.DoTypeAssertExpr(x); } +func (x *CallExpr) Visit(v ExprVisitor) { v.DoCallExpr(x); } +func (x *StarExpr) Visit(v ExprVisitor) { v.DoStarExpr(x); } +func (x *UnaryExpr) Visit(v ExprVisitor) { v.DoUnaryExpr(x); } +func (x *BinaryExpr) Visit(v ExprVisitor) { v.DoBinaryExpr(x); } +func (x *KeyValueExpr) Visit(v ExprVisitor) { v.DoKeyValueExpr(x); } + +func (x *ArrayType) Visit(v ExprVisitor) { v.DoArrayType(x); } +func (x *StructType) Visit(v ExprVisitor) { v.DoStructType(x); } +func (x *FuncType) Visit(v ExprVisitor) { v.DoFuncType(x); } +func (x *InterfaceType) Visit(v ExprVisitor) { v.DoInterfaceType(x); } +func (x *MapType) Visit(v ExprVisitor) { v.DoMapType(x); } +func (x *ChanType) Visit(v ExprVisitor) { v.DoChanType(x); } + + +// IsExported returns whether name is an exported Go symbol +// (i.e., whether it begins with an uppercase letter). +func IsExported(name string) bool { + ch, len := utf8.DecodeRuneInString(name); + return unicode.IsUpper(ch); +} + +// IsExported returns whether name is an exported Go symbol +// (i.e., whether it begins with an uppercase letter). +func (name *ast.Ident) IsExported() bool { + return IsExported(name.Value); +} + +func (name *ast.Ident) String() string { + return name.Value; +} + + +// ---------------------------------------------------------------------------- +// Statements + +// A statement is represented by a tree consisting of one +// or more of the following concrete statement nodes. +// +type ( + // A BadStmt node is a placeholder for statements containing + // syntax errors for which no correct statement nodes can be + // created. + // + BadStmt struct { + token.Position; // beginning position of bad statement + }; + + // A DeclStmt node represents a declaration in a statement list. + DeclStmt struct { + Decl Decl; + }; + + // An EmptyStmt node represents an empty statement. + // The "position" of the empty statement is the position + // of the immediately preceeding semicolon. + // + EmptyStmt struct { + token.Position; // position of preceeding ";" + }; + + // A LabeledStmt node represents a labeled statement. + LabeledStmt struct { + Label *Ident; + Stmt Stmt; + }; + + // An ExprStmt node represents a (stand-alone) expression + // in a statement list. + // + ExprStmt struct { + X Expr; // expression + }; + + // An IncDecStmt node represents an increment or decrement statement. + IncDecStmt struct { + X Expr; + Tok token.Token; // INC or DEC + }; + + // An AssignStmt node represents an assignment or + // a short variable declaration. + AssignStmt struct { + Lhs []Expr; + TokPos token.Position; // position of Tok + Tok token.Token; // assignment token, DEFINE + Rhs []Expr; + }; + + // A GoStmt node represents a go statement. + GoStmt struct { + token.Position; // position of "go" keyword + Call *CallExpr; + }; + + // A DeferStmt node represents a defer statement. + DeferStmt struct { + token.Position; // position of "defer" keyword + Call *CallExpr; + }; + + // A ReturnStmt node represents a return statement. + ReturnStmt struct { + token.Position; // position of "return" keyword + Results []Expr; + }; + + // A BranchStmt node represents a break, continue, goto, + // or fallthrough statement. + // + BranchStmt struct { + token.Position; // position of Tok + Tok token.Token; // keyword token (BREAK, CONTINUE, GOTO, FALLTHROUGH) + Label *Ident; + }; + + // A BlockStmt node represents a braced statement list. + BlockStmt struct { + token.Position; // position of "{" + List []Stmt; + Rbrace token.Position; // position of "}" + }; + + // An IfStmt node represents an if statement. + IfStmt struct { + token.Position; // position of "if" keyword + Init Stmt; + Cond Expr; + Body *BlockStmt; + Else Stmt; + }; + + // A CaseClause represents a case of an expression switch statement. + CaseClause struct { + token.Position; // position of "case" or "default" keyword + Values []Expr; // nil means default case + Colon token.Position; // position of ":" + Body []Stmt; // statement list; or nil + }; + + // A SwitchStmt node represents an expression switch statement. + SwitchStmt struct { + token.Position; // position of "switch" keyword + Init Stmt; + Tag Expr; + Body *BlockStmt; // CaseClauses only + }; + + // A TypeCaseClause represents a case of a type switch statement. + TypeCaseClause struct { + token.Position; // position of "case" or "default" keyword + Type Expr; // nil means default case + Colon token.Position; // position of ":" + Body []Stmt; // statement list; or nil + }; + + // An TypeSwitchStmt node represents a type switch statement. + TypeSwitchStmt struct { + token.Position; // position of "switch" keyword + Init Stmt; + Assign Stmt; // x := y.(type) + Body *BlockStmt; // TypeCaseClauses only + }; + + // A CommClause node represents a case of a select statement. + CommClause struct { + token.Position; // position of "case" or "default" keyword + Tok token.Token; // ASSIGN or DEFINE (valid only if Lhs != nil) + Lhs, Rhs Expr; // Rhs == nil means default case + Colon token.Position; // position of ":" + Body []Stmt; // statement list; or nil + }; + + // An SelectStmt node represents a select statement. + SelectStmt struct { + token.Position; // position of "select" keyword + Body *BlockStmt; // CommClauses only + }; + + // A ForStmt represents a for statement. + ForStmt struct { + token.Position; // position of "for" keyword + Init Stmt; + Cond Expr; + Post Stmt; + Body *BlockStmt; + }; + + // A RangeStmt represents a for statement with a range clause. + RangeStmt struct { + token.Position; // position of "for" keyword + Key, Value Expr; // Value may be nil + TokPos token.Position; // position of Tok + Tok token.Token; // ASSIGN, DEFINE + X Expr; // value to range over + Body *BlockStmt; + }; +) + + +// Pos() implementations for statement nodes where the position +// corresponds to the position of a sub-node. +// +func (s *DeclStmt) Pos() token.Position { return s.Decl.Pos(); } +func (s *LabeledStmt) Pos() token.Position { return s.Label.Pos(); } +func (s *ExprStmt) Pos() token.Position { return s.X.Pos(); } +func (s *IncDecStmt) Pos() token.Position { return s.X.Pos(); } +func (s *AssignStmt) Pos() token.Position { return s.Lhs[0].Pos(); } + + +// All statement nodes implement a Visit method which takes +// a StmtVisitor as argument. For a given node x of type X, and +// an implementation v of a StmtVisitor, calling x.Visit(v) will +// result in a call of v.DoX(x) (through a double-dispatch). +// +type StmtVisitor interface { + DoBadStmt(s *BadStmt); + DoDeclStmt(s *DeclStmt); + DoEmptyStmt(s *EmptyStmt); + DoLabeledStmt(s *LabeledStmt); + DoExprStmt(s *ExprStmt); + DoIncDecStmt(s *IncDecStmt); + DoAssignStmt(s *AssignStmt); + DoGoStmt(s *GoStmt); + DoDeferStmt(s *DeferStmt); + DoReturnStmt(s *ReturnStmt); + DoBranchStmt(s *BranchStmt); + DoBlockStmt(s *BlockStmt); + DoIfStmt(s *IfStmt); + DoCaseClause(s *CaseClause); + DoSwitchStmt(s *SwitchStmt); + DoTypeCaseClause(s *TypeCaseClause); + DoTypeSwitchStmt(s *TypeSwitchStmt); + DoCommClause(s *CommClause); + DoSelectStmt(s *SelectStmt); + DoForStmt(s *ForStmt); + DoRangeStmt(s *RangeStmt); +} + + +// Visit() implementations for all statement nodes. +// +func (s *BadStmt) Visit(v StmtVisitor) { v.DoBadStmt(s); } +func (s *DeclStmt) Visit(v StmtVisitor) { v.DoDeclStmt(s); } +func (s *EmptyStmt) Visit(v StmtVisitor) { v.DoEmptyStmt(s); } +func (s *LabeledStmt) Visit(v StmtVisitor) { v.DoLabeledStmt(s); } +func (s *ExprStmt) Visit(v StmtVisitor) { v.DoExprStmt(s); } +func (s *IncDecStmt) Visit(v StmtVisitor) { v.DoIncDecStmt(s); } +func (s *AssignStmt) Visit(v StmtVisitor) { v.DoAssignStmt(s); } +func (s *GoStmt) Visit(v StmtVisitor) { v.DoGoStmt(s); } +func (s *DeferStmt) Visit(v StmtVisitor) { v.DoDeferStmt(s); } +func (s *ReturnStmt) Visit(v StmtVisitor) { v.DoReturnStmt(s); } +func (s *BranchStmt) Visit(v StmtVisitor) { v.DoBranchStmt(s); } +func (s *BlockStmt) Visit(v StmtVisitor) { v.DoBlockStmt(s); } +func (s *IfStmt) Visit(v StmtVisitor) { v.DoIfStmt(s); } +func (s *CaseClause) Visit(v StmtVisitor) { v.DoCaseClause(s); } +func (s *SwitchStmt) Visit(v StmtVisitor) { v.DoSwitchStmt(s); } +func (s *TypeCaseClause) Visit(v StmtVisitor) { v.DoTypeCaseClause(s); } +func (s *TypeSwitchStmt) Visit(v StmtVisitor) { v.DoTypeSwitchStmt(s); } +func (s *CommClause) Visit(v StmtVisitor) { v.DoCommClause(s); } +func (s *SelectStmt) Visit(v StmtVisitor) { v.DoSelectStmt(s); } +func (s *ForStmt) Visit(v StmtVisitor) { v.DoForStmt(s); } +func (s *RangeStmt) Visit(v StmtVisitor) { v.DoRangeStmt(s); } + + +// ---------------------------------------------------------------------------- +// Declarations + +// A Spec node represents a single (non-parenthesized) import, +// constant, type, or variable declaration. +// +type ( + // The Spec type stands for any of *ImportSpec, *ValueSpec, and *TypeSpec. + Spec interface {}; + + // An ImportSpec node represents a single package import. + ImportSpec struct { + Doc Comments; // associated documentation; or nil + Name *Ident; // local package name (including "."); or nil + Path []*StringLit; // package path + }; + + // A ValueSpec node represents a constant or variable declaration + // (ConstSpec or VarSpec production). + ValueSpec struct { + Doc Comments; // associated documentation; or nil + Names []*Ident; + Type Expr; // value type; or nil + Values []Expr; + }; + + // A TypeSpec node represents a type declaration (TypeSpec production). + TypeSpec struct { + Doc Comments; // associated documentation; or nil + Name *Ident; // type name + Type Expr; + }; +) + + +// A declaration is represented by one of the following declaration nodes. +// +type ( + // A BadDecl node is a placeholder for declarations containing + // syntax errors for which no correct declaration nodes can be + // created. + // + BadDecl struct { + token.Position; // beginning position of bad declaration + }; + + // A GenDecl node (generic declaration node) represents an import, + // constant, type or variable declaration. A valid Lparen position + // (Lparen.Line > 0) indicates a parenthesized declaration. + // + // Relationship between Tok value and Specs element type: + // + // token.IMPORT *ImportSpec + // token.CONST *ValueSpec + // token.TYPE *TypeSpec + // token.VAR *ValueSpec + // + GenDecl struct { + Doc Comments; // associated documentation; or nil + token.Position; // position of Tok + Tok token.Token; // IMPORT, CONST, TYPE, VAR + Lparen token.Position; // position of '(', if any + Specs []Spec; + Rparen token.Position; // position of ')', if any + }; + + // A FuncDecl node represents a function declaration. + FuncDecl struct { + Doc Comments; // associated documentation; or nil + Recv *Field; // receiver (methods); or nil (functions) + Name *Ident; // function/method name + Type *FuncType; // position of Func keyword, parameters and results + Body *BlockStmt; // function body; or nil (forward declaration) + }; +) + + +// The position of a FuncDecl node is the position of its function type. +func (d *FuncDecl) Pos() token.Position { return d.Type.Pos(); } + + +// All declaration nodes implement a Visit method which takes +// a DeclVisitor as argument. For a given node x of type X, and +// an implementation v of a DeclVisitor, calling x.Visit(v) will +// result in a call of v.DoX(x) (through a double-dispatch). +// +type DeclVisitor interface { + DoBadDecl(d *BadDecl); + DoGenDecl(d *GenDecl); + DoFuncDecl(d *FuncDecl); +} + + +// Visit() implementations for all declaration nodes. +// +func (d *BadDecl) Visit(v DeclVisitor) { v.DoBadDecl(d); } +func (d *GenDecl) Visit(v DeclVisitor) { v.DoGenDecl(d); } +func (d *FuncDecl) Visit(v DeclVisitor) { v.DoFuncDecl(d); } + + +// ---------------------------------------------------------------------------- +// Programs + +// A Program node represents the root node of an AST +// for an entire source file. +// +type Program struct { + Doc Comments; // associated documentation; or nil + token.Position; // position of "package" keyword + Name *Ident; // package name + Decls []Decl; // top-level declarations + Comments []*Comment; // list of unassociated comments +} diff --git a/src/pkg/go/ast/format.go b/src/pkg/go/ast/format.go new file mode 100644 index 000000000..caeca19aa --- /dev/null +++ b/src/pkg/go/ast/format.go @@ -0,0 +1,123 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ast + +import ( + "datafmt"; + "go/ast"; + "go/token"; + "io"; + "os"; +) + + +// Format is a customized datafmt.Format for printing of ASTs. +type Format datafmt.Format; + + +// ---------------------------------------------------------------------------- +// Custom formatters + +// The AST-specific formatting state is maintained by a state variable. +type state struct { + // for now we have very little state + // TODO maintain list of unassociated comments + optSemi bool +} + + +func (s *state) Copy() datafmt.Environment { + copy := *s; + return © +} + + +func isValidPos(s *datafmt.State, value interface{}, ruleName string) bool { + pos := value.(token.Position); + return pos.IsValid(); +} + + +func isSend(s *datafmt.State, value interface{}, ruleName string) bool { + return value.(ast.ChanDir) & ast.SEND != 0; +} + + +func isRecv(s *datafmt.State, value interface{}, ruleName string) bool { + return value.(ast.ChanDir) & ast.RECV != 0; +} + + +func isMultiLineComment(s *datafmt.State, value interface{}, ruleName string) bool { + return value.([]byte)[1] == '*'; +} + + +func clearOptSemi(s *datafmt.State, value interface{}, ruleName string) bool { + s.Env().(*state).optSemi = false; + return true; +} + + +func setOptSemi(s *datafmt.State, value interface{}, ruleName string) bool { + s.Env().(*state).optSemi = true; + return true; +} + + +func optSemi(s *datafmt.State, value interface{}, ruleName string) bool { + if !s.Env().(*state).optSemi { + s.Write([]byte{';'}); + } + return true; +} + + +var fmap = datafmt.FormatterMap { + "isValidPos": isValidPos, + "isSend": isSend, + "isRecv": isRecv, + "isMultiLineComment": isMultiLineComment, + "/": clearOptSemi, + "clearOptSemi": clearOptSemi, + "setOptSemi": setOptSemi, + "optSemi": optSemi, +} + + +// ---------------------------------------------------------------------------- +// Printing + +// NewFormat parses a datafmt format specification from a file +// and adds AST-specific custom formatter rules. The result is +// the customized format or an os.Error, if any. +// +func NewFormat(filename string) (Format, os.Error) { + src, err := io.ReadFile(filename); + if err != nil { + return nil, err; + } + f, err := datafmt.Parse(src, fmap); + return Format(f), err; +} + + +// Fprint formats each AST node provided as argument according to the +// format f and writes to standard output. The result is the total number +// of bytes written and an os.Error, if any. +// +func (f Format) Fprint(w io.Writer, nodes ...) (int, os.Error) { + var s state; + return datafmt.Format(f).Fprint(w, &s, nodes); +} + + +// Fprint formats each AST node provided as argument according to the +// format f and writes to w. The result is the total number of bytes +// written and an os.Error, if any. +// +func (f Format) Print(nodes ...) (int, os.Error) { + return f.Fprint(os.Stdout, nodes); +} diff --git a/src/pkg/go/doc/Makefile b/src/pkg/go/doc/Makefile new file mode 100644 index 000000000..d7c6acaac --- /dev/null +++ b/src/pkg/go/doc/Makefile @@ -0,0 +1,68 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# DO NOT EDIT. Automatically generated by gobuild. +# gobuild -m >Makefile + +D=/go/ + +include $(GOROOT)/src/Make.$(GOARCH) +AR=gopack + +default: packages + +clean: + rm -rf *.[$(OS)] *.a [$(OS)].out _obj + +test: packages + gotest + +coverage: packages + gotest + 6cov -g `pwd` | grep -v '_test\.go:' + +%.$O: %.go + $(GC) -I_obj $*.go + +%.$O: %.c + $(CC) $*.c + +%.$O: %.s + $(AS) $*.s + +O1=\ + comment.$O\ + +O2=\ + doc.$O\ + + +phases: a1 a2 +_obj$D/doc.a: phases + +a1: $(O1) + $(AR) grc _obj$D/doc.a comment.$O + rm -f $(O1) + +a2: $(O2) + $(AR) grc _obj$D/doc.a doc.$O + rm -f $(O2) + + +newpkg: clean + mkdir -p _obj$D + $(AR) grc _obj$D/doc.a + +$(O1): newpkg +$(O2): a1 +$(O3): a2 + +nuke: clean + rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/doc.a + +packages: _obj$D/doc.a + +install: packages + test -d $(GOROOT)/pkg && mkdir -p $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D + cp _obj$D/doc.a $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/doc.a diff --git a/src/pkg/go/doc/comment.go b/src/pkg/go/doc/comment.go new file mode 100644 index 000000000..19a65a227 --- /dev/null +++ b/src/pkg/go/doc/comment.go @@ -0,0 +1,310 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Godoc comment extraction and comment -> HTML formatting. + +package doc + +import ( + "fmt"; + "io"; + "once"; + "regexp"; + "strings"; + "template"; // for htmlEscape +) + +// Comment extraction + +var ( + comment_markers *regexp.Regexp; + trailing_whitespace *regexp.Regexp; + comment_junk *regexp.Regexp; +) + +func makeRex(s string) *regexp.Regexp { + re, err := regexp.Compile(s); + if err != nil { + panic("MakeRegexp ", s, " ", err.String()); + } + return re; +} + +// TODO(rsc): Cannot use var initialization for regexps, +// because Regexp constructor needs threads. +func setupRegexps() { + comment_markers = makeRex("^/(/|\\*) ?"); + trailing_whitespace = makeRex("[ \t\r]+$"); + comment_junk = makeRex("^[ \t]*(/\\*|\\*/)[ \t]*$"); +} + +// Aggregate comment text, without comment markers. +func commentText(comments []string) string { + once.Do(setupRegexps); + lines := make([]string, 0, 20); + for i, c := range comments { + // split on newlines + cl := strings.Split(c, "\n"); + + // walk lines, stripping comment markers + w := 0; + for j, l := range cl { + // remove /* and */ lines + if comment_junk.Match(l) { + continue; + } + + // strip trailing white space + m := trailing_whitespace.Execute(l); + if len(m) > 0 { + l = l[0 : m[1]]; + } + + // strip leading comment markers + m = comment_markers.Execute(l); + if len(m) > 0 { + l = l[m[1] : len(l)]; + } + + // throw away leading blank lines + if w == 0 && l == "" { + continue; + } + + cl[w] = l; + w++; + } + + // throw away trailing blank lines + for w > 0 && cl[w-1] == "" { + w--; + } + cl = cl[0 : w]; + + // add this comment to total list + // TODO: maybe separate with a single blank line + // if there is already a comment and len(cl) > 0? + for j, l := range cl { + n := len(lines); + if n+1 >= cap(lines) { + newlines := make([]string, n, 2*cap(lines)); + for k := range newlines { + newlines[k] = lines[k]; + } + lines = newlines; + } + lines = lines[0 : n+1]; + lines[n] = l; + } + } + + // add final "" entry to get trailing newline. + // loop always leaves room for one more. + n := len(lines); + lines = lines[0 : n+1]; + + return strings.Join(lines, "\n"); +} + +// Split bytes into lines. +func split(text []byte) [][]byte { + // count lines + n := 0; + last := 0; + for i, c := range text { + if c == '\n' { + last = i+1; + n++; + } + } + if last < len(text) { + n++; + } + + // split + out := make([][]byte, n); + last = 0; + n = 0; + for i, c := range text { + if c == '\n' { + out[n] = text[last : i+1]; + last = i+1; + n++; + } + } + if last < len(text) { + out[n] = text[last : len(text)]; + } + + return out; +} + + +var ( + ldquo = io.StringBytes("“"); + rdquo = io.StringBytes("”"); +) + +// Escape comment text for HTML. +// Also, turn `` into “ and '' into ”. +func commentEscape(w io.Writer, s []byte) { + last := 0; + for i := 0; i < len(s)-1; i++ { + if s[i] == s[i+1] && (s[i] == '`' || s[i] == '\'') { + template.HtmlEscape(w, s[last : i]); + last = i+2; + switch s[i] { + case '`': + w.Write(ldquo); + case '\'': + w.Write(rdquo); + } + i++; // loop will add one more + } + } + template.HtmlEscape(w, s[last : len(s)]); +} + + +var ( + html_p = io.StringBytes("<p>\n"); + html_endp = io.StringBytes("</p>\n"); + html_pre = io.StringBytes("<pre>"); + html_endpre = io.StringBytes("</pre>\n"); +) + + +func indentLen(s []byte) int { + i := 0; + for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + i++; + } + return i; +} + + +func isBlank(s []byte) bool { + return len(s) == 0 || (len(s) == 1 && s[0] == '\n') +} + + +func commonPrefix(a, b []byte) []byte { + i := 0; + for i < len(a) && i < len(b) && a[i] == b[i] { + i++; + } + return a[0 : i]; +} + + +func unindent(block [][]byte) { + if len(block) == 0 { + return; + } + + // compute maximum common white prefix + prefix := block[0][0 : indentLen(block[0])]; + for i, line := range block { + if !isBlank(line) { + prefix = commonPrefix(prefix, line[0 : indentLen(line)]); + } + } + n := len(prefix); + + // remove + for i, line := range block { + if !isBlank(line) { + block[i] = line[n : len(line)]; + } + } +} + + +// Convert comment text to formatted HTML. +// The comment was prepared by DocReader, +// so it is known not to have leading, trailing blank lines +// nor to have trailing spaces at the end of lines. +// The comment markers have already been removed. +// +// Turn each run of multiple \n into </p><p> +// Turn each run of indented lines into <pre> without indent. +// +// TODO(rsc): I'd like to pass in an array of variable names []string +// and then italicize those strings when they appear as words. +func ToHtml(w io.Writer, s []byte) { + inpara := false; + + /* TODO(rsc): 6g cant generate code for these + close := func() { + if inpara { + w.Write(html_endp); + inpara = false; + } + }; + open := func() { + if !inpara { + w.Write(html_p); + inpara = true; + } + }; + */ + + lines := split(s); + unindent(lines); + for i := 0; i < len(lines); { + line := lines[i]; + if isBlank(line) { + // close paragraph + if inpara { + w.Write(html_endp); + inpara = false; + } + i++; + continue; + } + if indentLen(line) > 0 { + // close paragraph + if inpara { + w.Write(html_endp); + inpara = false; + } + + // count indented or blank lines + j := i+1; + for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { + j++; + } + // but not trailing blank lines + for j > i && isBlank(lines[j-1]) { + j--; + } + block := lines[i : j]; + i = j; + + unindent(block); + + // put those lines in a pre block. + // they don't get the nice text formatting, + // just html escaping + w.Write(html_pre); + for k, line := range block { + template.HtmlEscape(w, line); + } + w.Write(html_endpre); + continue; + } + // open paragraph + if !inpara { + w.Write(html_p); + inpara = true; + } + commentEscape(w, lines[i]); + i++; + } + if inpara { + w.Write(html_endp); + inpara = false; + } +} + diff --git a/src/pkg/go/doc/doc.go b/src/pkg/go/doc/doc.go new file mode 100644 index 000000000..03872fd14 --- /dev/null +++ b/src/pkg/go/doc/doc.go @@ -0,0 +1,486 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package doc + +import ( + "container/vector"; + "fmt"; + "go/ast"; + "go/doc"; + "go/token"; + "io"; + "regexp"; + "sort"; + "strings"; +) + + +// ---------------------------------------------------------------------------- +// Elementary support + +func hasExportedNames(names []*ast.Ident) bool { + for i, name := range names { + if name.IsExported() { + return true; + } + } + return false; +} + + +func hasExportedSpecs(specs []ast.Spec) bool { + for i, s := range specs { + // only called for []astSpec lists of *ast.ValueSpec + return hasExportedNames(s.(*ast.ValueSpec).Names); + } + return false; +} + + +// ---------------------------------------------------------------------------- + +type typeDoc struct { + decl *ast.GenDecl; // len(decl.Specs) == 1, and the element type is *ast.TypeSpec + factories map[string] *ast.FuncDecl; + methods map[string] *ast.FuncDecl; +} + + +// DocReader accumulates documentation for a single package. +// +type DocReader struct { + name string; // package name + path string; // import path + doc ast.Comments; // package documentation, if any + consts *vector.Vector; // list of *ast.GenDecl + types map[string] *typeDoc; + vars *vector.Vector; // list of *ast.GenDecl + funcs map[string] *ast.FuncDecl; +} + + +// Init initializes a DocReader to collect package documentation +// for the package with the given package name and import path. +// +func (doc *DocReader) Init(pkg, imp string) { + doc.name = pkg; + doc.path = imp; + doc.consts = vector.New(0); + doc.types = make(map[string] *typeDoc); + doc.vars = vector.New(0); + doc.funcs = make(map[string] *ast.FuncDecl); +} + + +func baseTypeName(typ ast.Expr) string { + switch t := typ.(type) { + case *ast.Ident: + return string(t.Value); + case *ast.StarExpr: + return baseTypeName(t.X); + } + return ""; +} + + +func (doc *DocReader) lookupTypeDoc(typ ast.Expr) *typeDoc { + tdoc, found := doc.types[baseTypeName(typ)]; + if found { + return tdoc; + } + return nil; +} + + +func (doc *DocReader) addType(decl *ast.GenDecl) { + typ := decl.Specs[0].(*ast.TypeSpec); + name := typ.Name.Value; + tdoc := &typeDoc{decl, make(map[string] *ast.FuncDecl), make(map[string] *ast.FuncDecl)}; + doc.types[name] = tdoc; +} + + +func (doc *DocReader) addFunc(fun *ast.FuncDecl) { + name := fun.Name.Value; + + // determine if it should be associated with a type + var typ *typeDoc; + if fun.Recv != nil { + // method + // (all receiver types must be declared before they are used) + typ = doc.lookupTypeDoc(fun.Recv.Type); + if typ != nil { + // type found (i.e., exported) + typ.methods[name] = fun; + } + // if the type wasn't found, it wasn't exported + // TODO(gri): a non-exported type may still have exported functions + // determine what to do in that case + return; + } + + // perhaps a factory function + // determine result type, if any + if len(fun.Type.Results) >= 1 { + res := fun.Type.Results[0]; + if len(res.Names) <= 1 { + // exactly one (named or anonymous) result type + typ = doc.lookupTypeDoc(res.Type); + if typ != nil { + typ.factories[name] = fun; + return; + } + } + } + + // ordinary function + doc.funcs[name] = fun; +} + + +func (doc *DocReader) addDecl(decl ast.Decl) { + switch d := decl.(type) { + case *ast.GenDecl: + if len(d.Specs) > 0 { + switch d.Tok { + case token.IMPORT: + // ignore + case token.CONST: + // constants are always handled as a group + if hasExportedSpecs(d.Specs) { + doc.consts.Push(d); + } + case token.TYPE: + // types are handled individually + for i, spec := range d.Specs { + s := spec.(*ast.TypeSpec); + if s.Name.IsExported() { + // make a (fake) GenDecl node for this TypeSpec + // (we need to do this here - as opposed to just + // for printing - so we don't loose the GenDecl + // documentation) + var noPos token.Position; + doc.addType(&ast.GenDecl{d.Doc, d.Pos(), token.TYPE, noPos, []ast.Spec{s}, noPos}); + } + } + case token.VAR: + // variables are always handled as a group + if hasExportedSpecs(d.Specs) { + doc.vars.Push(d); + } + } + } + case *ast.FuncDecl: + if d.Name.IsExported() { + doc.addFunc(d); + } + } +} + + +// AddProgram adds the AST for a source file to the DocReader. +// Adding the same AST multiple times is a no-op. +// +func (doc *DocReader) AddProgram(prog *ast.Program) { + if doc.name != prog.Name.Value { + panic("package names don't match"); + } + + // add package documentation + // TODO(gri) what to do if there are multiple files? + if prog.Doc != nil { + doc.doc = prog.Doc + } + + // add all exported declarations + for i, decl := range prog.Decls { + doc.addDecl(decl); + } +} + +// ---------------------------------------------------------------------------- +// Conversion to external representation + +func astComment(comments ast.Comments) string { + text := make([]string, len(comments)); + for i, c := range comments { + text[i] = string(c.Text); + } + return commentText(text); +} + +// ValueDoc is the documentation for a group of declared +// values, either vars or consts. +// +type ValueDoc struct { + Doc string; + Decl *ast.GenDecl; + order int; +} + +type sortValueDoc []*ValueDoc +func (p sortValueDoc) Len() int { return len(p); } +func (p sortValueDoc) Swap(i, j int) { p[i], p[j] = p[j], p[i]; } + + +func declName(d *ast.GenDecl) string { + if len(d.Specs) != 1 { + return "" + } + + switch v := d.Specs[0].(type) { + case *ast.ValueSpec: + return v.Names[0].Value; + case *ast.TypeSpec: + return v.Name.Value; + } + + return ""; +} + + +func (p sortValueDoc) Less(i, j int) bool { + // sort by name + // pull blocks (name = "") up to top + // in original order + if ni, nj := declName(p[i].Decl), declName(p[j].Decl); ni != nj { + return ni < nj; + } + return p[i].order < p[j].order; +} + + +func makeValueDocs(v *vector.Vector) []*ValueDoc { + d := make([]*ValueDoc, v.Len()); + for i := range d { + decl := v.At(i).(*ast.GenDecl); + d[i] = &ValueDoc{astComment(decl.Doc), decl, i}; + } + sort.Sort(sortValueDoc(d)); + return d; +} + + +// FuncDoc is the documentation for a func declaration, +// either a top-level function or a method function. +// +type FuncDoc struct { + Doc string; + Recv ast.Expr; // TODO(rsc): Would like string here + Name string; + Decl *ast.FuncDecl; +} + +type sortFuncDoc []*FuncDoc +func (p sortFuncDoc) Len() int { return len(p); } +func (p sortFuncDoc) Swap(i, j int) { p[i], p[j] = p[j], p[i]; } +func (p sortFuncDoc) Less(i, j int) bool { return p[i].Name < p[j].Name; } + + +func makeFuncDocs(m map[string] *ast.FuncDecl) []*FuncDoc { + d := make([]*FuncDoc, len(m)); + i := 0; + for name, f := range m { + doc := new(FuncDoc); + doc.Doc = astComment(f.Doc); + if f.Recv != nil { + doc.Recv = f.Recv.Type; + } + doc.Name = f.Name.Value; + doc.Decl = f; + d[i] = doc; + i++; + } + sort.Sort(sortFuncDoc(d)); + return d; +} + + +// TypeDoc is the documentation for a declared type. +// Factories is a sorted list of factory functions that return that type. +// Methods is a sorted list of method functions on that type. +type TypeDoc struct { + Doc string; + Type *ast.TypeSpec; + Factories []*FuncDoc; + Methods []*FuncDoc; + Decl *ast.GenDecl; + order int; +} + +type sortTypeDoc []*TypeDoc +func (p sortTypeDoc) Len() int { return len(p); } +func (p sortTypeDoc) Swap(i, j int) { p[i], p[j] = p[j], p[i]; } +func (p sortTypeDoc) Less(i, j int) bool { + // sort by name + // pull blocks (name = "") up to top + // in original order + if ni, nj := p[i].Type.Name.Value, p[j].Type.Name.Value; ni != nj { + return ni < nj; + } + return p[i].order < p[j].order; +} + + +// NOTE(rsc): This would appear not to be correct for type ( ) +// blocks, but the doc extractor above has split them into +// individual statements. +func makeTypeDocs(m map[string] *typeDoc) []*TypeDoc { + d := make([]*TypeDoc, len(m)); + i := 0; + for name, old := range m { + typespec := old.decl.Specs[0].(*ast.TypeSpec); + t := new(TypeDoc); + t.Doc = astComment(typespec.Doc); + t.Type = typespec; + t.Factories = makeFuncDocs(old.factories); + t.Methods = makeFuncDocs(old.methods); + t.Decl = old.decl; + t.order = i; + d[i] = t; + i++; + } + sort.Sort(sortTypeDoc(d)); + return d; +} + + +// PackageDoc is the documentation for an entire package. +// +type PackageDoc struct { + PackageName string; + ImportPath string; + Doc string; + Consts []*ValueDoc; + Types []*TypeDoc; + Vars []*ValueDoc; + Funcs []*FuncDoc; +} + + +// Doc returns the accumulated documentation for the package. +// +func (doc *DocReader) Doc() *PackageDoc { + p := new(PackageDoc); + p.PackageName = doc.name; + p.ImportPath = doc.path; + p.Doc = astComment(doc.doc); + p.Consts = makeValueDocs(doc.consts); + p.Vars = makeValueDocs(doc.vars); + p.Types = makeTypeDocs(doc.types); + p.Funcs = makeFuncDocs(doc.funcs); + return p; +} + + +// ---------------------------------------------------------------------------- +// Filtering by name + +// Does s look like a regular expression? +func isRegexp(s string) bool { + metachars := ".(|)*+?^$[]"; + for i, c := range s { + for j, m := range metachars { + if c == m { + return true + } + } + } + return false +} + + +func match(s string, a []string) bool { + for i, t := range a { + if isRegexp(t) { + if matched, err := regexp.Match(t, s); matched { + return true; + } + } + if s == t { + return true; + } + } + return false; +} + + +func matchDecl(d *ast.GenDecl, names []string) bool { + for i, d := range d.Specs { + switch v := d.(type) { + case *ast.ValueSpec: + for j, name := range v.Names { + if match(name.Value, names) { + return true; + } + } + case *ast.TypeSpec: + if match(v.Name.Value, names) { + return true; + } + } + } + return false; +} + + +func filterValueDocs(a []*ValueDoc, names []string) []*ValueDoc { + w := 0; + for i, vd := range a { + if matchDecl(vd.Decl, names) { + a[w] = vd; + w++; + } + } + return a[0 : w]; +} + + +func filterFuncDocs(a []*FuncDoc, names []string) []*FuncDoc { + w := 0; + for i, fd := range a { + if match(fd.Name, names) { + a[w] = fd; + w++; + } + } + return a[0 : w]; +} + + +func filterTypeDocs(a []*TypeDoc, names []string) []*TypeDoc { + w := 0; + for i, td := range a { + match := false; + if matchDecl(td.Decl, names) { + match = true; + } else { + // type name doesn't match, but we may have matching factories or methods + td.Factories = filterFuncDocs(td.Factories, names); + td.Methods = filterFuncDocs(td.Methods, names); + match = len(td.Factories) > 0 || len(td.Methods) > 0; + } + if match { + a[w] = td; + w++; + } + } + return a[0 : w]; +} + + +// Filter eliminates information from d that is not +// about one of the given names. +// TODO: Recognize "Type.Method" as a name. +// TODO(r): maybe precompile the regexps. +// +func (p *PackageDoc) Filter(names []string) { + p.Consts = filterValueDocs(p.Consts, names); + p.Vars = filterValueDocs(p.Vars, names); + p.Types = filterTypeDocs(p.Types, names); + p.Funcs = filterFuncDocs(p.Funcs, names); + p.Doc = ""; // don't show top-level package doc +} + diff --git a/src/pkg/go/parser/Makefile b/src/pkg/go/parser/Makefile new file mode 100644 index 000000000..08d83646f --- /dev/null +++ b/src/pkg/go/parser/Makefile @@ -0,0 +1,60 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# DO NOT EDIT. Automatically generated by gobuild. +# gobuild -m >Makefile + +D=/go/ + +include $(GOROOT)/src/Make.$(GOARCH) +AR=gopack + +default: packages + +clean: + rm -rf *.[$(OS)] *.a [$(OS)].out _obj + +test: packages + gotest + +coverage: packages + gotest + 6cov -g `pwd` | grep -v '_test\.go:' + +%.$O: %.go + $(GC) -I_obj $*.go + +%.$O: %.c + $(CC) $*.c + +%.$O: %.s + $(AS) $*.s + +O1=\ + parser.$O\ + + +phases: a1 +_obj$D/parser.a: phases + +a1: $(O1) + $(AR) grc _obj$D/parser.a parser.$O + rm -f $(O1) + + +newpkg: clean + mkdir -p _obj$D + $(AR) grc _obj$D/parser.a + +$(O1): newpkg +$(O2): a1 + +nuke: clean + rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/parser.a + +packages: _obj$D/parser.a + +install: packages + test -d $(GOROOT)/pkg && mkdir -p $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D + cp _obj$D/parser.a $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/parser.a diff --git a/src/pkg/go/parser/parser.go b/src/pkg/go/parser/parser.go new file mode 100644 index 000000000..056868695 --- /dev/null +++ b/src/pkg/go/parser/parser.go @@ -0,0 +1,1975 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// A parser for Go source text. The input is a stream of lexical tokens +// provided via the Scanner interface. The output is an abstract syntax +// tree (AST) representing the Go source. The parser is invoked by calling +// Parse. +// +package parser + +import ( + "container/vector"; + "fmt"; + "go/ast"; + "go/scanner"; + "go/token"; + "io"; + "os"; +) + + +// A parser error is represented by an Error node. The position Pos, if +// valid, points to the beginning of the offending token, and the error +// condition is described by Msg. +// +type Error struct { + Pos token.Position; + Msg string; +} + + +func (e *Error) String() string { + pos := ""; + if e.Pos.IsValid() { + pos = fmt.Sprintf("%d:%d: ", e.Pos.Line, e.Pos.Column); + } + return pos + e.Msg; +} + + +// Parser errors are returned as an ErrorList. +type ErrorList []*Error + + +// ErrorList implements the SortInterface. +func (p ErrorList) Len() int { return len(p); } +func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i]; } +func (p ErrorList) Less(i, j int) bool { return p[i].Pos.Offset < p[j].Pos.Offset; } + + +func (p ErrorList) String() string { + switch len(p) { + case 0: return "unspecified error"; + case 1: return p[0].String(); + } + return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p) - 1); +} + + +type interval struct { + beg, end int; +} + + +// The parser structure holds the parser's internal state. +type parser struct { + errors vector.Vector; + scanner scanner.Scanner; + + // Tracing/debugging + mode uint; // parsing mode + trace bool; // == (mode & Trace != 0) + indent uint; // indentation used for tracing output + + // Comments + comments vector.Vector; // list of collected, unassociated comments + last_doc interval; // last comments interval of consecutive comments + + // The next token + pos token.Position; // token position + tok token.Token; // one token look-ahead + lit []byte; // token literal + + // Non-syntactic parser control + opt_semi bool; // true if semicolon separator is optional in statement list + expr_lev int; // < 0: in control clause, >= 0: in expression +}; + + +// noPos is used when there is no corresponding source position for a token +var noPos token.Position; + + +// ---------------------------------------------------------------------------- +// Parsing support + +func (p *parser) printTrace(a ...) { + const dots = + ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " + ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "; + const n = uint(len(dots)); + fmt.Printf("%5d:%3d: ", p.pos.Line, p.pos.Column); + i := 2*p.indent; + for ; i > n; i -= n { + fmt.Print(dots); + } + fmt.Print(dots[0 : i]); + fmt.Println(a); +} + + +func trace(p *parser, msg string) *parser { + p.printTrace(msg, "("); + p.indent++; + return p; +} + + +func un/*trace*/(p *parser) { + p.indent--; + p.printTrace(")"); +} + + +func (p *parser) next0() { + // Because of one-token look-ahead, print the previous token + // when tracing as it provides a more readable output. The + // very first token (p.pos.Line == 0) is not initialized (it + // is token.ILLEGAL), so don't print it . + if p.trace && p.pos.Line > 0 { + s := p.tok.String(); + switch { + case p.tok.IsLiteral(): + p.printTrace(s, string(p.lit)); + case p.tok.IsOperator(), p.tok.IsKeyword(): + p.printTrace("\"" + s + "\""); + default: + p.printTrace(s); + } + } + + p.pos, p.tok, p.lit = p.scanner.Scan(); + p.opt_semi = false; +} + + +// Collect a comment in the parser's comment list and return the line +// on which the comment ends. +// +func (p *parser) collectComment() int { + // For /*-style comments, the comment may end on a different line. + // Scan the comment for '\n' chars and adjust the end line accordingly. + // (Note that the position of the next token may be even further down + // as there may be more whitespace lines after the comment.) + endline := p.pos.Line; + if p.lit[1] == '*' { + for _, b := range p.lit { + if b == '\n' { + endline++; + } + } + } + p.comments.Push(&ast.Comment{p.pos, p.lit, endline}); + p.next0(); + + return endline; +} + + +func (p *parser) getComments() interval { + // group adjacent comments, an empty line terminates a group + beg := p.comments.Len(); + endline := p.pos.Line; + for p.tok == token.COMMENT && endline+1 >= p.pos.Line { + endline = p.collectComment(); + } + end := p.comments.Len(); + return interval {beg, end}; +} + + +func (p *parser) getDoc() ast.Comments { + doc := p.last_doc; + n := doc.end - doc.beg; + + if n <= 0 || p.comments.At(doc.end - 1).(*ast.Comment).EndLine + 1 < p.pos.Line { + // no comments or empty line between last comment and current token; + // do not use as documentation + return nil; + } + + // found immediately adjacent comment interval; + // use as documentation + c := make(ast.Comments, n); + for i := 0; i < n; i++ { + c[i] = p.comments.At(doc.beg + i).(*ast.Comment); + } + + // remove comments from the general list + p.comments.Cut(doc.beg, doc.end); + + return c; +} + + +func (p *parser) next() { + p.next0(); + p.last_doc = interval{0, 0}; + for p.tok == token.COMMENT { + p.last_doc = p.getComments(); + } +} + + +// The parser implements scanner.Error. +func (p *parser) Error(pos token.Position, msg string) { + // Don't collect errors that are on the same line as the previous error + // in the hope to reduce the number of spurious errors due to incorrect + // parser synchronization. + if p.errors.Len() == 0 || p.errors.Last().(*Error).Pos.Line != pos.Line { + p.errors.Push(&Error{pos, msg}); + } +} + + +func (p *parser) error_expected(pos token.Position, msg string) { + msg = "expected " + msg; + if pos.Offset == p.pos.Offset { + // the error happened at the current position; + // make the error message more specific + msg += ", found '" + p.tok.String() + "'"; + if p.tok.IsLiteral() { + msg += " " + string(p.lit); + } + } + p.Error(pos, msg); +} + + +func (p *parser) expect(tok token.Token) token.Position { + pos := p.pos; + if p.tok != tok { + p.error_expected(pos, "'" + tok.String() + "'"); + } + p.next(); // make progress in any case + return pos; +} + + +// ---------------------------------------------------------------------------- +// Common productions + +func (p *parser) tryType() ast.Expr; +func (p *parser) parseStringList(x *ast.StringLit) []*ast.StringLit +func (p *parser) parseExpression() ast.Expr; +func (p *parser) parseStatement() ast.Stmt; +func (p *parser) parseDeclaration() ast.Decl; + + +func (p *parser) parseIdent() *ast.Ident { + if p.tok == token.IDENT { + x := &ast.Ident{p.pos, string(p.lit)}; + p.next(); + return x; + } + p.expect(token.IDENT); // use expect() error handling + return &ast.Ident{p.pos, ""}; +} + + +func (p *parser) parseIdentList(x ast.Expr) []*ast.Ident { + if p.trace { + defer un(trace(p, "IdentList")); + } + + list := vector.New(0); + if x == nil { + x = p.parseIdent(); + } + list.Push(x); + for p.tok == token.COMMA { + p.next(); + list.Push(p.parseIdent()); + } + + // convert vector + idents := make([]*ast.Ident, list.Len()); + for i := 0; i < list.Len(); i++ { + idents[i] = list.At(i).(*ast.Ident); + } + + return idents; +} + + +func (p *parser) parseExpressionList() []ast.Expr { + if p.trace { + defer un(trace(p, "ExpressionList")); + } + + list := vector.New(0); + list.Push(p.parseExpression()); + for p.tok == token.COMMA { + p.next(); + list.Push(p.parseExpression()); + } + + // convert list + exprs := make([]ast.Expr, list.Len()); + for i := 0; i < list.Len(); i++ { + exprs[i] = list.At(i).(ast.Expr); + } + + return exprs; +} + + +// ---------------------------------------------------------------------------- +// Types + +func (p *parser) parseType() ast.Expr { + if p.trace { + defer un(trace(p, "Type")); + } + + typ := p.tryType(); + + if typ == nil { + p.error_expected(p.pos, "type"); + p.next(); // make progress + return &ast.BadExpr{p.pos}; + } + + return typ; +} + + +func (p *parser) parseQualifiedIdent() ast.Expr { + if p.trace { + defer un(trace(p, "QualifiedIdent")); + } + + var x ast.Expr = p.parseIdent(); + if p.tok == token.PERIOD { + // first identifier is a package identifier + p.next(); + sel := p.parseIdent(); + x = &ast.SelectorExpr{x, sel}; + } + return x; +} + + +func (p *parser) parseTypeName() ast.Expr { + if p.trace { + defer un(trace(p, "TypeName")); + } + + return p.parseQualifiedIdent(); +} + + +func (p *parser) parseArrayType(ellipsis_ok bool) ast.Expr { + if p.trace { + defer un(trace(p, "ArrayType")); + } + + lbrack := p.expect(token.LBRACK); + var len ast.Expr; + if ellipsis_ok && p.tok == token.ELLIPSIS { + len = &ast.Ellipsis{p.pos}; + p.next(); + } else if p.tok != token.RBRACK { + len = p.parseExpression(); + } + p.expect(token.RBRACK); + elt := p.parseType(); + + return &ast.ArrayType{lbrack, len, elt}; +} + + +func (p *parser) makeIdentList(list *vector.Vector) []*ast.Ident { + idents := make([]*ast.Ident, list.Len()); + for i := 0; i < list.Len(); i++ { + ident, is_ident := list.At(i).(*ast.Ident); + if !is_ident { + pos := list.At(i).(ast.Expr).Pos(); + p.error_expected(pos, "identifier"); + idents[i] = &ast.Ident{pos, ""}; + } + idents[i] = ident; + } + return idents; +} + + +func (p *parser) parseFieldDecl() *ast.Field { + if p.trace { + defer un(trace(p, "FieldDecl")); + } + + doc := p.getDoc(); + + // a list of identifiers looks like a list of type names + list := vector.New(0); + for { + // TODO do not allow ()'s here + list.Push(p.parseType()); + if p.tok == token.COMMA { + p.next(); + } else { + break; + } + } + + // if we had a list of identifiers, it must be followed by a type + typ := p.tryType(); + + // optional tag + var tag []*ast.StringLit; + if p.tok == token.STRING { + tag = p.parseStringList(nil); + } + + // analyze case + var idents []*ast.Ident; + if typ != nil { + // IdentifierList Type + idents = p.makeIdentList(list); + } else { + // Type (anonymous field) + if list.Len() == 1 { + // TODO check that this looks like a type + typ = list.At(0).(ast.Expr); + } else { + p.error_expected(p.pos, "anonymous field"); + typ = &ast.BadExpr{p.pos}; + } + } + + return &ast.Field{doc, idents, typ, tag}; +} + + +func (p *parser) parseStructType() *ast.StructType { + if p.trace { + defer un(trace(p, "StructType")); + } + + pos := p.expect(token.STRUCT); + var lbrace, rbrace token.Position; + var fields []*ast.Field; + if p.tok == token.LBRACE { + lbrace = p.pos; + p.next(); + + list := vector.New(0); + for p.tok != token.RBRACE && p.tok != token.EOF { + list.Push(p.parseFieldDecl()); + if p.tok == token.SEMICOLON { + p.next(); + } else { + break; + } + } + if p.tok == token.SEMICOLON { + p.next(); + } + + rbrace = p.expect(token.RBRACE); + p.opt_semi = true; + + // convert vector + fields = make([]*ast.Field, list.Len()); + for i := list.Len() - 1; i >= 0; i-- { + fields[i] = list.At(i).(*ast.Field); + } + } + + return &ast.StructType{pos, lbrace, fields, rbrace}; +} + + +func (p *parser) parsePointerType() *ast.StarExpr { + if p.trace { + defer un(trace(p, "PointerType")); + } + + star := p.expect(token.MUL); + base := p.parseType(); + + return &ast.StarExpr{star, base}; +} + + +func (p *parser) tryParameterType(ellipsis_ok bool) ast.Expr { + if ellipsis_ok && p.tok == token.ELLIPSIS { + pos := p.pos; + p.next(); + if p.tok != token.RPAREN { + // "..." always must be at the very end of a parameter list + p.Error(pos, "expected type, found '...'"); + } + return &ast.Ellipsis{pos}; + } + return p.tryType(); +} + + +func (p *parser) parseParameterType(ellipsis_ok bool) ast.Expr { + typ := p.tryParameterType(ellipsis_ok); + if typ == nil { + p.error_expected(p.pos, "type"); + p.next(); // make progress + typ = &ast.BadExpr{p.pos}; + } + return typ; +} + + +func (p *parser) parseParameterDecl(ellipsis_ok bool) (*vector.Vector, ast.Expr) { + if p.trace { + defer un(trace(p, "ParameterDecl")); + } + + // a list of identifiers looks like a list of type names + list := vector.New(0); + for { + // TODO do not allow ()'s here + list.Push(p.parseParameterType(ellipsis_ok)); + if p.tok == token.COMMA { + p.next(); + } else { + break; + } + } + + // if we had a list of identifiers, it must be followed by a type + typ := p.tryParameterType(ellipsis_ok); + + return list, typ; +} + + +func (p *parser) parseParameterList(ellipsis_ok bool) []*ast.Field { + if p.trace { + defer un(trace(p, "ParameterList")); + } + + list, typ := p.parseParameterDecl(ellipsis_ok); + if typ != nil { + // IdentifierList Type + idents := p.makeIdentList(list); + list.Init(0); + list.Push(&ast.Field{nil, idents, typ, nil}); + + for p.tok == token.COMMA { + p.next(); + idents := p.parseIdentList(nil); + typ := p.parseParameterType(ellipsis_ok); + list.Push(&ast.Field{nil, idents, typ, nil}); + } + + } else { + // Type { "," Type } (anonymous parameters) + // convert list of types into list of *Param + for i := 0; i < list.Len(); i++ { + list.Set(i, &ast.Field{nil, nil, list.At(i).(ast.Expr), nil}); + } + } + + // convert list + params := make([]*ast.Field, list.Len()); + for i := 0; i < list.Len(); i++ { + params[i] = list.At(i).(*ast.Field); + } + + return params; +} + + +func (p *parser) parseParameters(ellipsis_ok bool) []*ast.Field { + if p.trace { + defer un(trace(p, "Parameters")); + } + + var params []*ast.Field; + p.expect(token.LPAREN); + if p.tok != token.RPAREN { + params = p.parseParameterList(ellipsis_ok); + } + p.expect(token.RPAREN); + + return params; +} + + +func (p *parser) parseResult() []*ast.Field { + if p.trace { + defer un(trace(p, "Result")); + } + + var results []*ast.Field; + if p.tok == token.LPAREN { + results = p.parseParameters(false); + } else if p.tok != token.FUNC { + typ := p.tryType(); + if typ != nil { + results = make([]*ast.Field, 1); + results[0] = &ast.Field{nil, nil, typ, nil}; + } + } + + return results; +} + + +func (p *parser) parseSignature() (params []*ast.Field, results []*ast.Field) { + if p.trace { + defer un(trace(p, "Signature")); + } + + params = p.parseParameters(true); + results = p.parseResult(); + + return params, results; +} + + +func (p *parser) parseFuncType() *ast.FuncType { + if p.trace { + defer un(trace(p, "FuncType")); + } + + pos := p.expect(token.FUNC); + params, results := p.parseSignature(); + + return &ast.FuncType{pos, params, results}; +} + + +func (p *parser) parseMethodSpec() *ast.Field { + if p.trace { + defer un(trace(p, "MethodSpec")); + } + + doc := p.getDoc(); + var idents []*ast.Ident; + var typ ast.Expr; + x := p.parseQualifiedIdent(); + if tmp, is_ident := x.(*ast.Ident); is_ident && (p.tok == token.COMMA || p.tok == token.LPAREN) { + // methods + idents = p.parseIdentList(x); + params, results := p.parseSignature(); + typ = &ast.FuncType{noPos, params, results}; + } else { + // embedded interface + typ = x; + } + + return &ast.Field{doc, idents, typ, nil}; +} + + +func (p *parser) parseInterfaceType() *ast.InterfaceType { + if p.trace { + defer un(trace(p, "InterfaceType")); + } + + pos := p.expect(token.INTERFACE); + var lbrace, rbrace token.Position; + var methods []*ast.Field; + if p.tok == token.LBRACE { + lbrace = p.pos; + p.next(); + + list := vector.New(0); + for p.tok == token.IDENT { + list.Push(p.parseMethodSpec()); + if p.tok != token.RBRACE { + p.expect(token.SEMICOLON); + } + } + + rbrace = p.expect(token.RBRACE); + p.opt_semi = true; + + // convert vector + methods = make([]*ast.Field, list.Len()); + for i := list.Len() - 1; i >= 0; i-- { + methods[i] = list.At(i).(*ast.Field); + } + } + + return &ast.InterfaceType{pos, lbrace, methods, rbrace}; +} + + +func (p *parser) parseMapType() *ast.MapType { + if p.trace { + defer un(trace(p, "MapType")); + } + + pos := p.expect(token.MAP); + p.expect(token.LBRACK); + key := p.parseType(); + p.expect(token.RBRACK); + value := p.parseType(); + + return &ast.MapType{pos, key, value}; +} + + +func (p *parser) parseChanType() *ast.ChanType { + if p.trace { + defer un(trace(p, "ChanType")); + } + + pos := p.pos; + dir := ast.SEND | ast.RECV; + if p.tok == token.CHAN { + p.next(); + if p.tok == token.ARROW { + p.next(); + dir = ast.SEND; + } + } else { + p.expect(token.ARROW); + p.expect(token.CHAN); + dir = ast.RECV; + } + value := p.parseType(); + + return &ast.ChanType{pos, dir, value}; +} + + +func (p *parser) tryRawType(ellipsis_ok bool) ast.Expr { + switch p.tok { + case token.IDENT: return p.parseTypeName(); + case token.LBRACK: return p.parseArrayType(ellipsis_ok); + case token.STRUCT: return p.parseStructType(); + case token.MUL: return p.parsePointerType(); + case token.FUNC: return p.parseFuncType(); + case token.INTERFACE: return p.parseInterfaceType(); + case token.MAP: return p.parseMapType(); + case token.CHAN, token.ARROW: return p.parseChanType(); + case token.LPAREN: + lparen := p.pos; + p.next(); + typ := p.parseType(); + rparen := p.expect(token.RPAREN); + return &ast.ParenExpr{lparen, typ, rparen}; + } + + // no type found + return nil; +} + + +func (p *parser) tryType() ast.Expr { + return p.tryRawType(false); +} + + +// ---------------------------------------------------------------------------- +// Blocks + +func makeStmtList(list *vector.Vector) []ast.Stmt { + stats := make([]ast.Stmt, list.Len()); + for i := 0; i < list.Len(); i++ { + stats[i] = list.At(i).(ast.Stmt); + } + return stats; +} + + +func (p *parser) parseStatementList() []ast.Stmt { + if p.trace { + defer un(trace(p, "StatementList")); + } + + list := vector.New(0); + expect_semi := false; + for p.tok != token.CASE && p.tok != token.DEFAULT && p.tok != token.RBRACE && p.tok != token.EOF { + if expect_semi { + p.expect(token.SEMICOLON); + expect_semi = false; + } + list.Push(p.parseStatement()); + if p.tok == token.SEMICOLON { + p.next(); + } else if p.opt_semi { + p.opt_semi = false; // "consume" optional semicolon + } else { + expect_semi = true; + } + } + + return makeStmtList(list); +} + + +func (p *parser) parseBlockStmt() *ast.BlockStmt { + if p.trace { + defer un(trace(p, "BlockStmt")); + } + + lbrace := p.expect(token.LBRACE); + list := p.parseStatementList(); + rbrace := p.expect(token.RBRACE); + p.opt_semi = true; + + return &ast.BlockStmt{lbrace, list, rbrace}; +} + + +// ---------------------------------------------------------------------------- +// Expressions + +func (p *parser) parseStringList(x *ast.StringLit) []*ast.StringLit { + if p.trace { + defer un(trace(p, "StringList")); + } + + list := vector.New(0); + if x != nil { + list.Push(x); + } + + for p.tok == token.STRING { + list.Push(&ast.StringLit{p.pos, p.lit}); + p.next(); + } + + // convert list + strings := make([]*ast.StringLit, list.Len()); + for i := 0; i < list.Len(); i++ { + strings[i] = list.At(i).(*ast.StringLit); + } + + return strings; +} + + +func (p *parser) parseFuncLit() ast.Expr { + if p.trace { + defer un(trace(p, "FuncLit")); + } + + typ := p.parseFuncType(); + p.expr_lev++; + body := p.parseBlockStmt(); + p.opt_semi = false; // function body requires separating ";" + p.expr_lev--; + + return &ast.FuncLit{typ, body}; +} + + +// parseOperand may return an expression or a raw type (incl. array +// types of the form [...]T. Callers must verify the result. +// +func (p *parser) parseOperand() ast.Expr { + if p.trace { + defer un(trace(p, "Operand")); + } + + switch p.tok { + case token.IDENT: + return p.parseIdent(); + + case token.INT: + x := &ast.IntLit{p.pos, p.lit}; + p.next(); + return x; + + case token.FLOAT: + x := &ast.FloatLit{p.pos, p.lit}; + p.next(); + return x; + + case token.CHAR: + x := &ast.CharLit{p.pos, p.lit}; + p.next(); + return x; + + case token.STRING: + x := &ast.StringLit{p.pos, p.lit}; + p.next(); + if p.tok == token.STRING { + return &ast.StringList{p.parseStringList(x)}; + } + return x; + + case token.LPAREN: + lparen := p.pos; + p.next(); + p.expr_lev++; + x := p.parseExpression(); + p.expr_lev--; + rparen := p.expect(token.RPAREN); + return &ast.ParenExpr{lparen, x, rparen}; + + case token.FUNC: + return p.parseFuncLit(); + + default: + t := p.tryRawType(true); // could be type for composite literal + if t != nil { + return t; + } + } + + p.error_expected(p.pos, "operand"); + p.next(); // make progress + return &ast.BadExpr{p.pos}; +} + + +func (p *parser) parseSelectorOrTypeAssertion(x ast.Expr) ast.Expr { + if p.trace { + defer un(trace(p, "SelectorOrTypeAssertion")); + } + + p.expect(token.PERIOD); + if p.tok == token.IDENT { + // selector + sel := p.parseIdent(); + return &ast.SelectorExpr{x, sel}; + } + + // type assertion + p.expect(token.LPAREN); + var typ ast.Expr; + if p.tok == token.TYPE { + // special case for type switch + typ = &ast.Ident{p.pos, "type"}; + p.next(); + } else { + typ = p.parseType(); + } + p.expect(token.RPAREN); + + return &ast.TypeAssertExpr{x, typ}; +} + + +func (p *parser) parseIndex(x ast.Expr) ast.Expr { + if p.trace { + defer un(trace(p, "Index")); + } + + p.expect(token.LBRACK); + p.expr_lev++; + begin := p.parseExpression(); + var end ast.Expr; + if p.tok == token.COLON { + p.next(); + end = p.parseExpression(); + } + p.expr_lev--; + p.expect(token.RBRACK); + + return &ast.IndexExpr{x, begin, end}; +} + + +func (p *parser) parseCallOrConversion(fun ast.Expr) *ast.CallExpr { + if p.trace { + defer un(trace(p, "CallOrConversion")); + } + + lparen := p.expect(token.LPAREN); + var args []ast.Expr; + if p.tok != token.RPAREN { + args = p.parseExpressionList(); + } + rparen := p.expect(token.RPAREN); + + return &ast.CallExpr{fun, lparen, args, rparen}; +} + + +func (p *parser) parseElement() ast.Expr { + if p.trace { + defer un(trace(p, "Element")); + } + + x := p.parseExpression(); + if p.tok == token.COLON { + colon := p.pos; + p.next(); + x = &ast.KeyValueExpr{x, colon, p.parseExpression()}; + } + + return x; +} + + +func (p *parser) parseElementList() []ast.Expr { + if p.trace { + defer un(trace(p, "ElementList")); + } + + list := vector.New(0); + for p.tok != token.RBRACE && p.tok != token.EOF { + list.Push(p.parseElement()); + if p.tok == token.COMMA { + p.next(); + } else { + break; + } + } + + // convert list + elts := make([]ast.Expr, list.Len()); + for i := 0; i < list.Len(); i++ { + elts[i] = list.At(i).(ast.Expr); + } + + return elts; +} + + +func (p *parser) parseCompositeLit(typ ast.Expr) ast.Expr { + if p.trace { + defer un(trace(p, "CompositeLit")); + } + + lbrace := p.expect(token.LBRACE); + var elts []ast.Expr; + if p.tok != token.RBRACE { + elts = p.parseElementList(); + } + rbrace := p.expect(token.RBRACE); + return &ast.CompositeLit{typ, lbrace, elts, rbrace}; +} + + +// TODO Consider different approach to checking syntax after parsing: +// Provide a arguments (set of flags) to parsing functions +// restricting what they are syupposed to accept depending +// on context. + +// checkExpr checks that x is an expression (and not a type). +func (p *parser) checkExpr(x ast.Expr) ast.Expr { + // TODO should provide predicate in AST nodes + switch t := x.(type) { + case *ast.BadExpr: + case *ast.Ident: + case *ast.IntLit: + case *ast.FloatLit: + case *ast.CharLit: + case *ast.StringLit: + case *ast.StringList: + case *ast.FuncLit: + case *ast.CompositeLit: + case *ast.ParenExpr: + case *ast.SelectorExpr: + case *ast.IndexExpr: + case *ast.TypeAssertExpr: + case *ast.CallExpr: + case *ast.StarExpr: + case *ast.UnaryExpr: + if t.Op == token.RANGE { + // the range operator is only allowed at the top of a for statement + p.error_expected(x.Pos(), "expression"); + x = &ast.BadExpr{x.Pos()}; + } + case *ast.BinaryExpr: + default: + // all other nodes are not proper expressions + p.error_expected(x.Pos(), "expression"); + x = &ast.BadExpr{x.Pos()}; + } + return x; +} + + +// isTypeName returns true iff x is type name. +func isTypeName(x ast.Expr) bool { + // TODO should provide predicate in AST nodes + switch t := x.(type) { + case *ast.BadExpr: + case *ast.Ident: + case *ast.ParenExpr: return isTypeName(t.X); // TODO should (TypeName) be illegal? + case *ast.SelectorExpr: return isTypeName(t.X); + default: return false; // all other nodes are not type names + } + return true; +} + + +// isCompositeLitType returns true iff x is a legal composite literal type. +func isCompositeLitType(x ast.Expr) bool { + // TODO should provide predicate in AST nodes + switch t := x.(type) { + case *ast.BadExpr: + case *ast.Ident: + case *ast.ParenExpr: return isCompositeLitType(t.X); + case *ast.SelectorExpr: return isTypeName(t.X); + case *ast.ArrayType: + case *ast.StructType: + case *ast.MapType: + default: return false; // all other nodes are not legal composite literal types + } + return true; +} + + +// checkExprOrType checks that x is an expression or a type +// (and not a raw type such as [...]T). +// +func (p *parser) checkExprOrType(x ast.Expr) ast.Expr { + // TODO should provide predicate in AST nodes + switch t := x.(type) { + case *ast.UnaryExpr: + if t.Op == token.RANGE { + // the range operator is only allowed at the top of a for statement + p.error_expected(x.Pos(), "expression"); + x = &ast.BadExpr{x.Pos()}; + } + case *ast.ArrayType: + if len, is_ellipsis := t.Len.(*ast.Ellipsis); is_ellipsis { + p.Error(len.Pos(), "expected array length, found '...'"); + x = &ast.BadExpr{x.Pos()}; + } + } + + // all other nodes are expressions or types + return x; +} + + +func (p *parser) parsePrimaryExpr() ast.Expr { + if p.trace { + defer un(trace(p, "PrimaryExpr")); + } + + x := p.parseOperand(); +L: for { + switch p.tok { + case token.PERIOD: x = p.parseSelectorOrTypeAssertion(p.checkExpr(x)); + case token.LBRACK: x = p.parseIndex(p.checkExpr(x)); + case token.LPAREN: x = p.parseCallOrConversion(p.checkExprOrType(x)); + case token.LBRACE: + if isCompositeLitType(x) && (p.expr_lev >= 0 || !isTypeName(x)) { + x = p.parseCompositeLit(x); + } else { + break L; + } + default: + break L; + } + } + + return p.checkExprOrType(x); +} + + +func (p *parser) parseUnaryExpr() ast.Expr { + if p.trace { + defer un(trace(p, "UnaryExpr")); + } + + switch p.tok { + case token.ADD, token.SUB, token.NOT, token.XOR, token.ARROW, token.AND, token.RANGE: + pos, op := p.pos, p.tok; + p.next(); + x := p.parseUnaryExpr(); + return &ast.UnaryExpr{pos, op, p.checkExpr(x)}; + + case token.MUL: + // unary "*" expression or pointer type + pos := p.pos; + p.next(); + x := p.parseUnaryExpr(); + return &ast.StarExpr{pos, p.checkExprOrType(x)}; + } + + return p.parsePrimaryExpr(); +} + + +func (p *parser) parseBinaryExpr(prec1 int) ast.Expr { + if p.trace { + defer un(trace(p, "BinaryExpr")); + } + + x := p.parseUnaryExpr(); + for prec := p.tok.Precedence(); prec >= prec1; prec-- { + for p.tok.Precedence() == prec { + pos, op := p.pos, p.tok; + p.next(); + y := p.parseBinaryExpr(prec + 1); + x = &ast.BinaryExpr{p.checkExpr(x), pos, op, p.checkExpr(y)}; + } + } + + return x; +} + + +func (p *parser) parseExpression() ast.Expr { + if p.trace { + defer un(trace(p, "Expression")); + } + + return p.parseBinaryExpr(token.LowestPrec + 1); +} + + +// ---------------------------------------------------------------------------- +// Statements + + +func (p *parser) parseSimpleStmt(label_ok bool) ast.Stmt { + if p.trace { + defer un(trace(p, "SimpleStmt")); + } + + x := p.parseExpressionList(); + + switch p.tok { + case token.COLON: + // labeled statement + p.next(); + if label_ok && len(x) == 1 { + if label, is_ident := x[0].(*ast.Ident); is_ident { + return &ast.LabeledStmt{label, p.parseStatement()}; + } + } + p.Error(x[0].Pos(), "illegal label declaration"); + return &ast.BadStmt{x[0].Pos()}; + + case + token.DEFINE, token.ASSIGN, token.ADD_ASSIGN, + token.SUB_ASSIGN, token.MUL_ASSIGN, token.QUO_ASSIGN, + token.REM_ASSIGN, token.AND_ASSIGN, token.OR_ASSIGN, + token.XOR_ASSIGN, token.SHL_ASSIGN, token.SHR_ASSIGN, token.AND_NOT_ASSIGN: + // assignment statement + pos, tok := p.pos, p.tok; + p.next(); + y := p.parseExpressionList(); + if len(x) > 1 && len(y) > 1 && len(x) != len(y) { + p.Error(x[0].Pos(), "arity of lhs doesn't match rhs"); + } + return &ast.AssignStmt{x, pos, tok, y}; + } + + if len(x) > 1 { + p.Error(x[0].Pos(), "only one expression allowed"); + // continue with first expression + } + + if p.tok == token.INC || p.tok == token.DEC { + // increment or decrement + s := &ast.IncDecStmt{x[0], p.tok}; + p.next(); // consume "++" or "--" + return s; + } + + // expression + return &ast.ExprStmt{x[0]}; +} + + +func (p *parser) parseCallExpr() *ast.CallExpr { + x := p.parseExpression(); + if call, is_call := x.(*ast.CallExpr); is_call { + return call; + } + p.error_expected(x.Pos(), "function/method call"); + return nil; +} + + +func (p *parser) parseGoStmt() ast.Stmt { + if p.trace { + defer un(trace(p, "GoStmt")); + } + + pos := p.expect(token.GO); + call := p.parseCallExpr(); + if call != nil { + return &ast.GoStmt{pos, call}; + } + return &ast.BadStmt{pos}; +} + + +func (p *parser) parseDeferStmt() ast.Stmt { + if p.trace { + defer un(trace(p, "DeferStmt")); + } + + pos := p.expect(token.DEFER); + call := p.parseCallExpr(); + if call != nil { + return &ast.DeferStmt{pos, call}; + } + return &ast.BadStmt{pos}; +} + + +func (p *parser) parseReturnStmt() *ast.ReturnStmt { + if p.trace { + defer un(trace(p, "ReturnStmt")); + } + + pos := p.pos; + p.expect(token.RETURN); + var x []ast.Expr; + if p.tok != token.SEMICOLON && p.tok != token.RBRACE { + x = p.parseExpressionList(); + } + + return &ast.ReturnStmt{pos, x}; +} + + +func (p *parser) parseBranchStmt(tok token.Token) *ast.BranchStmt { + if p.trace { + defer un(trace(p, "BranchStmt")); + } + + s := &ast.BranchStmt{p.pos, tok, nil}; + p.expect(tok); + if tok != token.FALLTHROUGH && p.tok == token.IDENT { + s.Label = p.parseIdent(); + } + + return s; +} + + +func (p *parser) isExpr(s ast.Stmt) bool { + if s == nil { + return true; + } + dummy, is_expr := s.(*ast.ExprStmt); + return is_expr; +} + + +func (p *parser) makeExpr(s ast.Stmt) ast.Expr { + if s == nil { + return nil; + } + if es, is_expr := s.(*ast.ExprStmt); is_expr { + return p.checkExpr(es.X); + } + p.Error(s.Pos(), "expected condition, found simple statement"); + return &ast.BadExpr{s.Pos()}; +} + + +func (p *parser) parseControlClause(isForStmt bool) (s1, s2, s3 ast.Stmt) { + if p.tok != token.LBRACE { + prev_lev := p.expr_lev; + p.expr_lev = -1; + + if p.tok != token.SEMICOLON { + s1 = p.parseSimpleStmt(false); + } + if p.tok == token.SEMICOLON { + p.next(); + if p.tok != token.LBRACE && p.tok != token.SEMICOLON { + s2 = p.parseSimpleStmt(false); + } + if isForStmt { + // for statements have a 3rd section + p.expect(token.SEMICOLON); + if p.tok != token.LBRACE { + s3 = p.parseSimpleStmt(false); + } + } + } else { + s1, s2 = nil, s1; + } + + p.expr_lev = prev_lev; + } + + return s1, s2, s3; +} + + +func (p *parser) parseIfStmt() *ast.IfStmt { + if p.trace { + defer un(trace(p, "IfStmt")); + } + + pos := p.expect(token.IF); + s1, s2, dummy := p.parseControlClause(false); + body := p.parseBlockStmt(); + var else_ ast.Stmt; + if p.tok == token.ELSE { + p.next(); + else_ = p.parseStatement(); + } + + return &ast.IfStmt{pos, s1, p.makeExpr(s2), body, else_}; +} + + +func (p *parser) parseCaseClause() *ast.CaseClause { + if p.trace { + defer un(trace(p, "CaseClause")); + } + + // SwitchCase + pos := p.pos; + var x []ast.Expr; + if p.tok == token.CASE { + p.next(); + x = p.parseExpressionList(); + } else { + p.expect(token.DEFAULT); + } + + colon := p.expect(token.COLON); + body := p.parseStatementList(); + + return &ast.CaseClause{pos, x, colon, body}; +} + + +func (p *parser) parseTypeCaseClause() *ast.TypeCaseClause { + if p.trace { + defer un(trace(p, "CaseClause")); + } + + // TypeSwitchCase + pos := p.pos; + var typ ast.Expr; + if p.tok == token.CASE { + p.next(); + typ = p.parseType(); + } else { + p.expect(token.DEFAULT); + } + + colon := p.expect(token.COLON); + body := p.parseStatementList(); + + return &ast.TypeCaseClause{pos, typ, colon, body}; +} + + +func (p *parser) parseSwitchStmt() ast.Stmt { + if p.trace { + defer un(trace(p, "SwitchStmt")); + } + + pos := p.expect(token.SWITCH); + s1, s2, dummy := p.parseControlClause(false); + + if p.isExpr(s2) { + // expression switch + lbrace := p.expect(token.LBRACE); + cases := vector.New(0); + for p.tok == token.CASE || p.tok == token.DEFAULT { + cases.Push(p.parseCaseClause()); + } + rbrace := p.expect(token.RBRACE); + p.opt_semi = true; + body := &ast.BlockStmt{lbrace, makeStmtList(cases), rbrace}; + return &ast.SwitchStmt{pos, s1, p.makeExpr(s2), body}; + } + + // type switch + // TODO do all the checks! + lbrace := p.expect(token.LBRACE); + cases := vector.New(0); + for p.tok == token.CASE || p.tok == token.DEFAULT { + cases.Push(p.parseTypeCaseClause()); + } + rbrace := p.expect(token.RBRACE); + p.opt_semi = true; + body := &ast.BlockStmt{lbrace, makeStmtList(cases), rbrace}; + return &ast.TypeSwitchStmt{pos, s1, s2, body}; +} + + +func (p *parser) parseCommClause() *ast.CommClause { + if p.trace { + defer un(trace(p, "CommClause")); + } + + // CommCase + pos := p.pos; + var tok token.Token; + var lhs, rhs ast.Expr; + if p.tok == token.CASE { + p.next(); + if p.tok == token.ARROW { + // RecvExpr without assignment + rhs = p.parseExpression(); + } else { + // SendExpr or RecvExpr + rhs = p.parseExpression(); + if p.tok == token.ASSIGN || p.tok == token.DEFINE { + // RecvExpr with assignment + tok = p.tok; + p.next(); + lhs = rhs; + if p.tok == token.ARROW { + rhs = p.parseExpression(); + } else { + p.expect(token.ARROW); // use expect() error handling + } + } + // else SendExpr + } + } else { + p.expect(token.DEFAULT); + } + + colon := p.expect(token.COLON); + body := p.parseStatementList(); + + return &ast.CommClause{pos, tok, lhs, rhs, colon, body}; +} + + +func (p *parser) parseSelectStmt() *ast.SelectStmt { + if p.trace { + defer un(trace(p, "SelectStmt")); + } + + pos := p.expect(token.SELECT); + lbrace := p.expect(token.LBRACE); + cases := vector.New(0); + for p.tok == token.CASE || p.tok == token.DEFAULT { + cases.Push(p.parseCommClause()); + } + rbrace := p.expect(token.RBRACE); + p.opt_semi = true; + body := &ast.BlockStmt{lbrace, makeStmtList(cases), rbrace}; + + return &ast.SelectStmt{pos, body}; +} + + +func (p *parser) parseForStmt() ast.Stmt { + if p.trace { + defer un(trace(p, "ForStmt")); + } + + pos := p.expect(token.FOR); + s1, s2, s3 := p.parseControlClause(true); + body := p.parseBlockStmt(); + + if as, is_as := s2.(*ast.AssignStmt); is_as { + // possibly a for statement with a range clause; check assignment operator + if as.Tok != token.ASSIGN && as.Tok != token.DEFINE { + p.error_expected(as.TokPos, "'=' or ':='"); + return &ast.BadStmt{pos}; + } + // check lhs + var key, value ast.Expr; + switch len(as.Lhs) { + case 2: + value = as.Lhs[1]; + fallthrough; + case 1: + key = as.Lhs[0]; + default: + p.error_expected(as.Lhs[0].Pos(), "1 or 2 expressions"); + return &ast.BadStmt{pos}; + } + // check rhs + if len(as.Rhs) != 1 { + p.error_expected(as.Rhs[0].Pos(), "1 expressions"); + return &ast.BadStmt{pos}; + } + if rhs, is_unary := as.Rhs[0].(*ast.UnaryExpr); is_unary && rhs.Op == token.RANGE { + // rhs is range expression; check lhs + return &ast.RangeStmt{pos, key, value, as.TokPos, as.Tok, rhs.X, body} + } else { + p.error_expected(s2.Pos(), "range clause"); + return &ast.BadStmt{pos}; + } + } else { + // regular for statement + return &ast.ForStmt{pos, s1, p.makeExpr(s2), s3, body}; + } + + panic(); // unreachable + return nil; +} + + +func (p *parser) parseStatement() ast.Stmt { + if p.trace { + defer un(trace(p, "Statement")); + } + + switch p.tok { + case token.CONST, token.TYPE, token.VAR: + return &ast.DeclStmt{p.parseDeclaration()}; + case + // tokens that may start a top-level expression + token.IDENT, token.INT, token.FLOAT, token.CHAR, token.STRING, token.FUNC, token.LPAREN, // operand + token.LBRACK, token.STRUCT, // composite type + token.MUL, token.AND, token.ARROW: // unary operators + return p.parseSimpleStmt(true); + case token.GO: + return p.parseGoStmt(); + case token.DEFER: + return p.parseDeferStmt(); + case token.RETURN: + return p.parseReturnStmt(); + case token.BREAK, token.CONTINUE, token.GOTO, token.FALLTHROUGH: + return p.parseBranchStmt(p.tok); + case token.LBRACE: + return p.parseBlockStmt(); + case token.IF: + return p.parseIfStmt(); + case token.SWITCH: + return p.parseSwitchStmt(); + case token.SELECT: + return p.parseSelectStmt(); + case token.FOR: + return p.parseForStmt(); + case token.SEMICOLON, token.RBRACE: + // don't consume the ";", it is the separator following the empty statement + return &ast.EmptyStmt{p.pos}; + } + + // no statement found + p.error_expected(p.pos, "statement"); + p.next(); // make progress + return &ast.BadStmt{p.pos}; +} + + +// ---------------------------------------------------------------------------- +// Declarations + +type parseSpecFunction func(p *parser, doc ast.Comments) ast.Spec + +func parseImportSpec(p *parser, doc ast.Comments) ast.Spec { + if p.trace { + defer un(trace(p, "ImportSpec")); + } + + var ident *ast.Ident; + if p.tok == token.PERIOD { + ident = &ast.Ident{p.pos, "."}; + p.next(); + } else if p.tok == token.IDENT { + ident = p.parseIdent(); + } + + var path []*ast.StringLit; + if p.tok == token.STRING { + path = p.parseStringList(nil); + } else { + p.expect(token.STRING); // use expect() error handling + } + + return &ast.ImportSpec{doc, ident, path}; +} + + +func parseConstSpec(p *parser, doc ast.Comments) ast.Spec { + if p.trace { + defer un(trace(p, "ConstSpec")); + } + + idents := p.parseIdentList(nil); + typ := p.tryType(); + var values []ast.Expr; + if typ != nil || p.tok == token.ASSIGN { + p.expect(token.ASSIGN); + values = p.parseExpressionList(); + } + + return &ast.ValueSpec{doc, idents, typ, values}; +} + + +func parseTypeSpec(p *parser, doc ast.Comments) ast.Spec { + if p.trace { + defer un(trace(p, "TypeSpec")); + } + + ident := p.parseIdent(); + typ := p.parseType(); + + return &ast.TypeSpec{doc, ident, typ}; +} + + +func parseVarSpec(p *parser, doc ast.Comments) ast.Spec { + if p.trace { + defer un(trace(p, "VarSpec")); + } + + idents := p.parseIdentList(nil); + typ := p.tryType(); + var values []ast.Expr; + if typ == nil || p.tok == token.ASSIGN { + p.expect(token.ASSIGN); + values = p.parseExpressionList(); + } + + return &ast.ValueSpec{doc, idents, typ, values}; +} + + +func (p *parser) parseGenDecl(keyword token.Token, f parseSpecFunction) *ast.GenDecl { + if p.trace { + defer un(trace(p, keyword.String() + "Decl")); + } + + doc := p.getDoc(); + pos := p.expect(keyword); + var lparen, rparen token.Position; + list := vector.New(0); + if p.tok == token.LPAREN { + lparen = p.pos; + p.next(); + for p.tok != token.RPAREN && p.tok != token.EOF { + doc := p.getDoc(); + list.Push(f(p, doc)); + if p.tok == token.SEMICOLON { + p.next(); + } else { + break; + } + } + rparen = p.expect(token.RPAREN); + p.opt_semi = true; + } else { + list.Push(f(p, doc)); + } + + // convert vector + specs := make([]ast.Spec, list.Len()); + for i := 0; i < list.Len(); i++ { + specs[i] = list.At(i); + } + return &ast.GenDecl{doc, pos, keyword, lparen, specs, rparen}; +} + + +func (p *parser) parseReceiver() *ast.Field { + if p.trace { + defer un(trace(p, "Receiver")); + } + + pos := p.pos; + par := p.parseParameters(false); + + // must have exactly one receiver + if len(par) != 1 || len(par) == 1 && len(par[0].Names) > 1 { + p.error_expected(pos, "exactly one receiver"); + return &ast.Field{nil, nil, &ast.BadExpr{noPos}, nil}; + } + + recv := par[0]; + + // recv type must be TypeName or *TypeName + base := recv.Type; + if ptr, is_ptr := base.(*ast.StarExpr); is_ptr { + base = ptr.X; + } + if !isTypeName(base) { + p.error_expected(base.Pos(), "type name"); + } + + return recv; +} + + +func (p *parser) parseFunctionDecl() *ast.FuncDecl { + if p.trace { + defer un(trace(p, "FunctionDecl")); + } + + doc := p.getDoc(); + pos := p.expect(token.FUNC); + + var recv *ast.Field; + if p.tok == token.LPAREN { + recv = p.parseReceiver(); + } + + ident := p.parseIdent(); + params, results := p.parseSignature(); + + var body *ast.BlockStmt; + if p.tok == token.LBRACE { + body = p.parseBlockStmt(); + } + + return &ast.FuncDecl{doc, recv, ident, &ast.FuncType{pos, params, results}, body}; +} + + +func (p *parser) parseDeclaration() ast.Decl { + if p.trace { + defer un(trace(p, "Declaration")); + } + + var f parseSpecFunction; + switch p.tok { + case token.CONST: f = parseConstSpec; + case token.TYPE: f = parseTypeSpec; + case token.VAR: f = parseVarSpec; + case token.FUNC: + return p.parseFunctionDecl(); + default: + pos := p.pos; + p.error_expected(pos, "declaration"); + p.next(); // make progress + return &ast.BadDecl{pos}; + } + + return p.parseGenDecl(p.tok, f); +} + + +// ---------------------------------------------------------------------------- +// Packages + +// The mode parameter to the Parse function is a set of flags (or 0). +// They control the amount of source code parsed and other optional +// parser functionality. +// +const ( + PackageClauseOnly uint = 1 << iota; // parsing stops after package clause + ImportsOnly; // parsing stops after import declarations + ParseComments; // parse comments and add them to AST + Trace; // print a trace of parsed productions +) + + +func (p *parser) parsePackage() *ast.Program { + if p.trace { + defer un(trace(p, "Program")); + } + + // package clause + comment := p.getDoc(); + pos := p.expect(token.PACKAGE); + ident := p.parseIdent(); + var decls []ast.Decl; + + // Don't bother parsing the rest if we had errors already. + // Likely not a Go source file at all. + + if p.errors.Len() == 0 && p.mode & PackageClauseOnly == 0 { + // import decls + list := vector.New(0); + for p.tok == token.IMPORT { + list.Push(p.parseGenDecl(token.IMPORT, parseImportSpec)); + if p.tok == token.SEMICOLON { + p.next(); + } + } + + if p.mode & ImportsOnly == 0 { + // rest of package body + for p.tok != token.EOF { + list.Push(p.parseDeclaration()); + if p.tok == token.SEMICOLON { + p.next(); + } + } + } + + // convert declaration list + decls = make([]ast.Decl, list.Len()); + for i := 0; i < list.Len(); i++ { + decls[i] = list.At(i).(ast.Decl); + } + } + + // convert comments list + comments := make([]*ast.Comment, p.comments.Len()); + for i := 0; i < p.comments.Len(); i++ { + comments[i] = p.comments.At(i).(*ast.Comment); + } + + return &ast.Program{comment, pos, ident, decls, comments}; +} + + +// ---------------------------------------------------------------------------- +// Parsing of entire programs. + +func readSource(src interface{}) ([]byte, os.Error) { + if src != nil { + switch s := src.(type) { + case string: + return io.StringBytes(s), nil; + case []byte: + return s, nil; + case *io.ByteBuffer: + // is io.Read, but src is already available in []byte form + if s != nil { + return s.Data(), nil; + } + case io.Reader: + var buf io.ByteBuffer; + n, err := io.Copy(s, &buf); + if err != nil { + return nil, err; + } + return buf.Data(), nil; + } + } + return nil, os.ErrorString("invalid source"); +} + + +// scannerMode returns the scanner mode bits given the parser's mode bits. +func scannerMode(mode uint) uint { + if mode & ParseComments != 0 { + return scanner.ScanComments; + } + return 0; +} + + +// Parse parses a Go program. +// +// The program source src may be provided in a variety of formats. At the +// moment the following types are supported: string, []byte, and io.Read. +// The mode parameter controls the amount of source text parsed and other +// optional parser functionality. +// +// Parse returns a complete AST if no error occured. Otherwise, if the +// source couldn't be read, the returned program is nil and the error +// indicates the specific failure. If the source was read but syntax +// errors were found, the result is a partial AST (with ast.BadX nodes +// representing the fragments of erroneous source code) and an ErrorList +// describing the syntax errors. +// +func Parse(src interface{}, mode uint) (*ast.Program, os.Error) { + data, err := readSource(src); + if err != nil { + return nil, err; + } + + // initialize parser state + var p parser; + p.errors.Init(0); + p.scanner.Init(data, &p, scannerMode(mode)); + p.mode = mode; + p.trace = mode & Trace != 0; // for convenience (p.trace is used frequently) + p.comments.Init(0); + p.next(); + + // parse program + prog := p.parsePackage(); + + // convert errors list, if any + if p.errors.Len() > 0 { + errors := make(ErrorList, p.errors.Len()); + for i := 0; i < p.errors.Len(); i++ { + errors[i] = p.errors.At(i).(*Error); + } + return prog, errors; + } + + return prog, nil; +} diff --git a/src/pkg/go/parser/parser_test.go b/src/pkg/go/parser/parser_test.go new file mode 100644 index 000000000..887fcf80f --- /dev/null +++ b/src/pkg/go/parser/parser_test.go @@ -0,0 +1,68 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package parser + +import ( + "go/ast"; + "go/parser"; + "os"; + "testing"; +) + + +var illegalInputs = []interface{} { + nil, + 3.14, + []byte(nil), + "foo!", +} + + +func TestParseIllegalInputs(t *testing.T) { + for _, src := range illegalInputs { + prog, err := Parse(src, 0); + if err == nil { + t.Errorf("Parse(%v) should have failed", src); + } + } +} + + +var validPrograms = []interface{} { + `package main`, + `package main import "fmt" func main() { fmt.Println("Hello, World!") }`, +} + + +func TestParseValidPrograms(t *testing.T) { + for _, src := range validPrograms { + prog, err := Parse(src, 0); + if err != nil { + t.Errorf("Parse(%q) failed: %v", src, err); + } + } +} + + +var validFiles = []string { + "parser.go", + "parser_test.go", +} + + +func TestParse3(t *testing.T) { + for _, filename := range validFiles { + src, err := os.Open(filename, os.O_RDONLY, 0); + defer src.Close(); + if err != nil { + t.Fatalf("os.Open(%s): %v\n", filename, err); + } + + prog, err := Parse(src, 0); + if err != nil { + t.Errorf("Parse(%q): %v", src, err); + } + } +} diff --git a/src/pkg/go/scanner/Makefile b/src/pkg/go/scanner/Makefile new file mode 100644 index 000000000..d47fecb7c --- /dev/null +++ b/src/pkg/go/scanner/Makefile @@ -0,0 +1,60 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# DO NOT EDIT. Automatically generated by gobuild. +# gobuild -m >Makefile + +D=/go/ + +include $(GOROOT)/src/Make.$(GOARCH) +AR=gopack + +default: packages + +clean: + rm -rf *.[$(OS)] *.a [$(OS)].out _obj + +test: packages + gotest + +coverage: packages + gotest + 6cov -g `pwd` | grep -v '_test\.go:' + +%.$O: %.go + $(GC) -I_obj $*.go + +%.$O: %.c + $(CC) $*.c + +%.$O: %.s + $(AS) $*.s + +O1=\ + scanner.$O\ + + +phases: a1 +_obj$D/scanner.a: phases + +a1: $(O1) + $(AR) grc _obj$D/scanner.a scanner.$O + rm -f $(O1) + + +newpkg: clean + mkdir -p _obj$D + $(AR) grc _obj$D/scanner.a + +$(O1): newpkg +$(O2): a1 + +nuke: clean + rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/scanner.a + +packages: _obj$D/scanner.a + +install: packages + test -d $(GOROOT)/pkg && mkdir -p $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D + cp _obj$D/scanner.a $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/scanner.a diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go new file mode 100644 index 000000000..a90e6f259 --- /dev/null +++ b/src/pkg/go/scanner/scanner.go @@ -0,0 +1,501 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// A scanner for Go source text. Takes a []byte as source which can +// then be tokenized through repeated calls to the Scan function. +// For a sample use of a scanner, see the implementation of Tokenize. +// +package scanner + +import ( + "go/token"; + "strconv"; + "unicode"; + "utf8"; +) + + +// An implementation of an ErrorHandler may be provided to the Scanner. +// If a syntax error is encountered and a handler was installed, Error +// is called with a position and an error message. The position points +// to the beginning of the offending token. +// +type ErrorHandler interface { + Error(pos token.Position, msg string); +} + + +// A Scanner holds the scanner's internal state while processing +// a given text. It can be allocated as part of another data +// structure but must be initialized via Init before use. For +// a sample use, see the implementation of Tokenize. +// +type Scanner struct { + // immutable state + src []byte; // source + err ErrorHandler; // error reporting; or nil + mode uint; // scanning mode + + // scanning state + pos token.Position; // previous reading position (position before ch) + offset int; // current reading offset (position after ch) + ch int; // one char look-ahead + + // public state - ok to modify + ErrorCount int; // number of errors encountered +} + + +// Read the next Unicode char into S.ch. +// S.ch < 0 means end-of-file. +// +func (S *Scanner) next() { + if S.offset < len(S.src) { + S.pos.Offset = S.offset; + S.pos.Column++; + r, w := int(S.src[S.offset]), 1; + switch { + case r == '\n': + S.pos.Line++; + S.pos.Column = 0; + case r >= 0x80: + // not ASCII + r, w = utf8.DecodeRune(S.src[S.offset : len(S.src)]); + } + S.offset += w; + S.ch = r; + } else { + S.pos.Offset = len(S.src); + S.ch = -1; // eof + } +} + + +// The mode parameter to the Init function is a set of flags (or 0). +// They control scanner behavior. +// +const ( + ScanComments = 1 << iota; // return comments as COMMENT tokens + AllowIllegalChars; // do not report an error for illegal chars +) + + +// Init prepares the scanner S to tokenize the text src. Calls to Scan +// will use the error handler err if they encounter a syntax error and +// err is not nil. Also, for each error encountered, the Scanner field +// ErrorCount is incremented by one. The mode parameter determines how +// comments and illegal characters are handled. +// +func (S *Scanner) Init(src []byte, err ErrorHandler, mode uint) { + // Explicitly initialize all fields since a scanner may be reused. + S.src = src; + S.err = err; + S.mode = mode; + S.pos = token.Position{0, 1, 0}; + S.offset = 0; + S.ErrorCount = 0; + S.next(); +} + + +func charString(ch int) string { + var s string; + switch ch { + case '\a': s = `\a`; + case '\b': s = `\b`; + case '\f': s = `\f`; + case '\n': s = `\n`; + case '\r': s = `\r`; + case '\t': s = `\t`; + case '\v': s = `\v`; + case '\\': s = `\\`; + case '\'': s = `\'`; + default : s = string(ch); + } + return "'" + s + "' (U+" + strconv.Itob(ch, 16) + ")"; +} + + +func (S *Scanner) error(pos token.Position, msg string) { + if S.err != nil { + S.err.Error(pos, msg); + } + S.ErrorCount++; +} + + +func (S *Scanner) expect(ch int) { + if S.ch != ch { + S.error(S.pos, "expected " + charString(ch) + ", found " + charString(S.ch)); + } + S.next(); // always make progress +} + + +func (S *Scanner) scanComment(pos token.Position) { + // first '/' already consumed + + if S.ch == '/' { + //-style comment + for S.ch >= 0 { + S.next(); + if S.ch == '\n' { + S.next(); // '\n' belongs to the comment + return; + } + } + + } else { + /*-style comment */ + S.expect('*'); + for S.ch >= 0 { + ch := S.ch; + S.next(); + if ch == '*' && S.ch == '/' { + S.next(); + return; + } + } + } + + S.error(pos, "comment not terminated"); +} + + +func isLetter(ch int) bool { + return + 'a' <= ch && ch <= 'z' || + 'A' <= ch && ch <= 'Z' || + ch == '_' || + ch >= 0x80 && unicode.IsLetter(ch); +} + + +func isDigit(ch int) bool { + return + '0' <= ch && ch <= '9' || + ch >= 0x80 && unicode.IsDecimalDigit(ch); +} + + +func (S *Scanner) scanIdentifier() token.Token { + pos := S.pos.Offset; + for isLetter(S.ch) || isDigit(S.ch) { + S.next(); + } + return token.Lookup(S.src[pos : S.pos.Offset]); +} + + +func digitVal(ch int) int { + switch { + case '0' <= ch && ch <= '9': return ch - '0'; + case 'a' <= ch && ch <= 'f': return ch - 'a' + 10; + case 'A' <= ch && ch <= 'F': return ch - 'A' + 10; + } + return 16; // larger than any legal digit val +} + + +func (S *Scanner) scanMantissa(base int) { + for digitVal(S.ch) < base { + S.next(); + } +} + + +func (S *Scanner) scanNumber(seen_decimal_point bool) token.Token { + tok := token.INT; + + if seen_decimal_point { + tok = token.FLOAT; + S.scanMantissa(10); + goto exponent; + } + + if S.ch == '0' { + // int or float + S.next(); + if S.ch == 'x' || S.ch == 'X' { + // hexadecimal int + S.next(); + S.scanMantissa(16); + } else { + // octal int or float + S.scanMantissa(8); + if digitVal(S.ch) < 10 || S.ch == '.' || S.ch == 'e' || S.ch == 'E' { + // float + tok = token.FLOAT; + goto mantissa; + } + // octal int + } + goto exit; + } + +mantissa: + // decimal int or float + S.scanMantissa(10); + + if S.ch == '.' { + // float + tok = token.FLOAT; + S.next(); + S.scanMantissa(10) + } + +exponent: + if S.ch == 'e' || S.ch == 'E' { + // float + tok = token.FLOAT; + S.next(); + if S.ch == '-' || S.ch == '+' { + S.next(); + } + S.scanMantissa(10); + } + +exit: + return tok; +} + + +func (S *Scanner) scanDigits(base, length int) { + for length > 0 && digitVal(S.ch) < base { + S.next(); + length--; + } + if length > 0 { + S.error(S.pos, "illegal char escape"); + } +} + + +func (S *Scanner) scanEscape(quote int) { + pos := S.pos; + ch := S.ch; + S.next(); + switch ch { + case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote: + // nothing to do + case '0', '1', '2', '3', '4', '5', '6', '7': + S.scanDigits(8, 3 - 1); // 1 char read already + case 'x': + S.scanDigits(16, 2); + case 'u': + S.scanDigits(16, 4); + case 'U': + S.scanDigits(16, 8); + default: + S.error(pos, "illegal char escape"); + } +} + + +func (S *Scanner) scanChar() { + // '\'' already consumed + + ch := S.ch; + S.next(); + if ch == '\\' { + S.scanEscape('\''); + } + + S.expect('\''); +} + + +func (S *Scanner) scanString(pos token.Position) { + // '"' already consumed + + for S.ch != '"' { + ch := S.ch; + S.next(); + if ch == '\n' || ch < 0 { + S.error(pos, "string not terminated"); + break; + } + if ch == '\\' { + S.scanEscape('"'); + } + } + + S.next(); +} + + +func (S *Scanner) scanRawString(pos token.Position) { + // '`' already consumed + + for S.ch != '`' { + ch := S.ch; + S.next(); + if ch == '\n' || ch < 0 { + S.error(pos, "string not terminated"); + break; + } + } + + S.next(); +} + + +// Helper functions for scanning multi-byte tokens such as >> += >>= . +// Different routines recognize different length tok_i based on matches +// of ch_i. If a token ends in '=', the result is tok1 or tok3 +// respectively. Otherwise, the result is tok0 if there was no other +// matching character, or tok2 if the matching character was ch2. + +func (S *Scanner) switch2(tok0, tok1 token.Token) token.Token { + if S.ch == '=' { + S.next(); + return tok1; + } + return tok0; +} + + +func (S *Scanner) switch3(tok0, tok1 token.Token, ch2 int, tok2 token.Token) token.Token { + if S.ch == '=' { + S.next(); + return tok1; + } + if S.ch == ch2 { + S.next(); + return tok2; + } + return tok0; +} + + +func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Token) token.Token { + if S.ch == '=' { + S.next(); + return tok1; + } + if S.ch == ch2 { + S.next(); + if S.ch == '=' { + S.next(); + return tok3; + } + return tok2; + } + return tok0; +} + + +// Scan scans the next token and returns the token position pos, +// the token tok, and the literal text lit corresponding to the +// token. The source end is indicated by token.EOF. +// +// For more tolerant parsing, Scan will return a valid token if +// possible even if a syntax error was encountered. Thus, even +// if the resulting token sequence contains no illegal tokens, +// a client may not assume that no error occurred. Instead it +// must check the scanner's ErrorCount or the number of calls +// of the error handler, if there was one installed. +// +func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) { +scan_again: + // skip white space + for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' || S.ch == '\r' { + S.next(); + } + + // current token start + pos, tok = S.pos, token.ILLEGAL; + + // determine token value + switch ch := S.ch; { + case isLetter(ch): + tok = S.scanIdentifier(); + case digitVal(ch) < 10: + tok = S.scanNumber(false); + default: + S.next(); // always make progress + switch ch { + case -1 : tok = token.EOF; + case '"' : tok = token.STRING; S.scanString(pos); + case '\'': tok = token.CHAR; S.scanChar(); + case '`' : tok = token.STRING; S.scanRawString(pos); + case ':' : tok = S.switch2(token.COLON, token.DEFINE); + case '.' : + if digitVal(S.ch) < 10 { + tok = S.scanNumber(true); + } else if S.ch == '.' { + S.next(); + if S.ch == '.' { + S.next(); + tok = token.ELLIPSIS; + } + } else { + tok = token.PERIOD; + } + case ',': tok = token.COMMA; + case ';': tok = token.SEMICOLON; + case '(': tok = token.LPAREN; + case ')': tok = token.RPAREN; + case '[': tok = token.LBRACK; + case ']': tok = token.RBRACK; + case '{': tok = token.LBRACE; + case '}': tok = token.RBRACE; + case '+': tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC); + case '-': tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC); + case '*': tok = S.switch2(token.MUL, token.MUL_ASSIGN); + case '/': + if S.ch == '/' || S.ch == '*' { + S.scanComment(pos); + tok = token.COMMENT; + if S.mode & ScanComments == 0 { + goto scan_again; + } + } else { + tok = S.switch2(token.QUO, token.QUO_ASSIGN); + } + case '%': tok = S.switch2(token.REM, token.REM_ASSIGN); + case '^': tok = S.switch2(token.XOR, token.XOR_ASSIGN); + case '<': + if S.ch == '-' { + S.next(); + tok = token.ARROW; + } else { + tok = S.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN); + } + case '>': tok = S.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN); + case '=': tok = S.switch2(token.ASSIGN, token.EQL); + case '!': tok = S.switch2(token.NOT, token.NEQ); + case '&': + if S.ch == '^' { + S.next(); + tok = S.switch2(token.AND_NOT, token.AND_NOT_ASSIGN); + } else { + tok = S.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND); + } + case '|': tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR); + default: + if S.mode & AllowIllegalChars == 0 { + S.error(pos, "illegal character " + charString(ch)); + } + } + } + + return pos, tok, S.src[pos.Offset : S.pos.Offset]; +} + + +// Tokenize calls a function f with the token position, token value, and token +// text for each token in the source src. The other parameters have the same +// meaning as for the Init function. Tokenize keeps scanning until f returns +// false (usually when the token value is token.EOF). The result is the number +// of errors encountered. +// +func Tokenize(src []byte, err ErrorHandler, mode uint, f func (pos token.Position, tok token.Token, lit []byte) bool) int { + var s Scanner; + s.Init(src, err, mode); + for f(s.Scan()) { + // action happens in f + } + return s.ErrorCount; +} diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go new file mode 100644 index 000000000..0defece8b --- /dev/null +++ b/src/pkg/go/scanner/scanner_test.go @@ -0,0 +1,276 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package scanner + +import ( + "go/scanner"; + "go/token"; + "io"; + "testing"; +) + + +const /* class */ ( + special = iota; + literal; + operator; + keyword; +) + + +func tokenclass(tok token.Token) int { + switch { + case tok.IsLiteral(): return literal; + case tok.IsOperator(): return operator; + case tok.IsKeyword(): return keyword; + } + return special; +} + + +type elt struct { + tok token.Token; + lit string; + class int; +} + + +var tokens = [...]elt{ + // Special tokens + elt{ token.COMMENT, "/* a comment */", special }, + elt{ token.COMMENT, "// a comment \n", special }, + + // Identifiers and basic type literals + elt{ token.IDENT, "foobar", literal }, + elt{ token.IDENT, "a۰۱۸", literal }, + elt{ token.IDENT, "foo६४", literal }, + elt{ token.IDENT, "bar9876", literal }, + elt{ token.INT, "0", literal }, + elt{ token.INT, "01234567", literal }, + elt{ token.INT, "0xcafebabe", literal }, + elt{ token.FLOAT, "0.", literal }, + elt{ token.FLOAT, ".0", literal }, + elt{ token.FLOAT, "3.14159265", literal }, + elt{ token.FLOAT, "1e0", literal }, + elt{ token.FLOAT, "1e+100", literal }, + elt{ token.FLOAT, "1e-100", literal }, + elt{ token.FLOAT, "2.71828e-1000", literal }, + elt{ token.CHAR, "'a'", literal }, + elt{ token.CHAR, "'\\000'", literal }, + elt{ token.CHAR, "'\\xFF'", literal }, + elt{ token.CHAR, "'\\uff16'", literal }, + elt{ token.CHAR, "'\\U0000ff16'", literal }, + elt{ token.STRING, "`foobar`", literal }, + + // Operators and delimitors + elt{ token.ADD, "+", operator }, + elt{ token.SUB, "-", operator }, + elt{ token.MUL, "*", operator }, + elt{ token.QUO, "/", operator }, + elt{ token.REM, "%", operator }, + + elt{ token.AND, "&", operator }, + elt{ token.OR, "|", operator }, + elt{ token.XOR, "^", operator }, + elt{ token.SHL, "<<", operator }, + elt{ token.SHR, ">>", operator }, + elt{ token.AND_NOT, "&^", operator }, + + elt{ token.ADD_ASSIGN, "+=", operator }, + elt{ token.SUB_ASSIGN, "-=", operator }, + elt{ token.MUL_ASSIGN, "*=", operator }, + elt{ token.QUO_ASSIGN, "/=", operator }, + elt{ token.REM_ASSIGN, "%=", operator }, + + elt{ token.AND_ASSIGN, "&=", operator }, + elt{ token.OR_ASSIGN, "|=", operator }, + elt{ token.XOR_ASSIGN, "^=", operator }, + elt{ token.SHL_ASSIGN, "<<=", operator }, + elt{ token.SHR_ASSIGN, ">>=", operator }, + elt{ token.AND_NOT_ASSIGN, "&^=", operator }, + + elt{ token.LAND, "&&", operator }, + elt{ token.LOR, "||", operator }, + elt{ token.ARROW, "<-", operator }, + elt{ token.INC, "++", operator }, + elt{ token.DEC, "--", operator }, + + elt{ token.EQL, "==", operator }, + elt{ token.LSS, "<", operator }, + elt{ token.GTR, ">", operator }, + elt{ token.ASSIGN, "=", operator }, + elt{ token.NOT, "!", operator }, + + elt{ token.NEQ, "!=", operator }, + elt{ token.LEQ, "<=", operator }, + elt{ token.GEQ, ">=", operator }, + elt{ token.DEFINE, ":=", operator }, + elt{ token.ELLIPSIS, "...", operator }, + + elt{ token.LPAREN, "(", operator }, + elt{ token.LBRACK, "[", operator }, + elt{ token.LBRACE, "{", operator }, + elt{ token.COMMA, ",", operator }, + elt{ token.PERIOD, ".", operator }, + + elt{ token.RPAREN, ")", operator }, + elt{ token.RBRACK, "]", operator }, + elt{ token.RBRACE, "}", operator }, + elt{ token.SEMICOLON, ";", operator }, + elt{ token.COLON, ":", operator }, + + // Keywords + elt{ token.BREAK, "break", keyword }, + elt{ token.CASE, "case", keyword }, + elt{ token.CHAN, "chan", keyword }, + elt{ token.CONST, "const", keyword }, + elt{ token.CONTINUE, "continue", keyword }, + + elt{ token.DEFAULT, "default", keyword }, + elt{ token.DEFER, "defer", keyword }, + elt{ token.ELSE, "else", keyword }, + elt{ token.FALLTHROUGH, "fallthrough", keyword }, + elt{ token.FOR, "for", keyword }, + + elt{ token.FUNC, "func", keyword }, + elt{ token.GO, "go", keyword }, + elt{ token.GOTO, "goto", keyword }, + elt{ token.IF, "if", keyword }, + elt{ token.IMPORT, "import", keyword }, + + elt{ token.INTERFACE, "interface", keyword }, + elt{ token.MAP, "map", keyword }, + elt{ token.PACKAGE, "package", keyword }, + elt{ token.RANGE, "range", keyword }, + elt{ token.RETURN, "return", keyword }, + + elt{ token.SELECT, "select", keyword }, + elt{ token.STRUCT, "struct", keyword }, + elt{ token.SWITCH, "switch", keyword }, + elt{ token.TYPE, "type", keyword }, + elt{ token.VAR, "var", keyword }, +} + + +const whitespace = " \t \n\n\n"; // to separate tokens + +type TestErrorHandler struct { + t *testing.T +} + +func (h *TestErrorHandler) Error(pos token.Position, msg string) { + h.t.Errorf("Error() called (msg = %s)", msg); +} + + +func NewlineCount(s string) int { + n := 0; + for i := 0; i < len(s); i++ { + if s[i] == '\n' { + n++; + } + } + return n; +} + + +// Verify that calling Scan() provides the correct results. +func TestScan(t *testing.T) { + // make source + var src string; + for i, e := range tokens { + src += e.lit + whitespace; + } + whitespace_linecount := NewlineCount(whitespace); + + // verify scan + index := 0; + eloc := token.Position{0, 1, 1}; + nerrors := scanner.Tokenize(io.StringBytes(src), &TestErrorHandler{t}, scanner.ScanComments, + func (pos token.Position, tok token.Token, litb []byte) bool { + e := elt{token.EOF, "", special}; + if index < len(tokens) { + e = tokens[index]; + } + lit := string(litb); + if tok == token.EOF { + lit = "<EOF>"; + eloc.Column = 0; + } + if pos.Offset != eloc.Offset { + t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, eloc.Offset); + } + if pos.Line != eloc.Line { + t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, eloc.Line); + } + if pos.Column!= eloc.Column { + t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, eloc.Column); + } + if tok != e.tok { + t.Errorf("bad token for %s: got %s, expected %s", lit, tok.String(), e.tok.String()); + } + if e.tok.IsLiteral() && lit != e.lit { + t.Errorf("bad literal for %s: got %s, expected %s", lit, lit, e.lit); + } + if tokenclass(tok) != e.class { + t.Errorf("bad class for %s: got %d, expected %d", lit, tokenclass(tok), e.class); + } + eloc.Offset += len(lit) + len(whitespace); + eloc.Line += NewlineCount(lit) + whitespace_linecount; + index++; + return tok != token.EOF; + } + ); + if nerrors != 0 { + t.Errorf("found %d errors", nerrors); + } +} + + +// Verify that initializing the same scanner more then once works correctly. +func TestInit(t *testing.T) { + var s scanner.Scanner; + + // 1st init + s.Init(io.StringBytes("if true { }"), nil, 0); + s.Scan(); // if + s.Scan(); // true + pos, tok, lit := s.Scan(); // { + if tok != token.LBRACE { + t.Errorf("bad token: got %s, expected %s", tok.String(), token.LBRACE); + } + + // 2nd init + s.Init(io.StringBytes("go true { ]"), nil, 0); + pos, tok, lit = s.Scan(); // go + if tok != token.GO { + t.Errorf("bad token: got %s, expected %s", tok.String(), token.GO); + } + + if s.ErrorCount != 0 { + t.Errorf("found %d errors", s.ErrorCount); + } +} + + +func TestIllegalChars(t *testing.T) { + var s scanner.Scanner; + + const src = "*?*$*@*"; + s.Init(io.StringBytes(src), &TestErrorHandler{t}, scanner.AllowIllegalChars); + for offs, ch := range src { + pos, tok, lit := s.Scan(); + if pos.Offset != offs { + t.Errorf("bad position for %s: got %d, expected %d", string(lit), pos.Offset, offs); + } + if tok == token.ILLEGAL && string(lit) != string(ch) { + t.Errorf("bad token: got %s, expected %s", string(lit), string(ch)); + } + } + + if s.ErrorCount != 0 { + t.Errorf("found %d errors", s.ErrorCount); + } +} diff --git a/src/pkg/go/token/Makefile b/src/pkg/go/token/Makefile new file mode 100644 index 000000000..12ef2a4aa --- /dev/null +++ b/src/pkg/go/token/Makefile @@ -0,0 +1,60 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# DO NOT EDIT. Automatically generated by gobuild. +# gobuild -m >Makefile + +D=/go/ + +include $(GOROOT)/src/Make.$(GOARCH) +AR=gopack + +default: packages + +clean: + rm -rf *.[$(OS)] *.a [$(OS)].out _obj + +test: packages + gotest + +coverage: packages + gotest + 6cov -g `pwd` | grep -v '_test\.go:' + +%.$O: %.go + $(GC) -I_obj $*.go + +%.$O: %.c + $(CC) $*.c + +%.$O: %.s + $(AS) $*.s + +O1=\ + token.$O\ + + +phases: a1 +_obj$D/token.a: phases + +a1: $(O1) + $(AR) grc _obj$D/token.a token.$O + rm -f $(O1) + + +newpkg: clean + mkdir -p _obj$D + $(AR) grc _obj$D/token.a + +$(O1): newpkg +$(O2): a1 + +nuke: clean + rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/token.a + +packages: _obj$D/token.a + +install: packages + test -d $(GOROOT)/pkg && mkdir -p $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D + cp _obj$D/token.a $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/token.a diff --git a/src/pkg/go/token/token.go b/src/pkg/go/token/token.go new file mode 100644 index 000000000..a70a75a54 --- /dev/null +++ b/src/pkg/go/token/token.go @@ -0,0 +1,347 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This package defines constants representing the lexical +// tokens of the Go programming language and basic operations +// on tokens (printing, predicates). +// +package token + +import "strconv" + +// Token is the set of lexical tokens of the Go programming language. +type Token int + +// The list of tokens. +const ( + // Special tokens + ILLEGAL Token = iota; + EOF; + COMMENT; + + // Identifiers and basic type literals + // (these tokens stand for classes of literals) + literal_beg; + IDENT; + INT; + FLOAT; + CHAR; + STRING; + literal_end; + + // Operators and delimiters + operator_beg; + ADD; + SUB; + MUL; + QUO; + REM; + + AND; + OR; + XOR; + SHL; + SHR; + AND_NOT; + + ADD_ASSIGN; + SUB_ASSIGN; + MUL_ASSIGN; + QUO_ASSIGN; + REM_ASSIGN; + + AND_ASSIGN; + OR_ASSIGN; + XOR_ASSIGN; + SHL_ASSIGN; + SHR_ASSIGN; + AND_NOT_ASSIGN; + + LAND; + LOR; + ARROW; + INC; + DEC; + + EQL; + LSS; + GTR; + ASSIGN; + NOT; + + NEQ; + LEQ; + GEQ; + DEFINE; + ELLIPSIS; + + LPAREN; + LBRACK; + LBRACE; + COMMA; + PERIOD; + + RPAREN; + RBRACK; + RBRACE; + SEMICOLON; + COLON; + operator_end; + + // Keywords + keyword_beg; + BREAK; + CASE; + CHAN; + CONST; + CONTINUE; + + DEFAULT; + DEFER; + ELSE; + FALLTHROUGH; + FOR; + + FUNC; + GO; + GOTO; + IF; + IMPORT; + + INTERFACE; + MAP; + PACKAGE; + RANGE; + RETURN; + + SELECT; + STRUCT; + SWITCH; + TYPE; + VAR; + keyword_end; +) + + +// At the moment we have no array literal syntax that lets us describe +// the index for each element - use a map for now to make sure they are +// in sync. +var tokens = map [Token] string { + ILLEGAL : "ILLEGAL", + + EOF : "EOF", + COMMENT : "COMMENT", + + IDENT : "IDENT", + INT : "INT", + FLOAT : "FLOAT", + CHAR : "CHAR", + STRING : "STRING", + + ADD : "+", + SUB : "-", + MUL : "*", + QUO : "/", + REM : "%", + + AND : "&", + OR : "|", + XOR : "^", + SHL : "<<", + SHR : ">>", + AND_NOT : "&^", + + ADD_ASSIGN : "+=", + SUB_ASSIGN : "-=", + MUL_ASSIGN : "+=", + QUO_ASSIGN : "/=", + REM_ASSIGN : "%=", + + AND_ASSIGN : "&=", + OR_ASSIGN : "|=", + XOR_ASSIGN : "^=", + SHL_ASSIGN : "<<=", + SHR_ASSIGN : ">>=", + AND_NOT_ASSIGN : "&^=", + + LAND : "&&", + LOR : "||", + ARROW : "<-", + INC : "++", + DEC : "--", + + EQL : "==", + LSS : "<", + GTR : ">", + ASSIGN : "=", + NOT : "!", + + NEQ : "!=", + LEQ : "<=", + GEQ : ">=", + DEFINE : ":=", + ELLIPSIS : "...", + + LPAREN : "(", + LBRACK : "[", + LBRACE : "{", + COMMA : ",", + PERIOD : ".", + + RPAREN : ")", + RBRACK : "]", + RBRACE : "}", + SEMICOLON : ";", + COLON : ":", + + BREAK : "break", + CASE : "case", + CHAN : "chan", + CONST : "const", + CONTINUE : "continue", + + DEFAULT : "default", + DEFER : "defer", + ELSE : "else", + FALLTHROUGH : "fallthrough", + FOR : "for", + + FUNC : "func", + GO : "go", + GOTO : "goto", + IF : "if", + IMPORT : "import", + + INTERFACE : "interface", + MAP : "map", + PACKAGE : "package", + RANGE : "range", + RETURN : "return", + + SELECT : "select", + STRUCT : "struct", + SWITCH : "switch", + TYPE : "type", + VAR : "var", +} + + +// String returns the string corresponding to the token tok. +// For operators, delimiters, and keywords the string is the actual +// token character sequence (e.g., for the token ADD, the string is +// "+"). For all other tokens the string corresponds to the token +// constant name (e.g. for the token IDENT, the string is "IDENT"). +// +func (tok Token) String() string { + if str, exists := tokens[tok]; exists { + return str; + } + return "token(" + strconv.Itoa(int(tok)) + ")"; +} + + +// A set of constants for precedence-based expression parsing. +// Non-operators have lowest precedence, followed by operators +// starting with precedence 1 up to unary operators. The highest +// precedence corresponds serves as "catch-all" precedence for +// selector, indexing, and other operator and delimiter tokens. +// +const ( + LowestPrec = 0; // non-operators + UnaryPrec = 7; + HighestPrec = 8; +) + + +// Precedence returns the operator precedence of the binary +// operator op. If op is not a binary operator, the result +// is LowestPrecedence. +// +func (op Token) Precedence() int { + switch op { + case LOR: + return 1; + case LAND: + return 2; + case ARROW: + return 3; + case EQL, NEQ, LSS, LEQ, GTR, GEQ: + return 4; + case ADD, SUB, OR, XOR: + return 5; + case MUL, QUO, REM, SHL, SHR, AND, AND_NOT: + return 6; + } + return LowestPrec; +} + + +var keywords map [string] Token; + +func init() { + keywords = make(map [string] Token); + for i := keyword_beg + 1; i < keyword_end; i++ { + keywords[tokens[i]] = i; + } +} + + +// Lookup maps an identifier to its keyword token or IDENT (if not a keyword). +// +func Lookup(ident []byte) Token { + // TODO Maps with []byte key are illegal because []byte does not + // support == . Should find a more efficient solution eventually. + if tok, is_keyword := keywords[string(ident)]; is_keyword { + return tok; + } + return IDENT; +} + + +// Predicates + +// IsLiteral returns true for tokens corresponding to identifiers +// and basic type literals; returns false otherwise. +// +func (tok Token) IsLiteral() bool { + return literal_beg < tok && tok < literal_end; +} + +// IsOperator returns true for tokens corresponding to operators and +// delimiters; returns false otherwise. +// +func (tok Token) IsOperator() bool { + return operator_beg < tok && tok < operator_end; +} + +// IsKeyword returns true for tokens corresponding to keywords; +// returns false otherwise. +// +func (tok Token) IsKeyword() bool { + return keyword_beg < tok && tok < keyword_end; +} + + +// Token source positions are represented by a Position value. +// A Position is valid if the line number is > 0. +// +type Position struct { + Offset int; // byte offset, starting at 0 + Line int; // line number, starting at 1 + Column int; // column number, starting at 1 (character count) +} + + +// Pos is an accessor method for anonymous Position fields. +// It returns its receiver. +// +func (pos *Position) Pos() Position { + return *pos; +} + + +// IsValid returns true if the position is valid. +func (pos *Position) IsValid() bool { + return pos.Line > 0 +} |