diff options
Diffstat (limited to 'src/pkg/go')
42 files changed, 4296 insertions, 1665 deletions
diff --git a/src/pkg/go/ast/Makefile b/src/pkg/go/ast/Makefile index 9b5c904c1..e9b885c70 100644 --- a/src/pkg/go/ast/Makefile +++ b/src/pkg/go/ast/Makefile @@ -2,12 +2,13 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -include ../../../Make.$(GOARCH) +include ../../../Make.inc TARG=go/ast GOFILES=\ ast.go\ filter.go\ + print.go\ scope.go\ walk.go\ diff --git a/src/pkg/go/ast/ast.go b/src/pkg/go/ast/ast.go index 2fc8b215f..cf2ce36df 100644 --- a/src/pkg/go/ast/ast.go +++ b/src/pkg/go/ast/ast.go @@ -34,8 +34,8 @@ import ( // All node types implement the Node interface. type Node interface { - // Pos returns the (beginning) position of the node. - Pos() token.Position + Pos() token.Pos // position of first character belonging to the node + End() token.Pos // position of first character immediately after the node } @@ -65,19 +65,27 @@ type Decl interface { // A Comment node represents a single //-style or /*-style comment. type Comment struct { - token.Position // beginning position of the comment - Text []byte // comment text (excluding '\n' for //-style comments) + Slash token.Pos // position of "/" starting the comment + Text []byte // comment text (excluding '\n' for //-style comments) } +func (c *Comment) Pos() token.Pos { return c.Slash } +func (c *Comment) End() token.Pos { return token.Pos(int(c.Slash) + len(c.Text)) } + + // A CommentGroup represents a sequence of comments // with no other tokens and no empty lines between. // type CommentGroup struct { - List []*Comment + List []*Comment // len(List) > 0 } +func (g *CommentGroup) Pos() token.Pos { return g.List[0].Pos() } +func (g *CommentGroup) End() token.Pos { return g.List[len(g.List)-1].End() } + + // ---------------------------------------------------------------------------- // Expressions and types @@ -94,7 +102,7 @@ type Field struct { } -func (f *Field) Pos() token.Position { +func (f *Field) Pos() token.Pos { if len(f.Names) > 0 { return f.Names[0].Pos() } @@ -102,11 +110,45 @@ func (f *Field) Pos() token.Position { } +func (f *Field) End() token.Pos { + if f.Tag != nil { + return f.Tag.End() + } + return f.Type.End() +} + + // A FieldList represents a list of Fields, enclosed by parentheses or braces. type FieldList struct { - Opening token.Position // position of opening parenthesis/brace - List []*Field // field list - Closing token.Position // position of closing parenthesis/brace + Opening token.Pos // position of opening parenthesis/brace, if any + List []*Field // field list + Closing token.Pos // position of closing parenthesis/brace, if any +} + + +func (f *FieldList) Pos() token.Pos { + if f.Opening.IsValid() { + return f.Opening + } + // the list should not be empty in this case; + // be conservative and guard against bad ASTs + if len(f.List) > 0 { + return f.List[0].Pos() + } + return token.NoPos +} + + +func (f *FieldList) End() token.Pos { + if f.Closing.IsValid() { + return f.Closing + 1 + } + // the list should not be empty in this case; + // be conservative and guard against bad ASTs + if n := len(f.List); n > 0 { + return f.List[n-1].End() + } + return token.NoPos } @@ -135,28 +177,29 @@ type ( // created. // BadExpr struct { - token.Position // beginning position of bad expression + From, To token.Pos // position range of bad expression } // An Ident node represents an identifier. Ident struct { - token.Position // identifier position - Obj *Object // denoted object + NamePos token.Pos // identifier position + Name string // identifier name + Obj *Object // denoted object; or nil } // An Ellipsis node stands for the "..." type in a // parameter list or the "..." length in an array type. // Ellipsis struct { - token.Position // position of "..." - Elt Expr // ellipsis element type (parameter lists only) + Ellipsis token.Pos // position of "..." + Elt Expr // ellipsis element type (parameter lists only); or nil } // A BasicLit node represents a literal of basic type. BasicLit struct { - token.Position // literal position - Kind token.Token // token.INT, token.FLOAT, token.IMAG, token.CHAR, or token.STRING - Value []byte // literal string; e.g. 42, 0x7f, 3.14, 1e-9, 'a', '\x7f', "foo" or `\m\n\o` + ValuePos token.Pos // literal position + Kind token.Token // token.INT, token.FLOAT, token.IMAG, token.CHAR, or token.STRING + Value []byte // literal string; e.g. 42, 0x7f, 3.14, 1e-9, 2.4i, 'a', '\x7f', "foo" or `\m\n\o` } // A FuncLit node represents a function literal. @@ -166,19 +209,18 @@ type ( } // A CompositeLit node represents a composite literal. - // CompositeLit struct { - Type Expr // literal type - Lbrace token.Position // position of "{" - Elts []Expr // list of composite elements - Rbrace token.Position // position of "}" + Type Expr // literal type; or nil + Lbrace token.Pos // position of "{" + Elts []Expr // list of composite elements; or nil + Rbrace token.Pos // position of "}" } // A ParenExpr node represents a parenthesized expression. ParenExpr struct { - token.Position // position of "(" - X Expr // parenthesized expression - Rparen token.Position // position of ")" + Lparen token.Pos // position of "(" + X Expr // parenthesized expression + Rparen token.Pos // position of ")" } // A SelectorExpr node represents an expression followed by a selector. @@ -189,15 +231,19 @@ type ( // An IndexExpr node represents an expression followed by an index. IndexExpr struct { - X Expr // expression - Index Expr // index expression + X Expr // expression + Lbrack token.Pos // position of "[" + Index Expr // index expression + Rbrack token.Pos // position of "]" } // An SliceExpr node represents an expression followed by slice indices. SliceExpr struct { - X Expr // expression - Index Expr // beginning of slice range - End Expr // end of slice range; or nil + X Expr // expression + Lbrack token.Pos // position of "[" + Low Expr // begin of slice range; or nil + High Expr // end of slice range; or nil + Rbrack token.Pos // position of "]" } // A TypeAssertExpr node represents an expression followed by a @@ -210,35 +256,36 @@ type ( // A CallExpr node represents an expression followed by an argument list. CallExpr struct { - Fun Expr // function expression - Lparen token.Position // position of "(" - Args []Expr // function arguments - Rparen token.Position // positions of ")" + Fun Expr // function expression + Lparen token.Pos // position of "(" + Args []Expr // function arguments; or nil + Ellipsis token.Pos // position of "...", if any + Rparen token.Pos // position of ")" } // A StarExpr node represents an expression of the form "*" Expression. // Semantically it could be a unary "*" expression, or a pointer type. + // StarExpr struct { - token.Position // position of "*" - X Expr // operand + Star token.Pos // position of "*" + X Expr // operand } // A UnaryExpr node represents a unary expression. // Unary "*" expressions are represented via StarExpr nodes. // UnaryExpr struct { - token.Position // position of Op - Op token.Token // operator - X Expr // operand + OpPos token.Pos // position of Op + Op token.Token // operator + X Expr // operand } // A BinaryExpr node represents a binary expression. - // BinaryExpr struct { - X Expr // left operand - OpPos token.Position // position of Op - Op token.Token // operator - Y Expr // right operand + X Expr // left operand + OpPos token.Pos // position of Op + Op token.Token // operator + Y Expr // right operand } // A KeyValueExpr node represents (key : value) pairs @@ -246,7 +293,7 @@ type ( // KeyValueExpr struct { Key Expr - Colon token.Position // position of ":" + Colon token.Pos // position of ":" Value Expr } ) @@ -270,66 +317,123 @@ const ( type ( // An ArrayType node represents an array or slice type. ArrayType struct { - token.Position // position of "[" - Len Expr // Ellipsis node for [...]T array types, nil for slice types - Elt Expr // element type + Lbrack token.Pos // position of "[" + Len Expr // Ellipsis node for [...]T array types, nil for slice types + Elt Expr // element type } // A StructType node represents a struct type. StructType struct { - token.Position // position of "struct" keyword - Fields *FieldList // list of field declarations - Incomplete bool // true if (source) fields are missing in the Fields list + Struct token.Pos // position of "struct" keyword + Fields *FieldList // list of field declarations + Incomplete bool // true if (source) fields are missing in the Fields list } // Pointer types are represented via StarExpr nodes. // A FuncType node represents a function type. FuncType struct { - token.Position // position of "func" keyword - Params *FieldList // (incoming) parameters - Results *FieldList // (outgoing) results + Func token.Pos // position of "func" keyword + Params *FieldList // (incoming) parameters + Results *FieldList // (outgoing) results; or nil } // An InterfaceType node represents an interface type. InterfaceType struct { - token.Position // position of "interface" keyword - Methods *FieldList // list of methods - Incomplete bool // true if (source) methods are missing in the Methods list + Interface token.Pos // position of "interface" keyword + Methods *FieldList // list of methods + Incomplete bool // true if (source) methods are missing in the Methods list } // A MapType node represents a map type. MapType struct { - token.Position // position of "map" keyword - Key Expr - Value Expr + Map token.Pos // position of "map" keyword + Key Expr + Value Expr } // A ChanType node represents a channel type. ChanType struct { - token.Position // position of "chan" keyword or "<-" (whichever comes first) - Dir ChanDir // channel direction - Value Expr // value type + Begin token.Pos // position of "chan" keyword or "<-" (whichever comes first) + Dir ChanDir // channel direction + Value Expr // value type } ) -// Pos() implementations for expression/type where the position -// corresponds to the position of a sub-node. +// Pos and End implementations for expression/type nodes. // -func (x *FuncLit) Pos() token.Position { return x.Type.Pos() } -func (x *CompositeLit) Pos() token.Position { return x.Type.Pos() } -func (x *SelectorExpr) Pos() token.Position { return x.X.Pos() } -func (x *IndexExpr) Pos() token.Position { return x.X.Pos() } -func (x *SliceExpr) Pos() token.Position { return x.X.Pos() } -func (x *TypeAssertExpr) Pos() token.Position { return x.X.Pos() } -func (x *CallExpr) Pos() token.Position { return x.Fun.Pos() } -func (x *BinaryExpr) Pos() token.Position { return x.X.Pos() } -func (x *KeyValueExpr) Pos() token.Position { return x.Key.Pos() } +func (x *BadExpr) Pos() token.Pos { return x.From } +func (x *Ident) Pos() token.Pos { return x.NamePos } +func (x *Ellipsis) Pos() token.Pos { return x.Ellipsis } +func (x *BasicLit) Pos() token.Pos { return x.ValuePos } +func (x *FuncLit) Pos() token.Pos { return x.Type.Pos() } +func (x *CompositeLit) Pos() token.Pos { + if x.Type != nil { + return x.Type.Pos() + } + return x.Lbrace +} +func (x *ParenExpr) Pos() token.Pos { return x.Lparen } +func (x *SelectorExpr) Pos() token.Pos { return x.X.Pos() } +func (x *IndexExpr) Pos() token.Pos { return x.X.Pos() } +func (x *SliceExpr) Pos() token.Pos { return x.X.Pos() } +func (x *TypeAssertExpr) Pos() token.Pos { return x.X.Pos() } +func (x *CallExpr) Pos() token.Pos { return x.Fun.Pos() } +func (x *StarExpr) Pos() token.Pos { return x.Star } +func (x *UnaryExpr) Pos() token.Pos { return x.OpPos } +func (x *BinaryExpr) Pos() token.Pos { return x.X.Pos() } +func (x *KeyValueExpr) Pos() token.Pos { return x.Key.Pos() } +func (x *ArrayType) Pos() token.Pos { return x.Lbrack } +func (x *StructType) Pos() token.Pos { return x.Struct } +func (x *FuncType) Pos() token.Pos { return x.Func } +func (x *InterfaceType) Pos() token.Pos { return x.Interface } +func (x *MapType) Pos() token.Pos { return x.Map } +func (x *ChanType) Pos() token.Pos { return x.Begin } + + +func (x *BadExpr) End() token.Pos { return x.To } +func (x *Ident) End() token.Pos { return token.Pos(int(x.NamePos) + len(x.Name)) } +func (x *Ellipsis) End() token.Pos { + if x.Elt != nil { + return x.Elt.End() + } + return x.Ellipsis + 3 // len("...") +} +func (x *BasicLit) End() token.Pos { return token.Pos(int(x.ValuePos) + len(x.Value)) } +func (x *FuncLit) End() token.Pos { return x.Body.End() } +func (x *CompositeLit) End() token.Pos { return x.Rbrace + 1 } +func (x *ParenExpr) End() token.Pos { return x.Rparen + 1 } +func (x *SelectorExpr) End() token.Pos { return x.Sel.End() } +func (x *IndexExpr) End() token.Pos { return x.Rbrack + 1 } +func (x *SliceExpr) End() token.Pos { return x.Rbrack + 1 } +func (x *TypeAssertExpr) End() token.Pos { + if x.Type != nil { + return x.Type.End() + } + return x.X.End() +} +func (x *CallExpr) End() token.Pos { return x.Rparen + 1 } +func (x *StarExpr) End() token.Pos { return x.X.End() } +func (x *UnaryExpr) End() token.Pos { return x.X.End() } +func (x *BinaryExpr) End() token.Pos { return x.Y.End() } +func (x *KeyValueExpr) End() token.Pos { return x.Value.End() } +func (x *ArrayType) End() token.Pos { return x.Elt.End() } +func (x *StructType) End() token.Pos { return x.Fields.End() } +func (x *FuncType) End() token.Pos { + if x.Results != nil { + return x.Results.End() + } + return x.Params.End() +} +func (x *InterfaceType) End() token.Pos { return x.Methods.End() } +func (x *MapType) End() token.Pos { return x.Value.End() } +func (x *ChanType) End() token.Pos { return x.Value.End() } // exprNode() ensures that only expression/type nodes can be // assigned to an ExprNode. +// func (x *BadExpr) exprNode() {} func (x *Ident) exprNode() {} func (x *Ellipsis) exprNode() {} @@ -358,17 +462,17 @@ func (x *ChanType) exprNode() {} // ---------------------------------------------------------------------------- // Convenience functions for Idents -var noPos token.Position +var noPos token.Pos -// NewIdent creates a new Ident without position and minimal object -// information. Useful for ASTs generated by code other than the Go -// parser. +// NewIdent creates a new Ident without position. +// Useful for ASTs generated by code other than the Go parser. // -func NewIdent(name string) *Ident { return &Ident{noPos, NewObj(Err, noPos, name)} } +func NewIdent(name string) *Ident { return &Ident{noPos, name, nil} } // IsExported returns whether name is an exported Go symbol // (i.e., whether it begins with an uppercase letter). +// func IsExported(name string) bool { ch, _ := utf8.DecodeRuneInString(name) return unicode.IsUpper(ch) @@ -377,16 +481,13 @@ func IsExported(name string) bool { // IsExported returns whether id is an exported Go symbol // (i.e., whether it begins with an uppercase letter). -func (id *Ident) IsExported() bool { return id.Obj.IsExported() } - - -// Name returns an identifier's name. -func (id *Ident) Name() string { return id.Obj.Name } +// +func (id *Ident) IsExported() bool { return IsExported(id.Name) } func (id *Ident) String() string { - if id != nil && id.Obj != nil { - return id.Obj.Name + if id != nil { + return id.Name } return "<nil>" } @@ -404,7 +505,7 @@ type ( // created. // BadStmt struct { - token.Position // beginning position of bad statement + From, To token.Pos // position range of bad statement } // A DeclStmt node represents a declaration in a statement list. @@ -417,12 +518,13 @@ type ( // of the immediately preceeding semicolon. // EmptyStmt struct { - token.Position // position of preceeding ";" + Semicolon token.Pos // position of preceeding ";" } // A LabeledStmt node represents a labeled statement. LabeledStmt struct { Label *Ident + Colon token.Pos // position of ":" Stmt Stmt } @@ -435,138 +537,212 @@ type ( // An IncDecStmt node represents an increment or decrement statement. IncDecStmt struct { - X Expr - Tok token.Token // INC or DEC + X Expr + TokPos token.Pos // position of Tok + Tok token.Token // INC or DEC } // An AssignStmt node represents an assignment or // a short variable declaration. + // AssignStmt struct { Lhs []Expr - TokPos token.Position // position of Tok - Tok token.Token // assignment token, DEFINE + TokPos token.Pos // position of Tok + Tok token.Token // assignment token, DEFINE Rhs []Expr } // A GoStmt node represents a go statement. GoStmt struct { - token.Position // position of "go" keyword - Call *CallExpr + Go token.Pos // position of "go" keyword + Call *CallExpr } // A DeferStmt node represents a defer statement. DeferStmt struct { - token.Position // position of "defer" keyword - Call *CallExpr + Defer token.Pos // position of "defer" keyword + Call *CallExpr } // A ReturnStmt node represents a return statement. ReturnStmt struct { - token.Position // position of "return" keyword - Results []Expr + Return token.Pos // position of "return" keyword + Results []Expr // result expressions; or nil } // A BranchStmt node represents a break, continue, goto, // or fallthrough statement. // BranchStmt struct { - token.Position // position of Tok - Tok token.Token // keyword token (BREAK, CONTINUE, GOTO, FALLTHROUGH) - Label *Ident + TokPos token.Pos // position of Tok + Tok token.Token // keyword token (BREAK, CONTINUE, GOTO, FALLTHROUGH) + Label *Ident // label name; or nil } // A BlockStmt node represents a braced statement list. BlockStmt struct { - token.Position // position of "{" - List []Stmt - Rbrace token.Position // position of "}" + Lbrace token.Pos // position of "{" + List []Stmt + Rbrace token.Pos // position of "}" } // An IfStmt node represents an if statement. IfStmt struct { - token.Position // position of "if" keyword - Init Stmt - Cond Expr - Body *BlockStmt - Else Stmt + If token.Pos // position of "if" keyword + Init Stmt // initalization statement; or nil + Cond Expr // condition; or nil + Body *BlockStmt + Else Stmt // else branch; or nil } // A CaseClause represents a case of an expression switch statement. CaseClause struct { - token.Position // position of "case" or "default" keyword - Values []Expr // nil means default case - Colon token.Position // position of ":" - Body []Stmt // statement list; or nil + Case token.Pos // position of "case" or "default" keyword + Values []Expr // nil means default case + Colon token.Pos // position of ":" + Body []Stmt // statement list; or nil } // A SwitchStmt node represents an expression switch statement. SwitchStmt struct { - token.Position // position of "switch" keyword - Init Stmt - Tag Expr - Body *BlockStmt // CaseClauses only + Switch token.Pos // position of "switch" keyword + Init Stmt // initalization statement; or nil + Tag Expr // tag expression; or nil + Body *BlockStmt // CaseClauses only } // A TypeCaseClause represents a case of a type switch statement. TypeCaseClause struct { - token.Position // position of "case" or "default" keyword - Types []Expr // nil means default case - Colon token.Position // position of ":" - Body []Stmt // statement list; or nil + Case token.Pos // position of "case" or "default" keyword + Types []Expr // nil means default case + Colon token.Pos // position of ":" + Body []Stmt // statement list; or nil } // An TypeSwitchStmt node represents a type switch statement. TypeSwitchStmt struct { - token.Position // position of "switch" keyword - Init Stmt - Assign Stmt // x := y.(type) - Body *BlockStmt // TypeCaseClauses only + Switch token.Pos // position of "switch" keyword + Init Stmt // initalization statement; or nil + Assign Stmt // x := y.(type) + Body *BlockStmt // TypeCaseClauses only } // A CommClause node represents a case of a select statement. CommClause struct { - token.Position // position of "case" or "default" keyword - Tok token.Token // ASSIGN or DEFINE (valid only if Lhs != nil) - Lhs, Rhs Expr // Rhs == nil means default case - Colon token.Position // position of ":" - Body []Stmt // statement list; or nil + Case token.Pos // position of "case" or "default" keyword + Tok token.Token // ASSIGN or DEFINE (valid only if Lhs != nil) + Lhs, Rhs Expr // Rhs == nil means default case + Colon token.Pos // position of ":" + Body []Stmt // statement list; or nil } // An SelectStmt node represents a select statement. SelectStmt struct { - token.Position // position of "select" keyword - Body *BlockStmt // CommClauses only + Select token.Pos // position of "select" keyword + Body *BlockStmt // CommClauses only } // A ForStmt represents a for statement. ForStmt struct { - token.Position // position of "for" keyword - Init Stmt - Cond Expr - Post Stmt - Body *BlockStmt + For token.Pos // position of "for" keyword + Init Stmt // initalization statement; or nil + Cond Expr // condition; or nil + Post Stmt // post iteration statement; or nil + Body *BlockStmt } // A RangeStmt represents a for statement with a range clause. RangeStmt struct { - token.Position // position of "for" keyword - Key, Value Expr // Value may be nil - TokPos token.Position // position of Tok - Tok token.Token // ASSIGN, DEFINE - X Expr // value to range over - Body *BlockStmt + For token.Pos // position of "for" keyword + Key, Value Expr // Value may be nil + TokPos token.Pos // position of Tok + Tok token.Token // ASSIGN, DEFINE + X Expr // value to range over + Body *BlockStmt } ) -// Pos() implementations for statement nodes where the position -// corresponds to the position of a sub-node. +// Pos and End implementations for statement nodes. // -func (s *DeclStmt) Pos() token.Position { return s.Decl.Pos() } -func (s *LabeledStmt) Pos() token.Position { return s.Label.Pos() } -func (s *ExprStmt) Pos() token.Position { return s.X.Pos() } -func (s *IncDecStmt) Pos() token.Position { return s.X.Pos() } -func (s *AssignStmt) Pos() token.Position { return s.Lhs[0].Pos() } +func (s *BadStmt) Pos() token.Pos { return s.From } +func (s *DeclStmt) Pos() token.Pos { return s.Decl.Pos() } +func (s *EmptyStmt) Pos() token.Pos { return s.Semicolon } +func (s *LabeledStmt) Pos() token.Pos { return s.Label.Pos() } +func (s *ExprStmt) Pos() token.Pos { return s.X.Pos() } +func (s *IncDecStmt) Pos() token.Pos { return s.X.Pos() } +func (s *AssignStmt) Pos() token.Pos { return s.Lhs[0].Pos() } +func (s *GoStmt) Pos() token.Pos { return s.Go } +func (s *DeferStmt) Pos() token.Pos { return s.Defer } +func (s *ReturnStmt) Pos() token.Pos { return s.Return } +func (s *BranchStmt) Pos() token.Pos { return s.TokPos } +func (s *BlockStmt) Pos() token.Pos { return s.Lbrace } +func (s *IfStmt) Pos() token.Pos { return s.If } +func (s *CaseClause) Pos() token.Pos { return s.Case } +func (s *SwitchStmt) Pos() token.Pos { return s.Switch } +func (s *TypeCaseClause) Pos() token.Pos { return s.Case } +func (s *TypeSwitchStmt) Pos() token.Pos { return s.Switch } +func (s *CommClause) Pos() token.Pos { return s.Case } +func (s *SelectStmt) Pos() token.Pos { return s.Select } +func (s *ForStmt) Pos() token.Pos { return s.For } +func (s *RangeStmt) Pos() token.Pos { return s.For } + + +func (s *BadStmt) End() token.Pos { return s.To } +func (s *DeclStmt) End() token.Pos { return s.Decl.End() } +func (s *EmptyStmt) End() token.Pos { + return s.Semicolon + 1 /* len(";") */ +} +func (s *LabeledStmt) End() token.Pos { return s.Stmt.End() } +func (s *ExprStmt) End() token.Pos { return s.X.End() } +func (s *IncDecStmt) End() token.Pos { + return s.TokPos + 2 /* len("++") */ +} +func (s *AssignStmt) End() token.Pos { return s.Rhs[len(s.Rhs)-1].End() } +func (s *GoStmt) End() token.Pos { return s.Call.End() } +func (s *DeferStmt) End() token.Pos { return s.Call.End() } +func (s *ReturnStmt) End() token.Pos { + if n := len(s.Results); n > 0 { + return s.Results[n-1].End() + } + return s.Return + 6 // len("return") +} +func (s *BranchStmt) End() token.Pos { + if s.Label != nil { + return s.Label.End() + } + return token.Pos(int(s.TokPos) + len(s.Tok.String())) +} +func (s *BlockStmt) End() token.Pos { return s.Rbrace + 1 } +func (s *IfStmt) End() token.Pos { + if s.Else != nil { + return s.Else.End() + } + return s.Body.End() +} +func (s *CaseClause) End() token.Pos { + if n := len(s.Body); n > 0 { + return s.Body[n-1].End() + } + return s.Colon + 1 +} +func (s *SwitchStmt) End() token.Pos { return s.Body.End() } +func (s *TypeCaseClause) End() token.Pos { + if n := len(s.Body); n > 0 { + return s.Body[n-1].End() + } + return s.Colon + 1 +} +func (s *TypeSwitchStmt) End() token.Pos { return s.Body.End() } +func (s *CommClause) End() token.Pos { + if n := len(s.Body); n > 0 { + return s.Body[n-1].End() + } + return s.Colon + 1 +} +func (s *SelectStmt) End() token.Pos { return s.Body.End() } +func (s *ForStmt) End() token.Pos { return s.Body.End() } +func (s *RangeStmt) End() token.Pos { return s.Body.End() } // stmtNode() ensures that only statement nodes can be @@ -618,9 +794,10 @@ type ( // A ValueSpec node represents a constant or variable declaration // (ConstSpec or VarSpec production). + // ValueSpec struct { Doc *CommentGroup // associated documentation; or nil - Names []*Ident // value names + Names []*Ident // value names (len(Names) > 0) Type Expr // value type; or nil Values []Expr // initial values; or nil Comment *CommentGroup // line comments; or nil @@ -630,23 +807,35 @@ type ( TypeSpec struct { Doc *CommentGroup // associated documentation; or nil Name *Ident // type name - Type Expr // *ArrayType, *StructType, *FuncType, *InterfaceType, *MapType, *ChanType or *Ident + Type Expr // *Ident, *ParenExpr, *SelectorExpr, *StarExpr, or any of the *XxxTypes Comment *CommentGroup // line comments; or nil } ) -// Pos() implementations for spec nodes. +// Pos and End implementations for spec nodes. // -func (s *ImportSpec) Pos() token.Position { +func (s *ImportSpec) Pos() token.Pos { if s.Name != nil { return s.Name.Pos() } return s.Path.Pos() } +func (s *ValueSpec) Pos() token.Pos { return s.Names[0].Pos() } +func (s *TypeSpec) Pos() token.Pos { return s.Name.Pos() } + -func (s *ValueSpec) Pos() token.Position { return s.Names[0].Pos() } -func (s *TypeSpec) Pos() token.Position { return s.Name.Pos() } +func (s *ImportSpec) End() token.Pos { return s.Path.End() } +func (s *ValueSpec) End() token.Pos { + if n := len(s.Values); n > 0 { + return s.Values[n-1].End() + } + if s.Type != nil { + return s.Type.End() + } + return s.Names[len(s.Names)-1].End() +} +func (s *TypeSpec) End() token.Pos { return s.Type.End() } // specNode() ensures that only spec nodes can be @@ -665,7 +854,7 @@ type ( // created. // BadDecl struct { - token.Position // beginning position of bad declaration + From, To token.Pos // position range of bad declaration } // A GenDecl node (generic declaration node) represents an import, @@ -680,12 +869,12 @@ type ( // token.VAR *ValueSpec // GenDecl struct { - Doc *CommentGroup // associated documentation; or nil - token.Position // position of Tok - Tok token.Token // IMPORT, CONST, TYPE, VAR - Lparen token.Position // position of '(', if any - Specs []Spec - Rparen token.Position // position of ')', if any + Doc *CommentGroup // associated documentation; or nil + TokPos token.Pos // position of Tok + Tok token.Token // IMPORT, CONST, TYPE, VAR + Lparen token.Pos // position of '(', if any + Specs []Spec + Rparen token.Pos // position of ')', if any } // A FuncDecl node represents a function declaration. @@ -699,8 +888,26 @@ type ( ) -// The position of a FuncDecl node is the position of its function type. -func (d *FuncDecl) Pos() token.Position { return d.Type.Pos() } +// Pos and End implementations for declaration nodes. +// +func (d *BadDecl) Pos() token.Pos { return d.From } +func (d *GenDecl) Pos() token.Pos { return d.TokPos } +func (d *FuncDecl) Pos() token.Pos { return d.Type.Pos() } + + +func (d *BadDecl) End() token.Pos { return d.To } +func (d *GenDecl) End() token.Pos { + if d.Rparen.IsValid() { + return d.Rparen + 1 + } + return d.Specs[0].End() +} +func (d *FuncDecl) End() token.Pos { + if d.Body != nil { + return d.Body.End() + } + return d.Type.End() +} // declNode() ensures that only declaration nodes can be @@ -721,11 +928,20 @@ func (d *FuncDecl) declNode() {} // via Doc and Comment fields. // type File struct { - Doc *CommentGroup // associated documentation; or nil - token.Position // position of "package" keyword - Name *Ident // package name - Decls []Decl // top-level declarations - Comments []*CommentGroup // list of all comments in the source file + Doc *CommentGroup // associated documentation; or nil + Package token.Pos // position of "package" keyword + Name *Ident // package name + Decls []Decl // top-level declarations; or nil + Comments []*CommentGroup // list of all comments in the source file +} + + +func (f *File) Pos() token.Pos { return f.Package } +func (f *File) End() token.Pos { + if n := len(f.Decls); n > 0 { + return f.Decls[n-1].End() + } + return f.Name.End() } @@ -734,6 +950,10 @@ type File struct { // type Package struct { Name string // package name - Scope *Scope // package scope + Scope *Scope // package scope; or nil Files map[string]*File // Go source files by filename } + + +func (p *Package) Pos() token.Pos { return token.NoPos } +func (p *Package) End() token.Pos { return token.NoPos } diff --git a/src/pkg/go/ast/filter.go b/src/pkg/go/ast/filter.go index 009ffc21d..0c3cef4b2 100644 --- a/src/pkg/go/ast/filter.go +++ b/src/pkg/go/ast/filter.go @@ -197,7 +197,7 @@ type Filter func(string) bool func filterIdentList(list []*Ident, f Filter) []*Ident { j := 0 for _, x := range list { - if f(x.Name()) { + if f(x.Name) { list[j] = x j++ } @@ -212,7 +212,7 @@ func filterSpec(spec Spec, f Filter) bool { s.Names = filterIdentList(s.Names, f) return len(s.Names) > 0 case *TypeSpec: - return f(s.Name.Name()) + return f(s.Name.Name) } return false } @@ -236,7 +236,7 @@ func filterDecl(decl Decl, f Filter) bool { d.Specs = filterSpecList(d.Specs, f) return len(d.Specs) > 0 case *FuncDecl: - return f(d.Name.Name()) + return f(d.Name.Name) } return false } @@ -307,27 +307,6 @@ const ( var separator = &Comment{noPos, []byte("//")} -// lineAfterComment computes the position of the beginning -// of the line immediately following a comment. -func lineAfterComment(c *Comment) token.Position { - pos := c.Pos() - line := pos.Line - text := c.Text - if text[1] == '*' { - /*-style comment - determine endline */ - for _, ch := range text { - if ch == '\n' { - line++ - } - } - } - pos.Offset += len(text) + 1 // +1 for newline - pos.Line = line + 1 // line after comment - pos.Column = 1 // beginning of line - return pos -} - - // MergePackageFiles creates a file AST by merging the ASTs of the // files belonging to a package. The mode flags control merging behavior. // @@ -351,7 +330,7 @@ func MergePackageFiles(pkg *Package, mode MergeMode) *File { // a package comment; but it's better to collect extra comments // than drop them on the floor. var doc *CommentGroup - var pos token.Position + var pos token.Pos if ndocs > 0 { list := make([]*Comment, ndocs-1) // -1: no separator before first group i := 0 @@ -366,11 +345,11 @@ func MergePackageFiles(pkg *Package, mode MergeMode) *File { list[i] = c i++ } - end := lineAfterComment(f.Doc.List[len(f.Doc.List)-1]) - if end.Offset > pos.Offset { - // Keep the maximum end position as - // position for the package clause. - pos = end + if f.Package > pos { + // Keep the maximum package clause position as + // position for the package clause of the merged + // files. + pos = f.Package } } } @@ -397,7 +376,7 @@ func MergePackageFiles(pkg *Package, mode MergeMode) *File { // entities (const, type, vars) if // multiple declarations are common. if f, isFun := d.(*FuncDecl); isFun { - name := f.Name.Name() + name := f.Name.Name if j, exists := funcs[name]; exists { // function declared already if decls[j] != nil && decls[j].(*FuncDecl).Doc == nil { diff --git a/src/pkg/go/ast/print.go b/src/pkg/go/ast/print.go new file mode 100644 index 000000000..d71490d4a --- /dev/null +++ b/src/pkg/go/ast/print.go @@ -0,0 +1,217 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains printing suppport for ASTs. + +package ast + +import ( + "fmt" + "go/token" + "io" + "os" + "reflect" +) + + +// A FieldFilter may be provided to Fprint to control the output. +type FieldFilter func(name string, value reflect.Value) bool + + +// NotNilFilter returns true for field values that are not nil; +// it returns false otherwise. +func NotNilFilter(_ string, value reflect.Value) bool { + v, ok := value.(interface { + IsNil() bool + }) + return !ok || !v.IsNil() +} + + +// Fprint prints the (sub-)tree starting at AST node x to w. +// +// A non-nil FieldFilter f may be provided to control the output: +// struct fields for which f(fieldname, fieldvalue) is true are +// are printed; all others are filtered from the output. +// +func Fprint(w io.Writer, x interface{}, f FieldFilter) (n int, err os.Error) { + // setup printer + p := printer{ + output: w, + filter: f, + ptrmap: make(map[interface{}]int), + last: '\n', // force printing of line number on first line + } + + // install error handler + defer func() { + n = p.written + if e := recover(); e != nil { + err = e.(localError).err // re-panics if it's not a localError + } + }() + + // print x + if x == nil { + p.printf("nil\n") + return + } + p.print(reflect.NewValue(x)) + p.printf("\n") + + return +} + + +// Print prints x to standard output, skipping nil fields. +// Print(x) is the same as Fprint(os.Stdout, x, NotNilFilter). +func Print(x interface{}) (int, os.Error) { + return Fprint(os.Stdout, x, NotNilFilter) +} + + +type printer struct { + output io.Writer + filter FieldFilter + ptrmap map[interface{}]int // *reflect.PtrValue -> line number + written int // number of bytes written to output + indent int // current indentation level + last byte // the last byte processed by Write + line int // current line number +} + + +var indent = []byte(". ") + +func (p *printer) Write(data []byte) (n int, err os.Error) { + var m int + for i, b := range data { + // invariant: data[0:n] has been written + if b == '\n' { + m, err = p.output.Write(data[n : i+1]) + n += m + if err != nil { + return + } + p.line++ + } else if p.last == '\n' { + _, err = fmt.Fprintf(p.output, "%6d ", p.line) + if err != nil { + return + } + for j := p.indent; j > 0; j-- { + _, err = p.output.Write(indent) + if err != nil { + return + } + } + } + p.last = b + } + m, err = p.output.Write(data[n:]) + n += m + return +} + + +// localError wraps locally caught os.Errors so we can distinguish +// them from genuine panics which we don't want to return as errors. +type localError struct { + err os.Error +} + + +// printf is a convenience wrapper that takes care of print errors. +func (p *printer) printf(format string, args ...interface{}) { + n, err := fmt.Fprintf(p, format, args...) + p.written += n + if err != nil { + panic(localError{err}) + } +} + + +// Implementation note: Print is written for AST nodes but could be +// used to print arbitrary data structures; such a version should +// probably be in a different package. + +func (p *printer) print(x reflect.Value) { + // Note: This test is only needed because AST nodes + // embed a token.Position, and thus all of them + // understand the String() method (but it only + // applies to the Position field). + // TODO: Should reconsider this AST design decision. + if pos, ok := x.Interface().(token.Position); ok { + p.printf("%s", pos) + return + } + + if !NotNilFilter("", x) { + p.printf("nil") + return + } + + switch v := x.(type) { + case *reflect.InterfaceValue: + p.print(v.Elem()) + + case *reflect.MapValue: + p.printf("%s (len = %d) {\n", x.Type().String(), v.Len()) + p.indent++ + for _, key := range v.Keys() { + p.print(key) + p.printf(": ") + p.print(v.Elem(key)) + } + p.indent-- + p.printf("}") + + case *reflect.PtrValue: + p.printf("*") + // type-checked ASTs may contain cycles - use ptrmap + // to keep track of objects that have been printed + // already and print the respective line number instead + ptr := v.Interface() + if line, exists := p.ptrmap[ptr]; exists { + p.printf("(obj @ %d)", line) + } else { + p.ptrmap[ptr] = p.line + p.print(v.Elem()) + } + + case *reflect.SliceValue: + if s, ok := v.Interface().([]byte); ok { + p.printf("%#q", s) + return + } + p.printf("%s (len = %d) {\n", x.Type().String(), v.Len()) + p.indent++ + for i, n := 0, v.Len(); i < n; i++ { + p.printf("%d: ", i) + p.print(v.Elem(i)) + p.printf("\n") + } + p.indent-- + p.printf("}") + + case *reflect.StructValue: + p.printf("%s {\n", x.Type().String()) + p.indent++ + t := v.Type().(*reflect.StructType) + for i, n := 0, t.NumField(); i < n; i++ { + name := t.Field(i).Name + value := v.Field(i) + if p.filter == nil || p.filter(name, value) { + p.printf("%s: ", name) + p.print(value) + p.printf("\n") + } + } + p.indent-- + p.printf("}") + + default: + p.printf("%v", x.Interface()) + } +} diff --git a/src/pkg/go/ast/scope.go b/src/pkg/go/ast/scope.go index b5a38484e..956a208ae 100644 --- a/src/pkg/go/ast/scope.go +++ b/src/pkg/go/ast/scope.go @@ -2,25 +2,110 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// This file implements scopes, the objects they contain, +// and object types. + package ast -import "go/token" +// A Scope maintains the set of named language entities declared +// in the scope and a link to the immediately surrounding (outer) +// scope. +// +type Scope struct { + Outer *Scope + Objects []*Object // in declaration order + // Implementation note: In some cases (struct fields, + // function parameters) we need the source order of + // variables. Thus for now, we store scope entries + // in a linear list. If scopes become very large + // (say, for packages), we may need to change this + // to avoid slow lookups. +} + + +// NewScope creates a new scope nested in the outer scope. +func NewScope(outer *Scope) *Scope { + const n = 4 // initial scope capacity, must be > 0 + return &Scope{outer, make([]*Object, 0, n)} +} + + +// Lookup returns the object with the given name if it is +// found in scope s, otherwise it returns nil. Outer scopes +// are ignored. +// +// Lookup always returns nil if name is "_", even if the scope +// contains objects with that name. +// +func (s *Scope) Lookup(name string) *Object { + if name != "_" { + for _, obj := range s.Objects { + if obj.Name == name { + return obj + } + } + } + return nil +} + + +// Insert attempts to insert a named object into the scope s. +// If the scope does not contain an object with that name yet +// or if the object is named "_", Insert inserts the object +// and returns it. Otherwise, Insert leaves the scope unchanged +// and returns the object found in the scope instead. +// +func (s *Scope) Insert(obj *Object) *Object { + alt := s.Lookup(obj.Name) + if alt == nil { + s.append(obj) + alt = obj + } + return alt +} + + +func (s *Scope) append(obj *Object) { + s.Objects = append(s.Objects, obj) +} + +// ---------------------------------------------------------------------------- +// Objects + +// An Object describes a language entity such as a package, +// constant, type, variable, or function (incl. methods). +// +type Object struct { + Kind Kind + Name string // declared name + Type *Type + Decl interface{} // corresponding Field, XxxSpec or FuncDecl + N int // value of iota for this declaration +} + + +// NewObj creates a new object of a given kind and name. +func NewObj(kind Kind, name string) *Object { + return &Object{Kind: kind, Name: name} +} + -type ObjKind int +// Kind describes what an object represents. +type Kind int // The list of possible Object kinds. const ( - Err ObjKind = iota // object kind unknown (forward reference or error) - Pkg // package - Con // constant - Typ // type - Var // variable - Fun // function or method + Bad Kind = iota // for error handling + Pkg // package + Con // constant + Typ // type + Var // variable + Fun // function or method ) var objKindStrings = [...]string{ - Err: "<unknown object kind>", + Bad: "bad", Pkg: "package", Con: "const", Typ: "type", @@ -29,65 +114,129 @@ var objKindStrings = [...]string{ } -func (kind ObjKind) String() string { return objKindStrings[kind] } +func (kind Kind) String() string { return objKindStrings[kind] } -// An Object describes a language entity such as a package, -// constant, type, variable, or function (incl. methods). -// -type Object struct { - Kind ObjKind - Pos token.Position // declaration position - Name string // declared name +// IsExported returns whether obj is exported. +func (obj *Object) IsExported() bool { return IsExported(obj.Name) } + + +// ---------------------------------------------------------------------------- +// Types + +// A Type represents a Go type. +type Type struct { + Form Form + Obj *Object // corresponding type name, or nil + Scope *Scope // fields and methods, always present + N uint // basic type id, array length, number of function results, or channel direction + Key, Elt *Type // map key and array, pointer, slice, map or channel element + Params *Scope // function (receiver, input and result) parameters, tuple expressions (results of function calls), or nil + Expr Expr // corresponding AST expression } -func NewObj(kind ObjKind, pos token.Position, name string) *Object { - return &Object{kind, pos, name} +// NewType creates a new type of a given form. +func NewType(form Form) *Type { + return &Type{Form: form, Scope: NewScope(nil)} } -// IsExported returns whether obj is exported. -func (obj *Object) IsExported() bool { return IsExported(obj.Name) } +// Form describes the form of a type. +type Form int +// The list of possible type forms. +const ( + BadType Form = iota // for error handling + Unresolved // type not fully setup + Basic + Array + Struct + Pointer + Function + Method + Interface + Slice + Map + Channel + Tuple +) -// A Scope maintains the set of named language entities visible -// in the scope and a link to the immediately surrounding (outer) -// scope. -// -type Scope struct { - Outer *Scope - Objects map[string]*Object + +var formStrings = [...]string{ + BadType: "badType", + Unresolved: "unresolved", + Basic: "basic", + Array: "array", + Struct: "struct", + Pointer: "pointer", + Function: "function", + Method: "method", + Interface: "interface", + Slice: "slice", + Map: "map", + Channel: "channel", + Tuple: "tuple", } -// NewScope creates a new scope nested in the outer scope. -func NewScope(outer *Scope) *Scope { return &Scope{outer, make(map[string]*Object)} } - - -// Declare attempts to insert a named object into the scope s. -// If the scope does not contain an object with that name yet, -// Declare inserts the object, and returns it. Otherwise, the -// scope remains unchanged and Declare returns the object found -// in the scope instead. -func (s *Scope) Declare(obj *Object) *Object { - decl, found := s.Objects[obj.Name] - if !found { - s.Objects[obj.Name] = obj - decl = obj - } - return decl -} +func (form Form) String() string { return formStrings[form] } -// Lookup looks up an object in the current scope chain. -// The result is nil if the object is not found. -// -func (s *Scope) Lookup(name string) *Object { - for ; s != nil; s = s.Outer { - if obj, found := s.Objects[name]; found { - return obj - } - } - return nil +// The list of basic type id's. +const ( + Bool = iota + Byte + Uint + Int + Float + Complex + Uintptr + String + + Uint8 + Uint16 + Uint32 + Uint64 + + Int8 + Int16 + Int32 + Int64 + + Float32 + Float64 + + Complex64 + Complex128 + + // TODO(gri) ideal types are missing +) + + +var BasicTypes = map[uint]string{ + Bool: "bool", + Byte: "byte", + Uint: "uint", + Int: "int", + Float: "float", + Complex: "complex", + Uintptr: "uintptr", + String: "string", + + Uint8: "uint8", + Uint16: "uint16", + Uint32: "uint32", + Uint64: "uint64", + + Int8: "int8", + Int16: "int16", + Int32: "int32", + Int64: "int64", + + Float32: "float32", + Float64: "float64", + + Complex64: "complex64", + Complex128: "complex128", } diff --git a/src/pkg/go/ast/walk.go b/src/pkg/go/ast/walk.go index 6c9837a01..875a92f3f 100644 --- a/src/pkg/go/ast/walk.go +++ b/src/pkg/go/ast/walk.go @@ -10,51 +10,57 @@ import "fmt" // If the result visitor w is not nil, Walk visits each of the children // of node with the visitor w, followed by a call of w.Visit(nil). type Visitor interface { - Visit(node interface{}) (w Visitor) + Visit(node Node) (w Visitor) } -func walkIdent(v Visitor, x *Ident) { - if x != nil { +// Helper functions for common node lists. They may be empty. + +func walkIdentList(v Visitor, list []*Ident) { + for _, x := range list { Walk(v, x) } } -func walkCommentGroup(v Visitor, g *CommentGroup) { - if g != nil { - Walk(v, g) +func walkExprList(v Visitor, list []Expr) { + for _, x := range list { + Walk(v, x) } } -func walkBlockStmt(v Visitor, b *BlockStmt) { - if b != nil { - Walk(v, b) +func walkStmtList(v Visitor, list []Stmt) { + for _, x := range list { + Walk(v, x) } } -// Walk traverses an AST in depth-first order: If node != nil, it -// invokes v.Visit(node). If the visitor w returned by v.Visit(node) is -// not nil, Walk visits each of the children of node with the visitor w, -// followed by a call of w.Visit(nil). -// -// Walk may be called with any of the named ast node types. It also -// accepts arguments of type []*Field, []*Ident, []Expr, []Stmt and []Decl; -// the respective children are the slice elements. -// -func Walk(v Visitor, node interface{}) { - if node == nil { - return +func walkDeclList(v Visitor, list []Decl) { + for _, x := range list { + Walk(v, x) } +} + + +// TODO(gri): Investigate if providing a closure to Walk leads to +// simpler use (and may help eliminate Inspect in turn). + +// Walk traverses an AST in depth-first order: It starts by calling +// v.Visit(node); node must not be nil. If the visitor w returned by +// v.Visit(node) is not nil, Walk is invoked recursively with visitor +// w for each of the non-nil children of node, followed by a call of +// w.Visit(nil). +// +func Walk(v Visitor, node Node) { if v = v.Visit(node); v == nil { return } // walk children // (the order of the cases matches the order - // of the corresponding declaration in ast.go) + // of the corresponding node types in ast.go) switch n := node.(type) { // Comments and fields case *Comment: @@ -66,11 +72,17 @@ func Walk(v Visitor, node interface{}) { } case *Field: - walkCommentGroup(v, n.Doc) - Walk(v, n.Names) + if n.Doc != nil { + Walk(v, n.Doc) + } + walkIdentList(v, n.Names) Walk(v, n.Type) - Walk(v, n.Tag) - walkCommentGroup(v, n.Comment) + if n.Tag != nil { + Walk(v, n.Tag) + } + if n.Comment != nil { + Walk(v, n.Comment) + } case *FieldList: for _, f := range n.List { @@ -78,25 +90,30 @@ func Walk(v Visitor, node interface{}) { } // Expressions - case *BadExpr, *Ident, *Ellipsis, *BasicLit: + case *BadExpr, *Ident, *BasicLit: // nothing to do - case *FuncLit: - if n != nil { - Walk(v, n.Type) + case *Ellipsis: + if n.Elt != nil { + Walk(v, n.Elt) } - walkBlockStmt(v, n.Body) - case *CompositeLit: + case *FuncLit: Walk(v, n.Type) - Walk(v, n.Elts) + Walk(v, n.Body) + + case *CompositeLit: + if n.Type != nil { + Walk(v, n.Type) + } + walkExprList(v, n.Elts) case *ParenExpr: Walk(v, n.X) case *SelectorExpr: Walk(v, n.X) - walkIdent(v, n.Sel) + Walk(v, n.Sel) case *IndexExpr: Walk(v, n.X) @@ -104,16 +121,22 @@ func Walk(v Visitor, node interface{}) { case *SliceExpr: Walk(v, n.X) - Walk(v, n.Index) - Walk(v, n.End) + if n.Low != nil { + Walk(v, n.Low) + } + if n.High != nil { + Walk(v, n.High) + } case *TypeAssertExpr: Walk(v, n.X) - Walk(v, n.Type) + if n.Type != nil { + Walk(v, n.Type) + } case *CallExpr: Walk(v, n.Fun) - Walk(v, n.Args) + walkExprList(v, n.Args) case *StarExpr: Walk(v, n.X) @@ -131,7 +154,9 @@ func Walk(v Visitor, node interface{}) { // Types case *ArrayType: - Walk(v, n.Len) + if n.Len != nil { + Walk(v, n.Len) + } Walk(v, n.Elt) case *StructType: @@ -164,7 +189,7 @@ func Walk(v Visitor, node interface{}) { // nothing to do case *LabeledStmt: - walkIdent(v, n.Label) + Walk(v, n.Label) Walk(v, n.Stmt) case *ExprStmt: @@ -174,150 +199,198 @@ func Walk(v Visitor, node interface{}) { Walk(v, n.X) case *AssignStmt: - Walk(v, n.Lhs) - Walk(v, n.Rhs) + walkExprList(v, n.Lhs) + walkExprList(v, n.Rhs) case *GoStmt: - if n.Call != nil { - Walk(v, n.Call) - } + Walk(v, n.Call) case *DeferStmt: - if n.Call != nil { - Walk(v, n.Call) - } + Walk(v, n.Call) case *ReturnStmt: - Walk(v, n.Results) + walkExprList(v, n.Results) case *BranchStmt: - walkIdent(v, n.Label) + if n.Label != nil { + Walk(v, n.Label) + } case *BlockStmt: - Walk(v, n.List) + walkStmtList(v, n.List) case *IfStmt: - Walk(v, n.Init) - Walk(v, n.Cond) - walkBlockStmt(v, n.Body) - Walk(v, n.Else) + if n.Init != nil { + Walk(v, n.Init) + } + if n.Cond != nil { + Walk(v, n.Cond) + } + Walk(v, n.Body) + if n.Else != nil { + Walk(v, n.Else) + } case *CaseClause: - Walk(v, n.Values) - Walk(v, n.Body) + walkExprList(v, n.Values) + walkStmtList(v, n.Body) case *SwitchStmt: - Walk(v, n.Init) - Walk(v, n.Tag) - walkBlockStmt(v, n.Body) + if n.Init != nil { + Walk(v, n.Init) + } + if n.Tag != nil { + Walk(v, n.Tag) + } + Walk(v, n.Body) case *TypeCaseClause: - Walk(v, n.Types) - Walk(v, n.Body) + for _, x := range n.Types { + Walk(v, x) + } + walkStmtList(v, n.Body) case *TypeSwitchStmt: - Walk(v, n.Init) + if n.Init != nil { + Walk(v, n.Init) + } Walk(v, n.Assign) - walkBlockStmt(v, n.Body) + Walk(v, n.Body) case *CommClause: - Walk(v, n.Lhs) - Walk(v, n.Rhs) - Walk(v, n.Body) + if n.Lhs != nil { + Walk(v, n.Lhs) + } + if n.Rhs != nil { + Walk(v, n.Rhs) + } + walkStmtList(v, n.Body) case *SelectStmt: - walkBlockStmt(v, n.Body) + Walk(v, n.Body) case *ForStmt: - Walk(v, n.Init) - Walk(v, n.Cond) - Walk(v, n.Post) - walkBlockStmt(v, n.Body) + if n.Init != nil { + Walk(v, n.Init) + } + if n.Cond != nil { + Walk(v, n.Cond) + } + if n.Post != nil { + Walk(v, n.Post) + } + Walk(v, n.Body) case *RangeStmt: Walk(v, n.Key) - Walk(v, n.Value) + if n.Value != nil { + Walk(v, n.Value) + } Walk(v, n.X) - walkBlockStmt(v, n.Body) + Walk(v, n.Body) // Declarations case *ImportSpec: - walkCommentGroup(v, n.Doc) - walkIdent(v, n.Name) + if n.Doc != nil { + Walk(v, n.Doc) + } + if n.Name != nil { + Walk(v, n.Name) + } Walk(v, n.Path) - walkCommentGroup(v, n.Comment) + if n.Comment != nil { + Walk(v, n.Comment) + } case *ValueSpec: - walkCommentGroup(v, n.Doc) - Walk(v, n.Names) - Walk(v, n.Type) - Walk(v, n.Values) - walkCommentGroup(v, n.Comment) + if n.Doc != nil { + Walk(v, n.Doc) + } + walkIdentList(v, n.Names) + if n.Type != nil { + Walk(v, n.Type) + } + walkExprList(v, n.Values) + if n.Comment != nil { + Walk(v, n.Comment) + } case *TypeSpec: - walkCommentGroup(v, n.Doc) - walkIdent(v, n.Name) + if n.Doc != nil { + Walk(v, n.Doc) + } + Walk(v, n.Name) Walk(v, n.Type) - walkCommentGroup(v, n.Comment) + if n.Comment != nil { + Walk(v, n.Comment) + } case *BadDecl: // nothing to do case *GenDecl: - walkCommentGroup(v, n.Doc) + if n.Doc != nil { + Walk(v, n.Doc) + } for _, s := range n.Specs { Walk(v, s) } case *FuncDecl: - walkCommentGroup(v, n.Doc) + if n.Doc != nil { + Walk(v, n.Doc) + } if n.Recv != nil { Walk(v, n.Recv) } - walkIdent(v, n.Name) - if n.Type != nil { - Walk(v, n.Type) + Walk(v, n.Name) + Walk(v, n.Type) + if n.Body != nil { + Walk(v, n.Body) } - walkBlockStmt(v, n.Body) // Files and packages case *File: - walkCommentGroup(v, n.Doc) - walkIdent(v, n.Name) - Walk(v, n.Decls) + if n.Doc != nil { + Walk(v, n.Doc) + } + Walk(v, n.Name) + walkDeclList(v, n.Decls) for _, g := range n.Comments { Walk(v, g) } + // don't walk n.Comments - they have been + // visited already through the individual + // nodes case *Package: for _, f := range n.Files { Walk(v, f) } - case []*Ident: - for _, x := range n { - Walk(v, x) - } + default: + fmt.Printf("ast.Walk: unexpected node type %T", n) + panic("ast.Walk") + } - case []Expr: - for _, x := range n { - Walk(v, x) - } + v.Visit(nil) +} - case []Stmt: - for _, x := range n { - Walk(v, x) - } - case []Decl: - for _, x := range n { - Walk(v, x) - } +type inspector func(Node) bool - default: - fmt.Printf("ast.Walk: unexpected type %T", n) - panic("ast.Walk") +func (f inspector) Visit(node Node) Visitor { + if f(node) { + return f } + return nil +} - v.Visit(nil) + +// Inspect traverses an AST in depth-first order: It starts by calling +// f(node); node must not be nil. If f returns true, Inspect invokes f +// for all the non-nil children of node, recursively. +// +func Inspect(node Node, f func(Node) bool) { + Walk(inspector(f), node) } diff --git a/src/pkg/go/doc/Makefile b/src/pkg/go/doc/Makefile index 1558ac30a..a5152c793 100644 --- a/src/pkg/go/doc/Makefile +++ b/src/pkg/go/doc/Makefile @@ -2,7 +2,7 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -include ../../../Make.$(GOARCH) +include ../../../Make.inc TARG=go/doc GOFILES=\ diff --git a/src/pkg/go/doc/comment.go b/src/pkg/go/doc/comment.go index bbbc6a3c2..9ff0bd536 100644 --- a/src/pkg/go/doc/comment.go +++ b/src/pkg/go/doc/comment.go @@ -8,7 +8,6 @@ package doc import ( "go/ast" - "http" // for URLEscape "io" "regexp" "strings" @@ -63,16 +62,7 @@ func CommentText(comment *ast.CommentGroup) string { // Walk lines, stripping trailing white space and adding to list. for _, l := range cl { - l = stripTrailingWhitespace(l) - // Add to list. - n := len(lines) - if n+1 >= cap(lines) { - newlines := make([]string, n, 2*cap(lines)) - copy(newlines, lines) - lines = newlines - } - lines = lines[0 : n+1] - lines[n] = l + lines = append(lines, stripTrailingWhitespace(l)) } } @@ -88,10 +78,8 @@ func CommentText(comment *ast.CommentGroup) string { lines = lines[0:n] // Add final "" entry to get trailing newline from Join. - // The original loop always leaves room for one more. if n > 0 && lines[n-1] != "" { - lines = lines[0 : n+1] - lines[n] = "" + lines = append(lines, "") } return strings.Join(lines, "\n") @@ -195,12 +183,12 @@ var ( // into a link). Go identifiers that appear in the words map are italicized; if // the corresponding map value is not the empty string, it is considered a URL // and the word is converted into a link. If nice is set, the remaining text's -// appearance is improved where is makes sense (e.g., `` is turned into “ +// appearance is improved where it makes sense (e.g., `` is turned into “ // and '' into ”). func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) { for { - m := matchRx.Execute(line) - if len(m) == 0 { + m := matchRx.FindSubmatchIndex(line) + if m == nil { break } // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is identRx) @@ -224,11 +212,10 @@ func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) { italics = false // don't italicize URLs } - // write match if len(url) > 0 { w.Write(html_a) - w.Write([]byte(http.URLEscape(url))) + template.HTMLEscape(w, []byte(url)) w.Write(html_aq) } if italics { diff --git a/src/pkg/go/doc/doc.go b/src/pkg/go/doc/doc.go index b73fd285c..fb1c4e03d 100644 --- a/src/pkg/go/doc/doc.go +++ b/src/pkg/go/doc/doc.go @@ -6,7 +6,6 @@ package doc import ( - "container/vector" "go/ast" "go/token" "regexp" @@ -21,7 +20,7 @@ type typeDoc struct { // if the type declaration hasn't been seen yet, decl is nil decl *ast.GenDecl // values, factory functions, and methods associated with the type - values *vector.Vector // list of *ast.GenDecl (consts and vars) + values []*ast.GenDecl // consts and vars factories map[string]*ast.FuncDecl methods map[string]*ast.FuncDecl } @@ -37,19 +36,17 @@ type typeDoc struct { type docReader struct { doc *ast.CommentGroup // package documentation, if any pkgName string - values *vector.Vector // list of *ast.GenDecl (consts and vars) + values []*ast.GenDecl // consts and vars types map[string]*typeDoc funcs map[string]*ast.FuncDecl - bugs *vector.Vector // list of *ast.CommentGroup + bugs []*ast.CommentGroup } func (doc *docReader) init(pkgName string) { doc.pkgName = pkgName - doc.values = new(vector.Vector) doc.types = make(map[string]*typeDoc) doc.funcs = make(map[string]*ast.FuncDecl) - doc.bugs = new(vector.Vector) } @@ -69,7 +66,7 @@ func (doc *docReader) addDoc(comments *ast.CommentGroup) { n2 := len(comments.List) list := make([]*ast.Comment, n1+1+n2) // + 1 for separator line copy(list, doc.doc.List) - list[n1] = &ast.Comment{token.Position{}, []byte("//")} // separator line + list[n1] = &ast.Comment{token.NoPos, []byte("//")} // separator line copy(list[n1+1:], comments.List) doc.doc = &ast.CommentGroup{list} } @@ -77,7 +74,7 @@ func (doc *docReader) addDoc(comments *ast.CommentGroup) { func (doc *docReader) addType(decl *ast.GenDecl) { spec := decl.Specs[0].(*ast.TypeSpec) - typ := doc.lookupTypeDoc(spec.Name.Name()) + typ := doc.lookupTypeDoc(spec.Name.Name) // typ should always be != nil since declared types // are always named - be conservative and check if typ != nil { @@ -96,7 +93,7 @@ func (doc *docReader) lookupTypeDoc(name string) *typeDoc { return tdoc } // type wasn't found - add one without declaration - tdoc := &typeDoc{nil, new(vector.Vector), make(map[string]*ast.FuncDecl), make(map[string]*ast.FuncDecl)} + tdoc := &typeDoc{nil, nil, make(map[string]*ast.FuncDecl), make(map[string]*ast.FuncDecl)} doc.types[name] = tdoc return tdoc } @@ -108,7 +105,7 @@ func baseTypeName(typ ast.Expr) string { // if the type is not exported, the effect to // a client is as if there were no type name if t.IsExported() { - return string(t.Name()) + return string(t.Name) } case *ast.StarExpr: return baseTypeName(t.X) @@ -130,7 +127,7 @@ func (doc *docReader) addValue(decl *ast.GenDecl) { name := "" switch { case v.Type != nil: - // a type is present; determine it's name + // a type is present; determine its name name = baseTypeName(v.Type) case decl.Tok == token.CONST: // no type is present but we have a constant declaration; @@ -156,16 +153,16 @@ func (doc *docReader) addValue(decl *ast.GenDecl) { // determine values list const threshold = 0.75 - values := doc.values + values := &doc.values if domName != "" && domFreq >= int(float(len(decl.Specs))*threshold) { // typed entries are sufficiently frequent typ := doc.lookupTypeDoc(domName) if typ != nil { - values = typ.values // associate with that type + values = &typ.values // associate with that type } } - values.Push(decl) + *values = append(*values, decl) } @@ -173,7 +170,7 @@ func (doc *docReader) addValue(decl *ast.GenDecl) { // at least one f with associated documentation is stored in table, if there // are multiple f's with the same name. func setFunc(table map[string]*ast.FuncDecl, f *ast.FuncDecl) { - name := f.Name.Name() + name := f.Name.Name if g, exists := table[name]; exists && g.Doc != nil { // a function with the same name has already been registered; // since it has documentation, assume f is simply another @@ -188,7 +185,7 @@ func setFunc(table map[string]*ast.FuncDecl, f *ast.FuncDecl) { func (doc *docReader) addFunc(fun *ast.FuncDecl) { - name := fun.Name.Name() + name := fun.Name.Name // determine if it should be associated with a type if fun.Recv != nil { @@ -252,7 +249,6 @@ func (doc *docReader) addDecl(decl ast.Decl) { doc.addValue(d) case token.TYPE: // types are handled individually - var noPos token.Position for _, spec := range d.Specs { // make a (fake) GenDecl node for this TypeSpec // (we need to do this here - as opposed to just @@ -265,7 +261,7 @@ func (doc *docReader) addDecl(decl ast.Decl) { // makeTypeDocs below). Simpler data structures, but // would lose GenDecl documentation if the TypeSpec // has documentation as well. - doc.addType(&ast.GenDecl{d.Doc, d.Pos(), token.TYPE, noPos, []ast.Spec{spec}, noPos}) + doc.addType(&ast.GenDecl{d.Doc, d.Pos(), token.TYPE, token.NoPos, []ast.Spec{spec}, token.NoPos}) // A new GenDecl node is created, no need to nil out d.Doc. } } @@ -277,14 +273,9 @@ func (doc *docReader) addDecl(decl ast.Decl) { func copyCommentList(list []*ast.Comment) []*ast.Comment { - copy := make([]*ast.Comment, len(list)) - for i, c := range list { - copy[i] = c - } - return copy + return append([]*ast.Comment(nil), list...) } - var ( bug_markers = regexp.MustCompile("^/[/*][ \t]*BUG\\(.*\\):[ \t]*") // BUG(uid): bug_content = regexp.MustCompile("[^ \n\r\t]+") // at least one non-whitespace char @@ -309,13 +300,13 @@ func (doc *docReader) addFile(src *ast.File) { // collect BUG(...) comments for _, c := range src.Comments { text := c.List[0].Text - if m := bug_markers.Execute(text); len(m) > 0 { + if m := bug_markers.FindIndex(text); m != nil { // found a BUG comment; maybe empty if btxt := text[m[1]:]; bug_content.Match(btxt) { // non-empty BUG comment; collect comment without BUG prefix list := copyCommentList(c.List) list[0].Text = text[m[1]:] - doc.bugs.Push(&ast.CommentGroup{list}) + doc.bugs = append(doc.bugs, &ast.CommentGroup{list}) } } } @@ -325,7 +316,7 @@ func (doc *docReader) addFile(src *ast.File) { func NewFileDoc(file *ast.File) *PackageDoc { var r docReader - r.init(file.Name.Name()) + r.init(file.Name.Name) r.addFile(file) return r.newDoc("", nil) } @@ -370,9 +361,9 @@ func declName(d *ast.GenDecl) string { switch v := d.Specs[0].(type) { case *ast.ValueSpec: - return v.Names[0].Name() + return v.Names[0].Name case *ast.TypeSpec: - return v.Name.Name() + return v.Name.Name } return "" @@ -390,11 +381,10 @@ func (p sortValueDoc) Less(i, j int) bool { } -func makeValueDocs(v *vector.Vector, tok token.Token) []*ValueDoc { - d := make([]*ValueDoc, v.Len()) // big enough in any case +func makeValueDocs(list []*ast.GenDecl, tok token.Token) []*ValueDoc { + d := make([]*ValueDoc, len(list)) // big enough in any case n := 0 - for i := range d { - decl := v.At(i).(*ast.GenDecl) + for i, decl := range list { if decl.Tok == tok { d[n] = &ValueDoc{CommentText(decl.Doc), decl, i} n++ @@ -434,7 +424,7 @@ func makeFuncDocs(m map[string]*ast.FuncDecl) []*FuncDoc { if f.Recv != nil { doc.Recv = f.Recv.List[0].Type } - doc.Name = f.Name.Name() + doc.Name = f.Name.Name doc.Decl = f d[i] = doc i++ @@ -467,7 +457,7 @@ func (p sortTypeDoc) Less(i, j int) bool { // sort by name // pull blocks (name = "") up to top // in original order - if ni, nj := p[i].Type.Name.Name(), p[j].Type.Name.Name(); ni != nj { + if ni, nj := p[i].Type.Name.Name, p[j].Type.Name.Name; ni != nj { return ni < nj } return p[i].order < p[j].order @@ -511,7 +501,7 @@ func (doc *docReader) makeTypeDocs(m map[string]*typeDoc) []*TypeDoc { // file containing the explicit type declaration is missing or if // an unqualified type name was used after a "." import) // 1) move values - doc.values.AppendVector(old.values) + doc.values = append(doc.values, old.values...) // 2) move factory functions for name, f := range old.factories { doc.funcs[name] = f @@ -531,10 +521,10 @@ func (doc *docReader) makeTypeDocs(m map[string]*typeDoc) []*TypeDoc { } -func makeBugDocs(v *vector.Vector) []string { - d := make([]string, v.Len()) - for i := 0; i < v.Len(); i++ { - d[i] = CommentText(v.At(i).(*ast.CommentGroup)) +func makeBugDocs(list []*ast.CommentGroup) []string { + d := make([]string, len(list)) + for i, g := range list { + d[i] = CommentText(g) } return d } @@ -587,12 +577,12 @@ func matchDecl(d *ast.GenDecl, f Filter) bool { switch v := d.(type) { case *ast.ValueSpec: for _, name := range v.Names { - if f(name.Name()) { + if f(name.Name) { return true } } case *ast.TypeSpec: - if f(v.Name.Name()) { + if f(v.Name.Name) { return true } } diff --git a/src/pkg/go/parser/Makefile b/src/pkg/go/parser/Makefile index d9b52a7d9..d301f41eb 100644 --- a/src/pkg/go/parser/Makefile +++ b/src/pkg/go/parser/Makefile @@ -2,7 +2,7 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -include ../../../Make.$(GOARCH) +include ../../../Make.inc TARG=go/parser GOFILES=\ diff --git a/src/pkg/go/parser/interface.go b/src/pkg/go/parser/interface.go index e1ddb37c3..84d699a67 100644 --- a/src/pkg/go/parser/interface.go +++ b/src/pkg/go/parser/interface.go @@ -57,72 +57,73 @@ func (p *parser) parseEOF() os.Error { // ParseExpr parses a Go expression and returns the corresponding -// AST node. The filename, src, and scope arguments have the same interpretation +// AST node. The fset, filename, and src arguments have the same interpretation // as for ParseFile. If there is an error, the result expression // may be nil or contain a partial AST. // -func ParseExpr(filename string, src interface{}, scope *ast.Scope) (ast.Expr, os.Error) { +func ParseExpr(fset *token.FileSet, filename string, src interface{}) (ast.Expr, os.Error) { data, err := readSource(filename, src) if err != nil { return nil, err } var p parser - p.init(filename, data, scope, 0) - return p.parseExpr(), p.parseEOF() + p.init(fset, filename, data, 0) + x := p.parseExpr() + if p.tok == token.SEMICOLON { + p.next() // consume automatically inserted semicolon, if any + } + return x, p.parseEOF() } // ParseStmtList parses a list of Go statements and returns the list -// of corresponding AST nodes. The filename, src, and scope arguments have the same +// of corresponding AST nodes. The fset, filename, and src arguments have the same // interpretation as for ParseFile. If there is an error, the node // list may be nil or contain partial ASTs. // -func ParseStmtList(filename string, src interface{}, scope *ast.Scope) ([]ast.Stmt, os.Error) { +func ParseStmtList(fset *token.FileSet, filename string, src interface{}) ([]ast.Stmt, os.Error) { data, err := readSource(filename, src) if err != nil { return nil, err } var p parser - p.init(filename, data, scope, 0) + p.init(fset, filename, data, 0) return p.parseStmtList(), p.parseEOF() } // ParseDeclList parses a list of Go declarations and returns the list -// of corresponding AST nodes. The filename, src, and scope arguments have the same +// of corresponding AST nodes. The fset, filename, and src arguments have the same // interpretation as for ParseFile. If there is an error, the node // list may be nil or contain partial ASTs. // -func ParseDeclList(filename string, src interface{}, scope *ast.Scope) ([]ast.Decl, os.Error) { +func ParseDeclList(fset *token.FileSet, filename string, src interface{}) ([]ast.Decl, os.Error) { data, err := readSource(filename, src) if err != nil { return nil, err } var p parser - p.init(filename, data, scope, 0) + p.init(fset, filename, data, 0) return p.parseDeclList(), p.parseEOF() } -// ParseFile parses a Go source file and returns a File node. +// ParseFile parses the source code of a single Go source file and returns +// the corresponding ast.File node. The source code may be provided via +// the filename of the source file, or via the src parameter. // -// If src != nil, ParseFile parses the file source from src. src may -// be provided in a variety of formats. At the moment the following types -// are supported: string, []byte, and io.Reader. In this case, filename is -// only used for source position information and error messages. +// If src != nil, ParseFile parses the source from src and the filename is +// only used when recording position information. The type of the argument +// for the src parameter must be string, []byte, or io.Reader. // // If src == nil, ParseFile parses the file specified by filename. // -// If scope != nil, it is the immediately surrounding scope for the file -// (the package scope) and it is used to lookup and declare identifiers. -// When parsing multiple files belonging to a package, the same scope should -// be provided to all files. -// // The mode parameter controls the amount of source text parsed and other -// optional parser functionality. +// optional parser functionality. Position information is recorded in the +// file set fset. // // If the source couldn't be read, the returned AST is nil and the error // indicates the specific failure. If the source was read but syntax @@ -130,57 +131,57 @@ func ParseDeclList(filename string, src interface{}, scope *ast.Scope) ([]ast.De // representing the fragments of erroneous source code). Multiple errors // are returned via a scanner.ErrorList which is sorted by file position. // -func ParseFile(filename string, src interface{}, scope *ast.Scope, mode uint) (*ast.File, os.Error) { +func ParseFile(fset *token.FileSet, filename string, src interface{}, mode uint) (*ast.File, os.Error) { data, err := readSource(filename, src) if err != nil { return nil, err } var p parser - p.init(filename, data, scope, mode) + p.init(fset, filename, data, mode) return p.parseFile(), p.GetError(scanner.NoMultiples) // parseFile() reads to EOF } // ParseFiles calls ParseFile for each file in the filenames list and returns // a map of package name -> package AST with all the packages found. The mode -// bits are passed to ParseFile unchanged. +// bits are passed to ParseFile unchanged. Position information is recorded +// in the file set fset. // // Files with parse errors are ignored. In this case the map of packages may -// be incomplete (missing packages and/or incomplete packages) and the last +// be incomplete (missing packages and/or incomplete packages) and the first // error encountered is returned. // -func ParseFiles(filenames []string, scope *ast.Scope, mode uint) (map[string]*ast.Package, os.Error) { - pkgs := make(map[string]*ast.Package) - var err os.Error +func ParseFiles(fset *token.FileSet, filenames []string, mode uint) (pkgs map[string]*ast.Package, first os.Error) { + pkgs = make(map[string]*ast.Package) for _, filename := range filenames { - var src *ast.File - src, err = ParseFile(filename, nil, scope, mode) - if err == nil { - name := src.Name.Name() + if src, err := ParseFile(fset, filename, nil, mode); err == nil { + name := src.Name.Name pkg, found := pkgs[name] if !found { - pkg = &ast.Package{name, scope, make(map[string]*ast.File)} + pkg = &ast.Package{name, nil, make(map[string]*ast.File)} pkgs[name] = pkg } pkg.Files[filename] = src + } else if first == nil { + first = err } } - - return pkgs, err + return } // ParseDir calls ParseFile for the files in the directory specified by path and // returns a map of package name -> package AST with all the packages found. If // filter != nil, only the files with os.FileInfo entries passing through the filter -// are considered. The mode bits are passed to ParseFile unchanged. +// are considered. The mode bits are passed to ParseFile unchanged. Position +// information is recorded in the file set fset. // // If the directory couldn't be read, a nil map and the respective error are -// returned. If a parse error occured, a non-nil but incomplete map and the +// returned. If a parse error occurred, a non-nil but incomplete map and the // error are returned. // -func ParseDir(path string, filter func(*os.FileInfo) bool, mode uint) (map[string]*ast.Package, os.Error) { +func ParseDir(fset *token.FileSet, path string, filter func(*os.FileInfo) bool, mode uint) (map[string]*ast.Package, os.Error) { fd, err := os.Open(path, os.O_RDONLY, 0) if err != nil { return nil, err @@ -203,6 +204,5 @@ func ParseDir(path string, filter func(*os.FileInfo) bool, mode uint) (map[strin } filenames = filenames[0:n] - var scope *ast.Scope = nil // for now tracking of declarations is disabled - return ParseFiles(filenames, scope, mode) + return ParseFiles(fset, filenames, mode) } diff --git a/src/pkg/go/parser/parser.go b/src/pkg/go/parser/parser.go index c1914005a..3b2fe4577 100644 --- a/src/pkg/go/parser/parser.go +++ b/src/pkg/go/parser/parser.go @@ -10,7 +10,6 @@ package parser import ( - "container/vector" "fmt" "go/ast" "go/scanner" @@ -36,6 +35,7 @@ const ( // The parser structure holds the parser's internal state. type parser struct { + file *token.File scanner.ErrorVector scanner scanner.Scanner @@ -45,23 +45,17 @@ type parser struct { indent uint // indentation used for tracing output // Comments - comments vector.Vector // list of *CommentGroup + comments []*ast.CommentGroup leadComment *ast.CommentGroup // the last lead comment lineComment *ast.CommentGroup // the last line comment // Next token - pos token.Position // token position - tok token.Token // one token look-ahead - lit []byte // token literal + pos token.Pos // token position + tok token.Token // one token look-ahead + lit []byte // token literal // Non-syntactic parser control exprLev int // < 0: in control clause, >= 0: in expression - - // Scopes - checkDecl bool // if set, check declarations - pkgScope *ast.Scope - fileScope *ast.Scope - funcScope *ast.Scope } @@ -75,16 +69,10 @@ func scannerMode(mode uint) uint { } -func (p *parser) init(filename string, src []byte, scope *ast.Scope, mode uint) { - p.scanner.Init(filename, src, p, scannerMode(mode)) +func (p *parser) init(fset *token.FileSet, filename string, src []byte, mode uint) { + p.file = p.scanner.Init(fset, filename, src, p, scannerMode(mode)) p.mode = mode p.trace = mode&Trace != 0 // for convenience (p.trace is used frequently) - if scope != nil { - p.checkDecl = true - } else { - scope = ast.NewScope(nil) // provide a dummy scope - } - p.pkgScope = scope p.next() } @@ -96,13 +84,14 @@ func (p *parser) printTrace(a ...interface{}) { const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " + ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " const n = uint(len(dots)) - fmt.Printf("%5d:%3d: ", p.pos.Line, p.pos.Column) + pos := p.file.Position(p.pos) + fmt.Printf("%5d:%3d: ", pos.Line, pos.Column) i := 2 * p.indent for ; i > n; i -= n { fmt.Print(dots) } fmt.Print(dots[0:i]) - fmt.Println(a) + fmt.Println(a...) } @@ -124,9 +113,9 @@ func un(p *parser) { func (p *parser) next0() { // Because of one-token look-ahead, print the previous token // when tracing as it provides a more readable output. The - // very first token (p.pos.Line == 0) is not initialized (it - // is token.ILLEGAL), so don't print it . - if p.trace && p.pos.Line > 0 { + // very first token (!p.pos.IsValid()) is not initialized + // (it is token.ILLEGAL), so don't print it . + if p.trace && p.pos.IsValid() { s := p.tok.String() switch { case p.tok.IsLiteral(): @@ -145,7 +134,7 @@ func (p *parser) next0() { func (p *parser) consumeComment() (comment *ast.Comment, endline int) { // /*-style comments may end on a different line than where they start. // Scan the comment for '\n' chars and adjust endline accordingly. - endline = p.pos.Line + endline = p.file.Line(p.pos) if p.lit[1] == '*' { for _, b := range p.lit { if b == '\n' { @@ -167,23 +156,17 @@ func (p *parser) consumeComment() (comment *ast.Comment, endline int) { // token terminates a comment group. // func (p *parser) consumeCommentGroup() (comments *ast.CommentGroup, endline int) { - var list vector.Vector - endline = p.pos.Line - for p.tok == token.COMMENT && endline+1 >= p.pos.Line { + var list []*ast.Comment + endline = p.file.Line(p.pos) + for p.tok == token.COMMENT && endline+1 >= p.file.Line(p.pos) { var comment *ast.Comment comment, endline = p.consumeComment() - list.Push(comment) - } - - // convert list - group := make([]*ast.Comment, len(list)) - for i, x := range list { - group[i] = x.(*ast.Comment) + list = append(list, comment) } // add comment group to the comments list - comments = &ast.CommentGroup{group} - p.comments.Push(comments) + comments = &ast.CommentGroup{list} + p.comments = append(p.comments, comments) return } @@ -207,18 +190,18 @@ func (p *parser) consumeCommentGroup() (comments *ast.CommentGroup, endline int) func (p *parser) next() { p.leadComment = nil p.lineComment = nil - line := p.pos.Line // current line + line := p.file.Line(p.pos) // current line p.next0() if p.tok == token.COMMENT { var comment *ast.CommentGroup var endline int - if p.pos.Line == line { + if p.file.Line(p.pos) == line { // The comment is on same line as previous token; it // cannot be a lead comment but may be a line comment. comment, endline = p.consumeCommentGroup() - if p.pos.Line != endline { + if p.file.Line(p.pos) != endline { // The next token is on a different line, thus // the last comment group is a line comment. p.lineComment = comment @@ -231,7 +214,7 @@ func (p *parser) next() { comment, endline = p.consumeCommentGroup() } - if endline+1 == p.pos.Line { + if endline+1 == p.file.Line(p.pos) { // The next token is following on the line immediately after the // comment group, thus the last comment group is a lead comment. p.leadComment = comment @@ -240,26 +223,35 @@ func (p *parser) next() { } -func (p *parser) errorExpected(pos token.Position, msg string) { +func (p *parser) error(pos token.Pos, msg string) { + p.Error(p.file.Position(pos), msg) +} + + +func (p *parser) errorExpected(pos token.Pos, msg string) { msg = "expected " + msg - if pos.Offset == p.pos.Offset { + if pos == p.pos { // the error happened at the current position; // make the error message more specific - msg += ", found '" + p.tok.String() + "'" - if p.tok.IsLiteral() { - msg += " " + string(p.lit) + if p.tok == token.SEMICOLON && p.lit[0] == '\n' { + msg += ", found newline" + } else { + msg += ", found '" + p.tok.String() + "'" + if p.tok.IsLiteral() { + msg += " " + string(p.lit) + } } } - p.Error(pos, msg) + p.error(pos, msg) } -func (p *parser) expect(tok token.Token) token.Position { +func (p *parser) expect(tok token.Token) token.Pos { pos := p.pos if p.tok != tok { p.errorExpected(pos, "'"+tok.String()+"'") } - p.next() // make progress in any case + p.next() // make progress return pos } @@ -272,152 +264,51 @@ func (p *parser) expectSemi() { // ---------------------------------------------------------------------------- -// Scope support - -func (p *parser) openScope() *ast.Scope { - p.funcScope = ast.NewScope(p.funcScope) - return p.funcScope -} - - -func (p *parser) closeScope() { p.funcScope = p.funcScope.Outer } - +// Identifiers -func (p *parser) parseIdent(kind ast.ObjKind) *ast.Ident { - obj := ast.NewObj(kind, p.pos, "_") +func (p *parser) parseIdent() *ast.Ident { + pos := p.pos + name := "_" if p.tok == token.IDENT { - obj.Name = string(p.lit) + name = string(p.lit) p.next() } else { p.expect(token.IDENT) // use expect() error handling } - return &ast.Ident{obj.Pos, obj} + return &ast.Ident{pos, name, nil} } -func (p *parser) parseIdentList(kind ast.ObjKind) []*ast.Ident { +func (p *parser) parseIdentList() (list []*ast.Ident) { if p.trace { defer un(trace(p, "IdentList")) } - var list vector.Vector - list.Push(p.parseIdent(kind)) + list = append(list, p.parseIdent()) for p.tok == token.COMMA { p.next() - list.Push(p.parseIdent(kind)) + list = append(list, p.parseIdent()) } - // convert vector - idents := make([]*ast.Ident, len(list)) - for i, x := range list { - idents[i] = x.(*ast.Ident) - } - - return idents -} - - -func (p *parser) declIdent(scope *ast.Scope, id *ast.Ident) { - decl := scope.Declare(id.Obj) - if p.checkDecl && decl != id.Obj { - if decl.Kind == ast.Err { - // declared object is a forward declaration - update it - *decl = *id.Obj - id.Obj = decl - return - } - p.Error(id.Pos(), "'"+id.Name()+"' declared already at "+decl.Pos.String()) - } -} - - -func (p *parser) declIdentList(scope *ast.Scope, list []*ast.Ident) { - for _, id := range list { - p.declIdent(scope, id) - } -} - - -func (p *parser) declFieldList(scope *ast.Scope, list []*ast.Field) { - for _, f := range list { - p.declIdentList(scope, f.Names) - } -} - - -func (p *parser) findIdent() *ast.Ident { - pos := p.pos - name := "_" - var obj *ast.Object - if p.tok == token.IDENT { - name = string(p.lit) - obj = p.funcScope.Lookup(name) - p.next() - } else { - p.expect(token.IDENT) // use expect() error handling - } - if obj == nil { - // No declaration found: either we are outside any function - // (p.funcScope == nil) or the identifier is not declared - // in any function. Try the file and package scope. - obj = p.fileScope.Lookup(name) // file scope is nested in package scope - if obj == nil { - // No declaration found anywhere: track as - // unresolved identifier in the package scope. - obj = ast.NewObj(ast.Err, pos, name) - p.pkgScope.Declare(obj) - } - } - return &ast.Ident{pos, obj} -} - - -func (p *parser) findIdentInScope(scope *ast.Scope) *ast.Ident { - pos := p.pos - name := "_" - var obj *ast.Object - if p.tok == token.IDENT { - name = string(p.lit) - obj = scope.Lookup(name) - p.next() - } else { - p.expect(token.IDENT) // use expect() error handling - } - if obj == nil { - // TODO(gri) At the moment we always arrive here because - // we don't track the lookup scope (and sometimes - // we can't). Just create a useable ident for now. - obj = ast.NewObj(ast.Err, pos, name) - } - return &ast.Ident{pos, obj} + return } // ---------------------------------------------------------------------------- // Common productions -func makeExprList(list *vector.Vector) []ast.Expr { - exprs := make([]ast.Expr, len(*list)) - for i, x := range *list { - exprs[i] = x.(ast.Expr) - } - return exprs -} - - -func (p *parser) parseExprList() []ast.Expr { +func (p *parser) parseExprList() (list []ast.Expr) { if p.trace { defer un(trace(p, "ExpressionList")) } - var list vector.Vector - list.Push(p.parseExpr()) + list = append(list, p.parseExpr()) for p.tok == token.COMMA { p.next() - list.Push(p.parseExpr()) + list = append(list, p.parseExpr()) } - return makeExprList(&list) + return } @@ -432,9 +323,10 @@ func (p *parser) parseType() ast.Expr { typ := p.tryType() if typ == nil { - p.errorExpected(p.pos, "type") + pos := p.pos + p.errorExpected(pos, "type") p.next() // make progress - return &ast.BadExpr{p.pos} + return &ast.BadExpr{pos, p.pos} } return typ @@ -446,11 +338,11 @@ func (p *parser) parseQualifiedIdent() ast.Expr { defer un(trace(p, "QualifiedIdent")) } - var x ast.Expr = p.findIdent() + var x ast.Expr = p.parseIdent() if p.tok == token.PERIOD { // first identifier is a package identifier p.next() - sel := p.findIdentInScope(nil) + sel := p.parseIdent() x = &ast.SelectorExpr{x, sel} } return x @@ -486,14 +378,14 @@ func (p *parser) parseArrayType(ellipsisOk bool) ast.Expr { } -func (p *parser) makeIdentList(list *vector.Vector) []*ast.Ident { - idents := make([]*ast.Ident, len(*list)) - for i, x := range *list { +func (p *parser) makeIdentList(list []ast.Expr) []*ast.Ident { + idents := make([]*ast.Ident, len(list)) + for i, x := range list { ident, isIdent := x.(*ast.Ident) if !isIdent { pos := x.(ast.Expr).Pos() p.errorExpected(pos, "identifier") - ident = &ast.Ident{pos, ast.NewObj(ast.Err, pos, "_")} + ident = &ast.Ident{pos, "_", nil} } idents[i] = ident } @@ -508,19 +400,8 @@ func (p *parser) parseFieldDecl() *ast.Field { doc := p.leadComment - // a list of identifiers looks like a list of type names - var list vector.Vector - for { - // TODO(gri): do not allow ()'s here - list.Push(p.parseType()) - if p.tok != token.COMMA { - break - } - p.next() - } - - // if we had a list of identifiers, it must be followed by a type - typ := p.tryType() + // fields + list, typ := p.parseVarList(false) // optional tag var tag *ast.BasicLit @@ -533,15 +414,14 @@ func (p *parser) parseFieldDecl() *ast.Field { var idents []*ast.Ident if typ != nil { // IdentifierList Type - idents = p.makeIdentList(&list) + idents = p.makeIdentList(list) } else { - // Type (anonymous field) - if len(list) == 1 { - // TODO(gri): check that this looks like a type - typ = list.At(0).(ast.Expr) - } else { - p.errorExpected(p.pos, "anonymous field") - typ = &ast.BadExpr{p.pos} + // ["*"] TypeName (AnonymousField) + typ = list[0] // we always have at least one element + if n := len(list); n > 1 || !isTypeName(deref(typ)) { + pos := typ.Pos() + p.errorExpected(pos, "anonymous field") + typ = &ast.BadExpr{pos, list[n-1].End()} } } @@ -558,22 +438,16 @@ func (p *parser) parseStructType() *ast.StructType { pos := p.expect(token.STRUCT) lbrace := p.expect(token.LBRACE) - var list vector.Vector - for p.tok == token.IDENT || p.tok == token.MUL { - list.Push(p.parseFieldDecl()) + var list []*ast.Field + for p.tok == token.IDENT || p.tok == token.MUL || p.tok == token.LPAREN { + // a field declaration cannot start with a '(' but we accept + // it here for more robust parsing and better error messages + // (parseFieldDecl will check and complain if necessary) + list = append(list, p.parseFieldDecl()) } rbrace := p.expect(token.RBRACE) - // convert vector - fields := make([]*ast.Field, len(list)) - for i, x := range list { - fields[i] = x.(*ast.Field) - } - - // TODO(gri) The struct scope shouldn't get lost. - p.declFieldList(ast.NewScope(nil), fields) - - return &ast.StructType{pos, &ast.FieldList{lbrace, fields, rbrace}, false} + return &ast.StructType{pos, &ast.FieldList{lbrace, list, rbrace}, false} } @@ -589,13 +463,17 @@ func (p *parser) parsePointerType() *ast.StarExpr { } -func (p *parser) tryParameterType(ellipsisOk bool) ast.Expr { - if ellipsisOk && p.tok == token.ELLIPSIS { +func (p *parser) tryVarType(isParam bool) ast.Expr { + if isParam && p.tok == token.ELLIPSIS { pos := p.pos p.next() - typ := p.tryType() + typ := p.tryType() // don't use parseType so we can provide better error message + if typ == nil { + p.error(pos, "'...' parameter is missing type") + typ = &ast.BadExpr{pos, p.pos} + } if p.tok != token.RPAREN { - p.Error(pos, "can use '...' for last parameter only") + p.error(pos, "can use '...' with last parameter type only") } return &ast.Ellipsis{pos, typ} } @@ -603,27 +481,30 @@ func (p *parser) tryParameterType(ellipsisOk bool) ast.Expr { } -func (p *parser) parseParameterType(ellipsisOk bool) ast.Expr { - typ := p.tryParameterType(ellipsisOk) +func (p *parser) parseVarType(isParam bool) ast.Expr { + typ := p.tryVarType(isParam) if typ == nil { - p.errorExpected(p.pos, "type") + pos := p.pos + p.errorExpected(pos, "type") p.next() // make progress - typ = &ast.BadExpr{p.pos} + typ = &ast.BadExpr{pos, p.pos} } return typ } -func (p *parser) parseParameterDecl(ellipsisOk bool) (*vector.Vector, ast.Expr) { +func (p *parser) parseVarList(isParam bool) (list []ast.Expr, typ ast.Expr) { if p.trace { - defer un(trace(p, "ParameterDecl")) + defer un(trace(p, "VarList")) } // a list of identifiers looks like a list of type names - var list vector.Vector for { - // TODO(gri): do not allow ()'s here - list.Push(p.parseParameterType(ellipsisOk)) + // parseVarType accepts any type (including parenthesized ones) + // even though the syntax does not permit them here: we + // accept them all for more robust parsing and complain + // afterwards + list = append(list, p.parseVarType(isParam)) if p.tok != token.COMMA { break } @@ -631,31 +512,30 @@ func (p *parser) parseParameterDecl(ellipsisOk bool) (*vector.Vector, ast.Expr) } // if we had a list of identifiers, it must be followed by a type - typ := p.tryParameterType(ellipsisOk) + typ = p.tryVarType(isParam) - return &list, typ + return } -func (p *parser) parseParameterList(ellipsisOk bool) []*ast.Field { +func (p *parser) parseParameterList(ellipsisOk bool) (params []*ast.Field) { if p.trace { defer un(trace(p, "ParameterList")) } - list, typ := p.parseParameterDecl(ellipsisOk) + list, typ := p.parseVarList(ellipsisOk) if typ != nil { // IdentifierList Type idents := p.makeIdentList(list) - list.Resize(0, 0) - list.Push(&ast.Field{nil, idents, typ, nil, nil}) + params = append(params, &ast.Field{nil, idents, typ, nil, nil}) if p.tok == token.COMMA { p.next() } for p.tok != token.RPAREN && p.tok != token.EOF { - idents := p.parseIdentList(ast.Var) - typ := p.parseParameterType(ellipsisOk) - list.Push(&ast.Field{nil, idents, typ, nil, nil}) + idents := p.parseIdentList() + typ := p.parseVarType(ellipsisOk) + params = append(params, &ast.Field{nil, idents, typ, nil, nil}) if p.tok != token.COMMA { break } @@ -664,23 +544,17 @@ func (p *parser) parseParameterList(ellipsisOk bool) []*ast.Field { } else { // Type { "," Type } (anonymous parameters) - // convert list of types into list of *Param - for i, x := range *list { - list.Set(i, &ast.Field{Type: x.(ast.Expr)}) + params = make([]*ast.Field, len(list)) + for i, x := range list { + params[i] = &ast.Field{Type: x} } } - // convert list - params := make([]*ast.Field, len(*list)) - for i, x := range *list { - params[i] = x.(*ast.Field) - } - - return params + return } -func (p *parser) parseParameters(scope *ast.Scope, ellipsisOk bool) *ast.FieldList { +func (p *parser) parseParameters(ellipsisOk bool) *ast.FieldList { if p.trace { defer un(trace(p, "Parameters")) } @@ -689,7 +563,6 @@ func (p *parser) parseParameters(scope *ast.Scope, ellipsisOk bool) *ast.FieldLi lparen := p.expect(token.LPAREN) if p.tok != token.RPAREN { params = p.parseParameterList(ellipsisOk) - p.declFieldList(scope, params) } rparen := p.expect(token.RPAREN) @@ -697,13 +570,13 @@ func (p *parser) parseParameters(scope *ast.Scope, ellipsisOk bool) *ast.FieldLi } -func (p *parser) parseResult(scope *ast.Scope) *ast.FieldList { +func (p *parser) parseResult() *ast.FieldList { if p.trace { defer un(trace(p, "Result")) } if p.tok == token.LPAREN { - return p.parseParameters(scope, false) + return p.parseParameters(false) } typ := p.tryType() @@ -717,28 +590,27 @@ func (p *parser) parseResult(scope *ast.Scope) *ast.FieldList { } -func (p *parser) parseSignature(scope *ast.Scope) (params, results *ast.FieldList) { +func (p *parser) parseSignature() (params, results *ast.FieldList) { if p.trace { defer un(trace(p, "Signature")) } - params = p.parseParameters(scope, true) - results = p.parseResult(scope) + params = p.parseParameters(true) + results = p.parseResult() return } -func (p *parser) parseFuncType() (*ast.Scope, *ast.FuncType) { +func (p *parser) parseFuncType() *ast.FuncType { if p.trace { defer un(trace(p, "FuncType")) } pos := p.expect(token.FUNC) - scope := ast.NewScope(p.funcScope) - params, results := p.parseSignature(scope) + params, results := p.parseSignature() - return scope, &ast.FuncType{pos, params, results} + return &ast.FuncType{pos, params, results} } @@ -754,8 +626,8 @@ func (p *parser) parseMethodSpec() *ast.Field { if ident, isIdent := x.(*ast.Ident); isIdent && p.tok == token.LPAREN { // method idents = []*ast.Ident{ident} - params, results := p.parseSignature(ast.NewScope(p.funcScope)) - typ = &ast.FuncType{noPos, params, results} + params, results := p.parseSignature() + typ = &ast.FuncType{token.NoPos, params, results} } else { // embedded interface typ = x @@ -773,22 +645,13 @@ func (p *parser) parseInterfaceType() *ast.InterfaceType { pos := p.expect(token.INTERFACE) lbrace := p.expect(token.LBRACE) - var list vector.Vector + var list []*ast.Field for p.tok == token.IDENT { - list.Push(p.parseMethodSpec()) + list = append(list, p.parseMethodSpec()) } rbrace := p.expect(token.RBRACE) - // convert vector - methods := make([]*ast.Field, len(list)) - for i, x := range list { - methods[i] = x.(*ast.Field) - } - - // TODO(gri) The interface scope shouldn't get lost. - p.declFieldList(ast.NewScope(nil), methods) - - return &ast.InterfaceType{pos, &ast.FieldList{lbrace, methods, rbrace}, false} + return &ast.InterfaceType{pos, &ast.FieldList{lbrace, list, rbrace}, false} } @@ -842,8 +705,7 @@ func (p *parser) tryRawType(ellipsisOk bool) ast.Expr { case token.MUL: return p.parsePointerType() case token.FUNC: - _, typ := p.parseFuncType() - return typ + return p.parseFuncType() case token.INTERFACE: return p.parseInterfaceType() case token.MAP: @@ -869,43 +731,28 @@ func (p *parser) tryType() ast.Expr { return p.tryRawType(false) } // ---------------------------------------------------------------------------- // Blocks -func makeStmtList(list *vector.Vector) []ast.Stmt { - stats := make([]ast.Stmt, len(*list)) - for i, x := range *list { - stats[i] = x.(ast.Stmt) - } - return stats -} - - -func (p *parser) parseStmtList() []ast.Stmt { +func (p *parser) parseStmtList() (list []ast.Stmt) { if p.trace { defer un(trace(p, "StatementList")) } - var list vector.Vector for p.tok != token.CASE && p.tok != token.DEFAULT && p.tok != token.RBRACE && p.tok != token.EOF { - list.Push(p.parseStmt()) + list = append(list, p.parseStmt()) } - return makeStmtList(&list) + return } -func (p *parser) parseBody(scope *ast.Scope) *ast.BlockStmt { +func (p *parser) parseBody() *ast.BlockStmt { if p.trace { defer un(trace(p, "Body")) } - savedScope := p.funcScope - p.funcScope = scope - lbrace := p.expect(token.LBRACE) list := p.parseStmtList() rbrace := p.expect(token.RBRACE) - p.funcScope = savedScope - return &ast.BlockStmt{lbrace, list, rbrace} } @@ -915,9 +762,6 @@ func (p *parser) parseBlockStmt() *ast.BlockStmt { defer un(trace(p, "BlockStmt")) } - p.openScope() - defer p.closeScope() - lbrace := p.expect(token.LBRACE) list := p.parseStmtList() rbrace := p.expect(token.RBRACE) @@ -934,14 +778,14 @@ func (p *parser) parseFuncTypeOrLit() ast.Expr { defer un(trace(p, "FuncTypeOrLit")) } - scope, typ := p.parseFuncType() + typ := p.parseFuncType() if p.tok != token.LBRACE { // function type only return typ } p.exprLev++ - body := p.parseBody(scope) + body := p.parseBody() p.exprLev-- return &ast.FuncLit{typ, body} @@ -958,7 +802,7 @@ func (p *parser) parseOperand() ast.Expr { switch p.tok { case token.IDENT: - return p.findIdent() + return p.parseIdent() case token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING: x := &ast.BasicLit{p.pos, p.tok, p.lit} @@ -984,9 +828,10 @@ func (p *parser) parseOperand() ast.Expr { } } - p.errorExpected(p.pos, "operand") + pos := p.pos + p.errorExpected(pos, "operand") p.next() // make progress - return &ast.BadExpr{p.pos} + return &ast.BadExpr{pos, p.pos} } @@ -998,7 +843,7 @@ func (p *parser) parseSelectorOrTypeAssertion(x ast.Expr) ast.Expr { p.expect(token.PERIOD) if p.tok == token.IDENT { // selector - sel := p.findIdentInScope(nil) + sel := p.parseIdent() return &ast.SelectorExpr{x, sel} } @@ -1022,23 +867,27 @@ func (p *parser) parseIndexOrSlice(x ast.Expr) ast.Expr { defer un(trace(p, "IndexOrSlice")) } - p.expect(token.LBRACK) + lbrack := p.expect(token.LBRACK) p.exprLev++ - index := p.parseExpr() + var low, high ast.Expr + isSlice := false + if p.tok != token.COLON { + low = p.parseExpr() + } if p.tok == token.COLON { + isSlice = true p.next() - var end ast.Expr if p.tok != token.RBRACK { - end = p.parseExpr() + high = p.parseExpr() } - x = &ast.SliceExpr{x, index, end} - } else { - x = &ast.IndexExpr{x, index} } p.exprLev-- - p.expect(token.RBRACK) + rbrack := p.expect(token.RBRACK) - return x + if isSlice { + return &ast.SliceExpr{x, lbrack, low, high, rbrack} + } + return &ast.IndexExpr{x, lbrack, low, rbrack} } @@ -1049,9 +898,14 @@ func (p *parser) parseCallOrConversion(fun ast.Expr) *ast.CallExpr { lparen := p.expect(token.LPAREN) p.exprLev++ - var list vector.Vector - for p.tok != token.RPAREN && p.tok != token.EOF { - list.Push(p.parseExpr()) + var list []ast.Expr + var ellipsis token.Pos + for p.tok != token.RPAREN && p.tok != token.EOF && !ellipsis.IsValid() { + list = append(list, p.parseExpr()) + if p.tok == token.ELLIPSIS { + ellipsis = p.pos + p.next() + } if p.tok != token.COMMA { break } @@ -1060,47 +914,49 @@ func (p *parser) parseCallOrConversion(fun ast.Expr) *ast.CallExpr { p.exprLev-- rparen := p.expect(token.RPAREN) - return &ast.CallExpr{fun, lparen, makeExprList(&list), rparen} + return &ast.CallExpr{fun, lparen, list, ellipsis, rparen} } -func (p *parser) parseElement() ast.Expr { +func (p *parser) parseElement(keyOk bool) ast.Expr { if p.trace { defer un(trace(p, "Element")) } + if p.tok == token.LBRACE { + return p.parseLiteralValue(nil) + } + x := p.parseExpr() - if p.tok == token.COLON { + if keyOk && p.tok == token.COLON { colon := p.pos p.next() - x = &ast.KeyValueExpr{x, colon, p.parseExpr()} + x = &ast.KeyValueExpr{x, colon, p.parseElement(false)} } - return x } -func (p *parser) parseElementList() []ast.Expr { +func (p *parser) parseElementList() (list []ast.Expr) { if p.trace { defer un(trace(p, "ElementList")) } - var list vector.Vector for p.tok != token.RBRACE && p.tok != token.EOF { - list.Push(p.parseElement()) + list = append(list, p.parseElement(true)) if p.tok != token.COMMA { break } p.next() } - return makeExprList(&list) + return } -func (p *parser) parseCompositeLit(typ ast.Expr) ast.Expr { +func (p *parser) parseLiteralValue(typ ast.Expr) ast.Expr { if p.trace { - defer un(trace(p, "CompositeLit")) + defer un(trace(p, "LiteralValue")) } lbrace := p.expect(token.LBRACE) @@ -1115,21 +971,16 @@ func (p *parser) parseCompositeLit(typ ast.Expr) ast.Expr { } -// TODO(gri): Consider different approach to checking syntax after parsing: -// Provide a arguments (set of flags) to parsing functions -// restricting what they are supposed to accept depending -// on context. - // checkExpr checks that x is an expression (and not a type). func (p *parser) checkExpr(x ast.Expr) ast.Expr { - // TODO(gri): should provide predicate in AST nodes - switch t := x.(type) { + switch t := unparen(x).(type) { case *ast.BadExpr: case *ast.Ident: case *ast.BasicLit: case *ast.FuncLit: case *ast.CompositeLit: case *ast.ParenExpr: + panic("unreachable") case *ast.SelectorExpr: case *ast.IndexExpr: case *ast.SliceExpr: @@ -1137,7 +988,7 @@ func (p *parser) checkExpr(x ast.Expr) ast.Expr { if t.Type == nil { // the form X.(type) is only allowed in type switch expressions p.errorExpected(x.Pos(), "expression") - x = &ast.BadExpr{x.Pos()} + x = &ast.BadExpr{x.Pos(), x.End()} } case *ast.CallExpr: case *ast.StarExpr: @@ -1145,28 +996,26 @@ func (p *parser) checkExpr(x ast.Expr) ast.Expr { if t.Op == token.RANGE { // the range operator is only allowed at the top of a for statement p.errorExpected(x.Pos(), "expression") - x = &ast.BadExpr{x.Pos()} + x = &ast.BadExpr{x.Pos(), x.End()} } case *ast.BinaryExpr: default: // all other nodes are not proper expressions p.errorExpected(x.Pos(), "expression") - x = &ast.BadExpr{x.Pos()} + x = &ast.BadExpr{x.Pos(), x.End()} } return x } -// isTypeName returns true iff x is type name. +// isTypeName returns true iff x is a (qualified) TypeName. func isTypeName(x ast.Expr) bool { - // TODO(gri): should provide predicate in AST nodes switch t := x.(type) { case *ast.BadExpr: case *ast.Ident: - case *ast.ParenExpr: - return isTypeName(t.X) // TODO(gri): should (TypeName) be illegal? case *ast.SelectorExpr: - return isTypeName(t.X) + _, isIdent := t.X.(*ast.Ident) + return isIdent default: return false // all other nodes are not type names } @@ -1174,16 +1023,14 @@ func isTypeName(x ast.Expr) bool { } -// isCompositeLitType returns true iff x is a legal composite literal type. -func isCompositeLitType(x ast.Expr) bool { - // TODO(gri): should provide predicate in AST nodes +// isLiteralType returns true iff x is a legal composite literal type. +func isLiteralType(x ast.Expr) bool { switch t := x.(type) { case *ast.BadExpr: case *ast.Ident: - case *ast.ParenExpr: - return isCompositeLitType(t.X) case *ast.SelectorExpr: - return isTypeName(t.X) + _, isIdent := t.X.(*ast.Ident) + return isIdent case *ast.ArrayType: case *ast.StructType: case *ast.MapType: @@ -1194,22 +1041,41 @@ func isCompositeLitType(x ast.Expr) bool { } +// If x is of the form *T, deref returns T, otherwise it returns x. +func deref(x ast.Expr) ast.Expr { + if p, isPtr := x.(*ast.StarExpr); isPtr { + x = p.X + } + return x +} + + +// If x is of the form (T), unparen returns unparen(T), otherwise it returns x. +func unparen(x ast.Expr) ast.Expr { + if p, isParen := x.(*ast.ParenExpr); isParen { + x = unparen(p.X) + } + return x +} + + // checkExprOrType checks that x is an expression or a type // (and not a raw type such as [...]T). // func (p *parser) checkExprOrType(x ast.Expr) ast.Expr { - // TODO(gri): should provide predicate in AST nodes - switch t := x.(type) { + switch t := unparen(x).(type) { + case *ast.ParenExpr: + panic("unreachable") case *ast.UnaryExpr: if t.Op == token.RANGE { // the range operator is only allowed at the top of a for statement p.errorExpected(x.Pos(), "expression") - x = &ast.BadExpr{x.Pos()} + x = &ast.BadExpr{x.Pos(), x.End()} } case *ast.ArrayType: if len, isEllipsis := t.Len.(*ast.Ellipsis); isEllipsis { - p.Error(len.Pos(), "expected array length, found '...'") - x = &ast.BadExpr{x.Pos()} + p.error(len.Pos(), "expected array length, found '...'") + x = &ast.BadExpr{x.Pos(), x.End()} } } @@ -1234,8 +1100,8 @@ L: case token.LPAREN: x = p.parseCallOrConversion(p.checkExprOrType(x)) case token.LBRACE: - if isCompositeLitType(x) && (p.exprLev >= 0 || !isTypeName(x)) { - x = p.parseCompositeLit(x) + if isLiteralType(x) && (p.exprLev >= 0 || !isTypeName(x)) { + x = p.parseLiteralValue(x) } else { break L } @@ -1328,14 +1194,15 @@ func (p *parser) parseSimpleStmt(labelOk bool) ast.Stmt { switch p.tok { case token.COLON: // labeled statement + colon := p.pos p.next() if labelOk && len(x) == 1 { if label, isIdent := x[0].(*ast.Ident); isIdent { - return &ast.LabeledStmt{label, p.parseStmt()} + return &ast.LabeledStmt{label, colon, p.parseStmt()} } } - p.Error(x[0].Pos(), "illegal label declaration") - return &ast.BadStmt{x[0].Pos()} + p.error(x[0].Pos(), "illegal label declaration") + return &ast.BadStmt{x[0].Pos(), colon + 1} case token.DEFINE, token.ASSIGN, token.ADD_ASSIGN, @@ -1350,13 +1217,13 @@ func (p *parser) parseSimpleStmt(labelOk bool) ast.Stmt { } if len(x) > 1 { - p.Error(x[0].Pos(), "only one expression allowed") + p.error(x[0].Pos(), "only one expression allowed") // continue with first expression } if p.tok == token.INC || p.tok == token.DEC { // increment or decrement - s := &ast.IncDecStmt{x[0], p.tok} + s := &ast.IncDecStmt{x[0], p.pos, p.tok} p.next() // consume "++" or "--" return s } @@ -1385,7 +1252,7 @@ func (p *parser) parseGoStmt() ast.Stmt { call := p.parseCallExpr() p.expectSemi() if call == nil { - return &ast.BadStmt{pos} + return &ast.BadStmt{pos, pos + 2} // len("go") } return &ast.GoStmt{pos, call} @@ -1401,7 +1268,7 @@ func (p *parser) parseDeferStmt() ast.Stmt { call := p.parseCallExpr() p.expectSemi() if call == nil { - return &ast.BadStmt{pos} + return &ast.BadStmt{pos, pos + 5} // len("defer") } return &ast.DeferStmt{pos, call} @@ -1433,7 +1300,7 @@ func (p *parser) parseBranchStmt(tok token.Token) *ast.BranchStmt { s := &ast.BranchStmt{p.pos, tok, nil} p.expect(tok) if tok != token.FALLTHROUGH && p.tok == token.IDENT { - s.Label = p.findIdentInScope(nil) + s.Label = p.parseIdent() } p.expectSemi() @@ -1448,8 +1315,8 @@ func (p *parser) makeExpr(s ast.Stmt) ast.Expr { if es, isExpr := s.(*ast.ExprStmt); isExpr { return p.checkExpr(es.X) } - p.Error(s.Pos(), "expected condition, found simple statement") - return &ast.BadExpr{s.Pos()} + p.error(s.Pos(), "expected condition, found simple statement") + return &ast.BadExpr{s.Pos(), s.End()} } @@ -1489,10 +1356,6 @@ func (p *parser) parseIfStmt() *ast.IfStmt { defer un(trace(p, "IfStmt")) } - // IfStmt block - p.openScope() - defer p.closeScope() - pos := p.expect(token.IF) s1, s2, _ := p.parseControlClause(false) body := p.parseBlockStmt() @@ -1513,10 +1376,6 @@ func (p *parser) parseCaseClause() *ast.CaseClause { defer un(trace(p, "CaseClause")) } - // CaseClause block - p.openScope() - defer p.closeScope() - // SwitchCase pos := p.pos var x []ast.Expr @@ -1534,19 +1393,18 @@ func (p *parser) parseCaseClause() *ast.CaseClause { } -func (p *parser) parseTypeList() []ast.Expr { +func (p *parser) parseTypeList() (list []ast.Expr) { if p.trace { defer un(trace(p, "TypeList")) } - var list vector.Vector - list.Push(p.parseType()) + list = append(list, p.parseType()) for p.tok == token.COMMA { p.next() - list.Push(p.parseType()) + list = append(list, p.parseType()) } - return makeExprList(&list) + return } @@ -1555,10 +1413,6 @@ func (p *parser) parseTypeCaseClause() *ast.TypeCaseClause { defer un(trace(p, "TypeCaseClause")) } - // TypeCaseClause block - p.openScope() - defer p.closeScope() - // TypeSwitchCase pos := p.pos var types []ast.Expr @@ -1595,21 +1449,17 @@ func (p *parser) parseSwitchStmt() ast.Stmt { defer un(trace(p, "SwitchStmt")) } - // SwitchStmt block - p.openScope() - defer p.closeScope() - pos := p.expect(token.SWITCH) s1, s2, _ := p.parseControlClause(false) if isExprSwitch(s2) { lbrace := p.expect(token.LBRACE) - var cases vector.Vector + var list []ast.Stmt for p.tok == token.CASE || p.tok == token.DEFAULT { - cases.Push(p.parseCaseClause()) + list = append(list, p.parseCaseClause()) } rbrace := p.expect(token.RBRACE) - body := &ast.BlockStmt{lbrace, makeStmtList(&cases), rbrace} + body := &ast.BlockStmt{lbrace, list, rbrace} p.expectSemi() return &ast.SwitchStmt{pos, s1, p.makeExpr(s2), body} } @@ -1617,13 +1467,13 @@ func (p *parser) parseSwitchStmt() ast.Stmt { // type switch // TODO(gri): do all the checks! lbrace := p.expect(token.LBRACE) - var cases vector.Vector + var list []ast.Stmt for p.tok == token.CASE || p.tok == token.DEFAULT { - cases.Push(p.parseTypeCaseClause()) + list = append(list, p.parseTypeCaseClause()) } rbrace := p.expect(token.RBRACE) p.expectSemi() - body := &ast.BlockStmt{lbrace, makeStmtList(&cases), rbrace} + body := &ast.BlockStmt{lbrace, list, rbrace} return &ast.TypeSwitchStmt{pos, s1, s2, body} } @@ -1633,10 +1483,6 @@ func (p *parser) parseCommClause() *ast.CommClause { defer un(trace(p, "CommClause")) } - // CommClause block - p.openScope() - defer p.closeScope() - // CommCase pos := p.pos var tok token.Token @@ -1680,13 +1526,13 @@ func (p *parser) parseSelectStmt() *ast.SelectStmt { pos := p.expect(token.SELECT) lbrace := p.expect(token.LBRACE) - var cases vector.Vector + var list []ast.Stmt for p.tok == token.CASE || p.tok == token.DEFAULT { - cases.Push(p.parseCommClause()) + list = append(list, p.parseCommClause()) } rbrace := p.expect(token.RBRACE) p.expectSemi() - body := &ast.BlockStmt{lbrace, makeStmtList(&cases), rbrace} + body := &ast.BlockStmt{lbrace, list, rbrace} return &ast.SelectStmt{pos, body} } @@ -1697,10 +1543,6 @@ func (p *parser) parseForStmt() ast.Stmt { defer un(trace(p, "ForStmt")) } - // ForStmt block - p.openScope() - defer p.closeScope() - pos := p.expect(token.FOR) s1, s2, s3 := p.parseControlClause(true) body := p.parseBlockStmt() @@ -1710,7 +1552,7 @@ func (p *parser) parseForStmt() ast.Stmt { // possibly a for statement with a range clause; check assignment operator if as.Tok != token.ASSIGN && as.Tok != token.DEFINE { p.errorExpected(as.TokPos, "'=' or ':='") - return &ast.BadStmt{pos} + return &ast.BadStmt{pos, body.End()} } // check lhs var key, value ast.Expr @@ -1721,19 +1563,19 @@ func (p *parser) parseForStmt() ast.Stmt { key = as.Lhs[0] default: p.errorExpected(as.Lhs[0].Pos(), "1 or 2 expressions") - return &ast.BadStmt{pos} + return &ast.BadStmt{pos, body.End()} } // check rhs if len(as.Rhs) != 1 { p.errorExpected(as.Rhs[0].Pos(), "1 expressions") - return &ast.BadStmt{pos} + return &ast.BadStmt{pos, body.End()} } if rhs, isUnary := as.Rhs[0].(*ast.UnaryExpr); isUnary && rhs.Op == token.RANGE { // rhs is range expression; check lhs return &ast.RangeStmt{pos, key, value, as.TokPos, as.Tok, rhs.X, body} } else { p.errorExpected(s2.Pos(), "range clause") - return &ast.BadStmt{pos} + return &ast.BadStmt{pos, body.End()} } } else { // regular for statement @@ -1791,9 +1633,10 @@ func (p *parser) parseStmt() (s ast.Stmt) { s = &ast.EmptyStmt{p.pos} default: // no statement found - p.errorExpected(p.pos, "statement") + pos := p.pos + p.errorExpected(pos, "statement") p.next() // make progress - s = &ast.BadStmt{p.pos} + s = &ast.BadStmt{pos, p.pos} } return @@ -1813,14 +1656,10 @@ func parseImportSpec(p *parser, doc *ast.CommentGroup) ast.Spec { var ident *ast.Ident if p.tok == token.PERIOD { - ident = &ast.Ident{p.pos, ast.NewObj(ast.Pkg, p.pos, ".")} + ident = &ast.Ident{p.pos, ".", nil} p.next() } else if p.tok == token.IDENT { - ident = p.parseIdent(ast.Pkg) - // TODO(gri) Make sure the ident is not already declared in the - // package scope. Also, cannot add the same name to - // the package scope later. - p.declIdent(p.fileScope, ident) + ident = p.parseIdent() } var path *ast.BasicLit @@ -1841,23 +1680,13 @@ func parseConstSpec(p *parser, doc *ast.CommentGroup) ast.Spec { defer un(trace(p, "ConstSpec")) } - idents := p.parseIdentList(ast.Con) - if p.funcScope == nil { - // the scope of a constant outside any function - // is the package scope - p.declIdentList(p.pkgScope, idents) - } + idents := p.parseIdentList() typ := p.tryType() var values []ast.Expr if typ != nil || p.tok == token.ASSIGN { p.expect(token.ASSIGN) values = p.parseExprList() } - if p.funcScope != nil { - // the scope of a constant inside a function - // begins after the the ConstSpec - p.declIdentList(p.funcScope, idents) - } p.expectSemi() return &ast.ValueSpec{doc, idents, typ, values, p.lineComment} @@ -1869,15 +1698,7 @@ func parseTypeSpec(p *parser, doc *ast.CommentGroup) ast.Spec { defer un(trace(p, "TypeSpec")) } - ident := p.parseIdent(ast.Typ) - // the scope of a type outside any function is - // the package scope; the scope of a type inside - // a function starts at the type identifier - scope := p.funcScope - if scope == nil { - scope = p.pkgScope - } - p.declIdent(scope, ident) + ident := p.parseIdent() typ := p.parseType() p.expectSemi() @@ -1890,23 +1711,13 @@ func parseVarSpec(p *parser, doc *ast.CommentGroup) ast.Spec { defer un(trace(p, "VarSpec")) } - idents := p.parseIdentList(ast.Var) - if p.funcScope == nil { - // the scope of a variable outside any function - // is the pkgScope - p.declIdentList(p.pkgScope, idents) - } + idents := p.parseIdentList() typ := p.tryType() var values []ast.Expr if typ == nil || p.tok == token.ASSIGN { p.expect(token.ASSIGN) values = p.parseExprList() } - if p.funcScope != nil { - // the scope of a variable inside a function - // begins after the the VarSpec - p.declIdentList(p.funcScope, idents) - } p.expectSemi() return &ast.ValueSpec{doc, idents, typ, values, p.lineComment} @@ -1915,58 +1726,51 @@ func parseVarSpec(p *parser, doc *ast.CommentGroup) ast.Spec { func (p *parser) parseGenDecl(keyword token.Token, f parseSpecFunction) *ast.GenDecl { if p.trace { - defer un(trace(p, keyword.String()+"Decl")) + defer un(trace(p, "GenDecl("+keyword.String()+")")) } doc := p.leadComment pos := p.expect(keyword) - var lparen, rparen token.Position - var list vector.Vector + var lparen, rparen token.Pos + var list []ast.Spec if p.tok == token.LPAREN { lparen = p.pos p.next() for p.tok != token.RPAREN && p.tok != token.EOF { - list.Push(f(p, p.leadComment)) + list = append(list, f(p, p.leadComment)) } rparen = p.expect(token.RPAREN) p.expectSemi() } else { - list.Push(f(p, nil)) + list = append(list, f(p, nil)) } - // convert vector - specs := make([]ast.Spec, len(list)) - for i, x := range list { - specs[i] = x.(ast.Spec) - } - - return &ast.GenDecl{doc, pos, keyword, lparen, specs, rparen} + return &ast.GenDecl{doc, pos, keyword, lparen, list, rparen} } -func (p *parser) parseReceiver(scope *ast.Scope) *ast.FieldList { +func (p *parser) parseReceiver() *ast.FieldList { if p.trace { defer un(trace(p, "Receiver")) } pos := p.pos - par := p.parseParameters(scope, false) + par := p.parseParameters(false) // must have exactly one receiver if par.NumFields() != 1 { p.errorExpected(pos, "exactly one receiver") - par.List = []*ast.Field{&ast.Field{Type: &ast.BadExpr{noPos}}} + // TODO determine a better range for BadExpr below + par.List = []*ast.Field{&ast.Field{Type: &ast.BadExpr{pos, pos}}} + return par } + // recv type must be of the form ["*"] identifier recv := par.List[0] - - // recv type must be TypeName or *TypeName - base := recv.Type - if ptr, isPtr := base.(*ast.StarExpr); isPtr { - base = ptr.X - } - if !isTypeName(base) { - p.errorExpected(base.Pos(), "type name") + base := deref(recv.Type) + if _, isIdent := base.(*ast.Ident); !isIdent { + p.errorExpected(base.Pos(), "(unqualified) identifier") + par.List = []*ast.Field{&ast.Field{Type: &ast.BadExpr{recv.Pos(), recv.End()}}} } return par @@ -1980,20 +1784,18 @@ func (p *parser) parseFuncDecl() *ast.FuncDecl { doc := p.leadComment pos := p.expect(token.FUNC) - scope := ast.NewScope(p.funcScope) var recv *ast.FieldList if p.tok == token.LPAREN { - recv = p.parseReceiver(scope) + recv = p.parseReceiver() } - ident := p.parseIdent(ast.Fun) - p.declIdent(p.pkgScope, ident) // there are no local function declarations - params, results := p.parseSignature(scope) + ident := p.parseIdent() + params, results := p.parseSignature() var body *ast.BlockStmt if p.tok == token.LBRACE { - body = p.parseBody(scope) + body = p.parseBody() } p.expectSemi() @@ -2023,8 +1825,8 @@ func (p *parser) parseDecl() ast.Decl { default: pos := p.pos p.errorExpected(pos, "declaration") - decl := &ast.BadDecl{pos} - p.next() // make progress in any case + p.next() // make progress + decl := &ast.BadDecl{pos, p.pos} return decl } @@ -2032,23 +1834,16 @@ func (p *parser) parseDecl() ast.Decl { } -func (p *parser) parseDeclList() []ast.Decl { +func (p *parser) parseDeclList() (list []ast.Decl) { if p.trace { defer un(trace(p, "DeclList")) } - var list vector.Vector for p.tok != token.EOF { - list.Push(p.parseDecl()) + list = append(list, p.parseDecl()) } - // convert vector - decls := make([]ast.Decl, len(list)) - for i, x := range list { - decls[i] = x.(ast.Decl) - } - - return decls + return } @@ -2063,10 +1858,9 @@ func (p *parser) parseFile() *ast.File { // package clause doc := p.leadComment pos := p.expect(token.PACKAGE) - ident := p.parseIdent(ast.Pkg) // package name is in no scope + ident := p.parseIdent() p.expectSemi() - p.fileScope = ast.NewScope(p.pkgScope) var decls []ast.Decl // Don't bother parsing the rest if we had errors already. @@ -2074,30 +1868,17 @@ func (p *parser) parseFile() *ast.File { if p.ErrorCount() == 0 && p.mode&PackageClauseOnly == 0 { // import decls - var list vector.Vector for p.tok == token.IMPORT { - list.Push(p.parseGenDecl(token.IMPORT, parseImportSpec)) + decls = append(decls, p.parseGenDecl(token.IMPORT, parseImportSpec)) } if p.mode&ImportsOnly == 0 { // rest of package body for p.tok != token.EOF { - list.Push(p.parseDecl()) + decls = append(decls, p.parseDecl()) } } - - // convert declaration list - decls = make([]ast.Decl, len(list)) - for i, x := range list { - decls[i] = x.(ast.Decl) - } - } - - // convert comments list - comments := make([]*ast.CommentGroup, len(p.comments)) - for i, x := range p.comments { - comments[i] = x.(*ast.CommentGroup) } - return &ast.File{doc, pos, ident, decls, comments} + return &ast.File{doc, pos, ident, decls, p.comments} } diff --git a/src/pkg/go/parser/parser_test.go b/src/pkg/go/parser/parser_test.go index 01327a41d..56bd80ef1 100644 --- a/src/pkg/go/parser/parser_test.go +++ b/src/pkg/go/parser/parser_test.go @@ -5,12 +5,14 @@ package parser import ( - "go/ast" + "go/token" "os" "testing" ) +var fset = token.NewFileSet() + var illegalInputs = []interface{}{ nil, 3.14, @@ -21,7 +23,7 @@ var illegalInputs = []interface{}{ func TestParseIllegalInputs(t *testing.T) { for _, src := range illegalInputs { - _, err := ParseFile("", src, nil, 0) + _, err := ParseFile(fset, "", src, 0) if err == nil { t.Errorf("ParseFile(%v) should have failed", src) } @@ -30,23 +32,26 @@ func TestParseIllegalInputs(t *testing.T) { var validPrograms = []interface{}{ + "package main\n", `package main;`, - `package main; import "fmt"; func main() { fmt.Println("Hello, World!") }` + "\n", - `package main; func main() { if f(T{}) {} }` + "\n", - `package main; func main() { _ = (<-chan int)(x) }` + "\n", - `package main; func main() { _ = (<-chan <-chan int)(x) }` + "\n", - `package main; func f(func() func() func())` + "\n", - `package main; func f(...)` + "\n", - `package main; func f(float, ...int)` + "\n", - `package main; type T []int; var a []bool; func f() { if a[T{42}[0]] {} }` + "\n", - `package main; type T []int; func g(int) bool { return true }; func f() { if g(T{42}[0]) {} }` + "\n", - `package main; type T []int; func f() { for _ = range []int{T{42}[0]} {} }` + "\n", + `package main; import "fmt"; func main() { fmt.Println("Hello, World!") };`, + `package main; func main() { if f(T{}) {} };`, + `package main; func main() { _ = (<-chan int)(x) };`, + `package main; func main() { _ = (<-chan <-chan int)(x) };`, + `package main; func f(func() func() func());`, + `package main; func f(...T);`, + `package main; func f(float, ...int);`, + `package main; func f(x int, a ...int) { f(0, a...); f(1, a...,) };`, + `package main; type T []int; var a []bool; func f() { if a[T{42}[0]] {} };`, + `package main; type T []int; func g(int) bool { return true }; func f() { if g(T{42}[0]) {} };`, + `package main; type T []int; func f() { for _ = range []int{T{42}[0]} {} };`, + `package main; var a = T{{1, 2}, {3, 4}}`, } func TestParseValidPrograms(t *testing.T) { for _, src := range validPrograms { - _, err := ParseFile("", src, ast.NewScope(nil), 0) + _, err := ParseFile(fset, "", src, 0) if err != nil { t.Errorf("ParseFile(%q): %v", src, err) } @@ -62,7 +67,7 @@ var validFiles = []string{ func TestParse3(t *testing.T) { for _, filename := range validFiles { - _, err := ParseFile(filename, nil, ast.NewScope(nil), 0) + _, err := ParseFile(fset, filename, nil, 0) if err != nil { t.Errorf("ParseFile(%s): %v", filename, err) } @@ -87,7 +92,7 @@ func dirFilter(f *os.FileInfo) bool { return nameFilter(f.Name) } func TestParse4(t *testing.T) { path := "." - pkgs, err := ParseDir(path, dirFilter, 0) + pkgs, err := ParseDir(fset, path, dirFilter, 0) if err != nil { t.Fatalf("ParseDir(%s): %v", path, err) } @@ -99,7 +104,7 @@ func TestParse4(t *testing.T) { t.Errorf(`package "parser" not found`) return } - for filename, _ := range pkg.Files { + for filename := range pkg.Files { if !nameFilter(filename) { t.Errorf("unexpected package file: %s", filename) } diff --git a/src/pkg/go/printer/Makefile b/src/pkg/go/printer/Makefile index a0fe22e42..6a71efc93 100644 --- a/src/pkg/go/printer/Makefile +++ b/src/pkg/go/printer/Makefile @@ -2,7 +2,7 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -include ../../../Make.$(GOARCH) +include ../../../Make.inc TARG=go/printer GOFILES=\ diff --git a/src/pkg/go/printer/nodes.go b/src/pkg/go/printer/nodes.go index a98af4a2a..1ee0846f6 100644 --- a/src/pkg/go/printer/nodes.go +++ b/src/pkg/go/printer/nodes.go @@ -10,7 +10,6 @@ package printer import ( "bytes" - "container/vector" "go/ast" "go/token" ) @@ -28,32 +27,30 @@ import ( // ---------------------------------------------------------------------------- // Common AST nodes. -// Print as many newlines as necessary (but at least min and and at most -// max newlines) to get to the current line. ws is printed before the first -// line break. If newSection is set, the first line break is printed as -// formfeed. Returns true if any line break was printed; returns false otherwise. +// Print as many newlines as necessary (but at least min newlines) to get to +// the current line. ws is printed before the first line break. If newSection +// is set, the first line break is printed as formfeed. Returns true if any +// line break was printed; returns false otherwise. // -// TODO(gri): Reconsider signature (provide position instead of line) +// TODO(gri): linebreak may add too many lines if the next statement at "line" +// is preceeded by comments because the computation of n assumes +// the current position before the comment and the target position +// after the comment. Thus, after interspersing such comments, the +// space taken up by them is not considered to reduce the number of +// linebreaks. At the moment there is no easy way to know about +// future (not yet interspersed) comments in this function. // -func (p *printer) linebreak(line, min, max int, ws whiteSpace, newSection bool) (printedBreak bool) { - n := line - p.pos.Line - switch { - case n < min: - n = min - case n > max: - n = max - } - +func (p *printer) linebreak(line, min int, ws whiteSpace, newSection bool) (printedBreak bool) { + n := p.nlines(line-p.pos.Line, min) if n > 0 { p.print(ws) if newSection { p.print(formfeed) n-- - printedBreak = true } - } - for ; n > 0; n-- { - p.print(newline) + for ; n > 0; n-- { + p.print(newline) + } printedBreak = true } return @@ -75,7 +72,7 @@ func (p *printer) setComment(g *ast.CommentGroup) { // for some reason there are pending comments; this // should never happen - handle gracefully and flush // all comments up to g, ignore anything after that - p.flush(g.List[0].Pos(), token.ILLEGAL) + p.flush(p.fset.Position(g.List[0].Pos()), token.ILLEGAL) } p.comments[0] = g p.cindex = 0 @@ -95,7 +92,7 @@ const ( // Sets multiLine to true if the identifier list spans multiple lines. -// If ident is set, a multi-line identifier list is indented after the +// If indent is set, a multi-line identifier list is indented after the // first linebreak encountered. func (p *printer) identList(list []*ast.Ident, indent bool, multiLine *bool) { // convert into an expression list so we can re-use exprList formatting @@ -107,7 +104,7 @@ func (p *printer) identList(list []*ast.Ident, indent bool, multiLine *bool) { if !indent { mode |= noIndent } - p.exprList(noPos, xlist, 1, mode, multiLine, noPos) + p.exprList(token.NoPos, xlist, 1, mode, multiLine, token.NoPos) } @@ -130,7 +127,7 @@ func (p *printer) keySize(pair *ast.KeyValueExpr) int { // TODO(gri) Consider rewriting this to be independent of []ast.Expr // so that we can use the algorithm for any kind of list // (e.g., pass list via a channel over which to range). -func (p *printer) exprList(prev token.Position, list []ast.Expr, depth int, mode exprListMode, multiLine *bool, next token.Position) { +func (p *printer) exprList(prev0 token.Pos, list []ast.Expr, depth int, mode exprListMode, multiLine *bool, next0 token.Pos) { if len(list) == 0 { return } @@ -139,14 +136,10 @@ func (p *printer) exprList(prev token.Position, list []ast.Expr, depth int, mode p.print(blank) } - line := list[0].Pos().Line - endLine := next.Line - if endLine == 0 { - // TODO(gri): endLine may be incorrect as it is really the beginning - // of the last list entry. There may be only one, very long - // entry in which case line == endLine. - endLine = list[len(list)-1].Pos().Line - } + prev := p.fset.Position(prev0) + next := p.fset.Position(next0) + line := p.fset.Position(list[0].Pos()).Line + endLine := p.fset.Position(list[len(list)-1].End()).Line if prev.IsValid() && prev.Line == line && line == endLine { // all list entries on a single line @@ -190,7 +183,7 @@ func (p *printer) exprList(prev token.Position, list []ast.Expr, depth int, mode // lines for them. linebreakMin = 0 } - if prev.IsValid() && prev.Line < line && p.linebreak(line, linebreakMin, 2, ws, true) { + if prev.IsValid() && prev.Line < line && p.linebreak(line, linebreakMin, ws, true) { ws = ignore *multiLine = true prevBreak = 0 @@ -202,7 +195,7 @@ func (p *printer) exprList(prev token.Position, list []ast.Expr, depth int, mode // print all list elements for i, x := range list { prevLine := line - line = x.Pos().Line + line = p.fset.Position(x.Pos()).Line // determine if the next linebreak, if any, needs to use formfeed: // in general, use the entire node size to make the decision; for @@ -252,7 +245,7 @@ func (p *printer) exprList(prev token.Position, list []ast.Expr, depth int, mode // unless forceFF is set or there are multiple expressions on // the same line in which case formfeed is used // broken with a formfeed - if p.linebreak(line, linebreakMin, 2, ws, useFF || prevBreak+1 < i) { + if p.linebreak(line, linebreakMin, ws, useFF || prevBreak+1 < i) { ws = ignore *multiLine = true prevBreak = i @@ -301,15 +294,27 @@ func (p *printer) exprList(prev token.Position, list []ast.Expr, depth int, mode func (p *printer) parameters(fields *ast.FieldList, multiLine *bool) { p.print(fields.Opening, token.LPAREN) if len(fields.List) > 0 { + var prevLine, line int for i, par := range fields.List { if i > 0 { - p.print(token.COMMA, blank) + p.print(token.COMMA) + if len(par.Names) > 0 { + line = p.fset.Position(par.Names[0].Pos()).Line + } else { + line = p.fset.Position(par.Type.Pos()).Line + } + if 0 < prevLine && prevLine < line && p.linebreak(line, 0, ignore, true) { + *multiLine = true + } else { + p.print(blank) + } } if len(par.Names) > 0 { p.identList(par.Names, false, multiLine) p.print(blank) } p.expr(par.Type, multiLine) + prevLine = p.fset.Position(par.Type.Pos()).Line } } p.print(fields.Closing, token.RPAREN) @@ -337,7 +342,7 @@ func identListSize(list []*ast.Ident, maxSize int) (size int) { if i > 0 { size += 2 // ", " } - size += len(x.Name()) + size += len(x.Name) if size >= maxSize { break } @@ -366,16 +371,21 @@ func (p *printer) isOneLineFieldList(list []*ast.Field) bool { func (p *printer) setLineComment(text string) { - p.setComment(&ast.CommentGroup{[]*ast.Comment{&ast.Comment{noPos, []byte(text)}}}) + p.setComment(&ast.CommentGroup{[]*ast.Comment{&ast.Comment{token.NoPos, []byte(text)}}}) } func (p *printer) fieldList(fields *ast.FieldList, isIncomplete bool, ctxt exprContext) { + p.nesting++ + defer func() { + p.nesting-- + }() + lbrace := fields.Opening list := fields.List rbrace := fields.Closing - if !isIncomplete && !p.commentBefore(rbrace) { + if !isIncomplete && !p.commentBefore(p.fset.Position(rbrace)) { // possibly a one-line struct/interface if len(list) == 0 { // no blank between keyword and {} in this case @@ -413,7 +423,7 @@ func (p *printer) fieldList(fields *ast.FieldList, isIncomplete bool, ctxt exprC var ml bool for i, f := range list { if i > 0 { - p.linebreak(f.Pos().Line, 1, 2, ignore, ml) + p.linebreak(p.fset.Position(f.Pos()).Line, 1, ignore, ml) } ml = false extraTabs := 0 @@ -448,7 +458,7 @@ func (p *printer) fieldList(fields *ast.FieldList, isIncomplete bool, ctxt exprC if len(list) > 0 { p.print(formfeed) } - p.flush(rbrace, token.RBRACE) // make sure we don't loose the last line comment + p.flush(p.fset.Position(rbrace), token.RBRACE) // make sure we don't loose the last line comment p.setLineComment("// contains unexported fields") } @@ -457,7 +467,7 @@ func (p *printer) fieldList(fields *ast.FieldList, isIncomplete bool, ctxt exprC var ml bool for i, f := range list { if i > 0 { - p.linebreak(f.Pos().Line, 1, 2, ignore, ml) + p.linebreak(p.fset.Position(f.Pos()).Line, 1, ignore, ml) } ml = false p.setComment(f.Doc) @@ -475,7 +485,7 @@ func (p *printer) fieldList(fields *ast.FieldList, isIncomplete bool, ctxt exprC if len(list) > 0 { p.print(formfeed) } - p.flush(rbrace, token.RBRACE) // make sure we don't loose the last line comment + p.flush(p.fset.Position(rbrace), token.RBRACE) // make sure we don't loose the last line comment p.setLineComment("// contains unexported methods") } @@ -540,7 +550,7 @@ func walkBinary(e *ast.BinaryExpr) (has5, has6 bool, maxProblem int) { case *ast.UnaryExpr: switch e.Op.String() + r.Op.String() { - case "/*": + case "/*", "&&", "&^": maxProblem = 6 case "++", "--": if maxProblem < 5 { @@ -609,11 +619,14 @@ func reduceDepth(depth int) int { // 1) If there is a binary operator with a right side unary operand // that would clash without a space, the cutoff must be (in order): // -// &^ 7 // /* 7 +// && 7 +// &^ 7 // ++ 6 // -- 6 // +// (Comparison operators always have spaces around them.) +// // 2) If there is a mix of level 6 and level 5 operators, then the cutoff // is 6 (use spaces to distinguish precedence) in Normal mode // and 5 (never use spaces) in Compact mode. @@ -643,12 +656,12 @@ func (p *printer) binaryExpr(x *ast.BinaryExpr, prec1, cutoff, depth int, multiL p.print(blank) } xline := p.pos.Line // before the operator (it may be on the next line!) - yline := x.Y.Pos().Line + yline := p.fset.Position(x.Y.Pos()).Line p.print(x.OpPos, x.Op) if xline != yline && xline > 0 && yline > 0 { // at least one line break, but respect an extra empty line // in the source - if p.linebreak(yline, 1, 2, ws, true) { + if p.linebreak(yline, 1, ws, true) { ws = ignore *multiLine = true printBlank = false // no blank after line break @@ -683,19 +696,19 @@ func splitSelector(expr ast.Expr) (body, suffix ast.Expr) { case *ast.CallExpr: body, suffix = splitSelector(x.Fun) if body != nil { - suffix = &ast.CallExpr{suffix, x.Lparen, x.Args, x.Rparen} + suffix = &ast.CallExpr{suffix, x.Lparen, x.Args, x.Ellipsis, x.Rparen} return } case *ast.IndexExpr: body, suffix = splitSelector(x.X) if body != nil { - suffix = &ast.IndexExpr{suffix, x.Index} + suffix = &ast.IndexExpr{suffix, x.Lbrack, x.Index, x.Rbrack} return } case *ast.SliceExpr: body, suffix = splitSelector(x.X) if body != nil { - suffix = &ast.SliceExpr{suffix, x.Index, x.End} + suffix = &ast.SliceExpr{suffix, x.Lbrack, x.Low, x.High, x.Rbrack} return } case *ast.TypeAssertExpr: @@ -712,23 +725,20 @@ func splitSelector(expr ast.Expr) (body, suffix ast.Expr) { // Convert an expression into an expression list split at the periods of // selector expressions. -func selectorExprList(expr ast.Expr) []ast.Expr { +func selectorExprList(expr ast.Expr) (list []ast.Expr) { // split expression - var list vector.Vector for expr != nil { var suffix ast.Expr expr, suffix = splitSelector(expr) - list.Push(suffix) + list = append(list, suffix) } - // convert expression list - result := make([]ast.Expr, len(list)) - i := len(result) - for _, x := range list { - i-- - result[i] = x.(ast.Expr) + // reverse list + for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 { + list[i], list[j] = list[j], list[i] } - return result + + return } @@ -791,16 +801,22 @@ func (p *printer) expr1(expr ast.Expr, prec1, depth int, ctxt exprContext, multi case *ast.FuncLit: p.expr(x.Type, multiLine) - p.funcBody(x.Body, distance(x.Type.Pos(), p.pos), true, multiLine) + p.funcBody(x.Body, p.distance(x.Type.Pos(), p.pos), true, multiLine) case *ast.ParenExpr: - p.print(token.LPAREN) - p.expr0(x.X, reduceDepth(depth), multiLine) // parentheses undo one level of depth - p.print(x.Rparen, token.RPAREN) + if _, hasParens := x.X.(*ast.ParenExpr); hasParens { + // don't print parentheses around an already parenthesized expression + // TODO(gri) consider making this more general and incorporate precedence levels + p.expr0(x.X, reduceDepth(depth), multiLine) // parentheses undo one level of depth + } else { + p.print(token.LPAREN) + p.expr0(x.X, reduceDepth(depth), multiLine) // parentheses undo one level of depth + p.print(x.Rparen, token.RPAREN) + } case *ast.SelectorExpr: parts := selectorExprList(expr) - p.exprList(noPos, parts, depth, periodSep, multiLine, noPos) + p.exprList(token.NoPos, parts, depth, periodSep, multiLine, token.NoPos) case *ast.TypeAssertExpr: p.expr1(x.X, token.HighestPrec, depth, 0, multiLine) @@ -815,25 +831,27 @@ func (p *printer) expr1(expr ast.Expr, prec1, depth int, ctxt exprContext, multi case *ast.IndexExpr: // TODO(gri): should treat[] like parentheses and undo one level of depth p.expr1(x.X, token.HighestPrec, 1, 0, multiLine) - p.print(token.LBRACK) + p.print(x.Lbrack, token.LBRACK) p.expr0(x.Index, depth+1, multiLine) - p.print(token.RBRACK) + p.print(x.Rbrack, token.RBRACK) case *ast.SliceExpr: // TODO(gri): should treat[] like parentheses and undo one level of depth p.expr1(x.X, token.HighestPrec, 1, 0, multiLine) - p.print(token.LBRACK) - p.expr0(x.Index, depth+1, multiLine) + p.print(x.Lbrack, token.LBRACK) + if x.Low != nil { + p.expr0(x.Low, depth+1, multiLine) + } // blanks around ":" if both sides exist and either side is a binary expression - if depth <= 1 && x.End != nil && (isBinary(x.Index) || isBinary(x.End)) { + if depth <= 1 && x.Low != nil && x.High != nil && (isBinary(x.Low) || isBinary(x.High)) { p.print(blank, token.COLON, blank) } else { p.print(token.COLON) } - if x.End != nil { - p.expr0(x.End, depth+1, multiLine) + if x.High != nil { + p.expr0(x.High, depth+1, multiLine) } - p.print(token.RBRACK) + p.print(x.Rbrack, token.RBRACK) case *ast.CallExpr: if len(x.Args) > 1 { @@ -842,10 +860,16 @@ func (p *printer) expr1(expr ast.Expr, prec1, depth int, ctxt exprContext, multi p.expr1(x.Fun, token.HighestPrec, depth, 0, multiLine) p.print(x.Lparen, token.LPAREN) p.exprList(x.Lparen, x.Args, depth, commaSep|commaTerm, multiLine, x.Rparen) + if x.Ellipsis.IsValid() { + p.print(x.Ellipsis, token.ELLIPSIS) + } p.print(x.Rparen, token.RPAREN) case *ast.CompositeLit: - p.expr1(x.Type, token.HighestPrec, depth, compositeLit, multiLine) + // composite literal elements that are composite literals themselves may have the type omitted + if x.Type != nil { + p.expr1(x.Type, token.HighestPrec, depth, compositeLit, multiLine) + } p.print(x.Lbrace, token.LBRACE) p.exprList(x.Lbrace, x.Elts, 1, commaSep|commaTerm, multiLine, x.Rbrace) p.print(x.Rbrace, token.RBRACE) @@ -917,8 +941,6 @@ func (p *printer) expr(x ast.Expr, multiLine *bool) { // ---------------------------------------------------------------------------- // Statements -const maxStmtNewlines = 2 // maximum number of newlines between statements - // Print the statement list indented, but without a newline after the last statement. // Extra line breaks between statements in the source are respected but at most one // empty line is printed between statements. @@ -931,7 +953,7 @@ func (p *printer) stmtList(list []ast.Stmt, _indent int, nextIsRBrace bool) { for i, s := range list { // _indent == 0 only for lists of switch/select case clauses; // in those cases each clause is a new section - p.linebreak(s.Pos().Line, 1, maxStmtNewlines, ignore, i == 0 || _indent == 0 || multiLine) + p.linebreak(p.fset.Position(s.Pos()).Line, 1, ignore, i == 0 || _indent == 0 || multiLine) multiLine = false p.stmt(s, nextIsRBrace && i == len(list)-1, &multiLine) } @@ -945,7 +967,7 @@ func (p *printer) stmtList(list []ast.Stmt, _indent int, nextIsRBrace bool) { func (p *printer) block(s *ast.BlockStmt, indent int) { p.print(s.Pos(), token.LBRACE) p.stmtList(s.List, indent, true) - p.linebreak(s.Rbrace.Line, 1, maxStmtNewlines, ignore, true) + p.linebreak(p.fset.Position(s.Rbrace).Line, 1, ignore, true) p.print(s.Rbrace, token.RBRACE) } @@ -961,16 +983,27 @@ func isTypeName(x ast.Expr) bool { } -// TODO(gri): Decide if this should be used more broadly. The printing code -// knows when to insert parentheses for precedence reasons, but -// need to be careful to keep them around type expressions. -func stripParens(x ast.Expr, inControlClause bool) ast.Expr { - for px, hasParens := x.(*ast.ParenExpr); hasParens; px, hasParens = x.(*ast.ParenExpr) { - x = px.X - if cx, isCompositeLit := x.(*ast.CompositeLit); inControlClause && isCompositeLit && isTypeName(cx.Type) { - // composite literals inside control clauses need parens if they start with a type name; - // don't strip innermost layer - return px +func stripParens(x ast.Expr) ast.Expr { + if px, strip := x.(*ast.ParenExpr); strip { + // parentheses must not be stripped if there are any + // unparenthesized composite literals starting with + // a type name + ast.Inspect(px.X, func(node ast.Node) bool { + switch x := node.(type) { + case *ast.ParenExpr: + // parentheses protect enclosed composite literals + return false + case *ast.CompositeLit: + if isTypeName(x.Type) { + strip = false // do not strip parentheses + } + return false + } + // in all other cases, keep inspecting + return true + }) + if strip { + return stripParens(px.X) } } return x @@ -983,7 +1016,7 @@ func (p *printer) controlClause(isForStmt bool, init ast.Stmt, expr ast.Expr, po if init == nil && post == nil { // no semicolons required if expr != nil { - p.expr(stripParens(expr, true), ignoreMultiLine) + p.expr(stripParens(expr), ignoreMultiLine) needsBlank = true } } else { @@ -994,7 +1027,7 @@ func (p *printer) controlClause(isForStmt bool, init ast.Stmt, expr ast.Expr, po } p.print(token.SEMICOLON, blank) if expr != nil { - p.expr(stripParens(expr, true), ignoreMultiLine) + p.expr(stripParens(expr), ignoreMultiLine) needsBlank = true } if isForStmt { @@ -1032,14 +1065,14 @@ func (p *printer) stmt(stmt ast.Stmt, nextIsRBrace bool, multiLine *bool) { // between (see writeWhitespace) p.print(unindent) p.expr(s.Label, multiLine) - p.print(token.COLON, indent) + p.print(s.Colon, token.COLON, indent) if e, isEmpty := s.Stmt.(*ast.EmptyStmt); isEmpty { if !nextIsRBrace { p.print(newline, e.Pos(), token.SEMICOLON) break } } else { - p.print(newline) + p.linebreak(p.fset.Position(s.Stmt.Pos()).Line, 1, ignore, true) } p.stmt(s.Stmt, nextIsRBrace, multiLine) @@ -1050,7 +1083,7 @@ func (p *printer) stmt(stmt ast.Stmt, nextIsRBrace bool, multiLine *bool) { case *ast.IncDecStmt: const depth = 1 p.expr0(s.X, depth+1, multiLine) - p.print(s.Tok) + p.print(s.TokPos, s.Tok) case *ast.AssignStmt: var depth = 1 @@ -1059,7 +1092,7 @@ func (p *printer) stmt(stmt ast.Stmt, nextIsRBrace bool, multiLine *bool) { } p.exprList(s.Pos(), s.Lhs, depth, commaSep, multiLine, s.TokPos) p.print(blank, s.TokPos, s.Tok) - p.exprList(s.TokPos, s.Rhs, depth, blankStart|commaSep, multiLine, noPos) + p.exprList(s.TokPos, s.Rhs, depth, blankStart|commaSep, multiLine, token.NoPos) case *ast.GoStmt: p.print(token.GO, blank) @@ -1072,7 +1105,7 @@ func (p *printer) stmt(stmt ast.Stmt, nextIsRBrace bool, multiLine *bool) { case *ast.ReturnStmt: p.print(token.RETURN) if s.Results != nil { - p.exprList(s.Pos(), s.Results, 1, blankStart|commaSep, multiLine, noPos) + p.exprList(s.Pos(), s.Results, 1, blankStart|commaSep, multiLine, token.NoPos) } case *ast.BranchStmt: @@ -1175,7 +1208,7 @@ func (p *printer) stmt(stmt ast.Stmt, nextIsRBrace bool, multiLine *bool) { p.expr(s.Value, multiLine) } p.print(blank, s.TokPos, s.Tok, blank, token.RANGE, blank) - p.expr(stripParens(s.X, true), multiLine) + p.expr(stripParens(s.X), multiLine) p.print(blank) p.block(s.Body, 1) *multiLine = true @@ -1191,25 +1224,25 @@ func (p *printer) stmt(stmt ast.Stmt, nextIsRBrace bool, multiLine *bool) { // ---------------------------------------------------------------------------- // Declarations -// The parameter n is the number of specs in the group. If indent is set, +// The parameter n is the number of specs in the group. If doIndent is set, // multi-line identifier lists in the spec are indented when the first // linebreak is encountered. // Sets multiLine to true if the spec spans multiple lines. // -func (p *printer) spec(spec ast.Spec, n int, indent bool, multiLine *bool) { +func (p *printer) spec(spec ast.Spec, n int, doIndent bool, multiLine *bool) { switch s := spec.(type) { case *ast.ImportSpec: p.setComment(s.Doc) if s.Name != nil { p.expr(s.Name, multiLine) - p.print(blank) + p.print(vtab) } p.expr(s.Path, multiLine) p.setComment(s.Comment) case *ast.ValueSpec: p.setComment(s.Doc) - p.identList(s.Names, indent, multiLine) // always present + p.identList(s.Names, doIndent, multiLine) // always present if n == 1 { if s.Type != nil { p.print(blank) @@ -1217,7 +1250,7 @@ func (p *printer) spec(spec ast.Spec, n int, indent bool, multiLine *bool) { } if s.Values != nil { p.print(blank, token.ASSIGN) - p.exprList(noPos, s.Values, 1, blankStart|commaSep, multiLine, noPos) + p.exprList(token.NoPos, s.Values, 1, blankStart|commaSep, multiLine, token.NoPos) } p.setComment(s.Comment) @@ -1230,7 +1263,7 @@ func (p *printer) spec(spec ast.Spec, n int, indent bool, multiLine *bool) { } if s.Values != nil { p.print(vtab, token.ASSIGN) - p.exprList(noPos, s.Values, 1, blankStart|commaSep, multiLine, noPos) + p.exprList(token.NoPos, s.Values, 1, blankStart|commaSep, multiLine, token.NoPos) extraTabs-- } if s.Comment != nil { @@ -1271,7 +1304,7 @@ func (p *printer) genDecl(d *ast.GenDecl, multiLine *bool) { var ml bool for i, s := range d.Specs { if i > 0 { - p.linebreak(s.Pos().Line, 1, 2, ignore, ml) + p.linebreak(p.fset.Position(s.Pos()).Line, 1, ignore, ml) } ml = false p.spec(s, len(d.Specs), false, &ml) @@ -1300,7 +1333,7 @@ func (p *printer) nodeSize(n ast.Node, maxSize int) (size int) { // in RawFormat cfg := Config{Mode: RawFormat} var buf bytes.Buffer - if _, err := cfg.Fprint(&buf, n); err != nil { + if _, err := cfg.Fprint(&buf, p.fset, n); err != nil { return } if buf.Len() <= maxSize { @@ -1318,11 +1351,11 @@ func (p *printer) nodeSize(n ast.Node, maxSize int) (size int) { func (p *printer) isOneLineFunc(b *ast.BlockStmt, headerSize int) bool { pos1 := b.Pos() pos2 := b.Rbrace - if pos1.IsValid() && pos2.IsValid() && pos1.Line != pos2.Line { + if pos1.IsValid() && pos2.IsValid() && p.fset.Position(pos1).Line != p.fset.Position(pos2).Line { // opening and closing brace are on different lines - don't make it a one-liner return false } - if len(b.List) > 5 || p.commentBefore(pos2) { + if len(b.List) > 5 || p.commentBefore(p.fset.Position(pos2)) { // too many statements or there is a comment inside - don't make it a one-liner return false } @@ -1345,6 +1378,11 @@ func (p *printer) funcBody(b *ast.BlockStmt, headerSize int, isLit bool, multiLi return } + p.nesting++ + defer func() { + p.nesting-- + }() + if p.isOneLineFunc(b, headerSize) { sep := vtab if isLit { @@ -1374,7 +1412,8 @@ func (p *printer) funcBody(b *ast.BlockStmt, headerSize int, isLit bool, multiLi // distance returns the column difference between from and to if both // are on the same line; if they are on different lines (or unknown) // the result is infinity. -func distance(from, to token.Position) int { +func (p *printer) distance(from0 token.Pos, to token.Position) int { + from := p.fset.Position(from0) if from.IsValid() && to.IsValid() && from.Line == to.Line { return to.Column - from.Column } @@ -1392,7 +1431,7 @@ func (p *printer) funcDecl(d *ast.FuncDecl, multiLine *bool) { } p.expr(d.Name, multiLine) p.signature(d.Type.Params, d.Type.Results, multiLine) - p.funcBody(d.Body, distance(d.Pos(), p.pos), false, multiLine) + p.funcBody(d.Body, p.distance(d.Pos(), p.pos), false, multiLine) } @@ -1414,8 +1453,6 @@ func (p *printer) decl(decl ast.Decl, multiLine *bool) { // ---------------------------------------------------------------------------- // Files -const maxDeclNewlines = 3 // maximum number of newlines between declarations - func declToken(decl ast.Decl) (tok token.Token) { tok = token.ILLEGAL switch d := decl.(type) { @@ -1444,7 +1481,7 @@ func (p *printer) file(src *ast.File) { if prev != tok { min = 2 } - p.linebreak(d.Pos().Line, min, maxDeclNewlines, ignore, false) + p.linebreak(p.fset.Position(d.Pos()).Line, min, ignore, false) p.decl(d, ignoreMultiLine) } } diff --git a/src/pkg/go/printer/printer.go b/src/pkg/go/printer/printer.go index 53632c83d..a4ddad50e 100644 --- a/src/pkg/go/printer/printer.go +++ b/src/pkg/go/printer/printer.go @@ -18,10 +18,7 @@ import ( ) -const ( - debug = false // enable for debugging - maxNewlines = 3 // maximum vertical white space -) +const debug = false // enable for debugging type whiteSpace int @@ -41,8 +38,8 @@ var ( esc = []byte{tabwriter.Escape} htab = []byte{'\t'} htabs = []byte("\t\t\t\t\t\t\t\t") - newlines = []byte("\n\n\n\n\n\n\n\n") // more than maxNewlines - formfeeds = []byte("\f\f\f\f\f\f\f\f") // more than maxNewlines + newlines = []byte("\n\n\n\n\n\n\n\n") // more than the max determined by nlines + formfeeds = []byte("\f\f\f\f\f\f\f\f") // more than the max determined by nlines esc_quot = []byte(""") // shorter than """ esc_apos = []byte("'") // shorter than "'" @@ -65,12 +62,15 @@ type printer struct { // Configuration (does not change after initialization) output io.Writer Config + fset *token.FileSet errors chan os.Error // Current state - written int // number of bytes written - indent int // current indentation - escape bool // true if in escape sequence + nesting int // nesting level (0: top-level (package scope), >0: functions/decls.) + written int // number of bytes written + indent int // current indentation + escape bool // true if in escape sequence + lastTok token.Token // the last token printed (token.ILLEGAL if it's whitespace) // Buffered whitespace buffer []whiteSpace @@ -95,9 +95,10 @@ type printer struct { } -func (p *printer) init(output io.Writer, cfg *Config) { +func (p *printer) init(output io.Writer, cfg *Config, fset *token.FileSet) { p.output = output p.Config = *cfg + p.fset = fset p.errors = make(chan os.Error) p.buffer = make([]whiteSpace, 0, 16) // whitespace sequences are short } @@ -106,12 +107,31 @@ func (p *printer) init(output io.Writer, cfg *Config) { func (p *printer) internalError(msg ...interface{}) { if debug { fmt.Print(p.pos.String() + ": ") - fmt.Println(msg) + fmt.Println(msg...) panic("go/printer") } } +// nlines returns the adjusted number of linebreaks given the desired number +// of breaks n such that min <= result <= max where max depends on the current +// nesting level. +// +func (p *printer) nlines(n, min int) int { + if n < min { + return min + } + max := 3 // max. number of newlines at the top level (p.nesting == 0) + if p.nesting > 0 { + max = 2 // max. number of newlines everywhere else + } + if n > max { + return max + } + return n +} + + // write0 writes raw (uninterpreted) data to p.output and handles errors. // write0 does not indent after newlines, and does not HTML-escape or update p.pos. // @@ -192,6 +212,11 @@ func (p *printer) write(data []byte) { case tabwriter.Escape: p.escape = !p.escape + + // ignore escape chars introduced by printer - they are + // invisible and must not affect p.pos (was issue #1089) + p.pos.Offset-- + p.pos.Column-- } } @@ -207,9 +232,7 @@ func (p *printer) write(data []byte) { func (p *printer) writeNewlines(n int, useFF bool) { if n > 0 { - if n > maxNewlines { - n = maxNewlines - } + n = p.nlines(n, 0) if useFF { p.write(formfeeds[0:n]) } else { @@ -292,8 +315,8 @@ func (p *printer) writeCommentPrefix(pos, next token.Position, isFirst, isKeywor } if pos.IsValid() && pos.Filename != p.last.Filename { - // comment in a different file - separate with newlines - p.writeNewlines(maxNewlines, true) + // comment in a different file - separate with newlines (writeNewlines will limit the number) + p.writeNewlines(10, true) return } @@ -380,7 +403,6 @@ func (p *printer) writeCommentPrefix(pos, next token.Position, isFirst, isKeywor func (p *printer) writeCommentLine(comment *ast.Comment, pos token.Position, line []byte) { // line must pass through unchanged, bracket it with tabwriter.Escape - esc := []byte{tabwriter.Escape} line = bytes.Join([][]byte{esc, line, esc}, nil) // apply styler, if any @@ -576,7 +598,7 @@ func (p *printer) writeComment(comment *ast.Comment) { // shortcut common case of //-style comments if text[1] == '/' { - p.writeCommentLine(comment, comment.Pos(), text) + p.writeCommentLine(comment, p.fset.Position(comment.Pos()), text) return } @@ -588,7 +610,7 @@ func (p *printer) writeComment(comment *ast.Comment) { // write comment lines, separated by formfeed, // without a line break after the last line linebreak := formfeeds[0:1] - pos := comment.Pos() + pos := p.fset.Position(comment.Pos()) for i, line := range lines { if i > 0 { p.write(linebreak) @@ -649,14 +671,14 @@ func (p *printer) intersperseComments(next token.Position, tok token.Token) (dro var last *ast.Comment for ; p.commentBefore(next); p.cindex++ { for _, c := range p.comments[p.cindex].List { - p.writeCommentPrefix(c.Pos(), next, last == nil, tok.IsKeyword()) + p.writeCommentPrefix(p.fset.Position(c.Pos()), next, last == nil, tok.IsKeyword()) p.writeComment(c) last = c } } if last != nil { - if last.Text[1] == '*' && last.Pos().Line == next.Line { + if last.Text[1] == '*' && p.fset.Position(last.Pos()).Line == next.Line { // the last comment is a /*-style comment and the next item // follows on the same line: separate with an extra blank p.write([]byte{' '}) @@ -726,6 +748,26 @@ func (p *printer) writeWhitespace(n int) { // ---------------------------------------------------------------------------- // Printing interface + +func mayCombine(prev token.Token, next byte) (b bool) { + switch prev { + case token.INT: + b = next == '.' // 1. + case token.ADD: + b = next == '+' // ++ + case token.SUB: + b = next == '-' // -- + case token.QUO: + b = next == '*' // /* + case token.LSS: + b = next == '-' || next == '<' // <- or << + case token.AND: + b = next == '&' || next == '^' // && or &^ + } + return +} + + // print prints a list of "items" (roughly corresponding to syntactic // tokens, but also including whitespace and formatting information). // It is the only print function that should be called directly from @@ -743,6 +785,7 @@ func (p *printer) print(args ...interface{}) { var data []byte var tag HTMLTag var tok token.Token + switch x := f.(type) { case whiteSpace: if x == ignore { @@ -765,7 +808,7 @@ func (p *printer) print(args ...interface{}) { if p.Styler != nil { data, tag = p.Styler.Ident(x) } else { - data = []byte(x.Name()) + data = []byte(x.Name) } tok = token.IDENT case *ast.BasicLit: @@ -779,22 +822,38 @@ func (p *printer) print(args ...interface{}) { // bytes since they do not appear in legal UTF-8 sequences) // TODO(gri): do this more efficiently. data = []byte("\xff" + string(data) + "\xff") - tok = token.INT // representing all literal tokens + tok = x.Kind case token.Token: + s := x.String() + if mayCombine(p.lastTok, s[0]) { + // the previous and the current token must be + // separated by a blank otherwise they combine + // into a different incorrect token sequence + // (except for token.INT followed by a '.' this + // should never happen because it is taken care + // of via binary expression formatting) + if len(p.buffer) != 0 { + p.internalError("whitespace buffer not empty") + } + p.buffer = p.buffer[0:1] + p.buffer[0] = ' ' + } if p.Styler != nil { data, tag = p.Styler.Token(x) } else { - data = []byte(x.String()) + data = []byte(s) } tok = x - case token.Position: + case token.Pos: if x.IsValid() { - next = x // accurate position of next item + next = p.fset.Position(x) // accurate position of next item } + tok = p.lastTok default: fmt.Fprintf(os.Stderr, "print: unsupported argument type %T\n", f) panic("go/printer type") } + p.lastTok = tok p.pos = next if data != nil { @@ -816,11 +875,11 @@ func (p *printer) print(args ...interface{}) { // before the next position in the source code. // func (p *printer) commentBefore(next token.Position) bool { - return p.cindex < len(p.comments) && p.comments[p.cindex].List[0].Pos().Offset < next.Offset + return p.cindex < len(p.comments) && p.fset.Position(p.comments[p.cindex].List[0].Pos()).Offset < next.Offset } -// Flush prints any pending comments and whitespace occuring +// Flush prints any pending comments and whitespace occurring // textually before the position of the next token tok. Flush // returns true if a pending formfeed character was dropped // from the whitespace buffer as a result of interspersing @@ -844,81 +903,85 @@ func (p *printer) flush(next token.Position, tok token.Token) (droppedFF bool) { // A trimmer is an io.Writer filter for stripping tabwriter.Escape // characters, trailing blanks and tabs, and for converting formfeed // and vtab characters into newlines and htabs (in case no tabwriter -// is used). +// is used). Text bracketed by tabwriter.Escape characters is passed +// through unchanged. // type trimmer struct { output io.Writer - buf bytes.Buffer + space bytes.Buffer + state int } -// Design note: It is tempting to eliminate extra blanks occuring in +// trimmer is implemented as a state machine. +// It can be in one of the following states: +const ( + inSpace = iota + inEscape + inText +) + + +// Design note: It is tempting to eliminate extra blanks occurring in // whitespace in this function as it could simplify some // of the blanks logic in the node printing functions. // However, this would mess up any formatting done by // the tabwriter. func (p *trimmer) Write(data []byte) (n int, err os.Error) { - // m < 0: no unwritten data except for whitespace - // m >= 0: data[m:n] unwritten and no whitespace - m := 0 - if p.buf.Len() > 0 { - m = -1 - } - + m := 0 // if p.state != inSpace, data[m:n] is unwritten var b byte for n, b = range data { - switch b { - default: - // write any pending whitespace - if m < 0 { - if _, err = p.output.Write(p.buf.Bytes()); err != nil { - return - } - p.buf.Reset() - m = n - } - - case '\v': + if b == '\v' { b = '\t' // convert to htab - fallthrough - - case '\t', ' ', tabwriter.Escape: - // write any pending (non-whitespace) data - if m >= 0 { - if _, err = p.output.Write(data[m:n]); err != nil { - return - } - m = -1 + } + switch p.state { + case inSpace: + switch b { + case '\t', ' ': + p.space.WriteByte(b) // WriteByte returns no errors + case '\f', '\n': + p.space.Reset() // discard trailing space + _, err = p.output.Write(newlines[0:1]) // write newline + case tabwriter.Escape: + _, err = p.output.Write(p.space.Bytes()) + p.space.Reset() + p.state = inEscape + m = n + 1 // drop tabwriter.Escape + default: + _, err = p.output.Write(p.space.Bytes()) + p.space.Reset() + p.state = inText + m = n } - // collect whitespace but discard tabwriter.Escapes. - if b != tabwriter.Escape { - p.buf.WriteByte(b) // WriteByte returns no errors + case inEscape: + if b == tabwriter.Escape { + _, err = p.output.Write(data[m:n]) + p.state = inSpace } - - case '\f', '\n': - // discard whitespace - p.buf.Reset() - // write any pending (non-whitespace) data - if m >= 0 { - if _, err = p.output.Write(data[m:n]); err != nil { - return - } - m = -1 - } - // convert formfeed into newline - if _, err = p.output.Write(newlines[0:1]); err != nil { - return + case inText: + switch b { + case '\t', ' ': + _, err = p.output.Write(data[m:n]) + p.state = inSpace + p.space.WriteByte(b) // WriteByte returns no errors + case '\f': + data[n] = '\n' // convert to newline + case tabwriter.Escape: + _, err = p.output.Write(data[m:n]) + p.state = inEscape + m = n + 1 // drop tabwriter.Escape } } + if err != nil { + return + } } n = len(data) - // write any pending non-whitespace - if m >= 0 { - if _, err = p.output.Write(data[m:n]); err != nil { - return - } + if p.state != inSpace { + _, err = p.output.Write(data[m:n]) + p.state = inSpace } return @@ -965,10 +1028,11 @@ type Config struct { // Fprint "pretty-prints" an AST node to output and returns the number // of bytes written and an error (if any) for a given configuration cfg. +// Position information is interpreted relative to the file set fset. // The node type must be *ast.File, or assignment-compatible to ast.Expr, // ast.Decl, ast.Spec, or ast.Stmt. // -func (cfg *Config) Fprint(output io.Writer, node interface{}) (int, os.Error) { +func (cfg *Config) Fprint(output io.Writer, fset *token.FileSet, node interface{}) (int, os.Error) { // redirect output through a trimmer to eliminate trailing whitespace // (Input to a tabwriter must be untrimmed since trailing tabs provide // formatting information. The tabwriter could provide trimming @@ -1000,13 +1064,15 @@ func (cfg *Config) Fprint(output io.Writer, node interface{}) (int, os.Error) { // setup printer and print node var p printer - p.init(output, cfg) + p.init(output, cfg, fset) go func() { switch n := node.(type) { case ast.Expr: + p.nesting = 1 p.useNodeComments = true p.expr(n, ignoreMultiLine) case ast.Stmt: + p.nesting = 1 p.useNodeComments = true // A labeled statement will un-indent to position the // label. Set indent to 1 so we don't get indent "underflow". @@ -1015,17 +1081,20 @@ func (cfg *Config) Fprint(output io.Writer, node interface{}) (int, os.Error) { } p.stmt(n, false, ignoreMultiLine) case ast.Decl: + p.nesting = 1 p.useNodeComments = true p.decl(n, ignoreMultiLine) case ast.Spec: + p.nesting = 1 p.useNodeComments = true p.spec(n, 1, false, ignoreMultiLine) case *ast.File: + p.nesting = 0 p.comments = n.Comments p.useNodeComments = n.Comments == nil p.file(n) default: - p.errors <- os.NewError(fmt.Sprintf("printer.Fprint: unsupported node type %T", n)) + p.errors <- fmt.Errorf("printer.Fprint: unsupported node type %T", n) runtime.Goexit() } p.flush(token.Position{Offset: infinity, Line: infinity}, token.EOF) @@ -1045,7 +1114,7 @@ func (cfg *Config) Fprint(output io.Writer, node interface{}) (int, os.Error) { // Fprint "pretty-prints" an AST node to output. // It calls Config.Fprint with default settings. // -func Fprint(output io.Writer, node interface{}) os.Error { - _, err := (&Config{Tabwidth: 8}).Fprint(output, node) // don't care about number of bytes written +func Fprint(output io.Writer, fset *token.FileSet, node interface{}) os.Error { + _, err := (&Config{Tabwidth: 8}).Fprint(output, fset, node) // don't care about number of bytes written return err } diff --git a/src/pkg/go/printer/printer_test.go b/src/pkg/go/printer/printer_test.go index a5de3774a..c66471b92 100644 --- a/src/pkg/go/printer/printer_test.go +++ b/src/pkg/go/printer/printer_test.go @@ -10,6 +10,7 @@ import ( "io/ioutil" "go/ast" "go/parser" + "go/token" "path" "testing" ) @@ -24,6 +25,9 @@ const ( var update = flag.Bool("update", false, "update golden files") +var fset = token.NewFileSet() + + func lineString(text []byte, i int) string { i0 := i for i < len(text) && text[i] != '\n' { @@ -43,7 +47,7 @@ const ( func check(t *testing.T, source, golden string, mode checkMode) { // parse source - prog, err := parser.ParseFile(source, nil, nil, parser.ParseComments) + prog, err := parser.ParseFile(fset, source, nil, parser.ParseComments) if err != nil { t.Error(err) return @@ -63,7 +67,7 @@ func check(t *testing.T, source, golden string, mode checkMode) { // format source var buf bytes.Buffer - if _, err := cfg.Fprint(&buf, prog); err != nil { + if _, err := cfg.Fprint(&buf, fset, prog); err != nil { t.Error(err) } res := buf.Bytes() @@ -112,14 +116,14 @@ type entry struct { // Use gotest -update to create/update the respective golden files. var data = []entry{ - entry{"empty.input", "empty.golden", 0}, - entry{"comments.input", "comments.golden", 0}, - entry{"comments.input", "comments.x", export}, - entry{"linebreaks.input", "linebreaks.golden", 0}, - entry{"expressions.input", "expressions.golden", 0}, - entry{"expressions.input", "expressions.raw", rawFormat}, - entry{"declarations.input", "declarations.golden", 0}, - entry{"statements.input", "statements.golden", 0}, + {"empty.input", "empty.golden", 0}, + {"comments.input", "comments.golden", 0}, + {"comments.input", "comments.x", export}, + {"linebreaks.input", "linebreaks.golden", 0}, + {"expressions.input", "expressions.golden", 0}, + {"expressions.input", "expressions.raw", rawFormat}, + {"declarations.input", "declarations.golden", 0}, + {"statements.input", "statements.golden", 0}, } diff --git a/src/pkg/go/printer/testdata/comments.golden b/src/pkg/go/printer/testdata/comments.golden index 4c9f71d95..200ea332f 100644 --- a/src/pkg/go/printer/testdata/comments.golden +++ b/src/pkg/go/printer/testdata/comments.golden @@ -431,6 +431,38 @@ func _() { } +// Comments immediately adjacent to punctuation (for which the go/printer +// may obly have estimated position information) must remain after the punctuation. +func _() { + _ = T{ + 1, // comment after comma + 2, /* comment after comma */ + 3, // comment after comma + } + _ = T{ + 1, // comment after comma + 2, /* comment after comma */ + 3, // comment after comma + } + _ = T{ + /* comment before literal */ 1, + 2, /* comment before comma - ok to move after comma */ + 3, /* comment before comma - ok to move after comma */ + } + + for i = 0; // comment after semicolon + i < 9; /* comment after semicolon */ + i++ { // comment after opening curly brace + } + + // TODO(gri) the last comment in this example should be aligned */ + for i = 0; // comment after semicolon + i < 9; /* comment before semicolon - ok to move after semicolon */ + i++ /* comment before opening curly brace */ { + } +} + + // Line comments with tabs func _() { var finput *bufio.Reader // input file diff --git a/src/pkg/go/printer/testdata/comments.input b/src/pkg/go/printer/testdata/comments.input index 335e81391..4a9ea4742 100644 --- a/src/pkg/go/printer/testdata/comments.input +++ b/src/pkg/go/printer/testdata/comments.input @@ -429,6 +429,40 @@ func _() { /* closing curly brace should be on new line */ } +// Comments immediately adjacent to punctuation (for which the go/printer +// may obly have estimated position information) must remain after the punctuation. +func _() { + _ = T{ + 1, // comment after comma + 2, /* comment after comma */ + 3 , // comment after comma + } + _ = T{ + 1 ,// comment after comma + 2 ,/* comment after comma */ + 3,// comment after comma + } + _ = T{ + /* comment before literal */1, + 2/* comment before comma - ok to move after comma */, + 3 /* comment before comma - ok to move after comma */ , + } + + for + i=0;// comment after semicolon + i<9;/* comment after semicolon */ + i++{// comment after opening curly brace + } + + // TODO(gri) the last comment in this example should be aligned */ + for + i=0;// comment after semicolon + i<9/* comment before semicolon - ok to move after semicolon */; + i++ /* comment before opening curly brace */ { + } +} + + // Line comments with tabs func _() { var finput *bufio.Reader // input file diff --git a/src/pkg/go/printer/testdata/declarations.golden b/src/pkg/go/printer/testdata/declarations.golden index 67f16b805..1c091b929 100644 --- a/src/pkg/go/printer/testdata/declarations.golden +++ b/src/pkg/go/printer/testdata/declarations.golden @@ -7,10 +7,10 @@ package imports import "io" import ( - _ "io" + _ "io" ) -import _ "io" +import _ "io" import ( "io" @@ -20,40 +20,60 @@ import ( import ( "io" - aLongRename "io" + aLongRename "io" - b "io" + b "io" +) + +import ( + "unrenamed" + renamed "renameMe" + . "io" + _ "io" + "io" + . "os" ) // no newlines between consecutive single imports, but // respect extra line breaks in the source (at most one empty line) -import _ "io" -import _ "io" -import _ "io" +import _ "io" +import _ "io" +import _ "io" -import _ "os" -import _ "os" -import _ "os" +import _ "os" +import _ "os" +import _ "os" -import _ "fmt" -import _ "fmt" -import _ "fmt" +import _ "fmt" +import _ "fmt" +import _ "fmt" import "foo" // a comment import "bar" // a comment import ( - _ "foo" + _ "foo" // a comment "bar" "foo" // a comment "bar" // a comment ) +// comments + renames +import ( + "unrenamed" // a comment + renamed "renameMe" + . "io" /* a comment */ + _ "io/ioutil" // a comment + "io" // testing alignment + . "os" + // a comment +) + // a case that caused problems in the past (comment placement) import ( - . "fmt" + . "fmt" "io" "malloc" // for the malloc count test only "math" @@ -63,7 +83,7 @@ import ( // at least one empty line between declarations of different kind -import _ "io" +import _ "io" var _ int @@ -617,24 +637,79 @@ func _() { // ellipsis parameters -func _(...) func _(...int) func _(...*int) func _(...[]int) func _(...struct{}) func _(bool, ...interface{}) func _(bool, ...func()) -func _(bool, ...func(...)) +func _(bool, ...func(...int)) func _(bool, ...map[string]int) func _(bool, ...chan int) -func _(b bool, x ...) func _(b bool, x ...int) func _(b bool, x ...*int) func _(b bool, x ...[]int) func _(b bool, x ...struct{}) func _(x ...interface{}) func _(x ...func()) -func _(x ...func(...)) +func _(x ...func(...int)) func _(x ...map[string]int) func _(x ...chan int) + + +// these parameter lists must remain multi-line since they are multi-line in the source +func _(bool, +int) { +} +func _(x bool, +y int) { +} +func _(x, +y bool) { +} +func _(bool, // comment +int) { +} +func _(x bool, // comment +y int) { +} +func _(x, // comment +y bool) { +} +func _(bool, // comment +// comment +int) { +} +func _(x bool, // comment +// comment +y int) { +} +func _(x, // comment +// comment +y bool) { +} +func _(bool, +// comment +int) { +} +func _(x bool, +// comment +y int) { +} +func _(x, +// comment +y bool) { +} +func _(x, // comment +y, // comment +z bool) { +} +func _(x, // comment +y, // comment +z bool) { +} +func _(x int, // comment +y float, // comment +z bool) { +} diff --git a/src/pkg/go/printer/testdata/declarations.input b/src/pkg/go/printer/testdata/declarations.input index 095d1ddac..c826462f9 100644 --- a/src/pkg/go/printer/testdata/declarations.input +++ b/src/pkg/go/printer/testdata/declarations.input @@ -25,6 +25,15 @@ import ( b "io" ) +import ( + "unrenamed" + renamed "renameMe" + . "io" + _ "io" + "io" + . "os" +) + // no newlines between consecutive single imports, but // respect extra line breaks in the source (at most one empty line) import _ "io" @@ -51,6 +60,17 @@ import ( "bar" // a comment ) +// comments + renames +import ( + "unrenamed" // a comment + renamed "renameMe" + . "io" /* a comment */ + _ "io/ioutil" // a comment + "io" // testing alignment + . "os" + // a comment +) + // a case that caused problems in the past (comment placement) import ( . "fmt" @@ -605,24 +625,79 @@ func _() { // ellipsis parameters -func _(...) func _(...int) func _(...*int) func _(...[]int) func _(...struct{}) func _(bool, ...interface{}) func _(bool, ...func()) -func _(bool, ...func(...)) +func _(bool, ...func(...int)) func _(bool, ...map[string]int) func _(bool, ...chan int) -func _(b bool, x ...) func _(b bool, x ...int) func _(b bool, x ...*int) func _(b bool, x ...[]int) func _(b bool, x ...struct{}) func _(x ...interface{}) func _(x ...func()) -func _(x ...func(...)) +func _(x ...func(...int)) func _(x ...map[string]int) func _(x ...chan int) + + +// these parameter lists must remain multi-line since they are multi-line in the source +func _(bool, +int) { +} +func _(x bool, +y int) { +} +func _(x, +y bool) { +} +func _(bool, // comment +int) { +} +func _(x bool, // comment +y int) { +} +func _(x, // comment +y bool) { +} +func _(bool, // comment +// comment +int) { +} +func _(x bool, // comment +// comment +y int) { +} +func _(x, // comment +// comment +y bool) { +} +func _(bool, +// comment +int) { +} +func _(x bool, +// comment +y int) { +} +func _(x, +// comment +y bool) { +} +func _(x, // comment +y,// comment +z bool) { +} +func _(x, // comment + y,// comment + z bool) { +} +func _(x int, // comment + y float, // comment + z bool) { +} diff --git a/src/pkg/go/printer/testdata/expressions.golden b/src/pkg/go/printer/testdata/expressions.golden index 95e5502d3..882c7624c 100644 --- a/src/pkg/go/printer/testdata/expressions.golden +++ b/src/pkg/go/printer/testdata/expressions.golden @@ -31,6 +31,9 @@ func _() { _ = 1 + a _ = a + 1 _ = a + b + 1 + _ = s[a] + _ = s[a:] + _ = s[:b] _ = s[1:2] _ = s[a:b] _ = s[0:len(s)] @@ -56,6 +59,7 @@ func _() { _ = s[a : b-c] _ = s[0:] _ = s[a+b] + _ = s[:b-c] _ = s[a+b:] _ = a[a<<b+1] _ = a[a<<b+1:] @@ -165,6 +169,45 @@ func _() { } +func f(x int, args ...int) { + f(0, args...) + f(1, args) + f(2, args[0]) + + // make sure syntactically legal code remains syntactically legal + f(3, 42 ...) // a blank must remain between 42 and ... + f(4, 42....) + f(5, 42....) + f(6, 42.0...) + f(7, 42.0...) + f(8, .42...) + f(9, .42...) + f(10, 42e0...) + f(11, 42e0...) + + _ = 42 .x // a blank must remain between 42 and .x + _ = 42..x + _ = 42..x + _ = 42.0.x + _ = 42.0.x + _ = .42.x + _ = .42.x + _ = 42e0.x + _ = 42e0.x + + // a blank must remain between the binary operator and the 2nd operand + _ = x / *y + _ = x < -1 + _ = x < <-1 + _ = x + +1 + _ = x - -1 + _ = x & &x + _ = x & ^x + + _ = f(x / *y, x < -1, x < <-1, x + +1, x - -1, x & &x, x & ^x) +} + + func _() { _ = T{} _ = struct{}{} @@ -172,13 +215,6 @@ func _() { _ = [...]T{} _ = []T{} _ = map[int]T{} - - _ = (T){} - _ = (struct{}){} - _ = ([10]T){} - _ = ([...]T){} - _ = ([]T){} - _ = (map[int]T){} } @@ -206,6 +242,8 @@ func _() { ` _ = `foo bar` + _ = `three spaces before the end of the line starting here: +they must not be removed` } @@ -239,6 +277,27 @@ func _() { func _() { + _ = [][]int{ + []int{1}, + []int{1, 2}, + []int{1, 2, 3}, + } + _ = [][]int{ + {1}, + []int{1, 2}, + []int{1, 2, 3}, + } + _ = [][]int{ + {1}, + {1, 2}, + {1, 2, 3}, + } + _ = [][]int{{1}, {1, 2}, {1, 2, 3}} +} + + +// various multi-line expressions +func _() { // do not add extra indentation to multi-line string lists _ = "foo" + "bar" _ = "foo" + @@ -336,7 +395,6 @@ func _() { 2, 3, ) - // TODO(gri) the cases below are not correct yet f(1, 2, 3) // comment @@ -349,8 +407,7 @@ func _() { 3) // comment f(1, 2, - 3 // comment - , + 3, // comment ) } diff --git a/src/pkg/go/printer/testdata/expressions.input b/src/pkg/go/printer/testdata/expressions.input index 13891d971..647706b09 100644 --- a/src/pkg/go/printer/testdata/expressions.input +++ b/src/pkg/go/printer/testdata/expressions.input @@ -31,6 +31,9 @@ func _() { _ = 1+a _ = a+1 _ = a+b+1 + _ = s[a] + _ = s[a:] + _ = s[:b] _ = s[1:2] _ = s[a:b] _ = s[0:len(s)] @@ -56,6 +59,7 @@ func _() { _ = s[a : b-c] _ = s[0:] _ = s[a+b] + _ = s[: b-c] _ = s[a+b :] _ = a[a<<b+1] _ = a[a<<b+1 :] @@ -165,6 +169,45 @@ func _() { } +func f(x int, args ...int) { + f(0, args...) + f(1, args) + f(2, args[0]) + + // make sure syntactically legal code remains syntactically legal + f(3, 42 ...) // a blank must remain between 42 and ... + f(4, 42. ...) + f(5, 42....) + f(6, 42.0 ...) + f(7, 42.0...) + f(8, .42 ...) + f(9, .42...) + f(10, 42e0 ...) + f(11, 42e0...) + + _ = 42 .x // a blank must remain between 42 and .x + _ = 42. .x + _ = 42..x + _ = 42.0 .x + _ = 42.0.x + _ = .42 .x + _ = .42.x + _ = 42e0 .x + _ = 42e0.x + + // a blank must remain between the binary operator and the 2nd operand + _ = x/ *y + _ = x< -1 + _ = x< <-1 + _ = x+ +1 + _ = x- -1 + _ = x& &x + _ = x& ^x + + _ = f(x/ *y, x< -1, x< <-1, x+ +1, x- -1, x& &x, x& ^x) +} + + func _() { _ = T{} _ = struct{}{} @@ -172,13 +215,6 @@ func _() { _ = [...]T{} _ = []T{} _ = map[int]T{} - - _ = (T){} - _ = (struct{}){} - _ = ([10]T){} - _ = ([...]T){} - _ = ([]T){} - _ = (map[int]T){} } @@ -202,6 +238,8 @@ func _() { ` _ = `foo bar` + _ = `three spaces before the end of the line starting here: +they must not be removed` } @@ -231,6 +269,27 @@ func _() { func _() { + _ = [][]int { + []int{1}, + []int{1, 2}, + []int{1, 2, 3}, + } + _ = [][]int { + {1}, + []int{1, 2}, + []int{1, 2, 3}, + } + _ = [][]int { + {1}, + {1, 2}, + {1, 2, 3}, + } + _ = [][]int {{1}, {1, 2}, {1, 2, 3}} +} + + +// various multi-line expressions +func _() { // do not add extra indentation to multi-line string lists _ = "foo" + "bar" _ = "foo" + @@ -329,7 +388,6 @@ func _() { 2, 3, ) - // TODO(gri) the cases below are not correct yet f(1, 2, 3) // comment diff --git a/src/pkg/go/printer/testdata/expressions.raw b/src/pkg/go/printer/testdata/expressions.raw index dccc8d122..62be00cc3 100644 --- a/src/pkg/go/printer/testdata/expressions.raw +++ b/src/pkg/go/printer/testdata/expressions.raw @@ -31,6 +31,9 @@ func _() { _ = 1 + a _ = a + 1 _ = a + b + 1 + _ = s[a] + _ = s[a:] + _ = s[:b] _ = s[1:2] _ = s[a:b] _ = s[0:len(s)] @@ -56,6 +59,7 @@ func _() { _ = s[a : b-c] _ = s[0:] _ = s[a+b] + _ = s[:b-c] _ = s[a+b:] _ = a[a<<b+1] _ = a[a<<b+1:] @@ -165,6 +169,45 @@ func _() { } +func f(x int, args ...int) { + f(0, args...) + f(1, args) + f(2, args[0]) + + // make sure syntactically legal code remains syntactically legal + f(3, 42 ...) // a blank must remain between 42 and ... + f(4, 42....) + f(5, 42....) + f(6, 42.0...) + f(7, 42.0...) + f(8, .42...) + f(9, .42...) + f(10, 42e0...) + f(11, 42e0...) + + _ = 42 .x // a blank must remain between 42 and .x + _ = 42..x + _ = 42..x + _ = 42.0.x + _ = 42.0.x + _ = .42.x + _ = .42.x + _ = 42e0.x + _ = 42e0.x + + // a blank must remain between the binary operator and the 2nd operand + _ = x / *y + _ = x < -1 + _ = x < <-1 + _ = x + +1 + _ = x - -1 + _ = x & &x + _ = x & ^x + + _ = f(x / *y, x < -1, x < <-1, x + +1, x - -1, x & &x, x & ^x) +} + + func _() { _ = T{} _ = struct{}{} @@ -172,13 +215,6 @@ func _() { _ = [...]T{} _ = []T{} _ = map[int]T{} - - _ = (T){} - _ = (struct{}){} - _ = ([10]T){} - _ = ([...]T){} - _ = ([]T){} - _ = (map[int]T){} } @@ -206,6 +242,8 @@ func _() { ` _ = `foo bar` + _ = `three spaces before the end of the line starting here: +they must not be removed` } @@ -239,6 +277,27 @@ func _() { func _() { + _ = [][]int{ + []int{1}, + []int{1, 2}, + []int{1, 2, 3}, + } + _ = [][]int{ + {1}, + []int{1, 2}, + []int{1, 2, 3}, + } + _ = [][]int{ + {1}, + {1, 2}, + {1, 2, 3}, + } + _ = [][]int{{1}, {1, 2}, {1, 2, 3}} +} + + +// various multi-line expressions +func _() { // do not add extra indentation to multi-line string lists _ = "foo" + "bar" _ = "foo" + @@ -336,7 +395,6 @@ func _() { 2, 3, ) - // TODO(gri) the cases below are not correct yet f(1, 2, 3) // comment @@ -349,8 +407,7 @@ func _() { 3) // comment f(1, 2, - 3 // comment - , + 3, // comment ) } diff --git a/src/pkg/go/printer/testdata/statements.golden b/src/pkg/go/printer/testdata/statements.golden index 73a3e1236..5eceb7dd5 100644 --- a/src/pkg/go/printer/testdata/statements.golden +++ b/src/pkg/go/printer/testdata/statements.golden @@ -209,6 +209,38 @@ func _() { for _ = range (T1{T{42}}) { } + + if x == (T{42}[0]) { + } + if (x == T{42}[0]) { + } + if x == (T{42}[0]) { + } + if x == (T{42}[0]) { + } + if x == (T{42}[0]) { + } + if x == a+b*(T{42}[0]) { + } + if (x == a+b*T{42}[0]) { + } + if x == a+b*(T{42}[0]) { + } + if x == a+(b * (T{42}[0])) { + } + if x == a+b*(T{42}[0]) { + } + if (a + b*(T{42}[0])) == x { + } + if (a + b*(T{42}[0])) == x { + } + + if struct{ x bool }{false}.x { + } + if (struct{ x bool }{false}.x) == false { + } + if struct{ x bool }{false}.x == false { + } } @@ -227,7 +259,8 @@ func _() { var x = 1 // Each use(x) call below should have at most one empty line before and after. - + // Known bug: The first use call may have more than one empty line before + // (see go/printer/nodes.go, func linebreak). use(x) @@ -336,3 +369,49 @@ AnOverlongLabel: L: _ = 0 } + + +func _() { + for { + goto L + } +L: + + MoreCode() +} + + +func _() { + for { + goto L + } +L: // A comment on the same line as the label, followed by a single empty line. + // Known bug: There may be more than one empty line before MoreCode() + // (see go/printer/nodes.go, func linebreak). + + + MoreCode() +} + + +func _() { + for { + goto L + } +L: + + // There should be a single empty line before this comment. + MoreCode() +} + + +func _() { + for { + goto AVeryLongLabelThatShouldNotAffectFormatting + } +AVeryLongLabelThatShouldNotAffectFormatting: + // There should be a single empty line after this comment. + + // There should be a single empty line before this comment. + MoreCode() +} diff --git a/src/pkg/go/printer/testdata/statements.input b/src/pkg/go/printer/testdata/statements.input index 53f16c050..7819820ed 100644 --- a/src/pkg/go/printer/testdata/statements.input +++ b/src/pkg/go/printer/testdata/statements.input @@ -146,6 +146,23 @@ func _() { switch ; ((((T{})))) {} for _ = range (((T1{T{42}}))) {} + + if x == (T{42}[0]) {} + if (x == T{42}[0]) {} + if (x == (T{42}[0])) {} + if (x == (((T{42}[0])))) {} + if (((x == (T{42}[0])))) {} + if x == a + b*(T{42}[0]) {} + if (x == a + b*T{42}[0]) {} + if (x == a + b*(T{42}[0])) {} + if (x == a + ((b * (T{42}[0])))) {} + if (((x == a + b * (T{42}[0])))) {} + if (((a + b * (T{42}[0])) == x)) {} + if (((a + b * (T{42}[0])))) == x {} + + if (struct{x bool}{false}.x) {} + if (struct{x bool}{false}.x) == false {} + if (struct{x bool}{false}.x == false) {} } @@ -164,6 +181,8 @@ func _() { var x = 1 // Each use(x) call below should have at most one empty line before and after. + // Known bug: The first use call may have more than one empty line before + // (see go/printer/nodes.go, func linebreak). @@ -266,3 +285,54 @@ AnOverlongLabel: L: _ = 0 } + + +func _() { + for { + goto L + } +L: + + MoreCode() +} + + +func _() { + for { + goto L + } +L: // A comment on the same line as the label, followed by a single empty line. + // Known bug: There may be more than one empty line before MoreCode() + // (see go/printer/nodes.go, func linebreak). + + + + + MoreCode() +} + + +func _() { + for { + goto L + } +L: + + + + + // There should be a single empty line before this comment. + MoreCode() +} + + +func _() { + for { + goto AVeryLongLabelThatShouldNotAffectFormatting + } +AVeryLongLabelThatShouldNotAffectFormatting: + // There should be a single empty line after this comment. + + // There should be a single empty line before this comment. + MoreCode() +} diff --git a/src/pkg/go/scanner/Makefile b/src/pkg/go/scanner/Makefile index 70d21a972..453faac00 100644 --- a/src/pkg/go/scanner/Makefile +++ b/src/pkg/go/scanner/Makefile @@ -2,7 +2,7 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -include ../../../Make.$(GOARCH) +include ../../../Make.inc TARG=go/scanner GOFILES=\ diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go index e5ac9d772..6ce846cd8 100644 --- a/src/pkg/go/scanner/scanner.go +++ b/src/pkg/go/scanner/scanner.go @@ -4,13 +4,25 @@ // A scanner for Go source text. Takes a []byte as source which can // then be tokenized through repeated calls to the Scan function. -// For a sample use of a scanner, see the implementation of Tokenize. +// Typical use: +// +// var s Scanner +// fset := token.NewFileSet() // position information is relative to fset +// s.Init(fset, filename, src, nil /* no error handler */, 0) +// for { +// pos, tok, lit := s.Scan() +// if tok == token.EOF { +// break +// } +// // do something here with pos, tok, and lit +// } // package scanner import ( "bytes" "go/token" + "path" "strconv" "unicode" "utf8" @@ -19,20 +31,22 @@ import ( // A Scanner holds the scanner's internal state while processing // a given text. It can be allocated as part of another data -// structure but must be initialized via Init before use. For -// a sample use, see the implementation of Tokenize. +// structure but must be initialized via Init before use. // type Scanner struct { // immutable state + file *token.File // source file handle + dir string // directory portion of file.Name() src []byte // source err ErrorHandler // error reporting; or nil mode uint // scanning mode // scanning state - pos token.Position // previous reading position (position before ch) - offset int // current reading offset (position after ch) - ch int // one char look-ahead - insertSemi bool // insert a semicolon before next newline + ch int // current character + offset int // character offset + rdOffset int // reading offset (position after current character) + lineOffset int // current line offset + insertSemi bool // insert a semicolon before next newline // public state - ok to modify ErrorCount int // number of errors encountered @@ -43,29 +57,31 @@ type Scanner struct { // S.ch < 0 means end-of-file. // func (S *Scanner) next() { - if S.offset < len(S.src) { - S.pos.Offset = S.offset - S.pos.Column++ + if S.rdOffset < len(S.src) { + S.offset = S.rdOffset if S.ch == '\n' { - // next character starts a new line - S.pos.Line++ - S.pos.Column = 1 + S.lineOffset = S.offset + S.file.AddLine(S.offset) } - r, w := int(S.src[S.offset]), 1 + r, w := int(S.src[S.rdOffset]), 1 switch { case r == 0: - S.error(S.pos, "illegal character NUL") + S.error(S.offset, "illegal character NUL") case r >= 0x80: // not ASCII - r, w = utf8.DecodeRune(S.src[S.offset:]) + r, w = utf8.DecodeRune(S.src[S.rdOffset:]) if r == utf8.RuneError && w == 1 { - S.error(S.pos, "illegal UTF-8 encoding") + S.error(S.offset, "illegal UTF-8 encoding") } } - S.offset += w + S.rdOffset += w S.ch = r } else { - S.pos.Offset = len(S.src) + S.offset = len(S.src) + if S.ch == '\n' { + S.lineOffset = S.offset + S.file.AddLine(S.offset) + } S.ch = -1 // eof } } @@ -80,24 +96,38 @@ const ( InsertSemis // automatically insert semicolons ) +// TODO(gri) Would it be better to simply provide *token.File to Init +// instead of fset, and filename, and then return the file? +// It could cause an error/panic if the provided file.Size() +// doesn't match len(src). -// Init prepares the scanner S to tokenize the text src. Calls to Scan -// will use the error handler err if they encounter a syntax error and -// err is not nil. Also, for each error encountered, the Scanner field -// ErrorCount is incremented by one. The filename parameter is used as -// filename in the token.Position returned by Scan for each token. The -// mode parameter determines how comments and illegal characters are -// handled. +// Init prepares the scanner S to tokenize the text src. It sets the +// scanner at the beginning of the source text, adds a new file with +// the given filename to the file set fset, and returns that file. +// +// Calls to Scan will use the error handler err if they encounter a +// syntax error and err is not nil. Also, for each error encountered, +// the Scanner field ErrorCount is incremented by one. The mode parameter +// determines how comments, illegal characters, and semicolons are handled. // -func (S *Scanner) Init(filename string, src []byte, err ErrorHandler, mode uint) { +func (S *Scanner) Init(fset *token.FileSet, filename string, src []byte, err ErrorHandler, mode uint) *token.File { // Explicitly initialize all fields since a scanner may be reused. + S.file = fset.AddFile(filename, fset.Base(), len(src)) + S.dir, _ = path.Split(filename) S.src = src S.err = err S.mode = mode - S.pos = token.Position{filename, 0, 1, 0} + + S.ch = ' ' S.offset = 0 + S.rdOffset = 0 + S.lineOffset = 0 + S.insertSemi = false S.ErrorCount = 0 + S.next() + + return S.file } @@ -131,111 +161,109 @@ func charString(ch int) string { } -func (S *Scanner) error(pos token.Position, msg string) { +func (S *Scanner) error(offs int, msg string) { if S.err != nil { - S.err.Error(pos, msg) + S.err.Error(S.file.Position(S.file.Pos(offs)), msg) } S.ErrorCount++ } -func (S *Scanner) expect(ch int) { - if S.ch != ch { - S.error(S.pos, "expected "+charString(ch)+", found "+charString(S.ch)) +var prefix = []byte("//line ") + +func (S *Scanner) interpretLineComment(text []byte) { + if bytes.HasPrefix(text, prefix) { + // get filename and line number, if any + if i := bytes.Index(text, []byte{':'}); i > 0 { + if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 { + // valid //line filename:line comment; + filename := path.Clean(string(text[len(prefix):i])) + if filename[0] != '/' { + // make filename relative to current directory + filename = path.Join(S.dir, filename) + } + // update scanner position + S.file.AddLineInfo(S.lineOffset, filename, line-1) // -1 since comment applies to next line + } + } } - S.next() // always make progress } -var prefix = []byte("line ") - -func (S *Scanner) scanComment(pos token.Position) { - // first '/' already consumed +func (S *Scanner) scanComment() { + // initial '/' already consumed; S.ch == '/' || S.ch == '*' + offs := S.offset - 1 // position of initial '/' if S.ch == '/' { //-style comment - for S.ch >= 0 { + S.next() + for S.ch != '\n' && S.ch >= 0 { S.next() - if S.ch == '\n' { - // '\n' is not part of the comment for purposes of scanning - // (the comment ends on the same line where it started) - if pos.Column == 1 { - text := S.src[pos.Offset+2 : S.pos.Offset] - if bytes.HasPrefix(text, prefix) { - // comment starts at beginning of line with "//line "; - // get filename and line number, if any - i := bytes.Index(text, []byte{':'}) - if i >= 0 { - if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 { - // valid //line filename:line comment; - // update scanner position - S.pos.Filename = string(text[len(prefix):i]) - S.pos.Line = line - 1 // -1 since the '\n' has not been consumed yet - } - } - } - } - return - } } + if offs == S.lineOffset { + // comment starts at the beginning of the current line + S.interpretLineComment(S.src[offs:S.offset]) + } + return + } - } else { - /*-style comment */ - S.expect('*') - for S.ch >= 0 { - ch := S.ch + /*-style comment */ + S.next() + for S.ch >= 0 { + ch := S.ch + S.next() + if ch == '*' && S.ch == '/' { S.next() - if ch == '*' && S.ch == '/' { - S.next() - return - } + return } } - S.error(pos, "comment not terminated") + S.error(offs, "comment not terminated") } -func (S *Scanner) findNewline(pos token.Position) bool { - // first '/' already consumed; assume S.ch == '/' || S.ch == '*' +func (S *Scanner) findLineEnd() bool { + // initial '/' already consumed + + defer func(offs int) { + // reset scanner state to where it was upon calling findLineEnd + S.ch = '/' + S.offset = offs + S.rdOffset = offs + 1 + S.next() // consume initial '/' again + }(S.offset - 1) - // read ahead until a newline or non-comment token is found - newline := false - for pos1 := pos; S.ch >= 0; { + // read ahead until a newline, EOF, or non-comment token is found + for S.ch == '/' || S.ch == '*' { if S.ch == '/' { //-style comment always contains a newline - newline = true - break + return true } - S.scanComment(pos1) - if pos1.Line < S.pos.Line { - /*-style comment contained a newline */ - newline = true - break + /*-style comment: look for newline */ + S.next() + for S.ch >= 0 { + ch := S.ch + if ch == '\n' { + return true + } + S.next() + if ch == '*' && S.ch == '/' { + S.next() + break + } } S.skipWhitespace() // S.insertSemi is set - if S.ch == '\n' { - newline = true - break + if S.ch < 0 || S.ch == '\n' { + return true } if S.ch != '/' { // non-comment token - break - } - pos1 = S.pos - S.next() - if S.ch != '/' && S.ch != '*' { - // non-comment token - break + return false } + S.next() // consume '/' } - // reset position to where it was upon calling findNewline - S.pos = pos - S.offset = pos.Offset + 1 - S.next() - - return newline + return false } @@ -250,11 +278,11 @@ func isDigit(ch int) bool { func (S *Scanner) scanIdentifier() token.Token { - pos := S.pos.Offset + offs := S.offset for isLetter(S.ch) || isDigit(S.ch) { S.next() } - return token.Lookup(S.src[pos:S.pos.Offset]) + return token.Lookup(S.src[offs:S.offset]) } @@ -278,7 +306,7 @@ func (S *Scanner) scanMantissa(base int) { } -func (S *Scanner) scanNumber(pos token.Position, seenDecimalPoint bool) token.Token { +func (S *Scanner) scanNumber(seenDecimalPoint bool) token.Token { // digitVal(S.ch) < 10 tok := token.INT @@ -290,6 +318,7 @@ func (S *Scanner) scanNumber(pos token.Position, seenDecimalPoint bool) token.To if S.ch == '0' { // int or float + offs := S.offset S.next() if S.ch == 'x' || S.ch == 'X' { // hexadecimal int @@ -309,7 +338,7 @@ func (S *Scanner) scanNumber(pos token.Position, seenDecimalPoint bool) token.To } // octal int if seenDecimalDigit { - S.error(pos, "illegal octal number") + S.error(offs, "illegal octal number") } } goto exit @@ -346,7 +375,7 @@ exit: func (S *Scanner) scanEscape(quote int) { - pos := S.pos + offs := S.offset var i, base, max uint32 switch S.ch { @@ -366,28 +395,33 @@ func (S *Scanner) scanEscape(quote int) { i, base, max = 8, 16, unicode.MaxRune default: S.next() // always make progress - S.error(pos, "unknown escape sequence") + S.error(offs, "unknown escape sequence") return } var x uint32 - for ; i > 0; i-- { + for ; i > 0 && S.ch != quote && S.ch >= 0; i-- { d := uint32(digitVal(S.ch)) - if d > base { - S.error(S.pos, "illegal character in escape sequence") - return + if d >= base { + S.error(S.offset, "illegal character in escape sequence") + break } x = x*base + d S.next() } + // in case of an error, consume remaining chars + for ; i > 0 && S.ch != quote && S.ch >= 0; i-- { + S.next() + } if x > max || 0xd800 <= x && x < 0xe000 { - S.error(pos, "escape sequence is invalid Unicode code point") + S.error(offs, "escape sequence is invalid Unicode code point") } } -func (S *Scanner) scanChar(pos token.Position) { - // '\'' already consumed +func (S *Scanner) scanChar() { + // '\'' opening already consumed + offs := S.offset - 1 n := 0 for S.ch != '\'' { @@ -395,7 +429,7 @@ func (S *Scanner) scanChar(pos token.Position) { n++ S.next() if ch == '\n' || ch < 0 { - S.error(pos, "character literal not terminated") + S.error(offs, "character literal not terminated") n = 1 break } @@ -407,19 +441,20 @@ func (S *Scanner) scanChar(pos token.Position) { S.next() if n != 1 { - S.error(pos, "illegal character literal") + S.error(offs, "illegal character literal") } } -func (S *Scanner) scanString(pos token.Position) { - // '"' already consumed +func (S *Scanner) scanString() { + // '"' opening already consumed + offs := S.offset - 1 for S.ch != '"' { ch := S.ch S.next() if ch == '\n' || ch < 0 { - S.error(pos, "string not terminated") + S.error(offs, "string not terminated") break } if ch == '\\' { @@ -431,14 +466,15 @@ func (S *Scanner) scanString(pos token.Position) { } -func (S *Scanner) scanRawString(pos token.Position) { - // '`' already consumed +func (S *Scanner) scanRawString() { + // '`' opening already consumed + offs := S.offset - 1 for S.ch != '`' { ch := S.ch S.next() if ch < 0 { - S.error(pos, "string not terminated") + S.error(offs, "string not terminated") break } } @@ -499,12 +535,17 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke } -var semicolon = []byte{';'} +var newline = []byte{'\n'} // Scan scans the next token and returns the token position pos, // the token tok, and the literal text lit corresponding to the // token. The source end is indicated by token.EOF. // +// If the returned token is token.SEMICOLON, the corresponding +// literal value is ";" if the semicolon was present in the source, +// and "\n" if the semicolon was inserted because of a newline or +// at EOF. +// // For more tolerant parsing, Scan will return a valid token if // possible even if a syntax error was encountered. Thus, even // if the resulting token sequence contains no illegal tokens, @@ -512,13 +553,18 @@ var semicolon = []byte{';'} // must check the scanner's ErrorCount or the number of calls // of the error handler, if there was one installed. // -func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) { +// Scan adds line information to the file added to the file +// set with Init. Token positions are relative to that file +// and thus relative to the file set. +// +func (S *Scanner) Scan() (token.Pos, token.Token, []byte) { scanAgain: S.skipWhitespace() // current token start insertSemi := false - pos, tok = S.pos, token.ILLEGAL + offs := S.offset + tok := token.ILLEGAL // determine token value switch ch := S.ch; { @@ -530,36 +576,40 @@ scanAgain: } case digitVal(ch) < 10: insertSemi = true - tok = S.scanNumber(pos, false) + tok = S.scanNumber(false) default: S.next() // always make progress switch ch { case -1: + if S.insertSemi { + S.insertSemi = false // EOF consumed + return S.file.Pos(offs), token.SEMICOLON, newline + } tok = token.EOF case '\n': // we only reach here if S.insertSemi was // set in the first place and exited early // from S.skipWhitespace() S.insertSemi = false // newline consumed - return pos, token.SEMICOLON, semicolon + return S.file.Pos(offs), token.SEMICOLON, newline case '"': insertSemi = true tok = token.STRING - S.scanString(pos) + S.scanString() case '\'': insertSemi = true tok = token.CHAR - S.scanChar(pos) + S.scanChar() case '`': insertSemi = true tok = token.STRING - S.scanRawString(pos) + S.scanRawString() case ':': tok = S.switch2(token.COLON, token.DEFINE) case '.': if digitVal(S.ch) < 10 { insertSemi = true - tok = S.scanNumber(pos, true) + tok = S.scanNumber(true) } else if S.ch == '.' { S.next() if S.ch == '.' { @@ -603,15 +653,15 @@ scanAgain: case '/': if S.ch == '/' || S.ch == '*' { // comment - if S.insertSemi && S.findNewline(pos) { + if S.insertSemi && S.findLineEnd() { // reset position to the beginning of the comment - S.pos = pos - S.offset = pos.Offset + 1 S.ch = '/' + S.offset = offs + S.rdOffset = offs + 1 S.insertSemi = false // newline consumed - return pos, token.SEMICOLON, semicolon + return S.file.Pos(offs), token.SEMICOLON, newline } - S.scanComment(pos) + S.scanComment() if S.mode&ScanComments == 0 { // skip comment S.insertSemi = false // newline consumed @@ -649,7 +699,7 @@ scanAgain: tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR) default: if S.mode&AllowIllegalChars == 0 { - S.error(pos, "illegal character "+charString(ch)) + S.error(offs, "illegal character "+charString(ch)) } insertSemi = S.insertSemi // preserve insertSemi info } @@ -658,21 +708,5 @@ scanAgain: if S.mode&InsertSemis != 0 { S.insertSemi = insertSemi } - return pos, tok, S.src[pos.Offset:S.pos.Offset] -} - - -// Tokenize calls a function f with the token position, token value, and token -// text for each token in the source src. The other parameters have the same -// meaning as for the Init function. Tokenize keeps scanning until f returns -// false (usually when the token value is token.EOF). The result is the number -// of errors encountered. -// -func Tokenize(filename string, src []byte, err ErrorHandler, mode uint, f func(pos token.Position, tok token.Token, lit []byte) bool) int { - var s Scanner - s.Init(filename, src, err, mode) - for f(s.Scan()) { - // action happens in f - } - return s.ErrorCount + return S.file.Pos(offs), tok, S.src[offs:S.offset] } diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go index 002a81dd9..b1004f89d 100644 --- a/src/pkg/go/scanner/scanner_test.go +++ b/src/pkg/go/scanner/scanner_test.go @@ -11,6 +11,9 @@ import ( ) +var fset = token.NewFileSet() + + const /* class */ ( special = iota literal @@ -41,136 +44,136 @@ type elt struct { var tokens = [...]elt{ // Special tokens - elt{token.COMMENT, "/* a comment */", special}, - elt{token.COMMENT, "// a comment \n", special}, + {token.COMMENT, "/* a comment */", special}, + {token.COMMENT, "// a comment \n", special}, // Identifiers and basic type literals - elt{token.IDENT, "foobar", literal}, - elt{token.IDENT, "aÛ°Û±Û¸", literal}, - elt{token.IDENT, "foo६४", literal}, - elt{token.IDENT, "bar9876", literal}, - elt{token.INT, "0", literal}, - elt{token.INT, "1", literal}, - elt{token.INT, "123456789012345678890", literal}, - elt{token.INT, "01234567", literal}, - elt{token.INT, "0xcafebabe", literal}, - elt{token.FLOAT, "0.", literal}, - elt{token.FLOAT, ".0", literal}, - elt{token.FLOAT, "3.14159265", literal}, - elt{token.FLOAT, "1e0", literal}, - elt{token.FLOAT, "1e+100", literal}, - elt{token.FLOAT, "1e-100", literal}, - elt{token.FLOAT, "2.71828e-1000", literal}, - elt{token.IMAG, "0i", literal}, - elt{token.IMAG, "1i", literal}, - elt{token.IMAG, "012345678901234567889i", literal}, - elt{token.IMAG, "123456789012345678890i", literal}, - elt{token.IMAG, "0.i", literal}, - elt{token.IMAG, ".0i", literal}, - elt{token.IMAG, "3.14159265i", literal}, - elt{token.IMAG, "1e0i", literal}, - elt{token.IMAG, "1e+100i", literal}, - elt{token.IMAG, "1e-100i", literal}, - elt{token.IMAG, "2.71828e-1000i", literal}, - elt{token.CHAR, "'a'", literal}, - elt{token.CHAR, "'\\000'", literal}, - elt{token.CHAR, "'\\xFF'", literal}, - elt{token.CHAR, "'\\uff16'", literal}, - elt{token.CHAR, "'\\U0000ff16'", literal}, - elt{token.STRING, "`foobar`", literal}, - elt{token.STRING, "`" + `foo + {token.IDENT, "foobar", literal}, + {token.IDENT, "aÛ°Û±Û¸", literal}, + {token.IDENT, "foo६४", literal}, + {token.IDENT, "bar9876", literal}, + {token.INT, "0", literal}, + {token.INT, "1", literal}, + {token.INT, "123456789012345678890", literal}, + {token.INT, "01234567", literal}, + {token.INT, "0xcafebabe", literal}, + {token.FLOAT, "0.", literal}, + {token.FLOAT, ".0", literal}, + {token.FLOAT, "3.14159265", literal}, + {token.FLOAT, "1e0", literal}, + {token.FLOAT, "1e+100", literal}, + {token.FLOAT, "1e-100", literal}, + {token.FLOAT, "2.71828e-1000", literal}, + {token.IMAG, "0i", literal}, + {token.IMAG, "1i", literal}, + {token.IMAG, "012345678901234567889i", literal}, + {token.IMAG, "123456789012345678890i", literal}, + {token.IMAG, "0.i", literal}, + {token.IMAG, ".0i", literal}, + {token.IMAG, "3.14159265i", literal}, + {token.IMAG, "1e0i", literal}, + {token.IMAG, "1e+100i", literal}, + {token.IMAG, "1e-100i", literal}, + {token.IMAG, "2.71828e-1000i", literal}, + {token.CHAR, "'a'", literal}, + {token.CHAR, "'\\000'", literal}, + {token.CHAR, "'\\xFF'", literal}, + {token.CHAR, "'\\uff16'", literal}, + {token.CHAR, "'\\U0000ff16'", literal}, + {token.STRING, "`foobar`", literal}, + {token.STRING, "`" + `foo bar` + "`", literal, }, // Operators and delimitors - elt{token.ADD, "+", operator}, - elt{token.SUB, "-", operator}, - elt{token.MUL, "*", operator}, - elt{token.QUO, "/", operator}, - elt{token.REM, "%", operator}, - - elt{token.AND, "&", operator}, - elt{token.OR, "|", operator}, - elt{token.XOR, "^", operator}, - elt{token.SHL, "<<", operator}, - elt{token.SHR, ">>", operator}, - elt{token.AND_NOT, "&^", operator}, - - elt{token.ADD_ASSIGN, "+=", operator}, - elt{token.SUB_ASSIGN, "-=", operator}, - elt{token.MUL_ASSIGN, "*=", operator}, - elt{token.QUO_ASSIGN, "/=", operator}, - elt{token.REM_ASSIGN, "%=", operator}, - - elt{token.AND_ASSIGN, "&=", operator}, - elt{token.OR_ASSIGN, "|=", operator}, - elt{token.XOR_ASSIGN, "^=", operator}, - elt{token.SHL_ASSIGN, "<<=", operator}, - elt{token.SHR_ASSIGN, ">>=", operator}, - elt{token.AND_NOT_ASSIGN, "&^=", operator}, - - elt{token.LAND, "&&", operator}, - elt{token.LOR, "||", operator}, - elt{token.ARROW, "<-", operator}, - elt{token.INC, "++", operator}, - elt{token.DEC, "--", operator}, - - elt{token.EQL, "==", operator}, - elt{token.LSS, "<", operator}, - elt{token.GTR, ">", operator}, - elt{token.ASSIGN, "=", operator}, - elt{token.NOT, "!", operator}, - - elt{token.NEQ, "!=", operator}, - elt{token.LEQ, "<=", operator}, - elt{token.GEQ, ">=", operator}, - elt{token.DEFINE, ":=", operator}, - elt{token.ELLIPSIS, "...", operator}, - - elt{token.LPAREN, "(", operator}, - elt{token.LBRACK, "[", operator}, - elt{token.LBRACE, "{", operator}, - elt{token.COMMA, ",", operator}, - elt{token.PERIOD, ".", operator}, - - elt{token.RPAREN, ")", operator}, - elt{token.RBRACK, "]", operator}, - elt{token.RBRACE, "}", operator}, - elt{token.SEMICOLON, ";", operator}, - elt{token.COLON, ":", operator}, + {token.ADD, "+", operator}, + {token.SUB, "-", operator}, + {token.MUL, "*", operator}, + {token.QUO, "/", operator}, + {token.REM, "%", operator}, + + {token.AND, "&", operator}, + {token.OR, "|", operator}, + {token.XOR, "^", operator}, + {token.SHL, "<<", operator}, + {token.SHR, ">>", operator}, + {token.AND_NOT, "&^", operator}, + + {token.ADD_ASSIGN, "+=", operator}, + {token.SUB_ASSIGN, "-=", operator}, + {token.MUL_ASSIGN, "*=", operator}, + {token.QUO_ASSIGN, "/=", operator}, + {token.REM_ASSIGN, "%=", operator}, + + {token.AND_ASSIGN, "&=", operator}, + {token.OR_ASSIGN, "|=", operator}, + {token.XOR_ASSIGN, "^=", operator}, + {token.SHL_ASSIGN, "<<=", operator}, + {token.SHR_ASSIGN, ">>=", operator}, + {token.AND_NOT_ASSIGN, "&^=", operator}, + + {token.LAND, "&&", operator}, + {token.LOR, "||", operator}, + {token.ARROW, "<-", operator}, + {token.INC, "++", operator}, + {token.DEC, "--", operator}, + + {token.EQL, "==", operator}, + {token.LSS, "<", operator}, + {token.GTR, ">", operator}, + {token.ASSIGN, "=", operator}, + {token.NOT, "!", operator}, + + {token.NEQ, "!=", operator}, + {token.LEQ, "<=", operator}, + {token.GEQ, ">=", operator}, + {token.DEFINE, ":=", operator}, + {token.ELLIPSIS, "...", operator}, + + {token.LPAREN, "(", operator}, + {token.LBRACK, "[", operator}, + {token.LBRACE, "{", operator}, + {token.COMMA, ",", operator}, + {token.PERIOD, ".", operator}, + + {token.RPAREN, ")", operator}, + {token.RBRACK, "]", operator}, + {token.RBRACE, "}", operator}, + {token.SEMICOLON, ";", operator}, + {token.COLON, ":", operator}, // Keywords - elt{token.BREAK, "break", keyword}, - elt{token.CASE, "case", keyword}, - elt{token.CHAN, "chan", keyword}, - elt{token.CONST, "const", keyword}, - elt{token.CONTINUE, "continue", keyword}, - - elt{token.DEFAULT, "default", keyword}, - elt{token.DEFER, "defer", keyword}, - elt{token.ELSE, "else", keyword}, - elt{token.FALLTHROUGH, "fallthrough", keyword}, - elt{token.FOR, "for", keyword}, - - elt{token.FUNC, "func", keyword}, - elt{token.GO, "go", keyword}, - elt{token.GOTO, "goto", keyword}, - elt{token.IF, "if", keyword}, - elt{token.IMPORT, "import", keyword}, - - elt{token.INTERFACE, "interface", keyword}, - elt{token.MAP, "map", keyword}, - elt{token.PACKAGE, "package", keyword}, - elt{token.RANGE, "range", keyword}, - elt{token.RETURN, "return", keyword}, - - elt{token.SELECT, "select", keyword}, - elt{token.STRUCT, "struct", keyword}, - elt{token.SWITCH, "switch", keyword}, - elt{token.TYPE, "type", keyword}, - elt{token.VAR, "var", keyword}, + {token.BREAK, "break", keyword}, + {token.CASE, "case", keyword}, + {token.CHAN, "chan", keyword}, + {token.CONST, "const", keyword}, + {token.CONTINUE, "continue", keyword}, + + {token.DEFAULT, "default", keyword}, + {token.DEFER, "defer", keyword}, + {token.ELSE, "else", keyword}, + {token.FALLTHROUGH, "fallthrough", keyword}, + {token.FOR, "for", keyword}, + + {token.FUNC, "func", keyword}, + {token.GO, "go", keyword}, + {token.GOTO, "goto", keyword}, + {token.IF, "if", keyword}, + {token.IMPORT, "import", keyword}, + + {token.INTERFACE, "interface", keyword}, + {token.MAP, "map", keyword}, + {token.PACKAGE, "package", keyword}, + {token.RANGE, "range", keyword}, + {token.RETURN, "return", keyword}, + + {token.SELECT, "select", keyword}, + {token.STRUCT, "struct", keyword}, + {token.SWITCH, "switch", keyword}, + {token.TYPE, "type", keyword}, + {token.VAR, "var", keyword}, } @@ -196,18 +199,19 @@ func newlineCount(s string) int { } -func checkPos(t *testing.T, lit string, pos, expected token.Position) { +func checkPos(t *testing.T, lit string, p token.Pos, expected token.Position) { + pos := fset.Position(p) if pos.Filename != expected.Filename { - t.Errorf("bad filename for %s: got %s, expected %s", lit, pos.Filename, expected.Filename) + t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename) } if pos.Offset != expected.Offset { - t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, expected.Offset) + t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset) } if pos.Line != expected.Line { - t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, expected.Line) + t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line) } if pos.Column != expected.Column { - t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, expected.Column) + t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column) } } @@ -219,66 +223,76 @@ func TestScan(t *testing.T) { for _, e := range tokens { src += e.lit + whitespace } - src_linecount := newlineCount(src) + src_linecount := newlineCount(src) + 1 whitespace_linecount := newlineCount(whitespace) // verify scan + var s Scanner + s.Init(fset, "", []byte(src), &testErrorHandler{t}, ScanComments) index := 0 epos := token.Position{"", 0, 1, 1} // expected position - nerrors := Tokenize("", []byte(src), &testErrorHandler{t}, ScanComments, - func(pos token.Position, tok token.Token, litb []byte) bool { - e := elt{token.EOF, "", special} - if index < len(tokens) { - e = tokens[index] - } - lit := string(litb) - if tok == token.EOF { - lit = "<EOF>" - epos.Line = src_linecount - epos.Column = 1 - } - checkPos(t, lit, pos, epos) - if tok != e.tok { - t.Errorf("bad token for %q: got %s, expected %s", lit, tok.String(), e.tok.String()) - } - if e.tok.IsLiteral() && lit != e.lit { - t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, e.lit) - } - if tokenclass(tok) != e.class { - t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class) - } - epos.Offset += len(lit) + len(whitespace) - epos.Line += newlineCount(lit) + whitespace_linecount - if tok == token.COMMENT && litb[1] == '/' { - // correct for unaccounted '/n' in //-style comment - epos.Offset++ - epos.Line++ - } - index++ - return tok != token.EOF - }) - if nerrors != 0 { - t.Errorf("found %d errors", nerrors) + for { + pos, tok, litb := s.Scan() + e := elt{token.EOF, "", special} + if index < len(tokens) { + e = tokens[index] + } + lit := string(litb) + if tok == token.EOF { + lit = "<EOF>" + epos.Line = src_linecount + epos.Column = 1 + } + checkPos(t, lit, pos, epos) + if tok != e.tok { + t.Errorf("bad token for %q: got %s, expected %s", lit, tok.String(), e.tok.String()) + } + if e.tok.IsLiteral() && lit != e.lit { + t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, e.lit) + } + if tokenclass(tok) != e.class { + t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class) + } + epos.Offset += len(lit) + len(whitespace) + epos.Line += newlineCount(lit) + whitespace_linecount + if tok == token.COMMENT && litb[1] == '/' { + // correct for unaccounted '/n' in //-style comment + epos.Offset++ + epos.Line++ + } + index++ + if tok == token.EOF { + break + } + } + if s.ErrorCount != 0 { + t.Errorf("found %d errors", s.ErrorCount) } } func checkSemi(t *testing.T, line string, mode uint) { var S Scanner - S.Init("TestSemis", []byte(line), nil, mode) + file := S.Init(fset, "TestSemis", []byte(line), nil, mode) pos, tok, lit := S.Scan() for tok != token.EOF { if tok == token.ILLEGAL { + // the illegal token literal indicates what + // kind of semicolon literal to expect + semiLit := "\n" + if lit[0] == '#' { + semiLit = ";" + } // next token must be a semicolon - offs := pos.Offset + 1 + semiPos := file.Position(pos) + semiPos.Offset++ + semiPos.Column++ pos, tok, lit = S.Scan() if tok == token.SEMICOLON { - if pos.Offset != offs { - t.Errorf("bad offset for %q: got %d, expected %d", line, pos.Offset, offs) - } - if string(lit) != ";" { - t.Errorf(`bad literal for %q: got %q, expected ";"`, line, lit) + if string(lit) != semiLit { + t.Errorf(`bad literal for %q: got %q, expected %q`, line, lit, semiLit) } + checkPos(t, line, pos, semiPos) } else { t.Errorf("bad token for %q: got %s, expected ;", line, tok.String()) } @@ -291,9 +305,10 @@ func checkSemi(t *testing.T, line string, mode uint) { var lines = []string{ - // the $ character indicates where a semicolon is expected + // # indicates a semicolon present in the source + // $ indicates an automatically inserted semicolon "", - "$;", + "#;", "foo$\n", "123$\n", "1.2$\n", @@ -354,7 +369,7 @@ var lines = []string{ ")$\n", "]$\n", "}$\n", - "$;\n", + "#;\n", ":\n", "break$\n", @@ -388,57 +403,66 @@ var lines = []string{ "var\n", "foo$//comment\n", + "foo$//comment", "foo$/*comment*/\n", "foo$/*\n*/", "foo$/*comment*/ \n", "foo$/*\n*/ ", + "foo $// comment\n", + "foo $// comment", "foo $/*comment*/\n", "foo $/*\n*/", - - "foo $/*comment*/\n", + "foo $/* */ /* \n */ bar$/**/\n", "foo $/*0*/ /*1*/ /*2*/\n", + "foo $/*comment*/ \n", "foo $/*0*/ /*1*/ /*2*/ \n", - "foo $/**/ /*-------------*/ /*----\n*/bar $/* \n*/baa", + "foo $/**/ /*-------------*/ /*----\n*/bar $/* \n*/baa$\n", + "foo $/* an EOF terminates a line */", + "foo $/* an EOF terminates a line */ /*", + "foo $/* an EOF terminates a line */ //", "package main$\n\nfunc main() {\n\tif {\n\t\treturn /* */ }$\n}$\n", + "package main$", } func TestSemis(t *testing.T) { for _, line := range lines { checkSemi(t, line, AllowIllegalChars|InsertSemis) - } - for _, line := range lines { checkSemi(t, line, AllowIllegalChars|InsertSemis|ScanComments) + + // if the input ended in newlines, the input must tokenize the + // same with or without those newlines + for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- { + checkSemi(t, line[0:i], AllowIllegalChars|InsertSemis) + checkSemi(t, line[0:i], AllowIllegalChars|InsertSemis|ScanComments) + } } } -type seg struct { +var segments = []struct { srcline string // a line of source text filename string // filename for current token line int // line number for current token -} - - -var segments = []seg{ +}{ // exactly one token per line since the test consumes one token per segment - seg{" line1", "TestLineComments", 1}, - seg{"\nline2", "TestLineComments", 2}, - seg{"\nline3 //line File1.go:100", "TestLineComments", 3}, // bad line comment, ignored - seg{"\nline4", "TestLineComments", 4}, - seg{"\n//line File1.go:100\n line100", "File1.go", 100}, - seg{"\n//line File2.go:200\n line200", "File2.go", 200}, - seg{"\n//line :1\n line1", "", 1}, - seg{"\n//line foo:42\n line42", "foo", 42}, - seg{"\n //line foo:42\n line44", "foo", 44}, // bad line comment, ignored - seg{"\n//line foo 42\n line46", "foo", 46}, // bad line comment, ignored - seg{"\n//line foo:42 extra text\n line48", "foo", 48}, // bad line comment, ignored - seg{"\n//line foo:42\n line42", "foo", 42}, - seg{"\n//line foo:42\n line42", "foo", 42}, - seg{"\n//line File1.go:100\n line100", "File1.go", 100}, + {" line1", "dir/TestLineComments", 1}, + {"\nline2", "dir/TestLineComments", 2}, + {"\nline3 //line File1.go:100", "dir/TestLineComments", 3}, // bad line comment, ignored + {"\nline4", "dir/TestLineComments", 4}, + {"\n//line File1.go:100\n line100", "dir/File1.go", 100}, + {"\n//line File2.go:200\n line200", "dir/File2.go", 200}, + {"\n//line :1\n line1", "dir", 1}, + {"\n//line foo:42\n line42", "dir/foo", 42}, + {"\n //line foo:42\n line44", "dir/foo", 44}, // bad line comment, ignored + {"\n//line foo 42\n line46", "dir/foo", 46}, // bad line comment, ignored + {"\n//line foo:42 extra text\n line48", "dir/foo", 48}, // bad line comment, ignored + {"\n//line /bar:42\n line42", "/bar", 42}, + {"\n//line ./foo:42\n line42", "dir/foo", 42}, + {"\n//line a/b/c/File1.go:100\n line100", "dir/a/b/c/File1.go", 100}, } @@ -452,10 +476,11 @@ func TestLineComments(t *testing.T) { // verify scan var S Scanner - S.Init("TestLineComments", []byte(src), nil, 0) + file := S.Init(fset, "dir/TestLineComments", []byte(src), nil, 0) for _, s := range segments { - pos, _, lit := S.Scan() - checkPos(t, string(lit), pos, token.Position{s.filename, pos.Offset, s.line, pos.Column}) + p, _, lit := S.Scan() + pos := file.Position(p) + checkPos(t, string(lit), p, token.Position{s.filename, pos.Offset, s.line, pos.Column}) } if S.ErrorCount != 0 { @@ -469,7 +494,11 @@ func TestInit(t *testing.T) { var s Scanner // 1st init - s.Init("", []byte("if true { }"), nil, 0) + src1 := "if true { }" + f1 := s.Init(fset, "", []byte(src1), nil, 0) + if f1.Size() != len(src1) { + t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1)) + } s.Scan() // if s.Scan() // true _, tok, _ := s.Scan() // { @@ -478,7 +507,11 @@ func TestInit(t *testing.T) { } // 2nd init - s.Init("", []byte("go true { ]"), nil, 0) + src2 := "go true { ]" + f2 := s.Init(fset, "", []byte(src2), nil, 0) + if f2.Size() != len(src2) { + t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2)) + } _, tok, _ = s.Scan() // go if tok != token.GO { t.Errorf("bad token: got %s, expected %s", tok.String(), token.GO) @@ -494,11 +527,11 @@ func TestIllegalChars(t *testing.T) { var s Scanner const src = "*?*$*@*" - s.Init("", []byte(src), &testErrorHandler{t}, AllowIllegalChars) + file := s.Init(fset, "", []byte(src), &testErrorHandler{t}, AllowIllegalChars) for offs, ch := range src { pos, tok, lit := s.Scan() - if pos.Offset != offs { - t.Errorf("bad position for %s: got %d, expected %d", string(lit), pos.Offset, offs) + if poffs := file.Offset(pos); poffs != offs { + t.Errorf("bad position for %s: got %d, expected %d", string(lit), poffs, offs) } if tok == token.ILLEGAL && string(lit) != string(ch) { t.Errorf("bad token: got %s, expected %s", string(lit), string(ch)) @@ -522,10 +555,13 @@ func TestStdErrorHander(t *testing.T) { "@ @ @" // original file, line 1 again v := new(ErrorVector) - nerrors := Tokenize("File1", []byte(src), v, 0, - func(pos token.Position, tok token.Token, litb []byte) bool { - return tok != token.EOF - }) + var s Scanner + s.Init(fset, "File1", []byte(src), v, 0) + for { + if _, tok, _ := s.Scan(); tok == token.EOF { + break + } + } list := v.GetErrorList(Raw) if len(list) != 9 { @@ -545,8 +581,8 @@ func TestStdErrorHander(t *testing.T) { PrintError(os.Stderr, list) } - if v.ErrorCount() != nerrors { - t.Errorf("found %d errors, expected %d", v.ErrorCount(), nerrors) + if v.ErrorCount() != s.ErrorCount { + t.Errorf("found %d errors, expected %d", v.ErrorCount(), s.ErrorCount) } } @@ -568,7 +604,7 @@ func (h *errorCollector) Error(pos token.Position, msg string) { func checkError(t *testing.T, src string, tok token.Token, pos int, err string) { var s Scanner var h errorCollector - s.Init("", []byte(src), &h, ScanComments) + s.Init(fset, "", []byte(src), &h, ScanComments) _, tok0, _ := s.Scan() _, tok1, _ := s.Scan() if tok0 != tok { @@ -593,28 +629,34 @@ func checkError(t *testing.T, src string, tok token.Token, pos int, err string) } -type srcerr struct { +var errors = []struct { src string tok token.Token pos int err string -} - -var errors = []srcerr{ - srcerr{"\"\"", token.STRING, 0, ""}, - srcerr{"\"", token.STRING, 0, "string not terminated"}, - srcerr{"/**/", token.COMMENT, 0, ""}, - srcerr{"/*", token.COMMENT, 0, "comment not terminated"}, - srcerr{"//\n", token.COMMENT, 0, ""}, - srcerr{"//", token.COMMENT, 0, "comment not terminated"}, - srcerr{"077", token.INT, 0, ""}, - srcerr{"078.", token.FLOAT, 0, ""}, - srcerr{"07801234567.", token.FLOAT, 0, ""}, - srcerr{"078e0", token.FLOAT, 0, ""}, - srcerr{"078", token.INT, 0, "illegal octal number"}, - srcerr{"07800000009", token.INT, 0, "illegal octal number"}, - srcerr{"\"abc\x00def\"", token.STRING, 4, "illegal character NUL"}, - srcerr{"\"abc\x80def\"", token.STRING, 4, "illegal UTF-8 encoding"}, +}{ + {`#`, token.ILLEGAL, 0, "illegal character '#' (U+23)"}, + {`' '`, token.CHAR, 0, ""}, + {`''`, token.CHAR, 0, "illegal character literal"}, + {`'\8'`, token.CHAR, 2, "unknown escape sequence"}, + {`'\08'`, token.CHAR, 3, "illegal character in escape sequence"}, + {`'\x0g'`, token.CHAR, 4, "illegal character in escape sequence"}, + {`'\Uffffffff'`, token.CHAR, 2, "escape sequence is invalid Unicode code point"}, + {`'`, token.CHAR, 0, "character literal not terminated"}, + {`""`, token.STRING, 0, ""}, + {`"`, token.STRING, 0, "string not terminated"}, + {"``", token.STRING, 0, ""}, + {"`", token.STRING, 0, "string not terminated"}, + {"/**/", token.COMMENT, 0, ""}, + {"/*", token.COMMENT, 0, "comment not terminated"}, + {"077", token.INT, 0, ""}, + {"078.", token.FLOAT, 0, ""}, + {"07801234567.", token.FLOAT, 0, ""}, + {"078e0", token.FLOAT, 0, ""}, + {"078", token.INT, 0, "illegal octal number"}, + {"07800000009", token.INT, 0, "illegal octal number"}, + {"\"abc\x00def\"", token.STRING, 4, "illegal character NUL"}, + {"\"abc\x80def\"", token.STRING, 4, "illegal UTF-8 encoding"}, } diff --git a/src/pkg/go/token/Makefile b/src/pkg/go/token/Makefile index 629196c5d..4a4e64dc8 100644 --- a/src/pkg/go/token/Makefile +++ b/src/pkg/go/token/Makefile @@ -2,10 +2,11 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -include ../../../Make.$(GOARCH) +include ../../../Make.inc TARG=go/token GOFILES=\ + position.go\ token.go\ include ../../../Make.pkg diff --git a/src/pkg/go/token/position.go b/src/pkg/go/token/position.go new file mode 100644 index 000000000..0044a0ed7 --- /dev/null +++ b/src/pkg/go/token/position.go @@ -0,0 +1,409 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// TODO(gri) consider making this a separate package outside the go directory. + +package token + +import ( + "fmt" + "sort" + "sync" +) + + +// Position describes an arbitrary source position +// including the file, line, and column location. +// A Position is valid if the line number is > 0. +// +type Position struct { + Filename string // filename, if any + Offset int // offset, starting at 0 + Line int // line number, starting at 1 + Column int // column number, starting at 1 (character count) +} + + +// IsValid returns true if the position is valid. +func (pos *Position) IsValid() bool { return pos.Line > 0 } + + +// String returns a string in one of several forms: +// +// file:line:column valid position with file name +// line:column valid position without file name +// file invalid position with file name +// - invalid position without file name +// +func (pos Position) String() string { + s := pos.Filename + if pos.IsValid() { + if s != "" { + s += ":" + } + s += fmt.Sprintf("%d:%d", pos.Line, pos.Column) + } + if s == "" { + s = "-" + } + return s +} + + +// Pos is a compact encoding of a source position within a file set. +// It can be converted into a Position for a more convenient, but much +// larger, representation. +// +// The Pos value for a given file is a number in the range [base, base+size], +// where base and size are specified when adding the file to the file set via +// AddFile. +// +// To create the Pos value for a specific source offset, first add +// the respective file to the current file set (via FileSet.AddFile) +// and then call File.Pos(offset) for that file. Given a Pos value p +// for a specific file set fset, the corresponding Position value is +// obtained by calling fset.Position(p). +// +// Pos values can be compared directly with the usual comparison operators: +// If two Pos values p and q are in the same file, comparing p and q is +// equivalent to comparing the respective source file offsets. If p and q +// are in different files, p < q is true if the file implied by p was added +// to the respective file set before the file implied by q. +// +type Pos int + + +// The zero value for Pos is NoPos; there is no file and line information +// associated with it, and NoPos().IsValid() is false. NoPos is always +// smaller than any other Pos value. The corresponding Position value +// for NoPos is the zero value for Position. +// +const NoPos Pos = 0 + + +// IsValid returns true if the position is valid. +func (p Pos) IsValid() bool { + return p != NoPos +} + + +func searchFiles(a []*File, x int) int { + return sort.Search(len(a), func(i int) bool { return a[i].base > x }) - 1 +} + + +func (s *FileSet) file(p Pos) *File { + if i := searchFiles(s.files, int(p)); i >= 0 { + f := s.files[i] + // f.base <= int(p) by definition of searchFiles + if int(p) <= f.base+f.size { + return f + } + } + return nil +} + + +// File returns the file which contains the position p. +// If no such file is found (for instance for p == NoPos), +// the result is nil. +// +func (s *FileSet) File(p Pos) (f *File) { + if p != NoPos { + s.mutex.RLock() + f = s.file(p) + s.mutex.RUnlock() + } + return +} + + +func (f *File) position(p Pos) (pos Position) { + offset := int(p) - f.base + pos.Offset = offset + pos.Filename, pos.Line, pos.Column = f.info(offset) + return +} + + +// Position converts a Pos in the fileset into a general Position. +func (s *FileSet) Position(p Pos) (pos Position) { + if p != NoPos { + // TODO(gri) consider optimizing the case where p + // is in the last file addded, or perhaps + // looked at - will eliminate one level + // of search + s.mutex.RLock() + if f := s.file(p); f != nil { + pos = f.position(p) + } + s.mutex.RUnlock() + } + return +} + + +type lineInfo struct { + offset int + filename string + line int +} + + +// AddLineInfo adds alternative file and line number information for +// a given file offset. The offset must be larger than the offset for +// the previously added alternative line info and not larger than the +// file size; otherwise the information is ignored. +// +// AddLineInfo is typically used to register alternative position +// information for //line filename:line comments in source files. +// +func (f *File) AddLineInfo(offset int, filename string, line int) { + f.set.mutex.Lock() + if i := len(f.infos); i == 0 || f.infos[i-1].offset < offset && offset <= f.size { + f.infos = append(f.infos, lineInfo{offset, filename, line}) + } + f.set.mutex.Unlock() +} + + +// A File is a handle for a file belonging to a FileSet. +// A File has a name, size, and line offset table. +// +type File struct { + set *FileSet + name string // file name as provided to AddFile + base int // Pos value range for this file is [base...base+size] + size int // file size as provided to AddFile + + // lines and infos are protected by set.mutex + lines []int + infos []lineInfo +} + + +// Name returns the file name of file f as registered with AddFile. +func (f *File) Name() string { + return f.name +} + + +// Base returns the base offset of file f as registered with AddFile. +func (f *File) Base() int { + return f.base +} + + +// Size returns the size of file f as registered with AddFile. +func (f *File) Size() int { + return f.size +} + + +// LineCount returns the number of lines in file f. +func (f *File) LineCount() int { + f.set.mutex.RLock() + n := len(f.lines) + f.set.mutex.RUnlock() + return n +} + + +// AddLine adds the line offset for a new line. +// The line offset must be larger than the offset for the previous line +// and not larger than the file size; otherwise the line offset is ignored. +// +func (f *File) AddLine(offset int) { + f.set.mutex.Lock() + if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset <= f.size { + f.lines = append(f.lines, offset) + } + f.set.mutex.Unlock() +} + + +// SetLines sets all line offsets for a file and returns true if successful. +// Each line offset must be larger than the offset for the previous line +// and not larger than the file size; otherwise the SetLines fails and returns +// false. +// +func (f *File) SetLines(lines []int) bool { + // verify validity of lines table + size := f.size + for i, offset := range lines { + if i > 0 && offset <= lines[i-1] || size < offset { + return false + } + } + + // set lines table + f.set.mutex.Lock() + f.lines = lines + f.set.mutex.Unlock() + return true +} + + +// Pos returns the Pos value for the given file offset; +// the offset must be <= f.Size(). +// f.Pos(f.Offset(p)) == p. +// +func (f *File) Pos(offset int) Pos { + if offset > f.size { + panic("illegal file offset") + } + return Pos(f.base + offset) +} + + +// Offset returns the offset for the given file position p; +// p must be a valid Pos value in that file. +// f.Offset(f.Pos(offset)) == offset. +// +func (f *File) Offset(p Pos) int { + if int(p) < f.base || int(p) > f.base+f.size { + panic("illegal Pos value") + } + return int(p) - f.base +} + + +// Line returns the line number for the given file position p; +// p must be a Pos value in that file or NoPos. +// +func (f *File) Line(p Pos) int { + // TODO(gri) this can be implemented much more efficiently + return f.Position(p).Line +} + + +// Position returns the Position value for the given file position p; +// p must be a Pos value in that file or NoPos. +// +func (f *File) Position(p Pos) (pos Position) { + if p != NoPos { + if int(p) < f.base || int(p) > f.base+f.size { + panic("illegal Pos value") + } + pos = f.position(p) + } + return +} + + +func searchUints(a []int, x int) int { + return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1 +} + + +func searchLineInfos(a []lineInfo, x int) int { + return sort.Search(len(a), func(i int) bool { return a[i].offset > x }) - 1 +} + + +// info returns the file name, line, and column number for a file offset. +func (f *File) info(offset int) (filename string, line, column int) { + filename = f.name + if i := searchUints(f.lines, offset); i >= 0 { + line, column = i+1, offset-f.lines[i]+1 + } + if i := searchLineInfos(f.infos, offset); i >= 0 { + alt := &f.infos[i] + filename = alt.filename + if i := searchUints(f.lines, alt.offset); i >= 0 { + line += alt.line - i - 1 + } + } + return +} + + +// A FileSet represents a set of source files. +// Methods of file sets are synchronized; multiple goroutines +// may invoke them concurrently. +// +type FileSet struct { + mutex sync.RWMutex // protects the file set + base int // base offset for the next file + files []*File // list of files in the order added to the set + index map[*File]int // file -> files index for quick lookup +} + + +// NewFileSet creates a new file set. +func NewFileSet() *FileSet { + s := new(FileSet) + s.base = 1 // 0 == NoPos + s.index = make(map[*File]int) + return s +} + + +// Base returns the minimum base offset that must be provided to +// AddFile when adding the next file. +// +func (s *FileSet) Base() int { + s.mutex.RLock() + b := s.base + s.mutex.RUnlock() + return b + +} + + +// AddFile adds a new file with a given filename, base offset, and file size +// to the file set s and returns the file. Multiple files may have the same +// name. The base offset must not be smaller than the FileSet's Base(), and +// size must not be negative. +// +// Adding the file will set the file set's Base() value to base + size + 1 +// as the minimum base value for the next file. The following relationship +// exists between a Pos value p for a given file offset offs: +// +// int(p) = base + offs +// +// with offs in the range [0, size] and thus p in the range [base, base+size]. +// For convenience, File.Pos may be used to create file-specific position +// values from a file offset. +// +func (s *FileSet) AddFile(filename string, base, size int) *File { + s.mutex.Lock() + defer s.mutex.Unlock() + if base < s.base || size < 0 { + panic("illegal base or size") + } + // base >= s.base && size >= 0 + f := &File{s, filename, base, size, []int{0}, nil} + base += size + 1 // +1 because EOF also has a position + if base < 0 { + panic("token.Pos offset overflow (> 2G of source code in file set)") + } + // add the file to the file set + s.base = base + s.index[f] = len(s.files) + s.files = append(s.files, f) + return f +} + + +// Files returns the files added to the file set. +func (s *FileSet) Files() <-chan *File { + ch := make(chan *File) + go func() { + for i := 0; ; i++ { + var f *File + s.mutex.RLock() + if i < len(s.files) { + f = s.files[i] + } + s.mutex.RUnlock() + if f == nil { + break + } + ch <- f + } + close(ch) + }() + return ch +} diff --git a/src/pkg/go/token/position_test.go b/src/pkg/go/token/position_test.go new file mode 100644 index 000000000..1cffcc3c2 --- /dev/null +++ b/src/pkg/go/token/position_test.go @@ -0,0 +1,158 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package token + +import ( + "fmt" + "testing" +) + + +func checkPos(t *testing.T, msg string, p, q Position) { + if p.Filename != q.Filename { + t.Errorf("%s: expected filename = %q; got %q", msg, q.Filename, p.Filename) + } + if p.Offset != q.Offset { + t.Errorf("%s: expected offset = %d; got %d", msg, q.Offset, p.Offset) + } + if p.Line != q.Line { + t.Errorf("%s: expected line = %d; got %d", msg, q.Line, p.Line) + } + if p.Column != q.Column { + t.Errorf("%s: expected column = %d; got %d", msg, q.Column, p.Column) + } +} + + +func TestNoPos(t *testing.T) { + if NoPos.IsValid() { + t.Errorf("NoPos should not be valid") + } + var fset *FileSet + checkPos(t, "nil NoPos", fset.Position(NoPos), Position{}) + fset = NewFileSet() + checkPos(t, "fset NoPos", fset.Position(NoPos), Position{}) +} + + +var tests = []struct { + filename string + size int + lines []int +}{ + {"a", 0, []int{}}, + {"b", 5, []int{0}}, + {"c", 10, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}}, + {"d", 100, []int{0, 5, 10, 20, 30, 70, 71, 72, 80, 85, 90, 99}}, + {"e", 777, []int{0, 80, 100, 120, 130, 180, 267, 455, 500, 567, 620}}, +} + + +func linecol(lines []int, offs int) (int, int) { + prevLineOffs := 0 + for line, lineOffs := range lines { + if offs < lineOffs { + return line, offs - prevLineOffs + 1 + } + prevLineOffs = lineOffs + } + return len(lines), offs - prevLineOffs + 1 +} + + +func verifyPositions(t *testing.T, fset *FileSet, f *File, lines []int) { + for offs := 0; offs < f.Size(); offs++ { + p := f.Pos(offs) + offs2 := f.Offset(p) + if offs2 != offs { + t.Errorf("%s, Offset: expected offset %d; got %d", f.Name(), offs, offs2) + } + line, col := linecol(lines, offs) + msg := fmt.Sprintf("%s (offs = %d, p = %d)", f.Name(), offs, p) + checkPos(t, msg, f.Position(f.Pos(offs)), Position{f.Name(), offs, line, col}) + checkPos(t, msg, fset.Position(p), Position{f.Name(), offs, line, col}) + } +} + + +func TestPositions(t *testing.T) { + const delta = 7 // a non-zero base offset increment + fset := NewFileSet() + for _, test := range tests { + // add file and verify name and size + f := fset.AddFile(test.filename, fset.Base()+delta, test.size) + if f.Name() != test.filename { + t.Errorf("expected filename %q; got %q", test.filename, f.Name()) + } + if f.Size() != test.size { + t.Errorf("%s: expected file size %d; got %d", f.Name(), test.size, f.Size()) + } + if fset.File(f.Pos(0)) != f { + t.Errorf("%s: f.Pos(0) was not found in f", f.Name()) + } + + // add lines individually and verify all positions + for i, offset := range test.lines { + f.AddLine(offset) + if f.LineCount() != i+1 { + t.Errorf("%s, AddLine: expected line count %d; got %d", f.Name(), i+1, f.LineCount()) + } + // adding the same offset again should be ignored + f.AddLine(offset) + if f.LineCount() != i+1 { + t.Errorf("%s, AddLine: expected unchanged line count %d; got %d", f.Name(), i+1, f.LineCount()) + } + verifyPositions(t, fset, f, test.lines[0:i+1]) + } + + // add lines at once and verify all positions + ok := f.SetLines(test.lines) + if !ok { + t.Errorf("%s: SetLines failed", f.Name()) + } + if f.LineCount() != len(test.lines) { + t.Errorf("%s, SetLines: expected line count %d; got %d", f.Name(), len(test.lines), f.LineCount()) + } + verifyPositions(t, fset, f, test.lines) + } +} + + +func TestLineInfo(t *testing.T) { + fset := NewFileSet() + f := fset.AddFile("foo", fset.Base(), 500) + lines := []int{0, 42, 77, 100, 210, 220, 277, 300, 333, 401} + // add lines individually and provide alternative line information + for _, offs := range lines { + f.AddLine(offs) + f.AddLineInfo(offs, "bar", 42) + } + // verify positions for all offsets + for offs := 0; offs <= f.Size(); offs++ { + p := f.Pos(offs) + _, col := linecol(lines, offs) + msg := fmt.Sprintf("%s (offs = %d, p = %d)", f.Name(), offs, p) + checkPos(t, msg, f.Position(f.Pos(offs)), Position{"bar", offs, 42, col}) + checkPos(t, msg, fset.Position(p), Position{"bar", offs, 42, col}) + } +} + + +func TestFiles(t *testing.T) { + fset := NewFileSet() + for i, test := range tests { + fset.AddFile(test.filename, fset.Base(), test.size) + j := 0 + for g := range fset.Files() { + if g.Name() != tests[j].filename { + t.Errorf("expected filename = %s; got %s", tests[j].filename, g.Name()) + } + j++ + } + if j != i+1 { + t.Errorf("expected %d files; got %d", i+1, j) + } + } +} diff --git a/src/pkg/go/token/token.go b/src/pkg/go/token/token.go index 70c2501e9..1bd81c1b1 100644 --- a/src/pkg/go/token/token.go +++ b/src/pkg/go/token/token.go @@ -8,10 +8,7 @@ // package token -import ( - "fmt" - "strconv" -) +import "strconv" // Token is the set of lexical tokens of the Go programming language. @@ -321,39 +318,3 @@ func (tok Token) IsOperator() bool { return operator_beg < tok && tok < operator // returns false otherwise. // func (tok Token) IsKeyword() bool { return keyword_beg < tok && tok < keyword_end } - - -// Token source positions are represented by a Position value. -// A Position is valid if the line number is > 0. -// -type Position struct { - Filename string // filename, if any - Offset int // byte offset, starting at 0 - Line int // line number, starting at 1 - Column int // column number, starting at 1 (character count) -} - - -// Pos is an accessor method for anonymous Position fields. -// It returns its receiver. -// -func (pos *Position) Pos() Position { return *pos } - - -// IsValid returns true if the position is valid. -func (pos *Position) IsValid() bool { return pos.Line > 0 } - - -func (pos Position) String() string { - s := pos.Filename - if pos.IsValid() { - if s != "" { - s += ":" - } - s += fmt.Sprintf("%d:%d", pos.Line, pos.Column) - } - if s == "" { - s = "???" - } - return s -} diff --git a/src/pkg/go/typechecker/Makefile b/src/pkg/go/typechecker/Makefile new file mode 100644 index 000000000..62b2aa7fe --- /dev/null +++ b/src/pkg/go/typechecker/Makefile @@ -0,0 +1,13 @@ +# Copyright 2010 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=go/typechecker +GOFILES=\ + scope.go\ + typechecker.go\ + universe.go\ + +include ../../../Make.pkg diff --git a/src/pkg/go/typechecker/scope.go b/src/pkg/go/typechecker/scope.go new file mode 100644 index 000000000..114c93ea8 --- /dev/null +++ b/src/pkg/go/typechecker/scope.go @@ -0,0 +1,119 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements scope support functions. + +package typechecker + +import ( + "fmt" + "go/ast" + "go/token" +) + + +func (tc *typechecker) openScope() *ast.Scope { + tc.topScope = ast.NewScope(tc.topScope) + return tc.topScope +} + + +func (tc *typechecker) closeScope() { + tc.topScope = tc.topScope.Outer +} + + +// objPos computes the source position of the declaration of an object name. +// Only required for error reporting, so doesn't have to be fast. +func objPos(obj *ast.Object) (pos token.Pos) { + switch d := obj.Decl.(type) { + case *ast.Field: + for _, n := range d.Names { + if n.Name == obj.Name { + return n.Pos() + } + } + case *ast.ValueSpec: + for _, n := range d.Names { + if n.Name == obj.Name { + return n.Pos() + } + } + case *ast.TypeSpec: + return d.Name.Pos() + case *ast.FuncDecl: + return d.Name.Pos() + } + if debug { + fmt.Printf("decl = %T\n", obj.Decl) + } + panic("unreachable") +} + + +// declInScope declares an object of a given kind and name in scope and sets the object's Decl and N fields. +// It returns the newly allocated object. If an object with the same name already exists in scope, an error +// is reported and the object is not inserted. +// (Objects with _ name are always inserted into a scope without errors, but they cannot be found.) +func (tc *typechecker) declInScope(scope *ast.Scope, kind ast.Kind, name *ast.Ident, decl interface{}, n int) *ast.Object { + obj := ast.NewObj(kind, name.Name) + obj.Decl = decl + obj.N = n + name.Obj = obj + if alt := scope.Insert(obj); alt != obj { + tc.Errorf(name.Pos(), "%s already declared at %s", name.Name, objPos(alt)) + } + return obj +} + + +// decl is the same as declInScope(tc.topScope, ...) +func (tc *typechecker) decl(kind ast.Kind, name *ast.Ident, decl interface{}, n int) *ast.Object { + return tc.declInScope(tc.topScope, kind, name, decl, n) +} + + +// find returns the object with the given name if visible in the current scope hierarchy. +// If no such object is found, an error is reported and a bad object is returned instead. +func (tc *typechecker) find(name *ast.Ident) (obj *ast.Object) { + for s := tc.topScope; s != nil && obj == nil; s = s.Outer { + obj = s.Lookup(name.Name) + } + if obj == nil { + tc.Errorf(name.Pos(), "%s not declared", name.Name) + obj = ast.NewObj(ast.Bad, name.Name) + } + name.Obj = obj + return +} + + +// findField returns the object with the given name if visible in the type's scope. +// If no such object is found, an error is reported and a bad object is returned instead. +func (tc *typechecker) findField(typ *ast.Type, name *ast.Ident) (obj *ast.Object) { + // TODO(gri) This is simplistic at the moment and ignores anonymous fields. + obj = typ.Scope.Lookup(name.Name) + if obj == nil { + tc.Errorf(name.Pos(), "%s not declared", name.Name) + obj = ast.NewObj(ast.Bad, name.Name) + } + return +} + + +// printScope prints the objects in a scope. +func printScope(scope *ast.Scope) { + fmt.Printf("scope %p {", scope) + if scope != nil && len(scope.Objects) > 0 { + fmt.Println() + for _, obj := range scope.Objects { + form := "void" + if obj.Type != nil { + form = obj.Type.Form.String() + } + fmt.Printf("\t%s\t%s\n", obj.Name, form) + } + } + fmt.Printf("}\n") +} diff --git a/src/pkg/go/typechecker/testdata/test0.go b/src/pkg/go/typechecker/testdata/test0.go new file mode 100644 index 000000000..4e317f214 --- /dev/null +++ b/src/pkg/go/typechecker/testdata/test0.go @@ -0,0 +1,94 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// type declarations + +package P0 + +type ( + B bool + I int32 + A [10]P + T struct { + x, y P + } + P *T + R *R + F func(A) I + Y interface { + f(A) I + } + S []P + M map[I]F + C chan<- I +) + +type ( + a/* ERROR "illegal cycle" */ a + a/* ERROR "already declared" */ int + + b/* ERROR "illegal cycle" */ c + c d + d e + e b /* ERROR "not a type" */ + + t *t + + U V + V W + W *U + + P1 *S2 + P2 P1 + + S1 struct { + a, b, c int + u, v, a/* ERROR "already declared" */ float + } + S2/* ERROR "illegal cycle" */ struct { + x S2 + } + + L1 []L1 + L2 []int + + A1 [10]int + A2/* ERROR "illegal cycle" */ [10]A2 + A3/* ERROR "illegal cycle" */ [10]struct { + x A4 + } + A4 [10]A3 + + F1 func() + F2 func(x, y, z float) + F3 func(x, y, x /* ERROR "already declared" */ float) + F4 func() (x, y, x /* ERROR "already declared" */ float) + F5 func(x int) (x /* ERROR "already declared" */ float) + + I1 interface{} + I2 interface { + m1() + } + I3 interface { + m1() + m1 /* ERROR "already declared" */ () + } + I4 interface { + m1(x, y, x /* ERROR "already declared" */ float) + m2() (x, y, x /* ERROR "already declared" */ float) + m3(x int) (x /* ERROR "already declared" */ float) + } + I5 interface { + m1(I5) + } + + C1 chan int + C2 <-chan int + C3 chan<- C3 + + M1 map[Last]string + M2 map[string]M2 + + Last int +) diff --git a/src/pkg/go/typechecker/testdata/test1.go b/src/pkg/go/typechecker/testdata/test1.go new file mode 100644 index 000000000..b0808ee7a --- /dev/null +++ b/src/pkg/go/typechecker/testdata/test1.go @@ -0,0 +1,13 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// const and var declarations + +package P1 + +const ( + c1 /* ERROR "missing initializer" */ + c2 int = 0 + c3, c4 = 0 +) diff --git a/src/pkg/go/typechecker/testdata/test3.go b/src/pkg/go/typechecker/testdata/test3.go new file mode 100644 index 000000000..ea35808a0 --- /dev/null +++ b/src/pkg/go/typechecker/testdata/test3.go @@ -0,0 +1,38 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package P3 + +// function and method signatures + +func _() {} +func _() {} +func _(x, x /* ERROR "already declared" */ int) {} + +func f() {} +func f /* ERROR "already declared" */ () {} + +func (*foo /* ERROR "invalid receiver" */ ) m() {} +func (bar /* ERROR "not a type" */ ) m() {} + +func f1(x, _, _ int) (_, _ float) {} +func f2(x, y, x /* ERROR "already declared" */ int) {} +func f3(x, y int) (a, b, x /* ERROR "already declared" */ int) {} + +func (x *T) m1() {} +func (x *T) m1 /* ERROR "already declared" */ () {} +func (x T) m1 /* ERROR "already declared" */ () {} +func (T) m1 /* ERROR "already declared" */ () {} + +func (x *T) m2(u, x /* ERROR "already declared" */ int) {} +func (x *T) m3(a, b, c int) (u, x /* ERROR "already declared" */ int) {} +func (T) _(x, x /* ERROR "already declared" */ int) {} +func (T) _() (x, x /* ERROR "already declared" */ int) {} + +//func (PT) _() {} + +var bar int + +type T struct{} +type PT (T) diff --git a/src/pkg/go/typechecker/testdata/test4.go b/src/pkg/go/typechecker/testdata/test4.go new file mode 100644 index 000000000..bb9aee3ad --- /dev/null +++ b/src/pkg/go/typechecker/testdata/test4.go @@ -0,0 +1,11 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Constant declarations + +package P4 + +const ( + c0 /* ERROR "missing initializer" */ +) diff --git a/src/pkg/go/typechecker/typechecker.go b/src/pkg/go/typechecker/typechecker.go new file mode 100644 index 000000000..e9aefa240 --- /dev/null +++ b/src/pkg/go/typechecker/typechecker.go @@ -0,0 +1,484 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// INCOMPLETE PACKAGE. +// This package implements typechecking of a Go AST. +// The result of the typecheck is an augmented AST +// with object and type information for each identifier. +// +package typechecker + +import ( + "fmt" + "go/ast" + "go/token" + "go/scanner" + "os" +) + + +// TODO(gri) don't report errors for objects/types that are marked as bad. + + +const debug = true // set for debugging output + + +// An importer takes an import path and returns the data describing the +// respective package's exported interface. The data format is TBD. +// +type Importer func(path string) ([]byte, os.Error) + + +// CheckPackage typechecks a package and augments the AST by setting +// *ast.Object, *ast.Type, and *ast.Scope fields accordingly. If an +// importer is provided, it is used to handle imports, otherwise they +// are ignored (likely leading to typechecking errors). +// +// If errors are reported, the AST may be incompletely augmented (fields +// may be nil) or contain incomplete object, type, or scope information. +// +func CheckPackage(fset *token.FileSet, pkg *ast.Package, importer Importer) os.Error { + var tc typechecker + tc.fset = fset + tc.importer = importer + tc.checkPackage(pkg) + return tc.GetError(scanner.Sorted) +} + + +// CheckFile typechecks a single file, but otherwise behaves like +// CheckPackage. If the complete package consists of more than just +// one file, the file may not typecheck without errors. +// +func CheckFile(fset *token.FileSet, file *ast.File, importer Importer) os.Error { + // create a single-file dummy package + pkg := &ast.Package{file.Name.Name, nil, map[string]*ast.File{fset.Position(file.Name.NamePos).Filename: file}} + return CheckPackage(fset, pkg, importer) +} + + +// ---------------------------------------------------------------------------- +// Typechecker state + +type typechecker struct { + fset *token.FileSet + scanner.ErrorVector + importer Importer + topScope *ast.Scope // current top-most scope + cyclemap map[*ast.Object]bool // for cycle detection + iota int // current value of iota +} + + +func (tc *typechecker) Errorf(pos token.Pos, format string, args ...interface{}) { + tc.Error(tc.fset.Position(pos), fmt.Sprintf(format, args...)) +} + + +func assert(pred bool) { + if !pred { + panic("internal error") + } +} + + +/* +Typechecking is done in several phases: + +phase 1: declare all global objects; also collect all function and method declarations + - all objects have kind, name, decl fields; the decl field permits + quick lookup of an object's declaration + - constant objects have an iota value + - type objects have unresolved types with empty scopes, all others have nil types + - report global double declarations + +phase 2: bind methods to their receiver base types + - received base types must be declared in the package, thus for + each method a corresponding (unresolved) type must exist + - report method double declarations and errors with base types + +phase 3: resolve all global objects + - sequentially iterate through all objects in the global scope + - resolve types for all unresolved types and assign types to + all attached methods + - assign types to all other objects, possibly by evaluating + constant and initializer expressions + - resolution may recurse; a cyclemap is used to detect cycles + - report global typing errors + +phase 4: sequentially typecheck function and method bodies + - all global objects are declared and have types and values; + all methods have types + - sequentially process statements in each body; any object + referred to must be fully defined at this point + - report local typing errors +*/ + +func (tc *typechecker) checkPackage(pkg *ast.Package) { + // setup package scope + tc.topScope = Universe + tc.openScope() + defer tc.closeScope() + + // TODO(gri) there's no file scope at the moment since we ignore imports + + // phase 1: declare all global objects; also collect all function and method declarations + var funcs []*ast.FuncDecl + for _, file := range pkg.Files { + for _, decl := range file.Decls { + tc.declGlobal(decl) + if f, isFunc := decl.(*ast.FuncDecl); isFunc { + funcs = append(funcs, f) + } + } + } + + // phase 2: bind methods to their receiver base types + for _, m := range funcs { + if m.Recv != nil { + tc.bindMethod(m) + } + } + + // phase 3: resolve all global objects + // (note that objects with _ name are also in the scope) + tc.cyclemap = make(map[*ast.Object]bool) + for _, obj := range tc.topScope.Objects { + tc.resolve(obj) + } + assert(len(tc.cyclemap) == 0) + + // 4: sequentially typecheck function and method bodies + for _, f := range funcs { + tc.checkBlock(f.Body.List, f.Name.Obj.Type) + } + + pkg.Scope = tc.topScope +} + + +func (tc *typechecker) declGlobal(global ast.Decl) { + switch d := global.(type) { + case *ast.BadDecl: + // ignore + + case *ast.GenDecl: + iota := 0 + var prev *ast.ValueSpec + for _, spec := range d.Specs { + switch s := spec.(type) { + case *ast.ImportSpec: + // TODO(gri) imports go into file scope + case *ast.ValueSpec: + switch d.Tok { + case token.CONST: + if s.Values == nil { + // create a new spec with type and values from the previous one + if prev != nil { + s = &ast.ValueSpec{s.Doc, s.Names, prev.Type, prev.Values, s.Comment} + } else { + // TODO(gri) this should probably go into the const decl code + tc.Errorf(s.Pos(), "missing initializer for const %s", s.Names[0].Name) + } + } + for _, name := range s.Names { + tc.decl(ast.Con, name, s, iota) + } + case token.VAR: + for _, name := range s.Names { + tc.decl(ast.Var, name, s, 0) + } + default: + panic("unreachable") + } + prev = s + iota++ + case *ast.TypeSpec: + obj := tc.decl(ast.Typ, s.Name, s, 0) + // give all type objects an unresolved type so + // that we can collect methods in the type scope + typ := ast.NewType(ast.Unresolved) + obj.Type = typ + typ.Obj = obj + default: + panic("unreachable") + } + } + + case *ast.FuncDecl: + if d.Recv == nil { + tc.decl(ast.Fun, d.Name, d, 0) + } + + default: + panic("unreachable") + } +} + + +// If x is of the form *T, deref returns T, otherwise it returns x. +func deref(x ast.Expr) ast.Expr { + if p, isPtr := x.(*ast.StarExpr); isPtr { + x = p.X + } + return x +} + + +func (tc *typechecker) bindMethod(method *ast.FuncDecl) { + // a method is declared in the receiver base type's scope + var scope *ast.Scope + base := deref(method.Recv.List[0].Type) + if name, isIdent := base.(*ast.Ident); isIdent { + // if base is not an *ast.Ident, we had a syntax + // error and the parser reported an error already + obj := tc.topScope.Lookup(name.Name) + if obj == nil { + tc.Errorf(name.Pos(), "invalid receiver: %s is not declared in this package", name.Name) + } else if obj.Kind != ast.Typ { + tc.Errorf(name.Pos(), "invalid receiver: %s is not a type", name.Name) + } else { + typ := obj.Type + assert(typ.Form == ast.Unresolved) + scope = typ.Scope + } + } + if scope == nil { + // no receiver type found; use a dummy scope + // (we still want to type-check the method + // body, so make sure there is a name object + // and type) + // TODO(gri) should we record the scope so + // that we don't lose the receiver for type- + // checking of the method body? + scope = ast.NewScope(nil) + } + tc.declInScope(scope, ast.Fun, method.Name, method, 0) +} + + +func (tc *typechecker) resolve(obj *ast.Object) { + // check for declaration cycles + if tc.cyclemap[obj] { + tc.Errorf(objPos(obj), "illegal cycle in declaration of %s", obj.Name) + obj.Kind = ast.Bad + return + } + tc.cyclemap[obj] = true + defer func() { + tc.cyclemap[obj] = false, false + }() + + // resolve non-type objects + typ := obj.Type + if typ == nil { + switch obj.Kind { + case ast.Bad: + // ignore + + case ast.Con: + tc.declConst(obj) + + case ast.Var: + tc.declVar(obj) + //obj.Type = tc.typeFor(nil, obj.Decl.(*ast.ValueSpec).Type, false) + + case ast.Fun: + obj.Type = ast.NewType(ast.Function) + t := obj.Decl.(*ast.FuncDecl).Type + tc.declSignature(obj.Type, nil, t.Params, t.Results) + + default: + // type objects have non-nil types when resolve is called + if debug { + fmt.Printf("kind = %s\n", obj.Kind) + } + panic("unreachable") + } + return + } + + // resolve type objects + if typ.Form == ast.Unresolved { + tc.typeFor(typ, typ.Obj.Decl.(*ast.TypeSpec).Type, false) + + // provide types for all methods + for _, obj := range typ.Scope.Objects { + if obj.Kind == ast.Fun { + assert(obj.Type == nil) + obj.Type = ast.NewType(ast.Method) + f := obj.Decl.(*ast.FuncDecl) + t := f.Type + tc.declSignature(obj.Type, f.Recv, t.Params, t.Results) + } + } + } +} + + +func (tc *typechecker) checkBlock(body []ast.Stmt, ftype *ast.Type) { + tc.openScope() + defer tc.closeScope() + + // inject function/method parameters into block scope, if any + if ftype != nil { + for _, par := range ftype.Params.Objects { + obj := tc.topScope.Insert(par) + assert(obj == par) // ftype has no double declarations + } + } + + for _, stmt := range body { + tc.checkStmt(stmt) + } +} + + +// ---------------------------------------------------------------------------- +// Types + +// unparen removes parentheses around x, if any. +func unparen(x ast.Expr) ast.Expr { + if ux, hasParens := x.(*ast.ParenExpr); hasParens { + return unparen(ux.X) + } + return x +} + + +func (tc *typechecker) declFields(scope *ast.Scope, fields *ast.FieldList, ref bool) (n uint) { + if fields != nil { + for _, f := range fields.List { + typ := tc.typeFor(nil, f.Type, ref) + for _, name := range f.Names { + fld := tc.declInScope(scope, ast.Var, name, f, 0) + fld.Type = typ + n++ + } + } + } + return n +} + + +func (tc *typechecker) declSignature(typ *ast.Type, recv, params, results *ast.FieldList) { + assert((typ.Form == ast.Method) == (recv != nil)) + typ.Params = ast.NewScope(nil) + tc.declFields(typ.Params, recv, true) + tc.declFields(typ.Params, params, true) + typ.N = tc.declFields(typ.Params, results, true) +} + + +func (tc *typechecker) typeFor(def *ast.Type, x ast.Expr, ref bool) (typ *ast.Type) { + x = unparen(x) + + // type name + if t, isIdent := x.(*ast.Ident); isIdent { + obj := tc.find(t) + + if obj.Kind != ast.Typ { + tc.Errorf(t.Pos(), "%s is not a type", t.Name) + if def == nil { + typ = ast.NewType(ast.BadType) + } else { + typ = def + typ.Form = ast.BadType + } + typ.Expr = x + return + } + + if !ref { + tc.resolve(obj) // check for cycles even if type resolved + } + typ = obj.Type + + if def != nil { + // new type declaration: copy type structure + def.Form = typ.Form + def.N = typ.N + def.Key, def.Elt = typ.Key, typ.Elt + def.Params = typ.Params + def.Expr = x + typ = def + } + return + } + + // type literal + typ = def + if typ == nil { + typ = ast.NewType(ast.BadType) + } + typ.Expr = x + + switch t := x.(type) { + case *ast.SelectorExpr: + if debug { + fmt.Println("qualified identifier unimplemented") + } + typ.Form = ast.BadType + + case *ast.StarExpr: + typ.Form = ast.Pointer + typ.Elt = tc.typeFor(nil, t.X, true) + + case *ast.ArrayType: + if t.Len != nil { + typ.Form = ast.Array + // TODO(gri) compute the real length + // (this may call resolve recursively) + (*typ).N = 42 + } else { + typ.Form = ast.Slice + } + typ.Elt = tc.typeFor(nil, t.Elt, t.Len == nil) + + case *ast.StructType: + typ.Form = ast.Struct + tc.declFields(typ.Scope, t.Fields, false) + + case *ast.FuncType: + typ.Form = ast.Function + tc.declSignature(typ, nil, t.Params, t.Results) + + case *ast.InterfaceType: + typ.Form = ast.Interface + tc.declFields(typ.Scope, t.Methods, true) + + case *ast.MapType: + typ.Form = ast.Map + typ.Key = tc.typeFor(nil, t.Key, true) + typ.Elt = tc.typeFor(nil, t.Value, true) + + case *ast.ChanType: + typ.Form = ast.Channel + typ.N = uint(t.Dir) + typ.Elt = tc.typeFor(nil, t.Value, true) + + default: + if debug { + fmt.Printf("x is %T\n", x) + } + panic("unreachable") + } + + return +} + + +// ---------------------------------------------------------------------------- +// TODO(gri) implement these place holders + +func (tc *typechecker) declConst(*ast.Object) { +} + + +func (tc *typechecker) declVar(*ast.Object) { +} + + +func (tc *typechecker) checkStmt(ast.Stmt) { +} diff --git a/src/pkg/go/typechecker/typechecker_test.go b/src/pkg/go/typechecker/typechecker_test.go new file mode 100644 index 000000000..9c5b52e41 --- /dev/null +++ b/src/pkg/go/typechecker/typechecker_test.go @@ -0,0 +1,167 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements a simple typechecker test harness. Packages found +// in the testDir directory are typechecked. Error messages reported by +// the typechecker are compared against the error messages expected for +// the test files. +// +// Expected errors are indicated in the test files by putting a comment +// of the form /* ERROR "rx" */ immediately following an offending token. +// The harness will verify that an error matching the regular expression +// rx is reported at that source position. Consecutive comments may be +// used to indicate multiple errors for the same token position. +// +// For instance, the following test file indicates that a "not declared" +// error should be reported for the undeclared variable x: +// +// package P0 +// func f() { +// _ = x /* ERROR "not declared" */ + 1 +// } +// +// If the -pkg flag is set, only packages with package names matching +// the regular expression provided via the flag value are tested. + +package typechecker + +import ( + "flag" + "fmt" + "go/ast" + "go/parser" + "go/scanner" + "go/token" + "io/ioutil" + "os" + "regexp" + "sort" + "strings" + "testing" +) + + +const testDir = "./testdata" // location of test packages + +var fset = token.NewFileSet() + +var ( + pkgPat = flag.String("pkg", ".*", "regular expression to select test packages by package name") + trace = flag.Bool("trace", false, "print package names") +) + + +// ERROR comments must be of the form /* ERROR "rx" */ and rx is +// a regular expression that matches the expected error message. +var errRx = regexp.MustCompile(`^/\* *ERROR *"([^"]*)" *\*/$`) + +// expectedErrors collects the regular expressions of ERROR comments +// found in the package files of pkg and returns them in sorted order +// (by filename and position). +func expectedErrors(t *testing.T, pkg *ast.Package) (list scanner.ErrorList) { + // scan all package files + for filename := range pkg.Files { + src, err := ioutil.ReadFile(filename) + if err != nil { + t.Fatalf("expectedErrors(%s): %v", pkg.Name, err) + } + + var s scanner.Scanner + s.Init(fset, filename, src, nil, scanner.ScanComments) + var prev token.Pos // position of last non-comment token + loop: + for { + pos, tok, lit := s.Scan() + switch tok { + case token.EOF: + break loop + case token.COMMENT: + s := errRx.FindSubmatch(lit) + if len(s) == 2 { + list = append(list, &scanner.Error{fset.Position(prev), string(s[1])}) + } + default: + prev = pos + } + } + } + sort.Sort(list) // multiple files may not be sorted + return +} + + +func testFilter(f *os.FileInfo) bool { + return strings.HasSuffix(f.Name, ".go") && f.Name[0] != '.' +} + + +func checkError(t *testing.T, expected, found *scanner.Error) { + rx, err := regexp.Compile(expected.Msg) + if err != nil { + t.Errorf("%s: %v", expected.Pos, err) + return + } + + match := rx.MatchString(found.Msg) + + if expected.Pos.Offset != found.Pos.Offset { + if match { + t.Errorf("%s: expected error should have been at %s", expected.Pos, found.Pos) + } else { + t.Errorf("%s: error matching %q expected", expected.Pos, expected.Msg) + return + } + } + + if !match { + t.Errorf("%s: %q does not match %q", expected.Pos, expected.Msg, found.Msg) + } +} + + +func TestTypeCheck(t *testing.T) { + flag.Parse() + pkgRx, err := regexp.Compile(*pkgPat) + if err != nil { + t.Fatalf("illegal flag value %q: %s", *pkgPat, err) + } + + pkgs, err := parser.ParseDir(fset, testDir, testFilter, 0) + if err != nil { + scanner.PrintError(os.Stderr, err) + t.Fatalf("packages in %s contain syntax errors", testDir) + } + + for _, pkg := range pkgs { + if !pkgRx.MatchString(pkg.Name) { + continue // only test selected packages + } + + if *trace { + fmt.Println(pkg.Name) + } + + xlist := expectedErrors(t, pkg) + err := CheckPackage(fset, pkg, nil) + if err != nil { + if elist, ok := err.(scanner.ErrorList); ok { + // verify that errors match + for i := 0; i < len(xlist) && i < len(elist); i++ { + checkError(t, xlist[i], elist[i]) + } + // the correct number or errors must have been found + if len(xlist) != len(elist) { + fmt.Fprintf(os.Stderr, "%s\n", pkg.Name) + scanner.PrintError(os.Stderr, elist) + fmt.Fprintln(os.Stderr) + t.Errorf("TypeCheck(%s): %d errors expected but %d reported", pkg.Name, len(xlist), len(elist)) + } + } else { + t.Errorf("TypeCheck(%s): %v", pkg.Name, err) + } + } else if len(xlist) > 0 { + t.Errorf("TypeCheck(%s): %d errors expected but 0 reported", pkg.Name, len(xlist)) + } + } +} diff --git a/src/pkg/go/typechecker/universe.go b/src/pkg/go/typechecker/universe.go new file mode 100644 index 000000000..db950737f --- /dev/null +++ b/src/pkg/go/typechecker/universe.go @@ -0,0 +1,38 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package typechecker + +import "go/ast" + +// TODO(gri) should this be in package ast? + +// The Universe scope contains all predeclared identifiers. +var Universe *ast.Scope + + +func def(obj *ast.Object) { + alt := Universe.Insert(obj) + if alt != obj { + panic("object declared twice") + } +} + + +func init() { + Universe = ast.NewScope(nil) + + // basic types + for n, name := range ast.BasicTypes { + typ := ast.NewType(ast.Basic) + typ.N = n + obj := ast.NewObj(ast.Typ, name) + obj.Type = typ + typ.Obj = obj + def(obj) + } + + // built-in functions + // TODO(gri) implement this +} |