diff options
author | Ondřej Surý <ondrej@sury.org> | 2011-09-13 13:13:40 +0200 |
---|---|---|
committer | Ondřej Surý <ondrej@sury.org> | 2011-09-13 13:13:40 +0200 |
commit | 5ff4c17907d5b19510a62e08fd8d3b11e62b431d (patch) | |
tree | c0650497e988f47be9c6f2324fa692a52dea82e1 /src/pkg/go | |
parent | 80f18fc933cf3f3e829c5455a1023d69f7b86e52 (diff) | |
download | golang-upstream/60.tar.gz |
Imported Upstream version 60upstream/60
Diffstat (limited to 'src/pkg/go')
80 files changed, 23610 insertions, 0 deletions
diff --git a/src/pkg/go/ast/Makefile b/src/pkg/go/ast/Makefile new file mode 100644 index 000000000..40be10208 --- /dev/null +++ b/src/pkg/go/ast/Makefile @@ -0,0 +1,16 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=go/ast +GOFILES=\ + ast.go\ + filter.go\ + print.go\ + resolve.go\ + scope.go\ + walk.go\ + +include ../../../Make.pkg diff --git a/src/pkg/go/ast/ast.go b/src/pkg/go/ast/ast.go new file mode 100644 index 000000000..22bd5ee22 --- /dev/null +++ b/src/pkg/go/ast/ast.go @@ -0,0 +1,914 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package ast declares the types used to represent syntax trees for Go +// packages. +// +package ast + +import ( + "go/token" + "unicode" + "utf8" +) + +// ---------------------------------------------------------------------------- +// Interfaces +// +// There are 3 main classes of nodes: Expressions and type nodes, +// statement nodes, and declaration nodes. The node names usually +// match the corresponding Go spec production names to which they +// correspond. The node fields correspond to the individual parts +// of the respective productions. +// +// All nodes contain position information marking the beginning of +// the corresponding source text segment; it is accessible via the +// Pos accessor method. Nodes may contain additional position info +// for language constructs where comments may be found between parts +// of the construct (typically any larger, parenthesized subpart). +// That position information is needed to properly position comments +// when printing the construct. + +// All node types implement the Node interface. +type Node interface { + Pos() token.Pos // position of first character belonging to the node + End() token.Pos // position of first character immediately after the node +} + +// All expression nodes implement the Expr interface. +type Expr interface { + Node + exprNode() +} + +// All statement nodes implement the Stmt interface. +type Stmt interface { + Node + stmtNode() +} + +// All declaration nodes implement the Decl interface. +type Decl interface { + Node + declNode() +} + +// ---------------------------------------------------------------------------- +// Comments + +// A Comment node represents a single //-style or /*-style comment. +type Comment struct { + Slash token.Pos // position of "/" starting the comment + Text string // comment text (excluding '\n' for //-style comments) +} + +func (c *Comment) Pos() token.Pos { return c.Slash } +func (c *Comment) End() token.Pos { return token.Pos(int(c.Slash) + len(c.Text)) } + +// A CommentGroup represents a sequence of comments +// with no other tokens and no empty lines between. +// +type CommentGroup struct { + List []*Comment // len(List) > 0 +} + +func (g *CommentGroup) Pos() token.Pos { return g.List[0].Pos() } +func (g *CommentGroup) End() token.Pos { return g.List[len(g.List)-1].End() } + +// ---------------------------------------------------------------------------- +// Expressions and types + +// A Field represents a Field declaration list in a struct type, +// a method list in an interface type, or a parameter/result declaration +// in a signature. +// +type Field struct { + Doc *CommentGroup // associated documentation; or nil + Names []*Ident // field/method/parameter names; or nil if anonymous field + Type Expr // field/method/parameter type + Tag *BasicLit // field tag; or nil + Comment *CommentGroup // line comments; or nil +} + +func (f *Field) Pos() token.Pos { + if len(f.Names) > 0 { + return f.Names[0].Pos() + } + return f.Type.Pos() +} + +func (f *Field) End() token.Pos { + if f.Tag != nil { + return f.Tag.End() + } + return f.Type.End() +} + +// A FieldList represents a list of Fields, enclosed by parentheses or braces. +type FieldList struct { + Opening token.Pos // position of opening parenthesis/brace, if any + List []*Field // field list; or nil + Closing token.Pos // position of closing parenthesis/brace, if any +} + +func (f *FieldList) Pos() token.Pos { + if f.Opening.IsValid() { + return f.Opening + } + // the list should not be empty in this case; + // be conservative and guard against bad ASTs + if len(f.List) > 0 { + return f.List[0].Pos() + } + return token.NoPos +} + +func (f *FieldList) End() token.Pos { + if f.Closing.IsValid() { + return f.Closing + 1 + } + // the list should not be empty in this case; + // be conservative and guard against bad ASTs + if n := len(f.List); n > 0 { + return f.List[n-1].End() + } + return token.NoPos +} + +// NumFields returns the number of (named and anonymous fields) in a FieldList. +func (f *FieldList) NumFields() int { + n := 0 + if f != nil { + for _, g := range f.List { + m := len(g.Names) + if m == 0 { + m = 1 // anonymous field + } + n += m + } + } + return n +} + +// An expression is represented by a tree consisting of one +// or more of the following concrete expression nodes. +// +type ( + // A BadExpr node is a placeholder for expressions containing + // syntax errors for which no correct expression nodes can be + // created. + // + BadExpr struct { + From, To token.Pos // position range of bad expression + } + + // An Ident node represents an identifier. + Ident struct { + NamePos token.Pos // identifier position + Name string // identifier name + Obj *Object // denoted object; or nil + } + + // An Ellipsis node stands for the "..." type in a + // parameter list or the "..." length in an array type. + // + Ellipsis struct { + Ellipsis token.Pos // position of "..." + Elt Expr // ellipsis element type (parameter lists only); or nil + } + + // A BasicLit node represents a literal of basic type. + BasicLit struct { + ValuePos token.Pos // literal position + Kind token.Token // token.INT, token.FLOAT, token.IMAG, token.CHAR, or token.STRING + Value string // literal string; e.g. 42, 0x7f, 3.14, 1e-9, 2.4i, 'a', '\x7f', "foo" or `\m\n\o` + } + + // A FuncLit node represents a function literal. + FuncLit struct { + Type *FuncType // function type + Body *BlockStmt // function body + } + + // A CompositeLit node represents a composite literal. + CompositeLit struct { + Type Expr // literal type; or nil + Lbrace token.Pos // position of "{" + Elts []Expr // list of composite elements; or nil + Rbrace token.Pos // position of "}" + } + + // A ParenExpr node represents a parenthesized expression. + ParenExpr struct { + Lparen token.Pos // position of "(" + X Expr // parenthesized expression + Rparen token.Pos // position of ")" + } + + // A SelectorExpr node represents an expression followed by a selector. + SelectorExpr struct { + X Expr // expression + Sel *Ident // field selector + } + + // An IndexExpr node represents an expression followed by an index. + IndexExpr struct { + X Expr // expression + Lbrack token.Pos // position of "[" + Index Expr // index expression + Rbrack token.Pos // position of "]" + } + + // An SliceExpr node represents an expression followed by slice indices. + SliceExpr struct { + X Expr // expression + Lbrack token.Pos // position of "[" + Low Expr // begin of slice range; or nil + High Expr // end of slice range; or nil + Rbrack token.Pos // position of "]" + } + + // A TypeAssertExpr node represents an expression followed by a + // type assertion. + // + TypeAssertExpr struct { + X Expr // expression + Type Expr // asserted type; nil means type switch X.(type) + } + + // A CallExpr node represents an expression followed by an argument list. + CallExpr struct { + Fun Expr // function expression + Lparen token.Pos // position of "(" + Args []Expr // function arguments; or nil + Ellipsis token.Pos // position of "...", if any + Rparen token.Pos // position of ")" + } + + // A StarExpr node represents an expression of the form "*" Expression. + // Semantically it could be a unary "*" expression, or a pointer type. + // + StarExpr struct { + Star token.Pos // position of "*" + X Expr // operand + } + + // A UnaryExpr node represents a unary expression. + // Unary "*" expressions are represented via StarExpr nodes. + // + UnaryExpr struct { + OpPos token.Pos // position of Op + Op token.Token // operator + X Expr // operand + } + + // A BinaryExpr node represents a binary expression. + BinaryExpr struct { + X Expr // left operand + OpPos token.Pos // position of Op + Op token.Token // operator + Y Expr // right operand + } + + // A KeyValueExpr node represents (key : value) pairs + // in composite literals. + // + KeyValueExpr struct { + Key Expr + Colon token.Pos // position of ":" + Value Expr + } +) + +// The direction of a channel type is indicated by one +// of the following constants. +// +type ChanDir int + +const ( + SEND ChanDir = 1 << iota + RECV +) + +// A type is represented by a tree consisting of one +// or more of the following type-specific expression +// nodes. +// +type ( + // An ArrayType node represents an array or slice type. + ArrayType struct { + Lbrack token.Pos // position of "[" + Len Expr // Ellipsis node for [...]T array types, nil for slice types + Elt Expr // element type + } + + // A StructType node represents a struct type. + StructType struct { + Struct token.Pos // position of "struct" keyword + Fields *FieldList // list of field declarations + Incomplete bool // true if (source) fields are missing in the Fields list + } + + // Pointer types are represented via StarExpr nodes. + + // A FuncType node represents a function type. + FuncType struct { + Func token.Pos // position of "func" keyword + Params *FieldList // (incoming) parameters; or nil + Results *FieldList // (outgoing) results; or nil + } + + // An InterfaceType node represents an interface type. + InterfaceType struct { + Interface token.Pos // position of "interface" keyword + Methods *FieldList // list of methods + Incomplete bool // true if (source) methods are missing in the Methods list + } + + // A MapType node represents a map type. + MapType struct { + Map token.Pos // position of "map" keyword + Key Expr + Value Expr + } + + // A ChanType node represents a channel type. + ChanType struct { + Begin token.Pos // position of "chan" keyword or "<-" (whichever comes first) + Dir ChanDir // channel direction + Value Expr // value type + } +) + +// Pos and End implementations for expression/type nodes. +// +func (x *BadExpr) Pos() token.Pos { return x.From } +func (x *Ident) Pos() token.Pos { return x.NamePos } +func (x *Ellipsis) Pos() token.Pos { return x.Ellipsis } +func (x *BasicLit) Pos() token.Pos { return x.ValuePos } +func (x *FuncLit) Pos() token.Pos { return x.Type.Pos() } +func (x *CompositeLit) Pos() token.Pos { + if x.Type != nil { + return x.Type.Pos() + } + return x.Lbrace +} +func (x *ParenExpr) Pos() token.Pos { return x.Lparen } +func (x *SelectorExpr) Pos() token.Pos { return x.X.Pos() } +func (x *IndexExpr) Pos() token.Pos { return x.X.Pos() } +func (x *SliceExpr) Pos() token.Pos { return x.X.Pos() } +func (x *TypeAssertExpr) Pos() token.Pos { return x.X.Pos() } +func (x *CallExpr) Pos() token.Pos { return x.Fun.Pos() } +func (x *StarExpr) Pos() token.Pos { return x.Star } +func (x *UnaryExpr) Pos() token.Pos { return x.OpPos } +func (x *BinaryExpr) Pos() token.Pos { return x.X.Pos() } +func (x *KeyValueExpr) Pos() token.Pos { return x.Key.Pos() } +func (x *ArrayType) Pos() token.Pos { return x.Lbrack } +func (x *StructType) Pos() token.Pos { return x.Struct } +func (x *FuncType) Pos() token.Pos { return x.Func } +func (x *InterfaceType) Pos() token.Pos { return x.Interface } +func (x *MapType) Pos() token.Pos { return x.Map } +func (x *ChanType) Pos() token.Pos { return x.Begin } + +func (x *BadExpr) End() token.Pos { return x.To } +func (x *Ident) End() token.Pos { return token.Pos(int(x.NamePos) + len(x.Name)) } +func (x *Ellipsis) End() token.Pos { + if x.Elt != nil { + return x.Elt.End() + } + return x.Ellipsis + 3 // len("...") +} +func (x *BasicLit) End() token.Pos { return token.Pos(int(x.ValuePos) + len(x.Value)) } +func (x *FuncLit) End() token.Pos { return x.Body.End() } +func (x *CompositeLit) End() token.Pos { return x.Rbrace + 1 } +func (x *ParenExpr) End() token.Pos { return x.Rparen + 1 } +func (x *SelectorExpr) End() token.Pos { return x.Sel.End() } +func (x *IndexExpr) End() token.Pos { return x.Rbrack + 1 } +func (x *SliceExpr) End() token.Pos { return x.Rbrack + 1 } +func (x *TypeAssertExpr) End() token.Pos { + if x.Type != nil { + return x.Type.End() + } + return x.X.End() +} +func (x *CallExpr) End() token.Pos { return x.Rparen + 1 } +func (x *StarExpr) End() token.Pos { return x.X.End() } +func (x *UnaryExpr) End() token.Pos { return x.X.End() } +func (x *BinaryExpr) End() token.Pos { return x.Y.End() } +func (x *KeyValueExpr) End() token.Pos { return x.Value.End() } +func (x *ArrayType) End() token.Pos { return x.Elt.End() } +func (x *StructType) End() token.Pos { return x.Fields.End() } +func (x *FuncType) End() token.Pos { + if x.Results != nil { + return x.Results.End() + } + return x.Params.End() +} +func (x *InterfaceType) End() token.Pos { return x.Methods.End() } +func (x *MapType) End() token.Pos { return x.Value.End() } +func (x *ChanType) End() token.Pos { return x.Value.End() } + +// exprNode() ensures that only expression/type nodes can be +// assigned to an ExprNode. +// +func (x *BadExpr) exprNode() {} +func (x *Ident) exprNode() {} +func (x *Ellipsis) exprNode() {} +func (x *BasicLit) exprNode() {} +func (x *FuncLit) exprNode() {} +func (x *CompositeLit) exprNode() {} +func (x *ParenExpr) exprNode() {} +func (x *SelectorExpr) exprNode() {} +func (x *IndexExpr) exprNode() {} +func (x *SliceExpr) exprNode() {} +func (x *TypeAssertExpr) exprNode() {} +func (x *CallExpr) exprNode() {} +func (x *StarExpr) exprNode() {} +func (x *UnaryExpr) exprNode() {} +func (x *BinaryExpr) exprNode() {} +func (x *KeyValueExpr) exprNode() {} + +func (x *ArrayType) exprNode() {} +func (x *StructType) exprNode() {} +func (x *FuncType) exprNode() {} +func (x *InterfaceType) exprNode() {} +func (x *MapType) exprNode() {} +func (x *ChanType) exprNode() {} + +// ---------------------------------------------------------------------------- +// Convenience functions for Idents + +var noPos token.Pos + +// NewIdent creates a new Ident without position. +// Useful for ASTs generated by code other than the Go parser. +// +func NewIdent(name string) *Ident { return &Ident{noPos, name, nil} } + +// IsExported returns whether name is an exported Go symbol +// (i.e., whether it begins with an uppercase letter). +// +func IsExported(name string) bool { + ch, _ := utf8.DecodeRuneInString(name) + return unicode.IsUpper(ch) +} + +// IsExported returns whether id is an exported Go symbol +// (i.e., whether it begins with an uppercase letter). +// +func (id *Ident) IsExported() bool { return IsExported(id.Name) } + +func (id *Ident) String() string { + if id != nil { + return id.Name + } + return "<nil>" +} + +// ---------------------------------------------------------------------------- +// Statements + +// A statement is represented by a tree consisting of one +// or more of the following concrete statement nodes. +// +type ( + // A BadStmt node is a placeholder for statements containing + // syntax errors for which no correct statement nodes can be + // created. + // + BadStmt struct { + From, To token.Pos // position range of bad statement + } + + // A DeclStmt node represents a declaration in a statement list. + DeclStmt struct { + Decl Decl + } + + // An EmptyStmt node represents an empty statement. + // The "position" of the empty statement is the position + // of the immediately preceding semicolon. + // + EmptyStmt struct { + Semicolon token.Pos // position of preceding ";" + } + + // A LabeledStmt node represents a labeled statement. + LabeledStmt struct { + Label *Ident + Colon token.Pos // position of ":" + Stmt Stmt + } + + // An ExprStmt node represents a (stand-alone) expression + // in a statement list. + // + ExprStmt struct { + X Expr // expression + } + + // A SendStmt node represents a send statement. + SendStmt struct { + Chan Expr + Arrow token.Pos // position of "<-" + Value Expr + } + + // An IncDecStmt node represents an increment or decrement statement. + IncDecStmt struct { + X Expr + TokPos token.Pos // position of Tok + Tok token.Token // INC or DEC + } + + // An AssignStmt node represents an assignment or + // a short variable declaration. + // + AssignStmt struct { + Lhs []Expr + TokPos token.Pos // position of Tok + Tok token.Token // assignment token, DEFINE + Rhs []Expr + } + + // A GoStmt node represents a go statement. + GoStmt struct { + Go token.Pos // position of "go" keyword + Call *CallExpr + } + + // A DeferStmt node represents a defer statement. + DeferStmt struct { + Defer token.Pos // position of "defer" keyword + Call *CallExpr + } + + // A ReturnStmt node represents a return statement. + ReturnStmt struct { + Return token.Pos // position of "return" keyword + Results []Expr // result expressions; or nil + } + + // A BranchStmt node represents a break, continue, goto, + // or fallthrough statement. + // + BranchStmt struct { + TokPos token.Pos // position of Tok + Tok token.Token // keyword token (BREAK, CONTINUE, GOTO, FALLTHROUGH) + Label *Ident // label name; or nil + } + + // A BlockStmt node represents a braced statement list. + BlockStmt struct { + Lbrace token.Pos // position of "{" + List []Stmt + Rbrace token.Pos // position of "}" + } + + // An IfStmt node represents an if statement. + IfStmt struct { + If token.Pos // position of "if" keyword + Init Stmt // initialization statement; or nil + Cond Expr // condition + Body *BlockStmt + Else Stmt // else branch; or nil + } + + // A CaseClause represents a case of an expression or type switch statement. + CaseClause struct { + Case token.Pos // position of "case" or "default" keyword + List []Expr // list of expressions or types; nil means default case + Colon token.Pos // position of ":" + Body []Stmt // statement list; or nil + } + + // A SwitchStmt node represents an expression switch statement. + SwitchStmt struct { + Switch token.Pos // position of "switch" keyword + Init Stmt // initialization statement; or nil + Tag Expr // tag expression; or nil + Body *BlockStmt // CaseClauses only + } + + // An TypeSwitchStmt node represents a type switch statement. + TypeSwitchStmt struct { + Switch token.Pos // position of "switch" keyword + Init Stmt // initialization statement; or nil + Assign Stmt // x := y.(type) or y.(type) + Body *BlockStmt // CaseClauses only + } + + // A CommClause node represents a case of a select statement. + CommClause struct { + Case token.Pos // position of "case" or "default" keyword + Comm Stmt // send or receive statement; nil means default case + Colon token.Pos // position of ":" + Body []Stmt // statement list; or nil + } + + // An SelectStmt node represents a select statement. + SelectStmt struct { + Select token.Pos // position of "select" keyword + Body *BlockStmt // CommClauses only + } + + // A ForStmt represents a for statement. + ForStmt struct { + For token.Pos // position of "for" keyword + Init Stmt // initialization statement; or nil + Cond Expr // condition; or nil + Post Stmt // post iteration statement; or nil + Body *BlockStmt + } + + // A RangeStmt represents a for statement with a range clause. + RangeStmt struct { + For token.Pos // position of "for" keyword + Key, Value Expr // Value may be nil + TokPos token.Pos // position of Tok + Tok token.Token // ASSIGN, DEFINE + X Expr // value to range over + Body *BlockStmt + } +) + +// Pos and End implementations for statement nodes. +// +func (s *BadStmt) Pos() token.Pos { return s.From } +func (s *DeclStmt) Pos() token.Pos { return s.Decl.Pos() } +func (s *EmptyStmt) Pos() token.Pos { return s.Semicolon } +func (s *LabeledStmt) Pos() token.Pos { return s.Label.Pos() } +func (s *ExprStmt) Pos() token.Pos { return s.X.Pos() } +func (s *SendStmt) Pos() token.Pos { return s.Chan.Pos() } +func (s *IncDecStmt) Pos() token.Pos { return s.X.Pos() } +func (s *AssignStmt) Pos() token.Pos { return s.Lhs[0].Pos() } +func (s *GoStmt) Pos() token.Pos { return s.Go } +func (s *DeferStmt) Pos() token.Pos { return s.Defer } +func (s *ReturnStmt) Pos() token.Pos { return s.Return } +func (s *BranchStmt) Pos() token.Pos { return s.TokPos } +func (s *BlockStmt) Pos() token.Pos { return s.Lbrace } +func (s *IfStmt) Pos() token.Pos { return s.If } +func (s *CaseClause) Pos() token.Pos { return s.Case } +func (s *SwitchStmt) Pos() token.Pos { return s.Switch } +func (s *TypeSwitchStmt) Pos() token.Pos { return s.Switch } +func (s *CommClause) Pos() token.Pos { return s.Case } +func (s *SelectStmt) Pos() token.Pos { return s.Select } +func (s *ForStmt) Pos() token.Pos { return s.For } +func (s *RangeStmt) Pos() token.Pos { return s.For } + +func (s *BadStmt) End() token.Pos { return s.To } +func (s *DeclStmt) End() token.Pos { return s.Decl.End() } +func (s *EmptyStmt) End() token.Pos { + return s.Semicolon + 1 /* len(";") */ +} +func (s *LabeledStmt) End() token.Pos { return s.Stmt.End() } +func (s *ExprStmt) End() token.Pos { return s.X.End() } +func (s *SendStmt) End() token.Pos { return s.Value.End() } +func (s *IncDecStmt) End() token.Pos { + return s.TokPos + 2 /* len("++") */ +} +func (s *AssignStmt) End() token.Pos { return s.Rhs[len(s.Rhs)-1].End() } +func (s *GoStmt) End() token.Pos { return s.Call.End() } +func (s *DeferStmt) End() token.Pos { return s.Call.End() } +func (s *ReturnStmt) End() token.Pos { + if n := len(s.Results); n > 0 { + return s.Results[n-1].End() + } + return s.Return + 6 // len("return") +} +func (s *BranchStmt) End() token.Pos { + if s.Label != nil { + return s.Label.End() + } + return token.Pos(int(s.TokPos) + len(s.Tok.String())) +} +func (s *BlockStmt) End() token.Pos { return s.Rbrace + 1 } +func (s *IfStmt) End() token.Pos { + if s.Else != nil { + return s.Else.End() + } + return s.Body.End() +} +func (s *CaseClause) End() token.Pos { + if n := len(s.Body); n > 0 { + return s.Body[n-1].End() + } + return s.Colon + 1 +} +func (s *SwitchStmt) End() token.Pos { return s.Body.End() } +func (s *TypeSwitchStmt) End() token.Pos { return s.Body.End() } +func (s *CommClause) End() token.Pos { + if n := len(s.Body); n > 0 { + return s.Body[n-1].End() + } + return s.Colon + 1 +} +func (s *SelectStmt) End() token.Pos { return s.Body.End() } +func (s *ForStmt) End() token.Pos { return s.Body.End() } +func (s *RangeStmt) End() token.Pos { return s.Body.End() } + +// stmtNode() ensures that only statement nodes can be +// assigned to a StmtNode. +// +func (s *BadStmt) stmtNode() {} +func (s *DeclStmt) stmtNode() {} +func (s *EmptyStmt) stmtNode() {} +func (s *LabeledStmt) stmtNode() {} +func (s *ExprStmt) stmtNode() {} +func (s *SendStmt) stmtNode() {} +func (s *IncDecStmt) stmtNode() {} +func (s *AssignStmt) stmtNode() {} +func (s *GoStmt) stmtNode() {} +func (s *DeferStmt) stmtNode() {} +func (s *ReturnStmt) stmtNode() {} +func (s *BranchStmt) stmtNode() {} +func (s *BlockStmt) stmtNode() {} +func (s *IfStmt) stmtNode() {} +func (s *CaseClause) stmtNode() {} +func (s *SwitchStmt) stmtNode() {} +func (s *TypeSwitchStmt) stmtNode() {} +func (s *CommClause) stmtNode() {} +func (s *SelectStmt) stmtNode() {} +func (s *ForStmt) stmtNode() {} +func (s *RangeStmt) stmtNode() {} + +// ---------------------------------------------------------------------------- +// Declarations + +// A Spec node represents a single (non-parenthesized) import, +// constant, type, or variable declaration. +// +type ( + // The Spec type stands for any of *ImportSpec, *ValueSpec, and *TypeSpec. + Spec interface { + Node + specNode() + } + + // An ImportSpec node represents a single package import. + ImportSpec struct { + Doc *CommentGroup // associated documentation; or nil + Name *Ident // local package name (including "."); or nil + Path *BasicLit // import path + Comment *CommentGroup // line comments; or nil + } + + // A ValueSpec node represents a constant or variable declaration + // (ConstSpec or VarSpec production). + // + ValueSpec struct { + Doc *CommentGroup // associated documentation; or nil + Names []*Ident // value names (len(Names) > 0) + Type Expr // value type; or nil + Values []Expr // initial values; or nil + Comment *CommentGroup // line comments; or nil + } + + // A TypeSpec node represents a type declaration (TypeSpec production). + TypeSpec struct { + Doc *CommentGroup // associated documentation; or nil + Name *Ident // type name + Type Expr // *Ident, *ParenExpr, *SelectorExpr, *StarExpr, or any of the *XxxTypes + Comment *CommentGroup // line comments; or nil + } +) + +// Pos and End implementations for spec nodes. +// +func (s *ImportSpec) Pos() token.Pos { + if s.Name != nil { + return s.Name.Pos() + } + return s.Path.Pos() +} +func (s *ValueSpec) Pos() token.Pos { return s.Names[0].Pos() } +func (s *TypeSpec) Pos() token.Pos { return s.Name.Pos() } + +func (s *ImportSpec) End() token.Pos { return s.Path.End() } +func (s *ValueSpec) End() token.Pos { + if n := len(s.Values); n > 0 { + return s.Values[n-1].End() + } + if s.Type != nil { + return s.Type.End() + } + return s.Names[len(s.Names)-1].End() +} +func (s *TypeSpec) End() token.Pos { return s.Type.End() } + +// specNode() ensures that only spec nodes can be +// assigned to a Spec. +// +func (s *ImportSpec) specNode() {} +func (s *ValueSpec) specNode() {} +func (s *TypeSpec) specNode() {} + +// A declaration is represented by one of the following declaration nodes. +// +type ( + // A BadDecl node is a placeholder for declarations containing + // syntax errors for which no correct declaration nodes can be + // created. + // + BadDecl struct { + From, To token.Pos // position range of bad declaration + } + + // A GenDecl node (generic declaration node) represents an import, + // constant, type or variable declaration. A valid Lparen position + // (Lparen.Line > 0) indicates a parenthesized declaration. + // + // Relationship between Tok value and Specs element type: + // + // token.IMPORT *ImportSpec + // token.CONST *ValueSpec + // token.TYPE *TypeSpec + // token.VAR *ValueSpec + // + GenDecl struct { + Doc *CommentGroup // associated documentation; or nil + TokPos token.Pos // position of Tok + Tok token.Token // IMPORT, CONST, TYPE, VAR + Lparen token.Pos // position of '(', if any + Specs []Spec + Rparen token.Pos // position of ')', if any + } + + // A FuncDecl node represents a function declaration. + FuncDecl struct { + Doc *CommentGroup // associated documentation; or nil + Recv *FieldList // receiver (methods); or nil (functions) + Name *Ident // function/method name + Type *FuncType // position of Func keyword, parameters and results + Body *BlockStmt // function body; or nil (forward declaration) + } +) + +// Pos and End implementations for declaration nodes. +// +func (d *BadDecl) Pos() token.Pos { return d.From } +func (d *GenDecl) Pos() token.Pos { return d.TokPos } +func (d *FuncDecl) Pos() token.Pos { return d.Type.Pos() } + +func (d *BadDecl) End() token.Pos { return d.To } +func (d *GenDecl) End() token.Pos { + if d.Rparen.IsValid() { + return d.Rparen + 1 + } + return d.Specs[0].End() +} +func (d *FuncDecl) End() token.Pos { + if d.Body != nil { + return d.Body.End() + } + return d.Type.End() +} + +// declNode() ensures that only declaration nodes can be +// assigned to a DeclNode. +// +func (d *BadDecl) declNode() {} +func (d *GenDecl) declNode() {} +func (d *FuncDecl) declNode() {} + +// ---------------------------------------------------------------------------- +// Files and packages + +// A File node represents a Go source file. +// +// The Comments list contains all comments in the source file in order of +// appearance, including the comments that are pointed to from other nodes +// via Doc and Comment fields. +// +type File struct { + Doc *CommentGroup // associated documentation; or nil + Package token.Pos // position of "package" keyword + Name *Ident // package name + Decls []Decl // top-level declarations; or nil + Scope *Scope // package scope (this file only) + Imports []*ImportSpec // imports in this file + Unresolved []*Ident // unresolved identifiers in this file + Comments []*CommentGroup // list of all comments in the source file +} + +func (f *File) Pos() token.Pos { return f.Package } +func (f *File) End() token.Pos { + if n := len(f.Decls); n > 0 { + return f.Decls[n-1].End() + } + return f.Name.End() +} + +// A Package node represents a set of source files +// collectively building a Go package. +// +type Package struct { + Name string // package name + Scope *Scope // package scope across all files + Imports map[string]*Object // map of package id -> package object + Files map[string]*File // Go source files by filename +} + +func (p *Package) Pos() token.Pos { return token.NoPos } +func (p *Package) End() token.Pos { return token.NoPos } diff --git a/src/pkg/go/ast/filter.go b/src/pkg/go/ast/filter.go new file mode 100644 index 000000000..26733430d --- /dev/null +++ b/src/pkg/go/ast/filter.go @@ -0,0 +1,475 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ast + +import "go/token" + +// ---------------------------------------------------------------------------- +// Export filtering + +func identListExports(list []*Ident) []*Ident { + j := 0 + for _, x := range list { + if x.IsExported() { + list[j] = x + j++ + } + } + return list[0:j] +} + +// fieldName assumes that x is the type of an anonymous field and +// returns the corresponding field name. If x is not an acceptable +// anonymous field, the result is nil. +// +func fieldName(x Expr) *Ident { + switch t := x.(type) { + case *Ident: + return t + case *SelectorExpr: + if _, ok := t.X.(*Ident); ok { + return t.Sel + } + case *StarExpr: + return fieldName(t.X) + } + return nil +} + +func fieldListExports(fields *FieldList) (removedFields bool) { + if fields == nil { + return + } + list := fields.List + j := 0 + for _, f := range list { + exported := false + if len(f.Names) == 0 { + // anonymous field + // (Note that a non-exported anonymous field + // may still refer to a type with exported + // fields, so this is not absolutely correct. + // However, this cannot be done w/o complete + // type information.) + name := fieldName(f.Type) + exported = name != nil && name.IsExported() + } else { + n := len(f.Names) + f.Names = identListExports(f.Names) + if len(f.Names) < n { + removedFields = true + } + exported = len(f.Names) > 0 + } + if exported { + typeExports(f.Type) + list[j] = f + j++ + } + } + if j < len(list) { + removedFields = true + } + fields.List = list[0:j] + return +} + +func paramListExports(fields *FieldList) { + if fields == nil { + return + } + for _, f := range fields.List { + typeExports(f.Type) + } +} + +func typeExports(typ Expr) { + switch t := typ.(type) { + case *ArrayType: + typeExports(t.Elt) + case *StructType: + if fieldListExports(t.Fields) { + t.Incomplete = true + } + case *FuncType: + paramListExports(t.Params) + paramListExports(t.Results) + case *InterfaceType: + if fieldListExports(t.Methods) { + t.Incomplete = true + } + case *MapType: + typeExports(t.Key) + typeExports(t.Value) + case *ChanType: + typeExports(t.Value) + } +} + +func specExports(spec Spec) bool { + switch s := spec.(type) { + case *ValueSpec: + s.Names = identListExports(s.Names) + if len(s.Names) > 0 { + typeExports(s.Type) + return true + } + case *TypeSpec: + if s.Name.IsExported() { + typeExports(s.Type) + return true + } + } + return false +} + +func specListExports(list []Spec) []Spec { + j := 0 + for _, s := range list { + if specExports(s) { + list[j] = s + j++ + } + } + return list[0:j] +} + +func declExports(decl Decl) bool { + switch d := decl.(type) { + case *GenDecl: + d.Specs = specListExports(d.Specs) + return len(d.Specs) > 0 + case *FuncDecl: + d.Body = nil // strip body + return d.Name.IsExported() + } + return false +} + +// FileExports trims the AST for a Go source file in place such that only +// exported nodes remain: all top-level identifiers which are not exported +// and their associated information (such as type, initial value, or function +// body) are removed. Non-exported fields and methods of exported types are +// stripped, and the function bodies of exported functions are set to nil. +// The File.comments list is not changed. +// +// FileExports returns true if there is an exported declaration; it returns +// false otherwise. +// +func FileExports(src *File) bool { + j := 0 + for _, d := range src.Decls { + if declExports(d) { + src.Decls[j] = d + j++ + } + } + src.Decls = src.Decls[0:j] + return j > 0 +} + +// PackageExports trims the AST for a Go package in place such that only +// exported nodes remain. The pkg.Files list is not changed, so that file +// names and top-level package comments don't get lost. +// +// PackageExports returns true if there is an exported declaration; it +// returns false otherwise. +// +func PackageExports(pkg *Package) bool { + hasExports := false + for _, f := range pkg.Files { + if FileExports(f) { + hasExports = true + } + } + return hasExports +} + +// ---------------------------------------------------------------------------- +// General filtering + +type Filter func(string) bool + +func filterIdentList(list []*Ident, f Filter) []*Ident { + j := 0 + for _, x := range list { + if f(x.Name) { + list[j] = x + j++ + } + } + return list[0:j] +} + +func filterFieldList(fields *FieldList, filter Filter) (removedFields bool) { + if fields == nil { + return false + } + list := fields.List + j := 0 + for _, f := range list { + keepField := false + if len(f.Names) == 0 { + // anonymous field + name := fieldName(f.Type) + keepField = name != nil && filter(name.Name) + } else { + n := len(f.Names) + f.Names = filterIdentList(f.Names, filter) + if len(f.Names) < n { + removedFields = true + } + keepField = len(f.Names) > 0 + } + if keepField { + list[j] = f + j++ + } + } + if j < len(list) { + removedFields = true + } + fields.List = list[0:j] + return +} + +func filterSpec(spec Spec, f Filter) bool { + switch s := spec.(type) { + case *ValueSpec: + s.Names = filterIdentList(s.Names, f) + return len(s.Names) > 0 + case *TypeSpec: + if f(s.Name.Name) { + return true + } + switch t := s.Type.(type) { + case *StructType: + if filterFieldList(t.Fields, f) { + t.Incomplete = true + } + return len(t.Fields.List) > 0 + case *InterfaceType: + if filterFieldList(t.Methods, f) { + t.Incomplete = true + } + return len(t.Methods.List) > 0 + } + } + return false +} + +func filterSpecList(list []Spec, f Filter) []Spec { + j := 0 + for _, s := range list { + if filterSpec(s, f) { + list[j] = s + j++ + } + } + return list[0:j] +} + +// FilterDecl trims the AST for a Go declaration in place by removing +// all names (including struct field and interface method names, but +// not from parameter lists) that don't pass through the filter f. +// +// FilterDecl returns true if there are any declared names left after +// filtering; it returns false otherwise. +// +func FilterDecl(decl Decl, f Filter) bool { + switch d := decl.(type) { + case *GenDecl: + d.Specs = filterSpecList(d.Specs, f) + return len(d.Specs) > 0 + case *FuncDecl: + return f(d.Name.Name) + } + return false +} + +// FilterFile trims the AST for a Go file in place by removing all +// names from top-level declarations (including struct field and +// interface method names, but not from parameter lists) that don't +// pass through the filter f. If the declaration is empty afterwards, +// the declaration is removed from the AST. +// The File.comments list is not changed. +// +// FilterFile returns true if there are any top-level declarations +// left after filtering; it returns false otherwise. +// +func FilterFile(src *File, f Filter) bool { + j := 0 + for _, d := range src.Decls { + if FilterDecl(d, f) { + src.Decls[j] = d + j++ + } + } + src.Decls = src.Decls[0:j] + return j > 0 +} + +// FilterPackage trims the AST for a Go package in place by removing all +// names from top-level declarations (including struct field and +// interface method names, but not from parameter lists) that don't +// pass through the filter f. If the declaration is empty afterwards, +// the declaration is removed from the AST. +// The pkg.Files list is not changed, so that file names and top-level +// package comments don't get lost. +// +// FilterPackage returns true if there are any top-level declarations +// left after filtering; it returns false otherwise. +// +func FilterPackage(pkg *Package, f Filter) bool { + hasDecls := false + for _, src := range pkg.Files { + if FilterFile(src, f) { + hasDecls = true + } + } + return hasDecls +} + +// ---------------------------------------------------------------------------- +// Merging of package files + +// The MergeMode flags control the behavior of MergePackageFiles. +type MergeMode uint + +const ( + // If set, duplicate function declarations are excluded. + FilterFuncDuplicates MergeMode = 1 << iota + // If set, comments that are not associated with a specific + // AST node (as Doc or Comment) are excluded. + FilterUnassociatedComments +) + +// separator is an empty //-style comment that is interspersed between +// different comment groups when they are concatenated into a single group +// +var separator = &Comment{noPos, "//"} + +// MergePackageFiles creates a file AST by merging the ASTs of the +// files belonging to a package. The mode flags control merging behavior. +// +func MergePackageFiles(pkg *Package, mode MergeMode) *File { + // Count the number of package docs, comments and declarations across + // all package files. + ndocs := 0 + ncomments := 0 + ndecls := 0 + for _, f := range pkg.Files { + if f.Doc != nil { + ndocs += len(f.Doc.List) + 1 // +1 for separator + } + ncomments += len(f.Comments) + ndecls += len(f.Decls) + } + + // Collect package comments from all package files into a single + // CommentGroup - the collected package documentation. The order + // is unspecified. In general there should be only one file with + // a package comment; but it's better to collect extra comments + // than drop them on the floor. + var doc *CommentGroup + var pos token.Pos + if ndocs > 0 { + list := make([]*Comment, ndocs-1) // -1: no separator before first group + i := 0 + for _, f := range pkg.Files { + if f.Doc != nil { + if i > 0 { + // not the first group - add separator + list[i] = separator + i++ + } + for _, c := range f.Doc.List { + list[i] = c + i++ + } + if f.Package > pos { + // Keep the maximum package clause position as + // position for the package clause of the merged + // files. + pos = f.Package + } + } + } + doc = &CommentGroup{list} + } + + // Collect declarations from all package files. + var decls []Decl + if ndecls > 0 { + decls = make([]Decl, ndecls) + funcs := make(map[string]int) // map of global function name -> decls index + i := 0 // current index + n := 0 // number of filtered entries + for _, f := range pkg.Files { + for _, d := range f.Decls { + if mode&FilterFuncDuplicates != 0 { + // A language entity may be declared multiple + // times in different package files; only at + // build time declarations must be unique. + // For now, exclude multiple declarations of + // functions - keep the one with documentation. + // + // TODO(gri): Expand this filtering to other + // entities (const, type, vars) if + // multiple declarations are common. + if f, isFun := d.(*FuncDecl); isFun { + name := f.Name.Name + if j, exists := funcs[name]; exists { + // function declared already + if decls[j] != nil && decls[j].(*FuncDecl).Doc == nil { + // existing declaration has no documentation; + // ignore the existing declaration + decls[j] = nil + } else { + // ignore the new declaration + d = nil + } + n++ // filtered an entry + } else { + funcs[name] = i + } + } + } + decls[i] = d + i++ + } + } + + // Eliminate nil entries from the decls list if entries were + // filtered. We do this using a 2nd pass in order to not disturb + // the original declaration order in the source (otherwise, this + // would also invalidate the monotonically increasing position + // info within a single file). + if n > 0 { + i = 0 + for _, d := range decls { + if d != nil { + decls[i] = d + i++ + } + } + decls = decls[0:i] + } + } + + // Collect comments from all package files. + var comments []*CommentGroup + if mode&FilterUnassociatedComments == 0 { + comments = make([]*CommentGroup, ncomments) + i := 0 + for _, f := range pkg.Files { + i += copy(comments[i:], f.Comments) + } + } + + // TODO(gri) need to compute pkgScope and unresolved identifiers! + // TODO(gri) need to compute imports! + return &File{doc, pos, NewIdent(pkg.Name), decls, nil, nil, nil, comments} +} diff --git a/src/pkg/go/ast/print.go b/src/pkg/go/ast/print.go new file mode 100644 index 000000000..62a30481d --- /dev/null +++ b/src/pkg/go/ast/print.go @@ -0,0 +1,224 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains printing suppport for ASTs. + +package ast + +import ( + "fmt" + "go/token" + "io" + "os" + "reflect" +) + +// A FieldFilter may be provided to Fprint to control the output. +type FieldFilter func(name string, value reflect.Value) bool + +// NotNilFilter returns true for field values that are not nil; +// it returns false otherwise. +func NotNilFilter(_ string, v reflect.Value) bool { + switch v.Kind() { + case reflect.Chan, reflect.Func, reflect.Interface, reflect.Map, reflect.Ptr, reflect.Slice: + return !v.IsNil() + } + return true +} + +// Fprint prints the (sub-)tree starting at AST node x to w. +// If fset != nil, position information is interpreted relative +// to that file set. Otherwise positions are printed as integer +// values (file set specific offsets). +// +// A non-nil FieldFilter f may be provided to control the output: +// struct fields for which f(fieldname, fieldvalue) is true are +// are printed; all others are filtered from the output. +// +func Fprint(w io.Writer, fset *token.FileSet, x interface{}, f FieldFilter) (n int, err os.Error) { + // setup printer + p := printer{ + output: w, + fset: fset, + filter: f, + ptrmap: make(map[interface{}]int), + last: '\n', // force printing of line number on first line + } + + // install error handler + defer func() { + n = p.written + if e := recover(); e != nil { + err = e.(localError).err // re-panics if it's not a localError + } + }() + + // print x + if x == nil { + p.printf("nil\n") + return + } + p.print(reflect.ValueOf(x)) + p.printf("\n") + + return +} + +// Print prints x to standard output, skipping nil fields. +// Print(fset, x) is the same as Fprint(os.Stdout, fset, x, NotNilFilter). +func Print(fset *token.FileSet, x interface{}) (int, os.Error) { + return Fprint(os.Stdout, fset, x, NotNilFilter) +} + +type printer struct { + output io.Writer + fset *token.FileSet + filter FieldFilter + ptrmap map[interface{}]int // *T -> line number + written int // number of bytes written to output + indent int // current indentation level + last byte // the last byte processed by Write + line int // current line number +} + +var indent = []byte(". ") + +func (p *printer) Write(data []byte) (n int, err os.Error) { + var m int + for i, b := range data { + // invariant: data[0:n] has been written + if b == '\n' { + m, err = p.output.Write(data[n : i+1]) + n += m + if err != nil { + return + } + p.line++ + } else if p.last == '\n' { + _, err = fmt.Fprintf(p.output, "%6d ", p.line) + if err != nil { + return + } + for j := p.indent; j > 0; j-- { + _, err = p.output.Write(indent) + if err != nil { + return + } + } + } + p.last = b + } + m, err = p.output.Write(data[n:]) + n += m + return +} + +// localError wraps locally caught os.Errors so we can distinguish +// them from genuine panics which we don't want to return as errors. +type localError struct { + err os.Error +} + +// printf is a convenience wrapper that takes care of print errors. +func (p *printer) printf(format string, args ...interface{}) { + n, err := fmt.Fprintf(p, format, args...) + p.written += n + if err != nil { + panic(localError{err}) + } +} + +// Implementation note: Print is written for AST nodes but could be +// used to print arbitrary data structures; such a version should +// probably be in a different package. +// +// Note: This code detects (some) cycles created via pointers but +// not cycles that are created via slices or maps containing the +// same slice or map. Code for general data structures probably +// should catch those as well. + +func (p *printer) print(x reflect.Value) { + if !NotNilFilter("", x) { + p.printf("nil") + return + } + + switch x.Kind() { + case reflect.Interface: + p.print(x.Elem()) + + case reflect.Map: + p.printf("%s (len = %d) {\n", x.Type().String(), x.Len()) + p.indent++ + for _, key := range x.MapKeys() { + p.print(key) + p.printf(": ") + p.print(x.MapIndex(key)) + p.printf("\n") + } + p.indent-- + p.printf("}") + + case reflect.Ptr: + p.printf("*") + // type-checked ASTs may contain cycles - use ptrmap + // to keep track of objects that have been printed + // already and print the respective line number instead + ptr := x.Interface() + if line, exists := p.ptrmap[ptr]; exists { + p.printf("(obj @ %d)", line) + } else { + p.ptrmap[ptr] = p.line + p.print(x.Elem()) + } + + case reflect.Slice: + if s, ok := x.Interface().([]byte); ok { + p.printf("%#q", s) + return + } + p.printf("%s (len = %d) {\n", x.Type().String(), x.Len()) + p.indent++ + for i, n := 0, x.Len(); i < n; i++ { + p.printf("%d: ", i) + p.print(x.Index(i)) + p.printf("\n") + } + p.indent-- + p.printf("}") + + case reflect.Struct: + p.printf("%s {\n", x.Type().String()) + p.indent++ + t := x.Type() + for i, n := 0, t.NumField(); i < n; i++ { + name := t.Field(i).Name + value := x.Field(i) + if p.filter == nil || p.filter(name, value) { + p.printf("%s: ", name) + p.print(value) + p.printf("\n") + } + } + p.indent-- + p.printf("}") + + default: + v := x.Interface() + switch v := v.(type) { + case string: + // print strings in quotes + p.printf("%q", v) + return + case token.Pos: + // position values can be printed nicely if we have a file set + if p.fset != nil { + p.printf("%s", p.fset.Position(v)) + return + } + } + // default + p.printf("%v", v) + } +} diff --git a/src/pkg/go/ast/print_test.go b/src/pkg/go/ast/print_test.go new file mode 100644 index 000000000..f4e8f7a78 --- /dev/null +++ b/src/pkg/go/ast/print_test.go @@ -0,0 +1,77 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ast + +import ( + "bytes" + "strings" + "testing" +) + +var tests = []struct { + x interface{} // x is printed as s + s string +}{ + // basic types + {nil, "0 nil"}, + {true, "0 true"}, + {42, "0 42"}, + {3.14, "0 3.14"}, + {1 + 2.718i, "0 (1+2.718i)"}, + {"foobar", "0 \"foobar\""}, + + // maps + {map[string]int{"a": 1, "b": 2}, + `0 map[string] int (len = 2) { + 1 . "a": 1 + 2 . "b": 2 + 3 }`}, + + // pointers + {new(int), "0 *0"}, + + // slices + {[]int{1, 2, 3}, + `0 []int (len = 3) { + 1 . 0: 1 + 2 . 1: 2 + 3 . 2: 3 + 4 }`}, + + // structs + {struct{ x, y int }{42, 991}, + `0 struct { x int; y int } { + 1 . x: 42 + 2 . y: 991 + 3 }`}, +} + +// Split s into lines, trim whitespace from all lines, and return +// the concatenated non-empty lines. +func trim(s string) string { + lines := strings.Split(s, "\n") + i := 0 + for _, line := range lines { + line = strings.TrimSpace(line) + if line != "" { + lines[i] = line + i++ + } + } + return strings.Join(lines[0:i], "\n") +} + +func TestPrint(t *testing.T) { + var buf bytes.Buffer + for _, test := range tests { + buf.Reset() + if _, err := Fprint(&buf, nil, test.x, nil); err != nil { + t.Errorf("Fprint failed: %s", err) + } + if s, ts := trim(buf.String()), trim(test.s); s != ts { + t.Errorf("got:\n%s\nexpected:\n%s\n", s, ts) + } + } +} diff --git a/src/pkg/go/ast/resolve.go b/src/pkg/go/ast/resolve.go new file mode 100644 index 000000000..3927a799e --- /dev/null +++ b/src/pkg/go/ast/resolve.go @@ -0,0 +1,174 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements NewPackage. + +package ast + +import ( + "fmt" + "go/scanner" + "go/token" + "os" + "strconv" +) + +type pkgBuilder struct { + scanner.ErrorVector + fset *token.FileSet +} + +func (p *pkgBuilder) error(pos token.Pos, msg string) { + p.Error(p.fset.Position(pos), msg) +} + +func (p *pkgBuilder) errorf(pos token.Pos, format string, args ...interface{}) { + p.error(pos, fmt.Sprintf(format, args...)) +} + +func (p *pkgBuilder) declare(scope, altScope *Scope, obj *Object) { + alt := scope.Insert(obj) + if alt == nil && altScope != nil { + // see if there is a conflicting declaration in altScope + alt = altScope.Lookup(obj.Name) + } + if alt != nil { + prevDecl := "" + if pos := alt.Pos(); pos.IsValid() { + prevDecl = fmt.Sprintf("\n\tprevious declaration at %s", p.fset.Position(pos)) + } + p.error(obj.Pos(), fmt.Sprintf("%s redeclared in this block%s", obj.Name, prevDecl)) + } +} + +func resolve(scope *Scope, ident *Ident) bool { + for ; scope != nil; scope = scope.Outer { + if obj := scope.Lookup(ident.Name); obj != nil { + ident.Obj = obj + return true + } + } + return false +} + +// An Importer resolves import paths to package Objects. +// The imports map records the packages already imported, +// indexed by package id (canonical import path). +// An Importer must determine the canonical import path and +// check the map to see if it is already present in the imports map. +// If so, the Importer can return the map entry. Otherwise, the +// Importer should load the package data for the given path into +// a new *Object (pkg), record pkg in the imports map, and then +// return pkg. +type Importer func(imports map[string]*Object, path string) (pkg *Object, err os.Error) + +// NewPackage creates a new Package node from a set of File nodes. It resolves +// unresolved identifiers across files and updates each file's Unresolved list +// accordingly. If a non-nil importer and universe scope are provided, they are +// used to resolve identifiers not declared in any of the package files. Any +// remaining unresolved identifiers are reported as undeclared. If the files +// belong to different packages, one package name is selected and files with +// different package names are reported and then ignored. +// The result is a package node and a scanner.ErrorList if there were errors. +// +func NewPackage(fset *token.FileSet, files map[string]*File, importer Importer, universe *Scope) (*Package, os.Error) { + var p pkgBuilder + p.fset = fset + + // complete package scope + pkgName := "" + pkgScope := NewScope(universe) + for _, file := range files { + // package names must match + switch name := file.Name.Name; { + case pkgName == "": + pkgName = name + case name != pkgName: + p.errorf(file.Package, "package %s; expected %s", name, pkgName) + continue // ignore this file + } + + // collect top-level file objects in package scope + for _, obj := range file.Scope.Objects { + p.declare(pkgScope, nil, obj) + } + } + + // package global mapping of imported package ids to package objects + imports := make(map[string]*Object) + + // complete file scopes with imports and resolve identifiers + for _, file := range files { + // ignore file if it belongs to a different package + // (error has already been reported) + if file.Name.Name != pkgName { + continue + } + + // build file scope by processing all imports + importErrors := false + fileScope := NewScope(pkgScope) + for _, spec := range file.Imports { + if importer == nil { + importErrors = true + continue + } + path, _ := strconv.Unquote(string(spec.Path.Value)) + pkg, err := importer(imports, path) + if err != nil { + p.errorf(spec.Path.Pos(), "could not import %s (%s)", path, err) + importErrors = true + continue + } + // TODO(gri) If a local package name != "." is provided, + // global identifier resolution could proceed even if the + // import failed. Consider adjusting the logic here a bit. + + // local name overrides imported package name + name := pkg.Name + if spec.Name != nil { + name = spec.Name.Name + } + + // add import to file scope + if name == "." { + // merge imported scope with file scope + for _, obj := range pkg.Data.(*Scope).Objects { + p.declare(fileScope, pkgScope, obj) + } + } else { + // declare imported package object in file scope + // (do not re-use pkg in the file scope but create + // a new object instead; the Decl field is different + // for different files) + obj := NewObj(Pkg, name) + obj.Decl = spec + obj.Data = pkg.Data + p.declare(fileScope, pkgScope, obj) + } + } + + // resolve identifiers + if importErrors { + // don't use the universe scope without correct imports + // (objects in the universe may be shadowed by imports; + // with missing imports, identifiers might get resolved + // incorrectly to universe objects) + pkgScope.Outer = nil + } + i := 0 + for _, ident := range file.Unresolved { + if !resolve(fileScope, ident) { + p.errorf(ident.Pos(), "undeclared name: %s", ident.Name) + file.Unresolved[i] = ident + i++ + } + + } + file.Unresolved = file.Unresolved[0:i] + pkgScope.Outer = universe // reset universe scope + } + + return &Package{pkgName, pkgScope, imports, files}, p.GetError(scanner.Sorted) +} diff --git a/src/pkg/go/ast/scope.go b/src/pkg/go/ast/scope.go new file mode 100644 index 000000000..92e366980 --- /dev/null +++ b/src/pkg/go/ast/scope.go @@ -0,0 +1,156 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements scopes and the objects they contain. + +package ast + +import ( + "bytes" + "fmt" + "go/token" +) + +// A Scope maintains the set of named language entities declared +// in the scope and a link to the immediately surrounding (outer) +// scope. +// +type Scope struct { + Outer *Scope + Objects map[string]*Object +} + +// NewScope creates a new scope nested in the outer scope. +func NewScope(outer *Scope) *Scope { + const n = 4 // initial scope capacity + return &Scope{outer, make(map[string]*Object, n)} +} + +// Lookup returns the object with the given name if it is +// found in scope s, otherwise it returns nil. Outer scopes +// are ignored. +// +func (s *Scope) Lookup(name string) *Object { + return s.Objects[name] +} + +// Insert attempts to insert a named object obj into the scope s. +// If the scope already contains an object alt with the same name, +// Insert leaves the scope unchanged and returns alt. Otherwise +// it inserts obj and returns nil." +// +func (s *Scope) Insert(obj *Object) (alt *Object) { + if alt = s.Objects[obj.Name]; alt == nil { + s.Objects[obj.Name] = obj + } + return +} + +// Debugging support +func (s *Scope) String() string { + var buf bytes.Buffer + fmt.Fprintf(&buf, "scope %p {", s) + if s != nil && len(s.Objects) > 0 { + fmt.Fprintln(&buf) + for _, obj := range s.Objects { + fmt.Fprintf(&buf, "\t%s %s\n", obj.Kind, obj.Name) + } + } + fmt.Fprintf(&buf, "}\n") + return buf.String() +} + +// ---------------------------------------------------------------------------- +// Objects + +// TODO(gri) Consider replacing the Object struct with an interface +// and a corresponding set of object implementations. + +// An Object describes a named language entity such as a package, +// constant, type, variable, function (incl. methods), or label. +// +// The Data fields contains object-specific data: +// +// Kind Data type Data value +// Pkg *Scope package scope +// Con int iota for the respective declaration +// Con != nil constant value +// +type Object struct { + Kind ObjKind + Name string // declared name + Decl interface{} // corresponding Field, XxxSpec, FuncDecl, or LabeledStmt; or nil + Data interface{} // object-specific data; or nil + Type interface{} // place holder for type information; may be nil +} + +// NewObj creates a new object of a given kind and name. +func NewObj(kind ObjKind, name string) *Object { + return &Object{Kind: kind, Name: name} +} + +// Pos computes the source position of the declaration of an object name. +// The result may be an invalid position if it cannot be computed +// (obj.Decl may be nil or not correct). +func (obj *Object) Pos() token.Pos { + name := obj.Name + switch d := obj.Decl.(type) { + case *Field: + for _, n := range d.Names { + if n.Name == name { + return n.Pos() + } + } + case *ImportSpec: + if d.Name != nil && d.Name.Name == name { + return d.Name.Pos() + } + return d.Path.Pos() + case *ValueSpec: + for _, n := range d.Names { + if n.Name == name { + return n.Pos() + } + } + case *TypeSpec: + if d.Name.Name == name { + return d.Name.Pos() + } + case *FuncDecl: + if d.Name.Name == name { + return d.Name.Pos() + } + case *LabeledStmt: + if d.Label.Name == name { + return d.Label.Pos() + } + } + return token.NoPos +} + +// ObKind describes what an object represents. +type ObjKind int + +// The list of possible Object kinds. +const ( + Bad ObjKind = iota // for error handling + Pkg // package + Con // constant + Typ // type + Var // variable + Fun // function or method + Lbl // label +) + +var objKindStrings = [...]string{ + Bad: "bad", + Pkg: "package", + Con: "const", + Typ: "type", + Var: "var", + Fun: "func", + Lbl: "label", +} + +func (kind ObjKind) String() string { return objKindStrings[kind] } diff --git a/src/pkg/go/ast/walk.go b/src/pkg/go/ast/walk.go new file mode 100644 index 000000000..181cfd149 --- /dev/null +++ b/src/pkg/go/ast/walk.go @@ -0,0 +1,382 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ast + +import "fmt" + +// A Visitor's Visit method is invoked for each node encountered by Walk. +// If the result visitor w is not nil, Walk visits each of the children +// of node with the visitor w, followed by a call of w.Visit(nil). +type Visitor interface { + Visit(node Node) (w Visitor) +} + +// Helper functions for common node lists. They may be empty. + +func walkIdentList(v Visitor, list []*Ident) { + for _, x := range list { + Walk(v, x) + } +} + +func walkExprList(v Visitor, list []Expr) { + for _, x := range list { + Walk(v, x) + } +} + +func walkStmtList(v Visitor, list []Stmt) { + for _, x := range list { + Walk(v, x) + } +} + +func walkDeclList(v Visitor, list []Decl) { + for _, x := range list { + Walk(v, x) + } +} + +// TODO(gri): Investigate if providing a closure to Walk leads to +// simpler use (and may help eliminate Inspect in turn). + +// Walk traverses an AST in depth-first order: It starts by calling +// v.Visit(node); node must not be nil. If the visitor w returned by +// v.Visit(node) is not nil, Walk is invoked recursively with visitor +// w for each of the non-nil children of node, followed by a call of +// w.Visit(nil). +// +func Walk(v Visitor, node Node) { + if v = v.Visit(node); v == nil { + return + } + + // walk children + // (the order of the cases matches the order + // of the corresponding node types in ast.go) + switch n := node.(type) { + // Comments and fields + case *Comment: + // nothing to do + + case *CommentGroup: + for _, c := range n.List { + Walk(v, c) + } + + case *Field: + if n.Doc != nil { + Walk(v, n.Doc) + } + walkIdentList(v, n.Names) + Walk(v, n.Type) + if n.Tag != nil { + Walk(v, n.Tag) + } + if n.Comment != nil { + Walk(v, n.Comment) + } + + case *FieldList: + for _, f := range n.List { + Walk(v, f) + } + + // Expressions + case *BadExpr, *Ident, *BasicLit: + // nothing to do + + case *Ellipsis: + if n.Elt != nil { + Walk(v, n.Elt) + } + + case *FuncLit: + Walk(v, n.Type) + Walk(v, n.Body) + + case *CompositeLit: + if n.Type != nil { + Walk(v, n.Type) + } + walkExprList(v, n.Elts) + + case *ParenExpr: + Walk(v, n.X) + + case *SelectorExpr: + Walk(v, n.X) + Walk(v, n.Sel) + + case *IndexExpr: + Walk(v, n.X) + Walk(v, n.Index) + + case *SliceExpr: + Walk(v, n.X) + if n.Low != nil { + Walk(v, n.Low) + } + if n.High != nil { + Walk(v, n.High) + } + + case *TypeAssertExpr: + Walk(v, n.X) + if n.Type != nil { + Walk(v, n.Type) + } + + case *CallExpr: + Walk(v, n.Fun) + walkExprList(v, n.Args) + + case *StarExpr: + Walk(v, n.X) + + case *UnaryExpr: + Walk(v, n.X) + + case *BinaryExpr: + Walk(v, n.X) + Walk(v, n.Y) + + case *KeyValueExpr: + Walk(v, n.Key) + Walk(v, n.Value) + + // Types + case *ArrayType: + if n.Len != nil { + Walk(v, n.Len) + } + Walk(v, n.Elt) + + case *StructType: + Walk(v, n.Fields) + + case *FuncType: + Walk(v, n.Params) + if n.Results != nil { + Walk(v, n.Results) + } + + case *InterfaceType: + Walk(v, n.Methods) + + case *MapType: + Walk(v, n.Key) + Walk(v, n.Value) + + case *ChanType: + Walk(v, n.Value) + + // Statements + case *BadStmt: + // nothing to do + + case *DeclStmt: + Walk(v, n.Decl) + + case *EmptyStmt: + // nothing to do + + case *LabeledStmt: + Walk(v, n.Label) + Walk(v, n.Stmt) + + case *ExprStmt: + Walk(v, n.X) + + case *SendStmt: + Walk(v, n.Chan) + Walk(v, n.Value) + + case *IncDecStmt: + Walk(v, n.X) + + case *AssignStmt: + walkExprList(v, n.Lhs) + walkExprList(v, n.Rhs) + + case *GoStmt: + Walk(v, n.Call) + + case *DeferStmt: + Walk(v, n.Call) + + case *ReturnStmt: + walkExprList(v, n.Results) + + case *BranchStmt: + if n.Label != nil { + Walk(v, n.Label) + } + + case *BlockStmt: + walkStmtList(v, n.List) + + case *IfStmt: + if n.Init != nil { + Walk(v, n.Init) + } + Walk(v, n.Cond) + Walk(v, n.Body) + if n.Else != nil { + Walk(v, n.Else) + } + + case *CaseClause: + walkExprList(v, n.List) + walkStmtList(v, n.Body) + + case *SwitchStmt: + if n.Init != nil { + Walk(v, n.Init) + } + if n.Tag != nil { + Walk(v, n.Tag) + } + Walk(v, n.Body) + + case *TypeSwitchStmt: + if n.Init != nil { + Walk(v, n.Init) + } + Walk(v, n.Assign) + Walk(v, n.Body) + + case *CommClause: + if n.Comm != nil { + Walk(v, n.Comm) + } + walkStmtList(v, n.Body) + + case *SelectStmt: + Walk(v, n.Body) + + case *ForStmt: + if n.Init != nil { + Walk(v, n.Init) + } + if n.Cond != nil { + Walk(v, n.Cond) + } + if n.Post != nil { + Walk(v, n.Post) + } + Walk(v, n.Body) + + case *RangeStmt: + Walk(v, n.Key) + if n.Value != nil { + Walk(v, n.Value) + } + Walk(v, n.X) + Walk(v, n.Body) + + // Declarations + case *ImportSpec: + if n.Doc != nil { + Walk(v, n.Doc) + } + if n.Name != nil { + Walk(v, n.Name) + } + Walk(v, n.Path) + if n.Comment != nil { + Walk(v, n.Comment) + } + + case *ValueSpec: + if n.Doc != nil { + Walk(v, n.Doc) + } + walkIdentList(v, n.Names) + if n.Type != nil { + Walk(v, n.Type) + } + walkExprList(v, n.Values) + if n.Comment != nil { + Walk(v, n.Comment) + } + + case *TypeSpec: + if n.Doc != nil { + Walk(v, n.Doc) + } + Walk(v, n.Name) + Walk(v, n.Type) + if n.Comment != nil { + Walk(v, n.Comment) + } + + case *BadDecl: + // nothing to do + + case *GenDecl: + if n.Doc != nil { + Walk(v, n.Doc) + } + for _, s := range n.Specs { + Walk(v, s) + } + + case *FuncDecl: + if n.Doc != nil { + Walk(v, n.Doc) + } + if n.Recv != nil { + Walk(v, n.Recv) + } + Walk(v, n.Name) + Walk(v, n.Type) + if n.Body != nil { + Walk(v, n.Body) + } + + // Files and packages + case *File: + if n.Doc != nil { + Walk(v, n.Doc) + } + Walk(v, n.Name) + walkDeclList(v, n.Decls) + for _, g := range n.Comments { + Walk(v, g) + } + // don't walk n.Comments - they have been + // visited already through the individual + // nodes + + case *Package: + for _, f := range n.Files { + Walk(v, f) + } + + default: + fmt.Printf("ast.Walk: unexpected node type %T", n) + panic("ast.Walk") + } + + v.Visit(nil) +} + +type inspector func(Node) bool + +func (f inspector) Visit(node Node) Visitor { + if f(node) { + return f + } + return nil +} + +// Inspect traverses an AST in depth-first order: It starts by calling +// f(node); node must not be nil. If f returns true, Inspect invokes f +// for all the non-nil children of node, recursively. +// +func Inspect(node Node, f func(Node) bool) { + Walk(inspector(f), node) +} diff --git a/src/pkg/go/build/Makefile b/src/pkg/go/build/Makefile new file mode 100644 index 000000000..349e00e80 --- /dev/null +++ b/src/pkg/go/build/Makefile @@ -0,0 +1,22 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=go/build +GOFILES=\ + build.go\ + dir.go\ + path.go\ + syslist.go\ + +CLEANFILES+=syslist.go pkgtest/_obj cmdtest/_obj cgotest/_obj + +include ../../../Make.pkg + +syslist.go: ../../../Make.inc Makefile + echo '// Generated automatically by make.' >$@ + echo 'package build' >>$@ + echo 'const goosList = "$(GOOS_LIST)"' >>$@ + echo 'const goarchList = "$(GOARCH_LIST)"' >>$@ diff --git a/src/pkg/go/build/build.go b/src/pkg/go/build/build.go new file mode 100644 index 000000000..97f92bfb6 --- /dev/null +++ b/src/pkg/go/build/build.go @@ -0,0 +1,444 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package build provides tools for building Go packages. +package build + +import ( + "bytes" + "exec" + "fmt" + "os" + "path/filepath" + "regexp" + "runtime" + "strings" +) + +// Build produces a build Script for the given package. +func Build(tree *Tree, pkg string, info *DirInfo) (*Script, os.Error) { + s := &Script{} + b := &build{ + script: s, + path: filepath.Join(tree.SrcDir(), pkg), + } + b.obj = b.abs("_obj") + string(filepath.Separator) + + b.goarch = runtime.GOARCH + if g := os.Getenv("GOARCH"); g != "" { + b.goarch = g + } + var err os.Error + b.arch, err = ArchChar(b.goarch) + if err != nil { + return nil, err + } + + // add import object files to list of Inputs + for _, pkg := range info.Imports { + t, p, err := FindTree(pkg) + if err != nil && err != ErrNotFound { + // FindTree should always be able to suggest an import + // path and tree. The path must be malformed + // (for example, an absolute or relative path). + return nil, os.NewError("build: invalid import: " + pkg) + } + s.addInput(filepath.Join(t.PkgDir(), p+".a")) + } + + // .go files to be built with gc + gofiles := b.abss(info.GoFiles...) + s.addInput(gofiles...) + + var ofiles []string // object files to be linked or packed + + // make build directory + b.mkdir(b.obj) + s.addIntermediate(b.obj) + + // cgo + if len(info.CgoFiles) > 0 { + cgoFiles := b.abss(info.CgoFiles...) + s.addInput(cgoFiles...) + cgoCFiles := b.abss(info.CFiles...) + s.addInput(cgoCFiles...) + outGo, outObj := b.cgo(cgoFiles, cgoCFiles) + gofiles = append(gofiles, outGo...) + ofiles = append(ofiles, outObj...) + s.addIntermediate(outGo...) + s.addIntermediate(outObj...) + } + + // compile + if len(gofiles) > 0 { + ofile := b.obj + "_go_." + b.arch + b.gc(ofile, gofiles...) + ofiles = append(ofiles, ofile) + s.addIntermediate(ofile) + } + + // assemble + for _, sfile := range info.SFiles { + ofile := b.obj + sfile[:len(sfile)-1] + b.arch + sfile = b.abs(sfile) + s.addInput(sfile) + b.asm(ofile, sfile) + ofiles = append(ofiles, ofile) + s.addIntermediate(ofile) + } + + if len(ofiles) == 0 { + return nil, os.NewError("make: no object files to build") + } + + // choose target file + var targ string + if info.IsCommand() { + // use the last part of the import path as binary name + _, bin := filepath.Split(pkg) + if runtime.GOOS == "windows" { + bin += ".exe" + } + targ = filepath.Join(tree.BinDir(), bin) + } else { + targ = filepath.Join(tree.PkgDir(), pkg+".a") + } + + // make target directory + targDir, _ := filepath.Split(targ) + b.mkdir(targDir) + + // link binary or pack object + if info.IsCommand() { + b.ld(targ, ofiles...) + } else { + b.gopack(targ, ofiles...) + } + s.Output = append(s.Output, targ) + + return b.script, nil +} + +// A Script describes the build process for a Go package. +// The Input, Intermediate, and Output fields are lists of absolute paths. +type Script struct { + Cmd []*Cmd + Input []string + Intermediate []string + Output []string +} + +func (s *Script) addInput(file ...string) { + s.Input = append(s.Input, file...) +} + +func (s *Script) addIntermediate(file ...string) { + s.Intermediate = append(s.Intermediate, file...) +} + +// Run runs the Script's Cmds in order. +func (s *Script) Run() os.Error { + for _, c := range s.Cmd { + if err := c.Run(); err != nil { + return err + } + } + return nil +} + +// Stale returns true if the build's inputs are newer than its outputs. +func (s *Script) Stale() bool { + var latest int64 + // get latest mtime of outputs + for _, file := range s.Output { + fi, err := os.Stat(file) + if err != nil { + // any error reading output files means stale + return true + } + if m := fi.Mtime_ns; m > latest { + latest = m + } + } + for _, file := range s.Input { + fi, err := os.Stat(file) + if err != nil || fi.Mtime_ns > latest { + // any error reading input files means stale + // (attempt to rebuild to figure out why) + return true + } + } + return false +} + +// Clean removes the Script's Intermediate files. +// It tries to remove every file and returns the first error it encounters. +func (s *Script) Clean() (err os.Error) { + // Reverse order so that directories get removed after the files they contain. + for i := len(s.Intermediate) - 1; i >= 0; i-- { + if e := os.Remove(s.Intermediate[i]); err == nil { + err = e + } + } + return +} + +// Nuke removes the Script's Intermediate and Output files. +// It tries to remove every file and returns the first error it encounters. +func (s *Script) Nuke() (err os.Error) { + // Reverse order so that directories get removed after the files they contain. + for i := len(s.Output) - 1; i >= 0; i-- { + if e := os.Remove(s.Output[i]); err == nil { + err = e + } + } + if e := s.Clean(); err == nil { + err = e + } + return +} + +// A Cmd describes an individual build command. +type Cmd struct { + Args []string // command-line + Stdout string // write standard output to this file, "" is passthrough + Dir string // working directory + Env []string // environment + Input []string // file paths (dependencies) + Output []string // file paths +} + +func (c *Cmd) String() string { + return strings.Join(c.Args, " ") +} + +// Run executes the Cmd. +func (c *Cmd) Run() os.Error { + if c.Args[0] == "mkdir" { + for _, p := range c.Output { + if err := os.MkdirAll(p, 0777); err != nil { + return fmt.Errorf("command %q: %v", c, err) + } + } + return nil + } + out := new(bytes.Buffer) + cmd := exec.Command(c.Args[0], c.Args[1:]...) + cmd.Dir = c.Dir + cmd.Env = c.Env + cmd.Stdout = out + cmd.Stderr = out + if c.Stdout != "" { + f, err := os.Create(c.Stdout) + if err != nil { + return err + } + defer f.Close() + cmd.Stdout = f + } + if err := cmd.Run(); err != nil { + return fmt.Errorf("command %q: %v\n%v", c, err, out) + } + return nil +} + +// ArchChar returns the architecture character for the given goarch. +// For example, ArchChar("amd64") returns "6". +func ArchChar(goarch string) (string, os.Error) { + switch goarch { + case "386": + return "8", nil + case "amd64": + return "6", nil + case "arm": + return "5", nil + } + return "", os.NewError("unsupported GOARCH " + goarch) +} + +type build struct { + script *Script + path string + obj string + goarch string + arch string +} + +func (b *build) abs(file string) string { + if filepath.IsAbs(file) { + return file + } + return filepath.Join(b.path, file) +} + +func (b *build) abss(file ...string) []string { + s := make([]string, len(file)) + for i, f := range file { + s[i] = b.abs(f) + } + return s +} + +func (b *build) add(c Cmd) { + b.script.Cmd = append(b.script.Cmd, &c) +} + +func (b *build) mkdir(name string) { + b.add(Cmd{ + Args: []string{"mkdir", "-p", name}, + Output: []string{name}, + }) +} + +func (b *build) gc(ofile string, gofiles ...string) { + gc := b.arch + "g" + args := append([]string{gc, "-o", ofile}, gcImportArgs...) + args = append(args, gofiles...) + b.add(Cmd{ + Args: args, + Input: gofiles, + Output: []string{ofile}, + }) +} + +func (b *build) asm(ofile string, sfile string) { + asm := b.arch + "a" + b.add(Cmd{ + Args: []string{asm, "-o", ofile, sfile}, + Input: []string{sfile}, + Output: []string{ofile}, + }) +} + +func (b *build) ld(targ string, ofiles ...string) { + ld := b.arch + "l" + args := append([]string{ld, "-o", targ}, ldImportArgs...) + args = append(args, ofiles...) + b.add(Cmd{ + Args: args, + Input: ofiles, + Output: []string{targ}, + }) +} + +func (b *build) gopack(targ string, ofiles ...string) { + b.add(Cmd{ + Args: append([]string{"gopack", "grc", targ}, ofiles...), + Input: ofiles, + Output: []string{targ}, + }) +} + +func (b *build) cc(ofile string, cfiles ...string) { + cc := b.arch + "c" + dir := fmt.Sprintf("%s_%s", runtime.GOOS, runtime.GOARCH) + inc := filepath.Join(runtime.GOROOT(), "pkg", dir) + args := []string{cc, "-FVw", "-I", inc, "-o", ofile} + b.add(Cmd{ + Args: append(args, cfiles...), + Input: cfiles, + Output: []string{ofile}, + }) +} + +func (b *build) gccCompile(ofile, cfile string) { + b.add(Cmd{ + Args: b.gccArgs("-o", ofile, "-c", cfile), + Input: []string{cfile}, + Output: []string{ofile}, + }) +} + +func (b *build) gccLink(ofile string, ofiles ...string) { + b.add(Cmd{ + Args: append(b.gccArgs("-o", ofile), ofiles...), + Input: ofiles, + Output: []string{ofile}, + }) +} + +func (b *build) gccArgs(args ...string) []string { + // TODO(adg): HOST_CC + a := []string{"gcc", "-I", b.path, "-g", "-fPIC", "-O2"} + switch b.arch { + case "8": + a = append(a, "-m32") + case "6": + a = append(a, "-m64") + } + return append(a, args...) +} + +var cgoRe = regexp.MustCompile(`[/\\:]`) + +func (b *build) cgo(cgofiles, cgocfiles []string) (outGo, outObj []string) { + // cgo + // TODO(adg): CGOPKGPATH + // TODO(adg): CGO_FLAGS + gofiles := []string{b.obj + "_cgo_gotypes.go"} + cfiles := []string{b.obj + "_cgo_main.c", b.obj + "_cgo_export.c"} + for _, fn := range cgofiles { + f := b.obj + cgoRe.ReplaceAllString(fn[:len(fn)-2], "_") + gofiles = append(gofiles, f+"cgo1.go") + cfiles = append(cfiles, f+"cgo2.c") + } + defunC := b.obj + "_cgo_defun.c" + output := append([]string{defunC}, cfiles...) + output = append(output, gofiles...) + b.add(Cmd{ + Args: append([]string{"cgo", "--"}, cgofiles...), + Dir: b.path, + Env: append(os.Environ(), "GOARCH="+b.goarch), + Input: cgofiles, + Output: output, + }) + outGo = append(outGo, gofiles...) + exportH := filepath.Join(b.path, "_cgo_export.h") + b.script.addIntermediate(defunC, exportH, b.obj+"_cgo_flags") + b.script.addIntermediate(cfiles...) + + // cc _cgo_defun.c + defunObj := b.obj + "_cgo_defun." + b.arch + b.cc(defunObj, defunC) + outObj = append(outObj, defunObj) + + // gcc + linkobj := make([]string, 0, len(cfiles)) + for _, cfile := range cfiles { + ofile := cfile[:len(cfile)-1] + "o" + b.gccCompile(ofile, cfile) + linkobj = append(linkobj, ofile) + if !strings.HasSuffix(ofile, "_cgo_main.o") { + outObj = append(outObj, ofile) + } else { + b.script.addIntermediate(ofile) + } + } + for _, cfile := range cgocfiles { + ofile := b.obj + cgoRe.ReplaceAllString(cfile[:len(cfile)-1], "_") + "o" + b.gccCompile(ofile, cfile) + linkobj = append(linkobj, ofile) + outObj = append(outObj, ofile) + } + dynObj := b.obj + "_cgo_.o" + b.gccLink(dynObj, linkobj...) + b.script.addIntermediate(dynObj) + + // cgo -dynimport + importC := b.obj + "_cgo_import.c" + b.add(Cmd{ + Args: []string{"cgo", "-dynimport", dynObj}, + Stdout: importC, + Input: []string{dynObj}, + Output: []string{importC}, + }) + b.script.addIntermediate(importC) + + // cc _cgo_import.ARCH + importObj := b.obj + "_cgo_import." + b.arch + b.cc(importObj, importC) + outObj = append(outObj, importObj) + + return +} diff --git a/src/pkg/go/build/build_test.go b/src/pkg/go/build/build_test.go new file mode 100644 index 000000000..e59d87672 --- /dev/null +++ b/src/pkg/go/build/build_test.go @@ -0,0 +1,61 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package build + +import ( + "exec" + "path/filepath" + "testing" +) + +var buildPkgs = []string{ + "go/build/pkgtest", + "go/build/cmdtest", + "go/build/cgotest", +} + +const cmdtestOutput = "3" + +func TestBuild(t *testing.T) { + for _, pkg := range buildPkgs { + tree := Path[0] // Goroot + dir := filepath.Join(tree.SrcDir(), pkg) + + info, err := ScanDir(dir, true) + if err != nil { + t.Error("ScanDir:", err) + continue + } + + s, err := Build(tree, pkg, info) + if err != nil { + t.Error("Build:", err) + continue + } + + if err := s.Run(); err != nil { + t.Error("Run:", err) + continue + } + + if pkg == "go/build/cmdtest" { + bin := s.Output[0] + b, err := exec.Command(bin).CombinedOutput() + if err != nil { + t.Errorf("exec: %s: %v", bin, err) + continue + } + if string(b) != cmdtestOutput { + t.Errorf("cmdtest output: %s want: %s", b, cmdtestOutput) + } + } + + defer func(s *Script) { + if err := s.Nuke(); err != nil { + t.Errorf("nuking: %v", err) + } + }(s) + } +} diff --git a/src/pkg/go/build/cgotest/cgotest.c b/src/pkg/go/build/cgotest/cgotest.c new file mode 100644 index 000000000..b13acb227 --- /dev/null +++ b/src/pkg/go/build/cgotest/cgotest.c @@ -0,0 +1,9 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +int +Add(int x, int y, int *sum) +{ + sum = x+y; +} diff --git a/src/pkg/go/build/cgotest/cgotest.go b/src/pkg/go/build/cgotest/cgotest.go new file mode 100644 index 000000000..93bbf0688 --- /dev/null +++ b/src/pkg/go/build/cgotest/cgotest.go @@ -0,0 +1,19 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cgotest + +/* +char* greeting = "hello, world"; +*/ +// #include "cgotest.h" +import "C" +import "unsafe" + +var Greeting = C.GoString(C.greeting) + +func DoAdd(x, y int) (sum int) { + C.Add(C.int(x), C.int(y), (*C.int)(unsafe.Pointer(&sum))) + return +} diff --git a/src/pkg/go/build/cgotest/cgotest.h b/src/pkg/go/build/cgotest/cgotest.h new file mode 100644 index 000000000..9c73643b6 --- /dev/null +++ b/src/pkg/go/build/cgotest/cgotest.h @@ -0,0 +1,5 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +extern int Add(int, int, int *); diff --git a/src/pkg/go/build/cmdtest/main.go b/src/pkg/go/build/cmdtest/main.go new file mode 100644 index 000000000..bed4f485a --- /dev/null +++ b/src/pkg/go/build/cmdtest/main.go @@ -0,0 +1,12 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import "go/build/pkgtest" + +func main() { + pkgtest.Foo() + print(int(pkgtest.Sqrt(9))) +} diff --git a/src/pkg/go/build/dir.go b/src/pkg/go/build/dir.go new file mode 100644 index 000000000..e0000b534 --- /dev/null +++ b/src/pkg/go/build/dir.go @@ -0,0 +1,172 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package build + +import ( + "go/parser" + "go/token" + "log" + "os" + "path/filepath" + "strconv" + "strings" + "runtime" +) + +type DirInfo struct { + GoFiles []string // .go files in dir (excluding CgoFiles) + CgoFiles []string // .go files that import "C" + CFiles []string // .c files in dir + SFiles []string // .s files in dir + Imports []string // All packages imported by goFiles + PkgName string // Name of package in dir +} + +func (d *DirInfo) IsCommand() bool { + return d.PkgName == "main" +} + +// ScanDir returns a structure with details about the Go content found +// in the given directory. The file lists exclude: +// +// - files in package main (unless allowMain is true) +// - files in package documentation +// - files ending in _test.go +// - files starting with _ or . +// +// Only files that satisfy the goodOSArch function are included. +func ScanDir(dir string, allowMain bool) (info *DirInfo, err os.Error) { + f, err := os.Open(dir) + if err != nil { + return nil, err + } + dirs, err := f.Readdir(-1) + f.Close() + if err != nil { + return nil, err + } + + var di DirInfo + imported := make(map[string]bool) + fset := token.NewFileSet() + for i := range dirs { + d := &dirs[i] + if strings.HasPrefix(d.Name, "_") || + strings.HasPrefix(d.Name, ".") { + continue + } + if !goodOSArch(d.Name) { + continue + } + + switch filepath.Ext(d.Name) { + case ".go": + if strings.HasSuffix(d.Name, "_test.go") { + continue + } + case ".c": + di.CFiles = append(di.CFiles, d.Name) + continue + case ".s": + di.SFiles = append(di.SFiles, d.Name) + continue + default: + continue + } + + filename := filepath.Join(dir, d.Name) + pf, err := parser.ParseFile(fset, filename, nil, parser.ImportsOnly) + if err != nil { + return nil, err + } + s := string(pf.Name.Name) + if s == "main" && !allowMain { + continue + } + if s == "documentation" { + continue + } + if di.PkgName == "" { + di.PkgName = s + } else if di.PkgName != s { + // Only if all files in the directory are in package main + // do we return PkgName=="main". + // A mix of main and another package reverts + // to the original (allowMain=false) behaviour. + if s == "main" || di.PkgName == "main" { + return ScanDir(dir, false) + } + return nil, os.NewError("multiple package names in " + dir) + } + isCgo := false + for _, spec := range pf.Imports { + quoted := string(spec.Path.Value) + path, err := strconv.Unquote(quoted) + if err != nil { + log.Panicf("%s: parser returned invalid quoted string: <%s>", filename, quoted) + } + imported[path] = true + if path == "C" { + isCgo = true + } + } + if isCgo { + di.CgoFiles = append(di.CgoFiles, d.Name) + } else { + di.GoFiles = append(di.GoFiles, d.Name) + } + } + di.Imports = make([]string, len(imported)) + i := 0 + for p := range imported { + di.Imports[i] = p + i++ + } + return &di, nil +} + +// goodOSArch returns false if the filename contains a $GOOS or $GOARCH +// suffix which does not match the current system. +// The recognized filename formats are: +// +// name_$(GOOS).* +// name_$(GOARCH).* +// name_$(GOOS)_$(GOARCH).* +// +func goodOSArch(filename string) bool { + if dot := strings.Index(filename, "."); dot != -1 { + filename = filename[:dot] + } + l := strings.Split(filename, "_") + n := len(l) + if n == 0 { + return true + } + if good, known := goodOS[l[n-1]]; known { + return good + } + if good, known := goodArch[l[n-1]]; known { + if !good || n < 2 { + return false + } + good, known = goodOS[l[n-2]] + return good || !known + } + return true +} + +var goodOS = make(map[string]bool) +var goodArch = make(map[string]bool) + +func init() { + goodOS = make(map[string]bool) + goodArch = make(map[string]bool) + for _, v := range strings.Fields(goosList) { + goodOS[v] = v == runtime.GOOS + } + for _, v := range strings.Fields(goarchList) { + goodArch[v] = v == runtime.GOARCH + } +} diff --git a/src/pkg/go/build/path.go b/src/pkg/go/build/path.go new file mode 100644 index 000000000..e39b5f8fa --- /dev/null +++ b/src/pkg/go/build/path.go @@ -0,0 +1,182 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package build + +import ( + "fmt" + "log" + "os" + "path/filepath" + "runtime" +) + +// Path is a validated list of Trees derived from $GOROOT and $GOPATH at init. +var Path []*Tree + +// Tree describes a Go source tree, either $GOROOT or one from $GOPATH. +type Tree struct { + Path string + Goroot bool +} + +func newTree(p string) (*Tree, os.Error) { + if !filepath.IsAbs(p) { + return nil, os.NewError("must be absolute") + } + ep, err := filepath.EvalSymlinks(p) + if err != nil { + return nil, err + } + return &Tree{Path: ep}, nil +} + +// SrcDir returns the tree's package source directory. +func (t *Tree) SrcDir() string { + if t.Goroot { + return filepath.Join(t.Path, "src", "pkg") + } + return filepath.Join(t.Path, "src") +} + +// PkgDir returns the tree's package object directory. +func (t *Tree) PkgDir() string { + goos, goarch := runtime.GOOS, runtime.GOARCH + if e := os.Getenv("GOOS"); e != "" { + goos = e + } + if e := os.Getenv("GOARCH"); e != "" { + goarch = e + } + return filepath.Join(t.Path, "pkg", goos+"_"+goarch) +} + +// BinDir returns the tree's binary executable directory. +func (t *Tree) BinDir() string { + if t.Goroot { + if gobin := os.Getenv("GOBIN"); gobin != "" { + return gobin + } + } + return filepath.Join(t.Path, "bin") +} + +// HasSrc returns whether the given package's +// source can be found inside this Tree. +func (t *Tree) HasSrc(pkg string) bool { + fi, err := os.Stat(filepath.Join(t.SrcDir(), pkg)) + if err != nil { + return false + } + return fi.IsDirectory() +} + +// HasPkg returns whether the given package's +// object file can be found inside this Tree. +func (t *Tree) HasPkg(pkg string) bool { + fi, err := os.Stat(filepath.Join(t.PkgDir(), pkg+".a")) + if err != nil { + return false + } + return fi.IsRegular() + // TODO(adg): check object version is consistent +} + +var ( + ErrNotFound = os.NewError("go/build: package could not be found locally") + ErrTreeNotFound = os.NewError("go/build: no valid GOROOT or GOPATH could be found") +) + +// FindTree takes an import or filesystem path and returns the +// tree where the package source should be and the package import path. +func FindTree(path string) (tree *Tree, pkg string, err os.Error) { + if isLocalPath(path) { + if path, err = filepath.Abs(path); err != nil { + return + } + if path, err = filepath.EvalSymlinks(path); err != nil { + return + } + for _, t := range Path { + tpath := t.SrcDir() + string(filepath.Separator) + if !filepath.HasPrefix(path, tpath) { + continue + } + tree = t + pkg = path[len(tpath):] + return + } + err = fmt.Errorf("path %q not inside a GOPATH", path) + return + } + tree = defaultTree + pkg = path + for _, t := range Path { + if t.HasSrc(pkg) { + tree = t + return + } + } + if tree == nil { + err = ErrTreeNotFound + } else { + err = ErrNotFound + } + return +} + +// isLocalPath returns whether the given path is local (/foo ./foo ../foo . ..) +// Windows paths that starts with drive letter (c:\foo c:foo) are considered local. +func isLocalPath(s string) bool { + const sep = string(filepath.Separator) + return s == "." || s == ".." || + filepath.HasPrefix(s, sep) || + filepath.HasPrefix(s, "."+sep) || filepath.HasPrefix(s, ".."+sep) || + filepath.VolumeName(s) != "" +} + +var ( + // argument lists used by the build's gc and ld methods + gcImportArgs []string + ldImportArgs []string + + // default tree for remote packages + defaultTree *Tree +) + +// set up Path: parse and validate GOROOT and GOPATH variables +func init() { + root := runtime.GOROOT() + t, err := newTree(root) + if err != nil { + log.Printf("go/build: invalid GOROOT %q: %v", root, err) + } else { + t.Goroot = true + Path = []*Tree{t} + } + + for _, p := range filepath.SplitList(os.Getenv("GOPATH")) { + if p == "" { + continue + } + t, err := newTree(p) + if err != nil { + log.Printf("go/build: invalid GOPATH %q: %v", p, err) + continue + } + Path = append(Path, t) + gcImportArgs = append(gcImportArgs, "-I", t.PkgDir()) + ldImportArgs = append(ldImportArgs, "-L", t.PkgDir()) + + // select first GOPATH entry as default + if defaultTree == nil { + defaultTree = t + } + } + + // use GOROOT if no valid GOPATH specified + if defaultTree == nil && len(Path) > 0 { + defaultTree = Path[0] + } +} diff --git a/src/pkg/go/build/pkgtest/pkgtest.go b/src/pkg/go/build/pkgtest/pkgtest.go new file mode 100644 index 000000000..9322f5ebd --- /dev/null +++ b/src/pkg/go/build/pkgtest/pkgtest.go @@ -0,0 +1,9 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgtest + +func Foo() {} + +func Sqrt(x float64) float64 diff --git a/src/pkg/go/build/pkgtest/sqrt_386.s b/src/pkg/go/build/pkgtest/sqrt_386.s new file mode 100644 index 000000000..d0a428d52 --- /dev/null +++ b/src/pkg/go/build/pkgtest/sqrt_386.s @@ -0,0 +1,10 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// func Sqrt(x float64) float64 +TEXT ·Sqrt(SB),7,$0 + FMOVD x+0(FP),F0 + FSQRT + FMOVDP F0,r+8(FP) + RET diff --git a/src/pkg/go/build/pkgtest/sqrt_amd64.s b/src/pkg/go/build/pkgtest/sqrt_amd64.s new file mode 100644 index 000000000..f5b329e70 --- /dev/null +++ b/src/pkg/go/build/pkgtest/sqrt_amd64.s @@ -0,0 +1,9 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// func Sqrt(x float64) float64 +TEXT ·Sqrt(SB),7,$0 + SQRTSD x+0(FP), X0 + MOVSD X0, r+8(FP) + RET diff --git a/src/pkg/go/build/pkgtest/sqrt_arm.s b/src/pkg/go/build/pkgtest/sqrt_arm.s new file mode 100644 index 000000000..befbb8a89 --- /dev/null +++ b/src/pkg/go/build/pkgtest/sqrt_arm.s @@ -0,0 +1,10 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// func Sqrt(x float64) float64 +TEXT ·Sqrt(SB),7,$0 + MOVD x+0(FP),F0 + SQRTD F0,F0 + MOVD F0,r+8(FP) + RET diff --git a/src/pkg/go/build/syslist_test.go b/src/pkg/go/build/syslist_test.go new file mode 100644 index 000000000..eb0e5dcb6 --- /dev/null +++ b/src/pkg/go/build/syslist_test.go @@ -0,0 +1,62 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package build + +import ( + "runtime" + "testing" +) + +var ( + thisOS = runtime.GOOS + thisArch = runtime.GOARCH + otherOS = anotherOS() + otherArch = anotherArch() +) + +func anotherOS() string { + if thisOS != "darwin" { + return "darwin" + } + return "linux" +} + +func anotherArch() string { + if thisArch != "amd64" { + return "amd64" + } + return "386" +} + +type GoodFileTest struct { + name string + result bool +} + +var tests = []GoodFileTest{ + {"file.go", true}, + {"file.c", true}, + {"file_foo.go", true}, + {"file_" + thisArch + ".go", true}, + {"file_" + otherArch + ".go", false}, + {"file_" + thisOS + ".go", true}, + {"file_" + otherOS + ".go", false}, + {"file_" + thisOS + "_" + thisArch + ".go", true}, + {"file_" + otherOS + "_" + thisArch + ".go", false}, + {"file_" + thisOS + "_" + otherArch + ".go", false}, + {"file_" + otherOS + "_" + otherArch + ".go", false}, + {"file_foo_" + thisArch + ".go", true}, + {"file_foo_" + otherArch + ".go", false}, + {"file_" + thisOS + ".c", true}, + {"file_" + otherOS + ".c", false}, +} + +func TestGoodOSArch(t *testing.T) { + for _, test := range tests { + if goodOSArch(test.name) != test.result { + t.Fatalf("goodOSArch(%q) != %v", test.name, test.result) + } + } +} diff --git a/src/pkg/go/doc/Makefile b/src/pkg/go/doc/Makefile new file mode 100644 index 000000000..a5152c793 --- /dev/null +++ b/src/pkg/go/doc/Makefile @@ -0,0 +1,12 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=go/doc +GOFILES=\ + comment.go\ + doc.go\ + +include ../../../Make.pkg diff --git a/src/pkg/go/doc/comment.go b/src/pkg/go/doc/comment.go new file mode 100644 index 000000000..e1989226b --- /dev/null +++ b/src/pkg/go/doc/comment.go @@ -0,0 +1,345 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Godoc comment extraction and comment -> HTML formatting. + +package doc + +import ( + "go/ast" + "io" + "regexp" + "strings" + "template" // for HTMLEscape +) + +func isWhitespace(ch byte) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' } + +func stripTrailingWhitespace(s string) string { + i := len(s) + for i > 0 && isWhitespace(s[i-1]) { + i-- + } + return s[0:i] +} + +// CommentText returns the text of comment, +// with the comment markers - //, /*, and */ - removed. +func CommentText(comment *ast.CommentGroup) string { + if comment == nil { + return "" + } + comments := make([]string, len(comment.List)) + for i, c := range comment.List { + comments[i] = string(c.Text) + } + + lines := make([]string, 0, 10) // most comments are less than 10 lines + for _, c := range comments { + // Remove comment markers. + // The parser has given us exactly the comment text. + switch c[1] { + case '/': + //-style comment + c = c[2:] + // Remove leading space after //, if there is one. + // TODO(gri) This appears to be necessary in isolated + // cases (bignum.RatFromString) - why? + if len(c) > 0 && c[0] == ' ' { + c = c[1:] + } + case '*': + /*-style comment */ + c = c[2 : len(c)-2] + } + + // Split on newlines. + cl := strings.Split(c, "\n") + + // Walk lines, stripping trailing white space and adding to list. + for _, l := range cl { + lines = append(lines, stripTrailingWhitespace(l)) + } + } + + // Remove leading blank lines; convert runs of + // interior blank lines to a single blank line. + n := 0 + for _, line := range lines { + if line != "" || n > 0 && lines[n-1] != "" { + lines[n] = line + n++ + } + } + lines = lines[0:n] + + // Add final "" entry to get trailing newline from Join. + if n > 0 && lines[n-1] != "" { + lines = append(lines, "") + } + + return strings.Join(lines, "\n") +} + +// Split bytes into lines. +func split(text []byte) [][]byte { + // count lines + n := 0 + last := 0 + for i, c := range text { + if c == '\n' { + last = i + 1 + n++ + } + } + if last < len(text) { + n++ + } + + // split + out := make([][]byte, n) + last = 0 + n = 0 + for i, c := range text { + if c == '\n' { + out[n] = text[last : i+1] + last = i + 1 + n++ + } + } + if last < len(text) { + out[n] = text[last:] + } + + return out +} + +var ( + ldquo = []byte("“") + rdquo = []byte("”") +) + +// Escape comment text for HTML. If nice is set, +// also turn `` into “ and '' into ”. +func commentEscape(w io.Writer, s []byte, nice bool) { + last := 0 + if nice { + for i := 0; i < len(s)-1; i++ { + ch := s[i] + if ch == s[i+1] && (ch == '`' || ch == '\'') { + template.HTMLEscape(w, s[last:i]) + last = i + 2 + switch ch { + case '`': + w.Write(ldquo) + case '\'': + w.Write(rdquo) + } + i++ // loop will add one more + } + } + } + template.HTMLEscape(w, s[last:]) +} + +const ( + // Regexp for Go identifiers + identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this + + // Regexp for URLs + protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):` + hostPart = `[a-zA-Z0-9_@\-]+` + filePart = `[a-zA-Z0-9_?%#~&/\-+=]+` + urlRx = protocol + `//` + // http:// + hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/ + filePart + `([:.,]` + filePart + `)*` +) + +var matchRx = regexp.MustCompile(`(` + identRx + `)|(` + urlRx + `)`) + +var ( + html_a = []byte(`<a href="`) + html_aq = []byte(`">`) + html_enda = []byte("</a>") + html_i = []byte("<i>") + html_endi = []byte("</i>") + html_p = []byte("<p>\n") + html_endp = []byte("</p>\n") + html_pre = []byte("<pre>") + html_endpre = []byte("</pre>\n") +) + +// Emphasize and escape a line of text for HTML. URLs are converted into links; +// if the URL also appears in the words map, the link is taken from the map (if +// the corresponding map value is the empty string, the URL is not converted +// into a link). Go identifiers that appear in the words map are italicized; if +// the corresponding map value is not the empty string, it is considered a URL +// and the word is converted into a link. If nice is set, the remaining text's +// appearance is improved where it makes sense (e.g., `` is turned into “ +// and '' into ”). +func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) { + for { + m := matchRx.FindSubmatchIndex(line) + if m == nil { + break + } + // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is identRx) + + // write text before match + commentEscape(w, line[0:m[0]], nice) + + // analyze match + match := line[m[0]:m[1]] + url := "" + italics := false + if words != nil { + url, italics = words[string(match)] + } + if m[2] < 0 { + // didn't match against first parenthesized sub-regexp; must be match against urlRx + if !italics { + // no alternative URL in words list, use match instead + url = string(match) + } + italics = false // don't italicize URLs + } + + // write match + if len(url) > 0 { + w.Write(html_a) + template.HTMLEscape(w, []byte(url)) + w.Write(html_aq) + } + if italics { + w.Write(html_i) + } + commentEscape(w, match, nice) + if italics { + w.Write(html_endi) + } + if len(url) > 0 { + w.Write(html_enda) + } + + // advance + line = line[m[1]:] + } + commentEscape(w, line, nice) +} + +func indentLen(s []byte) int { + i := 0 + for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + i++ + } + return i +} + +func isBlank(s []byte) bool { return len(s) == 0 || (len(s) == 1 && s[0] == '\n') } + +func commonPrefix(a, b []byte) []byte { + i := 0 + for i < len(a) && i < len(b) && a[i] == b[i] { + i++ + } + return a[0:i] +} + +func unindent(block [][]byte) { + if len(block) == 0 { + return + } + + // compute maximum common white prefix + prefix := block[0][0:indentLen(block[0])] + for _, line := range block { + if !isBlank(line) { + prefix = commonPrefix(prefix, line[0:indentLen(line)]) + } + } + n := len(prefix) + + // remove + for i, line := range block { + if !isBlank(line) { + block[i] = line[n:] + } + } +} + +// Convert comment text to formatted HTML. +// The comment was prepared by DocReader, +// so it is known not to have leading, trailing blank lines +// nor to have trailing spaces at the end of lines. +// The comment markers have already been removed. +// +// Turn each run of multiple \n into </p><p>. +// Turn each run of indented lines into a <pre> block without indent. +// +// URLs in the comment text are converted into links; if the URL also appears +// in the words map, the link is taken from the map (if the corresponding map +// value is the empty string, the URL is not converted into a link). +// +// Go identifiers that appear in the words map are italicized; if the corresponding +// map value is not the empty string, it is considered a URL and the word is converted +// into a link. +func ToHTML(w io.Writer, s []byte, words map[string]string) { + inpara := false + + close := func() { + if inpara { + w.Write(html_endp) + inpara = false + } + } + open := func() { + if !inpara { + w.Write(html_p) + inpara = true + } + } + + lines := split(s) + unindent(lines) + for i := 0; i < len(lines); { + line := lines[i] + if isBlank(line) { + // close paragraph + close() + i++ + continue + } + if indentLen(line) > 0 { + // close paragraph + close() + + // count indented or blank lines + j := i + 1 + for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { + j++ + } + // but not trailing blank lines + for j > i && isBlank(lines[j-1]) { + j-- + } + block := lines[i:j] + i = j + + unindent(block) + + // put those lines in a pre block + w.Write(html_pre) + for _, line := range block { + emphasize(w, line, nil, false) // no nice text formatting + } + w.Write(html_endpre) + continue + } + // open paragraph + open() + emphasize(w, lines[i], words, true) // nice text formatting + i++ + } + close() +} diff --git a/src/pkg/go/doc/doc.go b/src/pkg/go/doc/doc.go new file mode 100644 index 000000000..c7fed9784 --- /dev/null +++ b/src/pkg/go/doc/doc.go @@ -0,0 +1,641 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package doc extracts source code documentation from a Go AST. +package doc + +import ( + "go/ast" + "go/token" + "regexp" + "sort" +) + +// ---------------------------------------------------------------------------- + +type typeDoc struct { + // len(decl.Specs) == 1, and the element type is *ast.TypeSpec + // if the type declaration hasn't been seen yet, decl is nil + decl *ast.GenDecl + // values, factory functions, and methods associated with the type + values []*ast.GenDecl // consts and vars + factories map[string]*ast.FuncDecl + methods map[string]*ast.FuncDecl +} + +// docReader accumulates documentation for a single package. +// It modifies the AST: Comments (declaration documentation) +// that have been collected by the DocReader are set to nil +// in the respective AST nodes so that they are not printed +// twice (once when printing the documentation and once when +// printing the corresponding AST node). +// +type docReader struct { + doc *ast.CommentGroup // package documentation, if any + pkgName string + values []*ast.GenDecl // consts and vars + types map[string]*typeDoc + funcs map[string]*ast.FuncDecl + bugs []*ast.CommentGroup +} + +func (doc *docReader) init(pkgName string) { + doc.pkgName = pkgName + doc.types = make(map[string]*typeDoc) + doc.funcs = make(map[string]*ast.FuncDecl) +} + +func (doc *docReader) addDoc(comments *ast.CommentGroup) { + if doc.doc == nil { + // common case: just one package comment + doc.doc = comments + return + } + + // More than one package comment: Usually there will be only + // one file with a package comment, but it's better to collect + // all comments than drop them on the floor. + // (This code isn't particularly clever - no amortized doubling is + // used - but this situation occurs rarely and is not time-critical.) + n1 := len(doc.doc.List) + n2 := len(comments.List) + list := make([]*ast.Comment, n1+1+n2) // + 1 for separator line + copy(list, doc.doc.List) + list[n1] = &ast.Comment{token.NoPos, "//"} // separator line + copy(list[n1+1:], comments.List) + doc.doc = &ast.CommentGroup{list} +} + +func (doc *docReader) addType(decl *ast.GenDecl) { + spec := decl.Specs[0].(*ast.TypeSpec) + typ := doc.lookupTypeDoc(spec.Name.Name) + // typ should always be != nil since declared types + // are always named - be conservative and check + if typ != nil { + // a type should be added at most once, so typ.decl + // should be nil - if it isn't, simply overwrite it + typ.decl = decl + } +} + +func (doc *docReader) lookupTypeDoc(name string) *typeDoc { + if name == "" { + return nil // no type docs for anonymous types + } + if tdoc, found := doc.types[name]; found { + return tdoc + } + // type wasn't found - add one without declaration + tdoc := &typeDoc{nil, nil, make(map[string]*ast.FuncDecl), make(map[string]*ast.FuncDecl)} + doc.types[name] = tdoc + return tdoc +} + +func baseTypeName(typ ast.Expr) string { + switch t := typ.(type) { + case *ast.Ident: + // if the type is not exported, the effect to + // a client is as if there were no type name + if t.IsExported() { + return t.Name + } + case *ast.StarExpr: + return baseTypeName(t.X) + } + return "" +} + +func (doc *docReader) addValue(decl *ast.GenDecl) { + // determine if decl should be associated with a type + // Heuristic: For each typed entry, determine the type name, if any. + // If there is exactly one type name that is sufficiently + // frequent, associate the decl with the respective type. + domName := "" + domFreq := 0 + prev := "" + for _, s := range decl.Specs { + if v, ok := s.(*ast.ValueSpec); ok { + name := "" + switch { + case v.Type != nil: + // a type is present; determine its name + name = baseTypeName(v.Type) + case decl.Tok == token.CONST: + // no type is present but we have a constant declaration; + // use the previous type name (w/o more type information + // we cannot handle the case of unnamed variables with + // initializer expressions except for some trivial cases) + name = prev + } + if name != "" { + // entry has a named type + if domName != "" && domName != name { + // more than one type name - do not associate + // with any type + domName = "" + break + } + domName = name + domFreq++ + } + prev = name + } + } + + // determine values list + const threshold = 0.75 + values := &doc.values + if domName != "" && domFreq >= int(float64(len(decl.Specs))*threshold) { + // typed entries are sufficiently frequent + typ := doc.lookupTypeDoc(domName) + if typ != nil { + values = &typ.values // associate with that type + } + } + + *values = append(*values, decl) +} + +// Helper function to set the table entry for function f. Makes sure that +// at least one f with associated documentation is stored in table, if there +// are multiple f's with the same name. +func setFunc(table map[string]*ast.FuncDecl, f *ast.FuncDecl) { + name := f.Name.Name + if g, exists := table[name]; exists && g.Doc != nil { + // a function with the same name has already been registered; + // since it has documentation, assume f is simply another + // implementation and ignore it + // TODO(gri) consider collecting all functions, or at least + // all comments + return + } + // function doesn't exist or has no documentation; use f + table[name] = f +} + +func (doc *docReader) addFunc(fun *ast.FuncDecl) { + name := fun.Name.Name + + // determine if it should be associated with a type + if fun.Recv != nil { + // method + typ := doc.lookupTypeDoc(baseTypeName(fun.Recv.List[0].Type)) + if typ != nil { + // exported receiver type + setFunc(typ.methods, fun) + } + // otherwise don't show the method + // TODO(gri): There may be exported methods of non-exported types + // that can be called because of exported values (consts, vars, or + // function results) of that type. Could determine if that is the + // case and then show those methods in an appropriate section. + return + } + + // perhaps a factory function + // determine result type, if any + if fun.Type.Results.NumFields() >= 1 { + res := fun.Type.Results.List[0] + if len(res.Names) <= 1 { + // exactly one (named or anonymous) result associated + // with the first type in result signature (there may + // be more than one result) + tname := baseTypeName(res.Type) + typ := doc.lookupTypeDoc(tname) + if typ != nil { + // named and exported result type + + // Work-around for failure of heuristic: In package os + // too many functions are considered factory functions + // for the Error type. Eliminate manually for now as + // this appears to be the only important case in the + // current library where the heuristic fails. + if doc.pkgName == "os" && tname == "Error" && + name != "NewError" && name != "NewSyscallError" { + // not a factory function for os.Error + setFunc(doc.funcs, fun) // treat as ordinary function + return + } + + setFunc(typ.factories, fun) + return + } + } + } + + // ordinary function + setFunc(doc.funcs, fun) +} + +func (doc *docReader) addDecl(decl ast.Decl) { + switch d := decl.(type) { + case *ast.GenDecl: + if len(d.Specs) > 0 { + switch d.Tok { + case token.CONST, token.VAR: + // constants and variables are always handled as a group + doc.addValue(d) + case token.TYPE: + // types are handled individually + for _, spec := range d.Specs { + // make a (fake) GenDecl node for this TypeSpec + // (we need to do this here - as opposed to just + // for printing - so we don't lose the GenDecl + // documentation) + // + // TODO(gri): Consider just collecting the TypeSpec + // node (and copy in the GenDecl.doc if there is no + // doc in the TypeSpec - this is currently done in + // makeTypeDocs below). Simpler data structures, but + // would lose GenDecl documentation if the TypeSpec + // has documentation as well. + doc.addType(&ast.GenDecl{d.Doc, d.Pos(), token.TYPE, token.NoPos, []ast.Spec{spec}, token.NoPos}) + // A new GenDecl node is created, no need to nil out d.Doc. + } + } + } + case *ast.FuncDecl: + doc.addFunc(d) + } +} + +func copyCommentList(list []*ast.Comment) []*ast.Comment { + return append([]*ast.Comment(nil), list...) +} + +var ( + bug_markers = regexp.MustCompile("^/[/*][ \t]*BUG\\(.*\\):[ \t]*") // BUG(uid): + bug_content = regexp.MustCompile("[^ \n\r\t]+") // at least one non-whitespace char +) + +// addFile adds the AST for a source file to the docReader. +// Adding the same AST multiple times is a no-op. +// +func (doc *docReader) addFile(src *ast.File) { + // add package documentation + if src.Doc != nil { + doc.addDoc(src.Doc) + src.Doc = nil // doc consumed - remove from ast.File node + } + + // add all declarations + for _, decl := range src.Decls { + doc.addDecl(decl) + } + + // collect BUG(...) comments + for _, c := range src.Comments { + text := c.List[0].Text + if m := bug_markers.FindStringIndex(text); m != nil { + // found a BUG comment; maybe empty + if btxt := text[m[1]:]; bug_content.MatchString(btxt) { + // non-empty BUG comment; collect comment without BUG prefix + list := copyCommentList(c.List) + list[0].Text = text[m[1]:] + doc.bugs = append(doc.bugs, &ast.CommentGroup{list}) + } + } + } + src.Comments = nil // consumed unassociated comments - remove from ast.File node +} + +func NewFileDoc(file *ast.File) *PackageDoc { + var r docReader + r.init(file.Name.Name) + r.addFile(file) + return r.newDoc("", nil) +} + +func NewPackageDoc(pkg *ast.Package, importpath string) *PackageDoc { + var r docReader + r.init(pkg.Name) + filenames := make([]string, len(pkg.Files)) + i := 0 + for filename, f := range pkg.Files { + r.addFile(f) + filenames[i] = filename + i++ + } + return r.newDoc(importpath, filenames) +} + +// ---------------------------------------------------------------------------- +// Conversion to external representation + +// ValueDoc is the documentation for a group of declared +// values, either vars or consts. +// +type ValueDoc struct { + Doc string + Decl *ast.GenDecl + order int +} + +type sortValueDoc []*ValueDoc + +func (p sortValueDoc) Len() int { return len(p) } +func (p sortValueDoc) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +func declName(d *ast.GenDecl) string { + if len(d.Specs) != 1 { + return "" + } + + switch v := d.Specs[0].(type) { + case *ast.ValueSpec: + return v.Names[0].Name + case *ast.TypeSpec: + return v.Name.Name + } + + return "" +} + +func (p sortValueDoc) Less(i, j int) bool { + // sort by name + // pull blocks (name = "") up to top + // in original order + if ni, nj := declName(p[i].Decl), declName(p[j].Decl); ni != nj { + return ni < nj + } + return p[i].order < p[j].order +} + +func makeValueDocs(list []*ast.GenDecl, tok token.Token) []*ValueDoc { + d := make([]*ValueDoc, len(list)) // big enough in any case + n := 0 + for i, decl := range list { + if decl.Tok == tok { + d[n] = &ValueDoc{CommentText(decl.Doc), decl, i} + n++ + decl.Doc = nil // doc consumed - removed from AST + } + } + d = d[0:n] + sort.Sort(sortValueDoc(d)) + return d +} + +// FuncDoc is the documentation for a func declaration, +// either a top-level function or a method function. +// +type FuncDoc struct { + Doc string + Recv ast.Expr // TODO(rsc): Would like string here + Name string + Decl *ast.FuncDecl +} + +type sortFuncDoc []*FuncDoc + +func (p sortFuncDoc) Len() int { return len(p) } +func (p sortFuncDoc) Swap(i, j int) { p[i], p[j] = p[j], p[i] } +func (p sortFuncDoc) Less(i, j int) bool { return p[i].Name < p[j].Name } + +func makeFuncDocs(m map[string]*ast.FuncDecl) []*FuncDoc { + d := make([]*FuncDoc, len(m)) + i := 0 + for _, f := range m { + doc := new(FuncDoc) + doc.Doc = CommentText(f.Doc) + f.Doc = nil // doc consumed - remove from ast.FuncDecl node + if f.Recv != nil { + doc.Recv = f.Recv.List[0].Type + } + doc.Name = f.Name.Name + doc.Decl = f + d[i] = doc + i++ + } + sort.Sort(sortFuncDoc(d)) + return d +} + +// TypeDoc is the documentation for a declared type. +// Consts and Vars are sorted lists of constants and variables of (mostly) that type. +// Factories is a sorted list of factory functions that return that type. +// Methods is a sorted list of method functions on that type. +type TypeDoc struct { + Doc string + Type *ast.TypeSpec + Consts []*ValueDoc + Vars []*ValueDoc + Factories []*FuncDoc + Methods []*FuncDoc + Decl *ast.GenDecl + order int +} + +type sortTypeDoc []*TypeDoc + +func (p sortTypeDoc) Len() int { return len(p) } +func (p sortTypeDoc) Swap(i, j int) { p[i], p[j] = p[j], p[i] } +func (p sortTypeDoc) Less(i, j int) bool { + // sort by name + // pull blocks (name = "") up to top + // in original order + if ni, nj := p[i].Type.Name.Name, p[j].Type.Name.Name; ni != nj { + return ni < nj + } + return p[i].order < p[j].order +} + +// NOTE(rsc): This would appear not to be correct for type ( ) +// blocks, but the doc extractor above has split them into +// individual declarations. +func (doc *docReader) makeTypeDocs(m map[string]*typeDoc) []*TypeDoc { + d := make([]*TypeDoc, len(m)) + i := 0 + for _, old := range m { + // all typeDocs should have a declaration associated with + // them after processing an entire package - be conservative + // and check + if decl := old.decl; decl != nil { + typespec := decl.Specs[0].(*ast.TypeSpec) + t := new(TypeDoc) + doc := typespec.Doc + typespec.Doc = nil // doc consumed - remove from ast.TypeSpec node + if doc == nil { + // no doc associated with the spec, use the declaration doc, if any + doc = decl.Doc + } + decl.Doc = nil // doc consumed - remove from ast.Decl node + t.Doc = CommentText(doc) + t.Type = typespec + t.Consts = makeValueDocs(old.values, token.CONST) + t.Vars = makeValueDocs(old.values, token.VAR) + t.Factories = makeFuncDocs(old.factories) + t.Methods = makeFuncDocs(old.methods) + t.Decl = old.decl + t.order = i + d[i] = t + i++ + } else { + // no corresponding type declaration found - move any associated + // values, factory functions, and methods back to the top-level + // so that they are not lost (this should only happen if a package + // file containing the explicit type declaration is missing or if + // an unqualified type name was used after a "." import) + // 1) move values + doc.values = append(doc.values, old.values...) + // 2) move factory functions + for name, f := range old.factories { + doc.funcs[name] = f + } + // 3) move methods + for name, f := range old.methods { + // don't overwrite functions with the same name + if _, found := doc.funcs[name]; !found { + doc.funcs[name] = f + } + } + } + } + d = d[0:i] // some types may have been ignored + sort.Sort(sortTypeDoc(d)) + return d +} + +func makeBugDocs(list []*ast.CommentGroup) []string { + d := make([]string, len(list)) + for i, g := range list { + d[i] = CommentText(g) + } + return d +} + +// PackageDoc is the documentation for an entire package. +// +type PackageDoc struct { + PackageName string + ImportPath string + Filenames []string + Doc string + Consts []*ValueDoc + Types []*TypeDoc + Vars []*ValueDoc + Funcs []*FuncDoc + Bugs []string +} + +// newDoc returns the accumulated documentation for the package. +// +func (doc *docReader) newDoc(importpath string, filenames []string) *PackageDoc { + p := new(PackageDoc) + p.PackageName = doc.pkgName + p.ImportPath = importpath + sort.Strings(filenames) + p.Filenames = filenames + p.Doc = CommentText(doc.doc) + // makeTypeDocs may extend the list of doc.values and + // doc.funcs and thus must be called before any other + // function consuming those lists + p.Types = doc.makeTypeDocs(doc.types) + p.Consts = makeValueDocs(doc.values, token.CONST) + p.Vars = makeValueDocs(doc.values, token.VAR) + p.Funcs = makeFuncDocs(doc.funcs) + p.Bugs = makeBugDocs(doc.bugs) + return p +} + +// ---------------------------------------------------------------------------- +// Filtering by name + +type Filter func(string) bool + +func matchFields(fields *ast.FieldList, f Filter) bool { + if fields != nil { + for _, field := range fields.List { + for _, name := range field.Names { + if f(name.Name) { + return true + } + } + } + } + return false +} + +func matchDecl(d *ast.GenDecl, f Filter) bool { + for _, d := range d.Specs { + switch v := d.(type) { + case *ast.ValueSpec: + for _, name := range v.Names { + if f(name.Name) { + return true + } + } + case *ast.TypeSpec: + if f(v.Name.Name) { + return true + } + switch t := v.Type.(type) { + case *ast.StructType: + if matchFields(t.Fields, f) { + return true + } + case *ast.InterfaceType: + if matchFields(t.Methods, f) { + return true + } + } + } + } + return false +} + +func filterValueDocs(a []*ValueDoc, f Filter) []*ValueDoc { + w := 0 + for _, vd := range a { + if matchDecl(vd.Decl, f) { + a[w] = vd + w++ + } + } + return a[0:w] +} + +func filterFuncDocs(a []*FuncDoc, f Filter) []*FuncDoc { + w := 0 + for _, fd := range a { + if f(fd.Name) { + a[w] = fd + w++ + } + } + return a[0:w] +} + +func filterTypeDocs(a []*TypeDoc, f Filter) []*TypeDoc { + w := 0 + for _, td := range a { + n := 0 // number of matches + if matchDecl(td.Decl, f) { + n = 1 + } else { + // type name doesn't match, but we may have matching consts, vars, factories or methods + td.Consts = filterValueDocs(td.Consts, f) + td.Vars = filterValueDocs(td.Vars, f) + td.Factories = filterFuncDocs(td.Factories, f) + td.Methods = filterFuncDocs(td.Methods, f) + n += len(td.Consts) + len(td.Vars) + len(td.Factories) + len(td.Methods) + } + if n > 0 { + a[w] = td + w++ + } + } + return a[0:w] +} + +// Filter eliminates documentation for names that don't pass through the filter f. +// TODO: Recognize "Type.Method" as a name. +// +func (p *PackageDoc) Filter(f Filter) { + p.Consts = filterValueDocs(p.Consts, f) + p.Vars = filterValueDocs(p.Vars, f) + p.Types = filterTypeDocs(p.Types, f) + p.Funcs = filterFuncDocs(p.Funcs, f) + p.Doc = "" // don't show top-level package doc +} diff --git a/src/pkg/go/parser/Makefile b/src/pkg/go/parser/Makefile new file mode 100644 index 000000000..d301f41eb --- /dev/null +++ b/src/pkg/go/parser/Makefile @@ -0,0 +1,12 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=go/parser +GOFILES=\ + interface.go\ + parser.go\ + +include ../../../Make.pkg diff --git a/src/pkg/go/parser/interface.go b/src/pkg/go/parser/interface.go new file mode 100644 index 000000000..4f980fc65 --- /dev/null +++ b/src/pkg/go/parser/interface.go @@ -0,0 +1,214 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains the exported entry points for invoking the parser. + +package parser + +import ( + "bytes" + "go/ast" + "go/scanner" + "go/token" + "io" + "io/ioutil" + "os" + "path/filepath" +) + +// If src != nil, readSource converts src to a []byte if possible; +// otherwise it returns an error. If src == nil, readSource returns +// the result of reading the file specified by filename. +// +func readSource(filename string, src interface{}) ([]byte, os.Error) { + if src != nil { + switch s := src.(type) { + case string: + return []byte(s), nil + case []byte: + return s, nil + case *bytes.Buffer: + // is io.Reader, but src is already available in []byte form + if s != nil { + return s.Bytes(), nil + } + case io.Reader: + var buf bytes.Buffer + _, err := io.Copy(&buf, s) + if err != nil { + return nil, err + } + return buf.Bytes(), nil + default: + return nil, os.NewError("invalid source") + } + } + + return ioutil.ReadFile(filename) +} + +func (p *parser) errors() os.Error { + mode := scanner.Sorted + if p.mode&SpuriousErrors == 0 { + mode = scanner.NoMultiples + } + return p.GetError(mode) +} + +// ParseExpr parses a Go expression and returns the corresponding +// AST node. The fset, filename, and src arguments have the same interpretation +// as for ParseFile. If there is an error, the result expression +// may be nil or contain a partial AST. +// +func ParseExpr(fset *token.FileSet, filename string, src interface{}) (ast.Expr, os.Error) { + data, err := readSource(filename, src) + if err != nil { + return nil, err + } + + var p parser + p.init(fset, filename, data, 0) + x := p.parseRhs() + if p.tok == token.SEMICOLON { + p.next() // consume automatically inserted semicolon, if any + } + p.expect(token.EOF) + + return x, p.errors() +} + +// ParseStmtList parses a list of Go statements and returns the list +// of corresponding AST nodes. The fset, filename, and src arguments have the same +// interpretation as for ParseFile. If there is an error, the node +// list may be nil or contain partial ASTs. +// +func ParseStmtList(fset *token.FileSet, filename string, src interface{}) ([]ast.Stmt, os.Error) { + data, err := readSource(filename, src) + if err != nil { + return nil, err + } + + var p parser + p.init(fset, filename, data, 0) + list := p.parseStmtList() + p.expect(token.EOF) + + return list, p.errors() +} + +// ParseDeclList parses a list of Go declarations and returns the list +// of corresponding AST nodes. The fset, filename, and src arguments have the same +// interpretation as for ParseFile. If there is an error, the node +// list may be nil or contain partial ASTs. +// +func ParseDeclList(fset *token.FileSet, filename string, src interface{}) ([]ast.Decl, os.Error) { + data, err := readSource(filename, src) + if err != nil { + return nil, err + } + + var p parser + p.init(fset, filename, data, 0) + list := p.parseDeclList() + p.expect(token.EOF) + + return list, p.errors() +} + +// ParseFile parses the source code of a single Go source file and returns +// the corresponding ast.File node. The source code may be provided via +// the filename of the source file, or via the src parameter. +// +// If src != nil, ParseFile parses the source from src and the filename is +// only used when recording position information. The type of the argument +// for the src parameter must be string, []byte, or io.Reader. +// +// If src == nil, ParseFile parses the file specified by filename. +// +// The mode parameter controls the amount of source text parsed and other +// optional parser functionality. Position information is recorded in the +// file set fset. +// +// If the source couldn't be read, the returned AST is nil and the error +// indicates the specific failure. If the source was read but syntax +// errors were found, the result is a partial AST (with ast.BadX nodes +// representing the fragments of erroneous source code). Multiple errors +// are returned via a scanner.ErrorList which is sorted by file position. +// +func ParseFile(fset *token.FileSet, filename string, src interface{}, mode uint) (*ast.File, os.Error) { + data, err := readSource(filename, src) + if err != nil { + return nil, err + } + + var p parser + p.init(fset, filename, data, mode) + file := p.parseFile() // parseFile reads to EOF + + return file, p.errors() +} + +// ParseFiles calls ParseFile for each file in the filenames list and returns +// a map of package name -> package AST with all the packages found. The mode +// bits are passed to ParseFile unchanged. Position information is recorded +// in the file set fset. +// +// Files with parse errors are ignored. In this case the map of packages may +// be incomplete (missing packages and/or incomplete packages) and the first +// error encountered is returned. +// +func ParseFiles(fset *token.FileSet, filenames []string, mode uint) (pkgs map[string]*ast.Package, first os.Error) { + pkgs = make(map[string]*ast.Package) + for _, filename := range filenames { + if src, err := ParseFile(fset, filename, nil, mode); err == nil { + name := src.Name.Name + pkg, found := pkgs[name] + if !found { + // TODO(gri) Use NewPackage here; reconsider ParseFiles API. + pkg = &ast.Package{name, nil, nil, make(map[string]*ast.File)} + pkgs[name] = pkg + } + pkg.Files[filename] = src + } else if first == nil { + first = err + } + } + return +} + +// ParseDir calls ParseFile for the files in the directory specified by path and +// returns a map of package name -> package AST with all the packages found. If +// filter != nil, only the files with os.FileInfo entries passing through the filter +// are considered. The mode bits are passed to ParseFile unchanged. Position +// information is recorded in the file set fset. +// +// If the directory couldn't be read, a nil map and the respective error are +// returned. If a parse error occurred, a non-nil but incomplete map and the +// error are returned. +// +func ParseDir(fset *token.FileSet, path string, filter func(*os.FileInfo) bool, mode uint) (map[string]*ast.Package, os.Error) { + fd, err := os.Open(path) + if err != nil { + return nil, err + } + defer fd.Close() + + list, err := fd.Readdir(-1) + if err != nil { + return nil, err + } + + filenames := make([]string, len(list)) + n := 0 + for i := 0; i < len(list); i++ { + d := &list[i] + if filter == nil || filter(d) { + filenames[n] = filepath.Join(path, d.Name) + n++ + } + } + filenames = filenames[0:n] + + return ParseFiles(fset, filenames, mode) +} diff --git a/src/pkg/go/parser/parser.go b/src/pkg/go/parser/parser.go new file mode 100644 index 000000000..9c14d1667 --- /dev/null +++ b/src/pkg/go/parser/parser.go @@ -0,0 +1,2161 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package parser implements a parser for Go source files. Input may be +// provided in a variety of forms (see the various Parse* functions); the +// output is an abstract syntax tree (AST) representing the Go source. The +// parser is invoked through one of the Parse* functions. +// +package parser + +import ( + "fmt" + "go/ast" + "go/scanner" + "go/token" +) + +// The mode parameter to the Parse* functions is a set of flags (or 0). +// They control the amount of source code parsed and other optional +// parser functionality. +// +const ( + PackageClauseOnly uint = 1 << iota // parsing stops after package clause + ImportsOnly // parsing stops after import declarations + ParseComments // parse comments and add them to AST + Trace // print a trace of parsed productions + DeclarationErrors // report declaration errors + SpuriousErrors // report all (not just the first) errors per line +) + +// The parser structure holds the parser's internal state. +type parser struct { + file *token.File + scanner.ErrorVector + scanner scanner.Scanner + + // Tracing/debugging + mode uint // parsing mode + trace bool // == (mode & Trace != 0) + indent uint // indentation used for tracing output + + // Comments + comments []*ast.CommentGroup + leadComment *ast.CommentGroup // last lead comment + lineComment *ast.CommentGroup // last line comment + + // Next token + pos token.Pos // token position + tok token.Token // one token look-ahead + lit string // token literal + + // Non-syntactic parser control + exprLev int // < 0: in control clause, >= 0: in expression + + // Ordinary identifier scopes + pkgScope *ast.Scope // pkgScope.Outer == nil + topScope *ast.Scope // top-most scope; may be pkgScope + unresolved []*ast.Ident // unresolved identifiers + imports []*ast.ImportSpec // list of imports + + // Label scope + // (maintained by open/close LabelScope) + labelScope *ast.Scope // label scope for current function + targetStack [][]*ast.Ident // stack of unresolved labels +} + +// scannerMode returns the scanner mode bits given the parser's mode bits. +func scannerMode(mode uint) uint { + var m uint = scanner.InsertSemis + if mode&ParseComments != 0 { + m |= scanner.ScanComments + } + return m +} + +func (p *parser) init(fset *token.FileSet, filename string, src []byte, mode uint) { + p.file = fset.AddFile(filename, fset.Base(), len(src)) + p.scanner.Init(p.file, src, p, scannerMode(mode)) + + p.mode = mode + p.trace = mode&Trace != 0 // for convenience (p.trace is used frequently) + + p.next() + + // set up the pkgScope here (as opposed to in parseFile) because + // there are other parser entry points (ParseExpr, etc.) + p.openScope() + p.pkgScope = p.topScope + + // for the same reason, set up a label scope + p.openLabelScope() +} + +// ---------------------------------------------------------------------------- +// Scoping support + +func (p *parser) openScope() { + p.topScope = ast.NewScope(p.topScope) +} + +func (p *parser) closeScope() { + p.topScope = p.topScope.Outer +} + +func (p *parser) openLabelScope() { + p.labelScope = ast.NewScope(p.labelScope) + p.targetStack = append(p.targetStack, nil) +} + +func (p *parser) closeLabelScope() { + // resolve labels + n := len(p.targetStack) - 1 + scope := p.labelScope + for _, ident := range p.targetStack[n] { + ident.Obj = scope.Lookup(ident.Name) + if ident.Obj == nil && p.mode&DeclarationErrors != 0 { + p.error(ident.Pos(), fmt.Sprintf("label %s undefined", ident.Name)) + } + } + // pop label scope + p.targetStack = p.targetStack[0:n] + p.labelScope = p.labelScope.Outer +} + +func (p *parser) declare(decl, data interface{}, scope *ast.Scope, kind ast.ObjKind, idents ...*ast.Ident) { + for _, ident := range idents { + assert(ident.Obj == nil, "identifier already declared or resolved") + obj := ast.NewObj(kind, ident.Name) + // remember the corresponding declaration for redeclaration + // errors and global variable resolution/typechecking phase + obj.Decl = decl + obj.Data = data + ident.Obj = obj + if ident.Name != "_" { + if alt := scope.Insert(obj); alt != nil && p.mode&DeclarationErrors != 0 { + prevDecl := "" + if pos := alt.Pos(); pos.IsValid() { + prevDecl = fmt.Sprintf("\n\tprevious declaration at %s", p.file.Position(pos)) + } + p.error(ident.Pos(), fmt.Sprintf("%s redeclared in this block%s", ident.Name, prevDecl)) + } + } + } +} + +func (p *parser) shortVarDecl(idents []*ast.Ident) { + // Go spec: A short variable declaration may redeclare variables + // provided they were originally declared in the same block with + // the same type, and at least one of the non-blank variables is new. + n := 0 // number of new variables + for _, ident := range idents { + assert(ident.Obj == nil, "identifier already declared or resolved") + obj := ast.NewObj(ast.Var, ident.Name) + // short var declarations cannot have redeclaration errors + // and are not global => no need to remember the respective + // declaration + ident.Obj = obj + if ident.Name != "_" { + if alt := p.topScope.Insert(obj); alt != nil { + ident.Obj = alt // redeclaration + } else { + n++ // new declaration + } + } + } + if n == 0 && p.mode&DeclarationErrors != 0 { + p.error(idents[0].Pos(), "no new variables on left side of :=") + } +} + +// The unresolved object is a sentinel to mark identifiers that have been added +// to the list of unresolved identifiers. The sentinel is only used for verifying +// internal consistency. +var unresolved = new(ast.Object) + +func (p *parser) resolve(x ast.Expr) { + // nothing to do if x is not an identifier or the blank identifier + ident, _ := x.(*ast.Ident) + if ident == nil { + return + } + assert(ident.Obj == nil, "identifier already declared or resolved") + if ident.Name == "_" { + return + } + // try to resolve the identifier + for s := p.topScope; s != nil; s = s.Outer { + if obj := s.Lookup(ident.Name); obj != nil { + ident.Obj = obj + return + } + } + // all local scopes are known, so any unresolved identifier + // must be found either in the file scope, package scope + // (perhaps in another file), or universe scope --- collect + // them so that they can be resolved later + ident.Obj = unresolved + p.unresolved = append(p.unresolved, ident) +} + +// ---------------------------------------------------------------------------- +// Parsing support + +func (p *parser) printTrace(a ...interface{}) { + const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " + + ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " + const n = uint(len(dots)) + pos := p.file.Position(p.pos) + fmt.Printf("%5d:%3d: ", pos.Line, pos.Column) + i := 2 * p.indent + for ; i > n; i -= n { + fmt.Print(dots) + } + fmt.Print(dots[0:i]) + fmt.Println(a...) +} + +func trace(p *parser, msg string) *parser { + p.printTrace(msg, "(") + p.indent++ + return p +} + +// Usage pattern: defer un(trace(p, "...")); +func un(p *parser) { + p.indent-- + p.printTrace(")") +} + +// Advance to the next token. +func (p *parser) next0() { + // Because of one-token look-ahead, print the previous token + // when tracing as it provides a more readable output. The + // very first token (!p.pos.IsValid()) is not initialized + // (it is token.ILLEGAL), so don't print it . + if p.trace && p.pos.IsValid() { + s := p.tok.String() + switch { + case p.tok.IsLiteral(): + p.printTrace(s, p.lit) + case p.tok.IsOperator(), p.tok.IsKeyword(): + p.printTrace("\"" + s + "\"") + default: + p.printTrace(s) + } + } + + p.pos, p.tok, p.lit = p.scanner.Scan() +} + +// Consume a comment and return it and the line on which it ends. +func (p *parser) consumeComment() (comment *ast.Comment, endline int) { + // /*-style comments may end on a different line than where they start. + // Scan the comment for '\n' chars and adjust endline accordingly. + endline = p.file.Line(p.pos) + if p.lit[1] == '*' { + // don't use range here - no need to decode Unicode code points + for i := 0; i < len(p.lit); i++ { + if p.lit[i] == '\n' { + endline++ + } + } + } + + comment = &ast.Comment{p.pos, p.lit} + p.next0() + + return +} + +// Consume a group of adjacent comments, add it to the parser's +// comments list, and return it together with the line at which +// the last comment in the group ends. An empty line or non-comment +// token terminates a comment group. +// +func (p *parser) consumeCommentGroup() (comments *ast.CommentGroup, endline int) { + var list []*ast.Comment + endline = p.file.Line(p.pos) + for p.tok == token.COMMENT && endline+1 >= p.file.Line(p.pos) { + var comment *ast.Comment + comment, endline = p.consumeComment() + list = append(list, comment) + } + + // add comment group to the comments list + comments = &ast.CommentGroup{list} + p.comments = append(p.comments, comments) + + return +} + +// Advance to the next non-comment token. In the process, collect +// any comment groups encountered, and remember the last lead and +// and line comments. +// +// A lead comment is a comment group that starts and ends in a +// line without any other tokens and that is followed by a non-comment +// token on the line immediately after the comment group. +// +// A line comment is a comment group that follows a non-comment +// token on the same line, and that has no tokens after it on the line +// where it ends. +// +// Lead and line comments may be considered documentation that is +// stored in the AST. +// +func (p *parser) next() { + p.leadComment = nil + p.lineComment = nil + line := p.file.Line(p.pos) // current line + p.next0() + + if p.tok == token.COMMENT { + var comment *ast.CommentGroup + var endline int + + if p.file.Line(p.pos) == line { + // The comment is on same line as the previous token; it + // cannot be a lead comment but may be a line comment. + comment, endline = p.consumeCommentGroup() + if p.file.Line(p.pos) != endline { + // The next token is on a different line, thus + // the last comment group is a line comment. + p.lineComment = comment + } + } + + // consume successor comments, if any + endline = -1 + for p.tok == token.COMMENT { + comment, endline = p.consumeCommentGroup() + } + + if endline+1 == p.file.Line(p.pos) { + // The next token is following on the line immediately after the + // comment group, thus the last comment group is a lead comment. + p.leadComment = comment + } + } +} + +func (p *parser) error(pos token.Pos, msg string) { + p.Error(p.file.Position(pos), msg) +} + +func (p *parser) errorExpected(pos token.Pos, msg string) { + msg = "expected " + msg + if pos == p.pos { + // the error happened at the current position; + // make the error message more specific + if p.tok == token.SEMICOLON && p.lit[0] == '\n' { + msg += ", found newline" + } else { + msg += ", found '" + p.tok.String() + "'" + if p.tok.IsLiteral() { + msg += " " + p.lit + } + } + } + p.error(pos, msg) +} + +func (p *parser) expect(tok token.Token) token.Pos { + pos := p.pos + if p.tok != tok { + p.errorExpected(pos, "'"+tok.String()+"'") + } + p.next() // make progress + return pos +} + +func (p *parser) expectSemi() { + if p.tok != token.RPAREN && p.tok != token.RBRACE { + p.expect(token.SEMICOLON) + } +} + +func assert(cond bool, msg string) { + if !cond { + panic("go/parser internal error: " + msg) + } +} + +// ---------------------------------------------------------------------------- +// Identifiers + +func (p *parser) parseIdent() *ast.Ident { + pos := p.pos + name := "_" + if p.tok == token.IDENT { + name = p.lit + p.next() + } else { + p.expect(token.IDENT) // use expect() error handling + } + return &ast.Ident{pos, name, nil} +} + +func (p *parser) parseIdentList() (list []*ast.Ident) { + if p.trace { + defer un(trace(p, "IdentList")) + } + + list = append(list, p.parseIdent()) + for p.tok == token.COMMA { + p.next() + list = append(list, p.parseIdent()) + } + + return +} + +// ---------------------------------------------------------------------------- +// Common productions + +// If lhs is set, result list elements which are identifiers are not resolved. +func (p *parser) parseExprList(lhs bool) (list []ast.Expr) { + if p.trace { + defer un(trace(p, "ExpressionList")) + } + + list = append(list, p.checkExpr(p.parseExpr(lhs))) + for p.tok == token.COMMA { + p.next() + list = append(list, p.checkExpr(p.parseExpr(lhs))) + } + + return +} + +func (p *parser) parseLhsList() []ast.Expr { + list := p.parseExprList(true) + switch p.tok { + case token.DEFINE: + // lhs of a short variable declaration + p.shortVarDecl(p.makeIdentList(list)) + case token.COLON: + // lhs of a label declaration or a communication clause of a select + // statement (parseLhsList is not called when parsing the case clause + // of a switch statement): + // - labels are declared by the caller of parseLhsList + // - for communication clauses, if there is a stand-alone identifier + // followed by a colon, we have a syntax error; there is no need + // to resolve the identifier in that case + default: + // identifiers must be declared elsewhere + for _, x := range list { + p.resolve(x) + } + } + return list +} + +func (p *parser) parseRhsList() []ast.Expr { + return p.parseExprList(false) +} + +// ---------------------------------------------------------------------------- +// Types + +func (p *parser) parseType() ast.Expr { + if p.trace { + defer un(trace(p, "Type")) + } + + typ := p.tryType() + + if typ == nil { + pos := p.pos + p.errorExpected(pos, "type") + p.next() // make progress + return &ast.BadExpr{pos, p.pos} + } + + return typ +} + +// If the result is an identifier, it is not resolved. +func (p *parser) parseTypeName() ast.Expr { + if p.trace { + defer un(trace(p, "TypeName")) + } + + ident := p.parseIdent() + // don't resolve ident yet - it may be a parameter or field name + + if p.tok == token.PERIOD { + // ident is a package name + p.next() + p.resolve(ident) + sel := p.parseIdent() + return &ast.SelectorExpr{ident, sel} + } + + return ident +} + +func (p *parser) parseArrayType(ellipsisOk bool) ast.Expr { + if p.trace { + defer un(trace(p, "ArrayType")) + } + + lbrack := p.expect(token.LBRACK) + var len ast.Expr + if ellipsisOk && p.tok == token.ELLIPSIS { + len = &ast.Ellipsis{p.pos, nil} + p.next() + } else if p.tok != token.RBRACK { + len = p.parseRhs() + } + p.expect(token.RBRACK) + elt := p.parseType() + + return &ast.ArrayType{lbrack, len, elt} +} + +func (p *parser) makeIdentList(list []ast.Expr) []*ast.Ident { + idents := make([]*ast.Ident, len(list)) + for i, x := range list { + ident, isIdent := x.(*ast.Ident) + if !isIdent { + pos := x.(ast.Expr).Pos() + p.errorExpected(pos, "identifier") + ident = &ast.Ident{pos, "_", nil} + } + idents[i] = ident + } + return idents +} + +func (p *parser) parseFieldDecl(scope *ast.Scope) *ast.Field { + if p.trace { + defer un(trace(p, "FieldDecl")) + } + + doc := p.leadComment + + // fields + list, typ := p.parseVarList(false) + + // optional tag + var tag *ast.BasicLit + if p.tok == token.STRING { + tag = &ast.BasicLit{p.pos, p.tok, p.lit} + p.next() + } + + // analyze case + var idents []*ast.Ident + if typ != nil { + // IdentifierList Type + idents = p.makeIdentList(list) + } else { + // ["*"] TypeName (AnonymousField) + typ = list[0] // we always have at least one element + p.resolve(typ) + if n := len(list); n > 1 || !isTypeName(deref(typ)) { + pos := typ.Pos() + p.errorExpected(pos, "anonymous field") + typ = &ast.BadExpr{pos, list[n-1].End()} + } + } + + p.expectSemi() // call before accessing p.linecomment + + field := &ast.Field{doc, idents, typ, tag, p.lineComment} + p.declare(field, nil, scope, ast.Var, idents...) + + return field +} + +func (p *parser) parseStructType() *ast.StructType { + if p.trace { + defer un(trace(p, "StructType")) + } + + pos := p.expect(token.STRUCT) + lbrace := p.expect(token.LBRACE) + scope := ast.NewScope(nil) // struct scope + var list []*ast.Field + for p.tok == token.IDENT || p.tok == token.MUL || p.tok == token.LPAREN { + // a field declaration cannot start with a '(' but we accept + // it here for more robust parsing and better error messages + // (parseFieldDecl will check and complain if necessary) + list = append(list, p.parseFieldDecl(scope)) + } + rbrace := p.expect(token.RBRACE) + + // TODO(gri): store struct scope in AST + return &ast.StructType{pos, &ast.FieldList{lbrace, list, rbrace}, false} +} + +func (p *parser) parsePointerType() *ast.StarExpr { + if p.trace { + defer un(trace(p, "PointerType")) + } + + star := p.expect(token.MUL) + base := p.parseType() + + return &ast.StarExpr{star, base} +} + +func (p *parser) tryVarType(isParam bool) ast.Expr { + if isParam && p.tok == token.ELLIPSIS { + pos := p.pos + p.next() + typ := p.tryIdentOrType(isParam) // don't use parseType so we can provide better error message + if typ == nil { + p.error(pos, "'...' parameter is missing type") + typ = &ast.BadExpr{pos, p.pos} + } + if p.tok != token.RPAREN { + p.error(pos, "can use '...' with last parameter type only") + } + return &ast.Ellipsis{pos, typ} + } + return p.tryIdentOrType(false) +} + +func (p *parser) parseVarType(isParam bool) ast.Expr { + typ := p.tryVarType(isParam) + if typ == nil { + pos := p.pos + p.errorExpected(pos, "type") + p.next() // make progress + typ = &ast.BadExpr{pos, p.pos} + } + return typ +} + +func (p *parser) parseVarList(isParam bool) (list []ast.Expr, typ ast.Expr) { + if p.trace { + defer un(trace(p, "VarList")) + } + + // a list of identifiers looks like a list of type names + for { + // parseVarType accepts any type (including parenthesized ones) + // even though the syntax does not permit them here: we + // accept them all for more robust parsing and complain + // afterwards + list = append(list, p.parseVarType(isParam)) + if p.tok != token.COMMA { + break + } + p.next() + } + + // if we had a list of identifiers, it must be followed by a type + typ = p.tryVarType(isParam) + if typ != nil { + p.resolve(typ) + } + + return +} + +func (p *parser) parseParameterList(scope *ast.Scope, ellipsisOk bool) (params []*ast.Field) { + if p.trace { + defer un(trace(p, "ParameterList")) + } + + list, typ := p.parseVarList(ellipsisOk) + if typ != nil { + // IdentifierList Type + idents := p.makeIdentList(list) + field := &ast.Field{nil, idents, typ, nil, nil} + params = append(params, field) + // Go spec: The scope of an identifier denoting a function + // parameter or result variable is the function body. + p.declare(field, nil, scope, ast.Var, idents...) + if p.tok == token.COMMA { + p.next() + } + + for p.tok != token.RPAREN && p.tok != token.EOF { + idents := p.parseIdentList() + typ := p.parseVarType(ellipsisOk) + field := &ast.Field{nil, idents, typ, nil, nil} + params = append(params, field) + // Go spec: The scope of an identifier denoting a function + // parameter or result variable is the function body. + p.declare(field, nil, scope, ast.Var, idents...) + if p.tok != token.COMMA { + break + } + p.next() + } + + } else { + // Type { "," Type } (anonymous parameters) + params = make([]*ast.Field, len(list)) + for i, x := range list { + p.resolve(x) + params[i] = &ast.Field{Type: x} + } + } + + return +} + +func (p *parser) parseParameters(scope *ast.Scope, ellipsisOk bool) *ast.FieldList { + if p.trace { + defer un(trace(p, "Parameters")) + } + + var params []*ast.Field + lparen := p.expect(token.LPAREN) + if p.tok != token.RPAREN { + params = p.parseParameterList(scope, ellipsisOk) + } + rparen := p.expect(token.RPAREN) + + return &ast.FieldList{lparen, params, rparen} +} + +func (p *parser) parseResult(scope *ast.Scope) *ast.FieldList { + if p.trace { + defer un(trace(p, "Result")) + } + + if p.tok == token.LPAREN { + return p.parseParameters(scope, false) + } + + typ := p.tryType() + if typ != nil { + list := make([]*ast.Field, 1) + list[0] = &ast.Field{Type: typ} + return &ast.FieldList{List: list} + } + + return nil +} + +func (p *parser) parseSignature(scope *ast.Scope) (params, results *ast.FieldList) { + if p.trace { + defer un(trace(p, "Signature")) + } + + params = p.parseParameters(scope, true) + results = p.parseResult(scope) + + return +} + +func (p *parser) parseFuncType() (*ast.FuncType, *ast.Scope) { + if p.trace { + defer un(trace(p, "FuncType")) + } + + pos := p.expect(token.FUNC) + scope := ast.NewScope(p.topScope) // function scope + params, results := p.parseSignature(scope) + + return &ast.FuncType{pos, params, results}, scope +} + +func (p *parser) parseMethodSpec(scope *ast.Scope) *ast.Field { + if p.trace { + defer un(trace(p, "MethodSpec")) + } + + doc := p.leadComment + var idents []*ast.Ident + var typ ast.Expr + x := p.parseTypeName() + if ident, isIdent := x.(*ast.Ident); isIdent && p.tok == token.LPAREN { + // method + idents = []*ast.Ident{ident} + scope := ast.NewScope(nil) // method scope + params, results := p.parseSignature(scope) + typ = &ast.FuncType{token.NoPos, params, results} + } else { + // embedded interface + typ = x + p.resolve(typ) + } + p.expectSemi() // call before accessing p.linecomment + + spec := &ast.Field{doc, idents, typ, nil, p.lineComment} + p.declare(spec, nil, scope, ast.Fun, idents...) + + return spec +} + +func (p *parser) parseInterfaceType() *ast.InterfaceType { + if p.trace { + defer un(trace(p, "InterfaceType")) + } + + pos := p.expect(token.INTERFACE) + lbrace := p.expect(token.LBRACE) + scope := ast.NewScope(nil) // interface scope + var list []*ast.Field + for p.tok == token.IDENT { + list = append(list, p.parseMethodSpec(scope)) + } + rbrace := p.expect(token.RBRACE) + + // TODO(gri): store interface scope in AST + return &ast.InterfaceType{pos, &ast.FieldList{lbrace, list, rbrace}, false} +} + +func (p *parser) parseMapType() *ast.MapType { + if p.trace { + defer un(trace(p, "MapType")) + } + + pos := p.expect(token.MAP) + p.expect(token.LBRACK) + key := p.parseType() + p.expect(token.RBRACK) + value := p.parseType() + + return &ast.MapType{pos, key, value} +} + +func (p *parser) parseChanType() *ast.ChanType { + if p.trace { + defer un(trace(p, "ChanType")) + } + + pos := p.pos + dir := ast.SEND | ast.RECV + if p.tok == token.CHAN { + p.next() + if p.tok == token.ARROW { + p.next() + dir = ast.SEND + } + } else { + p.expect(token.ARROW) + p.expect(token.CHAN) + dir = ast.RECV + } + value := p.parseType() + + return &ast.ChanType{pos, dir, value} +} + +// If the result is an identifier, it is not resolved. +func (p *parser) tryIdentOrType(ellipsisOk bool) ast.Expr { + switch p.tok { + case token.IDENT: + return p.parseTypeName() + case token.LBRACK: + return p.parseArrayType(ellipsisOk) + case token.STRUCT: + return p.parseStructType() + case token.MUL: + return p.parsePointerType() + case token.FUNC: + typ, _ := p.parseFuncType() + return typ + case token.INTERFACE: + return p.parseInterfaceType() + case token.MAP: + return p.parseMapType() + case token.CHAN, token.ARROW: + return p.parseChanType() + case token.LPAREN: + lparen := p.pos + p.next() + typ := p.parseType() + rparen := p.expect(token.RPAREN) + return &ast.ParenExpr{lparen, typ, rparen} + } + + // no type found + return nil +} + +func (p *parser) tryType() ast.Expr { + typ := p.tryIdentOrType(false) + if typ != nil { + p.resolve(typ) + } + return typ +} + +// ---------------------------------------------------------------------------- +// Blocks + +func (p *parser) parseStmtList() (list []ast.Stmt) { + if p.trace { + defer un(trace(p, "StatementList")) + } + + for p.tok != token.CASE && p.tok != token.DEFAULT && p.tok != token.RBRACE && p.tok != token.EOF { + list = append(list, p.parseStmt()) + } + + return +} + +func (p *parser) parseBody(scope *ast.Scope) *ast.BlockStmt { + if p.trace { + defer un(trace(p, "Body")) + } + + lbrace := p.expect(token.LBRACE) + p.topScope = scope // open function scope + p.openLabelScope() + list := p.parseStmtList() + p.closeLabelScope() + p.closeScope() + rbrace := p.expect(token.RBRACE) + + return &ast.BlockStmt{lbrace, list, rbrace} +} + +func (p *parser) parseBlockStmt() *ast.BlockStmt { + if p.trace { + defer un(trace(p, "BlockStmt")) + } + + lbrace := p.expect(token.LBRACE) + p.openScope() + list := p.parseStmtList() + p.closeScope() + rbrace := p.expect(token.RBRACE) + + return &ast.BlockStmt{lbrace, list, rbrace} +} + +// ---------------------------------------------------------------------------- +// Expressions + +func (p *parser) parseFuncTypeOrLit() ast.Expr { + if p.trace { + defer un(trace(p, "FuncTypeOrLit")) + } + + typ, scope := p.parseFuncType() + if p.tok != token.LBRACE { + // function type only + return typ + } + + p.exprLev++ + body := p.parseBody(scope) + p.exprLev-- + + return &ast.FuncLit{typ, body} +} + +// parseOperand may return an expression or a raw type (incl. array +// types of the form [...]T. Callers must verify the result. +// If lhs is set and the result is an identifier, it is not resolved. +// +func (p *parser) parseOperand(lhs bool) ast.Expr { + if p.trace { + defer un(trace(p, "Operand")) + } + + switch p.tok { + case token.IDENT: + x := p.parseIdent() + if !lhs { + p.resolve(x) + } + return x + + case token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING: + x := &ast.BasicLit{p.pos, p.tok, p.lit} + p.next() + return x + + case token.LPAREN: + lparen := p.pos + p.next() + p.exprLev++ + x := p.parseRhsOrType() // types may be parenthesized: (some type) + p.exprLev-- + rparen := p.expect(token.RPAREN) + return &ast.ParenExpr{lparen, x, rparen} + + case token.FUNC: + return p.parseFuncTypeOrLit() + + default: + if typ := p.tryIdentOrType(true); typ != nil { + // could be type for composite literal or conversion + _, isIdent := typ.(*ast.Ident) + assert(!isIdent, "type cannot be identifier") + return typ + } + } + + pos := p.pos + p.errorExpected(pos, "operand") + p.next() // make progress + return &ast.BadExpr{pos, p.pos} +} + +func (p *parser) parseSelector(x ast.Expr) ast.Expr { + if p.trace { + defer un(trace(p, "Selector")) + } + + sel := p.parseIdent() + + return &ast.SelectorExpr{x, sel} +} + +func (p *parser) parseTypeAssertion(x ast.Expr) ast.Expr { + if p.trace { + defer un(trace(p, "TypeAssertion")) + } + + p.expect(token.LPAREN) + var typ ast.Expr + if p.tok == token.TYPE { + // type switch: typ == nil + p.next() + } else { + typ = p.parseType() + } + p.expect(token.RPAREN) + + return &ast.TypeAssertExpr{x, typ} +} + +func (p *parser) parseIndexOrSlice(x ast.Expr) ast.Expr { + if p.trace { + defer un(trace(p, "IndexOrSlice")) + } + + lbrack := p.expect(token.LBRACK) + p.exprLev++ + var low, high ast.Expr + isSlice := false + if p.tok != token.COLON { + low = p.parseRhs() + } + if p.tok == token.COLON { + isSlice = true + p.next() + if p.tok != token.RBRACK { + high = p.parseRhs() + } + } + p.exprLev-- + rbrack := p.expect(token.RBRACK) + + if isSlice { + return &ast.SliceExpr{x, lbrack, low, high, rbrack} + } + return &ast.IndexExpr{x, lbrack, low, rbrack} +} + +func (p *parser) parseCallOrConversion(fun ast.Expr) *ast.CallExpr { + if p.trace { + defer un(trace(p, "CallOrConversion")) + } + + lparen := p.expect(token.LPAREN) + p.exprLev++ + var list []ast.Expr + var ellipsis token.Pos + for p.tok != token.RPAREN && p.tok != token.EOF && !ellipsis.IsValid() { + list = append(list, p.parseRhsOrType()) // builtins may expect a type: make(some type, ...) + if p.tok == token.ELLIPSIS { + ellipsis = p.pos + p.next() + } + if p.tok != token.COMMA { + break + } + p.next() + } + p.exprLev-- + rparen := p.expect(token.RPAREN) + + return &ast.CallExpr{fun, lparen, list, ellipsis, rparen} +} + +func (p *parser) parseElement(keyOk bool) ast.Expr { + if p.trace { + defer un(trace(p, "Element")) + } + + if p.tok == token.LBRACE { + return p.parseLiteralValue(nil) + } + + x := p.checkExpr(p.parseExpr(keyOk)) // don't resolve if map key + if keyOk { + if p.tok == token.COLON { + colon := p.pos + p.next() + return &ast.KeyValueExpr{x, colon, p.parseElement(false)} + } + p.resolve(x) // not a map key + } + + return x +} + +func (p *parser) parseElementList() (list []ast.Expr) { + if p.trace { + defer un(trace(p, "ElementList")) + } + + for p.tok != token.RBRACE && p.tok != token.EOF { + list = append(list, p.parseElement(true)) + if p.tok != token.COMMA { + break + } + p.next() + } + + return +} + +func (p *parser) parseLiteralValue(typ ast.Expr) ast.Expr { + if p.trace { + defer un(trace(p, "LiteralValue")) + } + + lbrace := p.expect(token.LBRACE) + var elts []ast.Expr + p.exprLev++ + if p.tok != token.RBRACE { + elts = p.parseElementList() + } + p.exprLev-- + rbrace := p.expect(token.RBRACE) + return &ast.CompositeLit{typ, lbrace, elts, rbrace} +} + +// checkExpr checks that x is an expression (and not a type). +func (p *parser) checkExpr(x ast.Expr) ast.Expr { + switch t := unparen(x).(type) { + case *ast.BadExpr: + case *ast.Ident: + case *ast.BasicLit: + case *ast.FuncLit: + case *ast.CompositeLit: + case *ast.ParenExpr: + panic("unreachable") + case *ast.SelectorExpr: + case *ast.IndexExpr: + case *ast.SliceExpr: + case *ast.TypeAssertExpr: + // If t.Type == nil we have a type assertion of the form + // y.(type), which is only allowed in type switch expressions. + // It's hard to exclude those but for the case where we are in + // a type switch. Instead be lenient and test this in the type + // checker. + case *ast.CallExpr: + case *ast.StarExpr: + case *ast.UnaryExpr: + case *ast.BinaryExpr: + default: + // all other nodes are not proper expressions + p.errorExpected(x.Pos(), "expression") + x = &ast.BadExpr{x.Pos(), x.End()} + } + return x +} + +// isTypeName returns true iff x is a (qualified) TypeName. +func isTypeName(x ast.Expr) bool { + switch t := x.(type) { + case *ast.BadExpr: + case *ast.Ident: + case *ast.SelectorExpr: + _, isIdent := t.X.(*ast.Ident) + return isIdent + default: + return false // all other nodes are not type names + } + return true +} + +// isLiteralType returns true iff x is a legal composite literal type. +func isLiteralType(x ast.Expr) bool { + switch t := x.(type) { + case *ast.BadExpr: + case *ast.Ident: + case *ast.SelectorExpr: + _, isIdent := t.X.(*ast.Ident) + return isIdent + case *ast.ArrayType: + case *ast.StructType: + case *ast.MapType: + default: + return false // all other nodes are not legal composite literal types + } + return true +} + +// If x is of the form *T, deref returns T, otherwise it returns x. +func deref(x ast.Expr) ast.Expr { + if p, isPtr := x.(*ast.StarExpr); isPtr { + x = p.X + } + return x +} + +// If x is of the form (T), unparen returns unparen(T), otherwise it returns x. +func unparen(x ast.Expr) ast.Expr { + if p, isParen := x.(*ast.ParenExpr); isParen { + x = unparen(p.X) + } + return x +} + +// checkExprOrType checks that x is an expression or a type +// (and not a raw type such as [...]T). +// +func (p *parser) checkExprOrType(x ast.Expr) ast.Expr { + switch t := unparen(x).(type) { + case *ast.ParenExpr: + panic("unreachable") + case *ast.UnaryExpr: + case *ast.ArrayType: + if len, isEllipsis := t.Len.(*ast.Ellipsis); isEllipsis { + p.error(len.Pos(), "expected array length, found '...'") + x = &ast.BadExpr{x.Pos(), x.End()} + } + } + + // all other nodes are expressions or types + return x +} + +// If lhs is set and the result is an identifier, it is not resolved. +func (p *parser) parsePrimaryExpr(lhs bool) ast.Expr { + if p.trace { + defer un(trace(p, "PrimaryExpr")) + } + + x := p.parseOperand(lhs) +L: + for { + switch p.tok { + case token.PERIOD: + p.next() + if lhs { + p.resolve(x) + } + switch p.tok { + case token.IDENT: + x = p.parseSelector(p.checkExpr(x)) + case token.LPAREN: + x = p.parseTypeAssertion(p.checkExpr(x)) + default: + pos := p.pos + p.next() // make progress + p.errorExpected(pos, "selector or type assertion") + x = &ast.BadExpr{pos, p.pos} + } + case token.LBRACK: + if lhs { + p.resolve(x) + } + x = p.parseIndexOrSlice(p.checkExpr(x)) + case token.LPAREN: + if lhs { + p.resolve(x) + } + x = p.parseCallOrConversion(p.checkExprOrType(x)) + case token.LBRACE: + if isLiteralType(x) && (p.exprLev >= 0 || !isTypeName(x)) { + if lhs { + p.resolve(x) + } + x = p.parseLiteralValue(x) + } else { + break L + } + default: + break L + } + lhs = false // no need to try to resolve again + } + + return x +} + +// If lhs is set and the result is an identifier, it is not resolved. +func (p *parser) parseUnaryExpr(lhs bool) ast.Expr { + if p.trace { + defer un(trace(p, "UnaryExpr")) + } + + switch p.tok { + case token.ADD, token.SUB, token.NOT, token.XOR, token.AND: + pos, op := p.pos, p.tok + p.next() + x := p.parseUnaryExpr(false) + return &ast.UnaryExpr{pos, op, p.checkExpr(x)} + + case token.ARROW: + // channel type or receive expression + pos := p.pos + p.next() + if p.tok == token.CHAN { + p.next() + value := p.parseType() + return &ast.ChanType{pos, ast.RECV, value} + } + + x := p.parseUnaryExpr(false) + return &ast.UnaryExpr{pos, token.ARROW, p.checkExpr(x)} + + case token.MUL: + // pointer type or unary "*" expression + pos := p.pos + p.next() + x := p.parseUnaryExpr(false) + return &ast.StarExpr{pos, p.checkExprOrType(x)} + } + + return p.parsePrimaryExpr(lhs) +} + +// If lhs is set and the result is an identifier, it is not resolved. +func (p *parser) parseBinaryExpr(lhs bool, prec1 int) ast.Expr { + if p.trace { + defer un(trace(p, "BinaryExpr")) + } + + x := p.parseUnaryExpr(lhs) + for prec := p.tok.Precedence(); prec >= prec1; prec-- { + for p.tok.Precedence() == prec { + pos, op := p.pos, p.tok + p.next() + if lhs { + p.resolve(x) + lhs = false + } + y := p.parseBinaryExpr(false, prec+1) + x = &ast.BinaryExpr{p.checkExpr(x), pos, op, p.checkExpr(y)} + } + } + + return x +} + +// If lhs is set and the result is an identifier, it is not resolved. +// The result may be a type or even a raw type ([...]int). Callers must +// check the result (using checkExpr or checkExprOrType), depending on +// context. +func (p *parser) parseExpr(lhs bool) ast.Expr { + if p.trace { + defer un(trace(p, "Expression")) + } + + return p.parseBinaryExpr(lhs, token.LowestPrec+1) +} + +func (p *parser) parseRhs() ast.Expr { + return p.checkExpr(p.parseExpr(false)) +} + +func (p *parser) parseRhsOrType() ast.Expr { + return p.checkExprOrType(p.parseExpr(false)) +} + +// ---------------------------------------------------------------------------- +// Statements + +// Parsing modes for parseSimpleStmt. +const ( + basic = iota + labelOk + rangeOk +) + +// parseSimpleStmt returns true as 2nd result if it parsed the assignment +// of a range clause (with mode == rangeOk). The returned statement is an +// assignment with a right-hand side that is a single unary expression of +// the form "range x". No guarantees are given for the left-hand side. +func (p *parser) parseSimpleStmt(mode int) (ast.Stmt, bool) { + if p.trace { + defer un(trace(p, "SimpleStmt")) + } + + x := p.parseLhsList() + + switch p.tok { + case + token.DEFINE, token.ASSIGN, token.ADD_ASSIGN, + token.SUB_ASSIGN, token.MUL_ASSIGN, token.QUO_ASSIGN, + token.REM_ASSIGN, token.AND_ASSIGN, token.OR_ASSIGN, + token.XOR_ASSIGN, token.SHL_ASSIGN, token.SHR_ASSIGN, token.AND_NOT_ASSIGN: + // assignment statement, possibly part of a range clause + pos, tok := p.pos, p.tok + p.next() + var y []ast.Expr + isRange := false + if mode == rangeOk && p.tok == token.RANGE && (tok == token.DEFINE || tok == token.ASSIGN) { + pos := p.pos + p.next() + y = []ast.Expr{&ast.UnaryExpr{pos, token.RANGE, p.parseRhs()}} + isRange = true + } else { + y = p.parseRhsList() + } + return &ast.AssignStmt{x, pos, tok, y}, isRange + } + + if len(x) > 1 { + p.errorExpected(x[0].Pos(), "1 expression") + // continue with first expression + } + + switch p.tok { + case token.COLON: + // labeled statement + colon := p.pos + p.next() + if label, isIdent := x[0].(*ast.Ident); mode == labelOk && isIdent { + // Go spec: The scope of a label is the body of the function + // in which it is declared and excludes the body of any nested + // function. + stmt := &ast.LabeledStmt{label, colon, p.parseStmt()} + p.declare(stmt, nil, p.labelScope, ast.Lbl, label) + return stmt, false + } + // The label declaration typically starts at x[0].Pos(), but the label + // declaration may be erroneous due to a token after that position (and + // before the ':'). If SpuriousErrors is not set, the (only) error re- + // ported for the line is the illegal label error instead of the token + // before the ':' that caused the problem. Thus, use the (latest) colon + // position for error reporting. + p.error(colon, "illegal label declaration") + return &ast.BadStmt{x[0].Pos(), colon + 1}, false + + case token.ARROW: + // send statement + arrow := p.pos + p.next() // consume "<-" + y := p.parseRhs() + return &ast.SendStmt{x[0], arrow, y}, false + + case token.INC, token.DEC: + // increment or decrement + s := &ast.IncDecStmt{x[0], p.pos, p.tok} + p.next() // consume "++" or "--" + return s, false + } + + // expression + return &ast.ExprStmt{x[0]}, false +} + +func (p *parser) parseCallExpr() *ast.CallExpr { + x := p.parseRhsOrType() // could be a conversion: (some type)(x) + if call, isCall := x.(*ast.CallExpr); isCall { + return call + } + p.errorExpected(x.Pos(), "function/method call") + return nil +} + +func (p *parser) parseGoStmt() ast.Stmt { + if p.trace { + defer un(trace(p, "GoStmt")) + } + + pos := p.expect(token.GO) + call := p.parseCallExpr() + p.expectSemi() + if call == nil { + return &ast.BadStmt{pos, pos + 2} // len("go") + } + + return &ast.GoStmt{pos, call} +} + +func (p *parser) parseDeferStmt() ast.Stmt { + if p.trace { + defer un(trace(p, "DeferStmt")) + } + + pos := p.expect(token.DEFER) + call := p.parseCallExpr() + p.expectSemi() + if call == nil { + return &ast.BadStmt{pos, pos + 5} // len("defer") + } + + return &ast.DeferStmt{pos, call} +} + +func (p *parser) parseReturnStmt() *ast.ReturnStmt { + if p.trace { + defer un(trace(p, "ReturnStmt")) + } + + pos := p.pos + p.expect(token.RETURN) + var x []ast.Expr + if p.tok != token.SEMICOLON && p.tok != token.RBRACE { + x = p.parseRhsList() + } + p.expectSemi() + + return &ast.ReturnStmt{pos, x} +} + +func (p *parser) parseBranchStmt(tok token.Token) *ast.BranchStmt { + if p.trace { + defer un(trace(p, "BranchStmt")) + } + + pos := p.expect(tok) + var label *ast.Ident + if tok != token.FALLTHROUGH && p.tok == token.IDENT { + label = p.parseIdent() + // add to list of unresolved targets + n := len(p.targetStack) - 1 + p.targetStack[n] = append(p.targetStack[n], label) + } + p.expectSemi() + + return &ast.BranchStmt{pos, tok, label} +} + +func (p *parser) makeExpr(s ast.Stmt) ast.Expr { + if s == nil { + return nil + } + if es, isExpr := s.(*ast.ExprStmt); isExpr { + return p.checkExpr(es.X) + } + p.error(s.Pos(), "expected condition, found simple statement") + return &ast.BadExpr{s.Pos(), s.End()} +} + +func (p *parser) parseIfStmt() *ast.IfStmt { + if p.trace { + defer un(trace(p, "IfStmt")) + } + + pos := p.expect(token.IF) + p.openScope() + defer p.closeScope() + + var s ast.Stmt + var x ast.Expr + { + prevLev := p.exprLev + p.exprLev = -1 + if p.tok == token.SEMICOLON { + p.next() + x = p.parseRhs() + } else { + s, _ = p.parseSimpleStmt(basic) + if p.tok == token.SEMICOLON { + p.next() + x = p.parseRhs() + } else { + x = p.makeExpr(s) + s = nil + } + } + p.exprLev = prevLev + } + + body := p.parseBlockStmt() + var else_ ast.Stmt + if p.tok == token.ELSE { + p.next() + else_ = p.parseStmt() + } else { + p.expectSemi() + } + + return &ast.IfStmt{pos, s, x, body, else_} +} + +func (p *parser) parseTypeList() (list []ast.Expr) { + if p.trace { + defer un(trace(p, "TypeList")) + } + + list = append(list, p.parseType()) + for p.tok == token.COMMA { + p.next() + list = append(list, p.parseType()) + } + + return +} + +func (p *parser) parseCaseClause(exprSwitch bool) *ast.CaseClause { + if p.trace { + defer un(trace(p, "CaseClause")) + } + + pos := p.pos + var list []ast.Expr + if p.tok == token.CASE { + p.next() + if exprSwitch { + list = p.parseRhsList() + } else { + list = p.parseTypeList() + } + } else { + p.expect(token.DEFAULT) + } + + colon := p.expect(token.COLON) + p.openScope() + body := p.parseStmtList() + p.closeScope() + + return &ast.CaseClause{pos, list, colon, body} +} + +func isExprSwitch(s ast.Stmt) bool { + if s == nil { + return true + } + if e, ok := s.(*ast.ExprStmt); ok { + if a, ok := e.X.(*ast.TypeAssertExpr); ok { + return a.Type != nil // regular type assertion + } + return true + } + return false +} + +func (p *parser) parseSwitchStmt() ast.Stmt { + if p.trace { + defer un(trace(p, "SwitchStmt")) + } + + pos := p.expect(token.SWITCH) + p.openScope() + defer p.closeScope() + + var s1, s2 ast.Stmt + if p.tok != token.LBRACE { + prevLev := p.exprLev + p.exprLev = -1 + if p.tok != token.SEMICOLON { + s2, _ = p.parseSimpleStmt(basic) + } + if p.tok == token.SEMICOLON { + p.next() + s1 = s2 + s2 = nil + if p.tok != token.LBRACE { + s2, _ = p.parseSimpleStmt(basic) + } + } + p.exprLev = prevLev + } + + exprSwitch := isExprSwitch(s2) + lbrace := p.expect(token.LBRACE) + var list []ast.Stmt + for p.tok == token.CASE || p.tok == token.DEFAULT { + list = append(list, p.parseCaseClause(exprSwitch)) + } + rbrace := p.expect(token.RBRACE) + p.expectSemi() + body := &ast.BlockStmt{lbrace, list, rbrace} + + if exprSwitch { + return &ast.SwitchStmt{pos, s1, p.makeExpr(s2), body} + } + // type switch + // TODO(gri): do all the checks! + return &ast.TypeSwitchStmt{pos, s1, s2, body} +} + +func (p *parser) parseCommClause() *ast.CommClause { + if p.trace { + defer un(trace(p, "CommClause")) + } + + p.openScope() + pos := p.pos + var comm ast.Stmt + if p.tok == token.CASE { + p.next() + lhs := p.parseLhsList() + if p.tok == token.ARROW { + // SendStmt + if len(lhs) > 1 { + p.errorExpected(lhs[0].Pos(), "1 expression") + // continue with first expression + } + arrow := p.pos + p.next() + rhs := p.parseRhs() + comm = &ast.SendStmt{lhs[0], arrow, rhs} + } else { + // RecvStmt + pos := p.pos + tok := p.tok + var rhs ast.Expr + if tok == token.ASSIGN || tok == token.DEFINE { + // RecvStmt with assignment + if len(lhs) > 2 { + p.errorExpected(lhs[0].Pos(), "1 or 2 expressions") + // continue with first two expressions + lhs = lhs[0:2] + } + p.next() + rhs = p.parseRhs() + } else { + // rhs must be single receive operation + if len(lhs) > 1 { + p.errorExpected(lhs[0].Pos(), "1 expression") + // continue with first expression + } + rhs = lhs[0] + lhs = nil // there is no lhs + } + if lhs != nil { + comm = &ast.AssignStmt{lhs, pos, tok, []ast.Expr{rhs}} + } else { + comm = &ast.ExprStmt{rhs} + } + } + } else { + p.expect(token.DEFAULT) + } + + colon := p.expect(token.COLON) + body := p.parseStmtList() + p.closeScope() + + return &ast.CommClause{pos, comm, colon, body} +} + +func (p *parser) parseSelectStmt() *ast.SelectStmt { + if p.trace { + defer un(trace(p, "SelectStmt")) + } + + pos := p.expect(token.SELECT) + lbrace := p.expect(token.LBRACE) + var list []ast.Stmt + for p.tok == token.CASE || p.tok == token.DEFAULT { + list = append(list, p.parseCommClause()) + } + rbrace := p.expect(token.RBRACE) + p.expectSemi() + body := &ast.BlockStmt{lbrace, list, rbrace} + + return &ast.SelectStmt{pos, body} +} + +func (p *parser) parseForStmt() ast.Stmt { + if p.trace { + defer un(trace(p, "ForStmt")) + } + + pos := p.expect(token.FOR) + p.openScope() + defer p.closeScope() + + var s1, s2, s3 ast.Stmt + var isRange bool + if p.tok != token.LBRACE { + prevLev := p.exprLev + p.exprLev = -1 + if p.tok != token.SEMICOLON { + s2, isRange = p.parseSimpleStmt(rangeOk) + } + if !isRange && p.tok == token.SEMICOLON { + p.next() + s1 = s2 + s2 = nil + if p.tok != token.SEMICOLON { + s2, _ = p.parseSimpleStmt(basic) + } + p.expectSemi() + if p.tok != token.LBRACE { + s3, _ = p.parseSimpleStmt(basic) + } + } + p.exprLev = prevLev + } + + body := p.parseBlockStmt() + p.expectSemi() + + if isRange { + as := s2.(*ast.AssignStmt) + // check lhs + var key, value ast.Expr + switch len(as.Lhs) { + case 2: + key, value = as.Lhs[0], as.Lhs[1] + case 1: + key = as.Lhs[0] + default: + p.errorExpected(as.Lhs[0].Pos(), "1 or 2 expressions") + return &ast.BadStmt{pos, body.End()} + } + // parseSimpleStmt returned a right-hand side that + // is a single unary expression of the form "range x" + x := as.Rhs[0].(*ast.UnaryExpr).X + return &ast.RangeStmt{pos, key, value, as.TokPos, as.Tok, x, body} + } + + // regular for statement + return &ast.ForStmt{pos, s1, p.makeExpr(s2), s3, body} +} + +func (p *parser) parseStmt() (s ast.Stmt) { + if p.trace { + defer un(trace(p, "Statement")) + } + + switch p.tok { + case token.CONST, token.TYPE, token.VAR: + s = &ast.DeclStmt{p.parseDecl()} + case + // tokens that may start a top-level expression + token.IDENT, token.INT, token.FLOAT, token.CHAR, token.STRING, token.FUNC, token.LPAREN, // operand + token.LBRACK, token.STRUCT, // composite type + token.MUL, token.AND, token.ARROW, token.ADD, token.SUB, token.XOR: // unary operators + s, _ = p.parseSimpleStmt(labelOk) + // because of the required look-ahead, labeled statements are + // parsed by parseSimpleStmt - don't expect a semicolon after + // them + if _, isLabeledStmt := s.(*ast.LabeledStmt); !isLabeledStmt { + p.expectSemi() + } + case token.GO: + s = p.parseGoStmt() + case token.DEFER: + s = p.parseDeferStmt() + case token.RETURN: + s = p.parseReturnStmt() + case token.BREAK, token.CONTINUE, token.GOTO, token.FALLTHROUGH: + s = p.parseBranchStmt(p.tok) + case token.LBRACE: + s = p.parseBlockStmt() + p.expectSemi() + case token.IF: + s = p.parseIfStmt() + case token.SWITCH: + s = p.parseSwitchStmt() + case token.SELECT: + s = p.parseSelectStmt() + case token.FOR: + s = p.parseForStmt() + case token.SEMICOLON: + s = &ast.EmptyStmt{p.pos} + p.next() + case token.RBRACE: + // a semicolon may be omitted before a closing "}" + s = &ast.EmptyStmt{p.pos} + default: + // no statement found + pos := p.pos + p.errorExpected(pos, "statement") + p.next() // make progress + s = &ast.BadStmt{pos, p.pos} + } + + return +} + +// ---------------------------------------------------------------------------- +// Declarations + +type parseSpecFunction func(p *parser, doc *ast.CommentGroup, iota int) ast.Spec + +func parseImportSpec(p *parser, doc *ast.CommentGroup, _ int) ast.Spec { + if p.trace { + defer un(trace(p, "ImportSpec")) + } + + var ident *ast.Ident + switch p.tok { + case token.PERIOD: + ident = &ast.Ident{p.pos, ".", nil} + p.next() + case token.IDENT: + ident = p.parseIdent() + } + + var path *ast.BasicLit + if p.tok == token.STRING { + path = &ast.BasicLit{p.pos, p.tok, p.lit} + p.next() + } else { + p.expect(token.STRING) // use expect() error handling + } + p.expectSemi() // call before accessing p.linecomment + + // collect imports + spec := &ast.ImportSpec{doc, ident, path, p.lineComment} + p.imports = append(p.imports, spec) + + return spec +} + +func parseConstSpec(p *parser, doc *ast.CommentGroup, iota int) ast.Spec { + if p.trace { + defer un(trace(p, "ConstSpec")) + } + + idents := p.parseIdentList() + typ := p.tryType() + var values []ast.Expr + if typ != nil || p.tok == token.ASSIGN || iota == 0 { + p.expect(token.ASSIGN) + values = p.parseRhsList() + } + p.expectSemi() // call before accessing p.linecomment + + // Go spec: The scope of a constant or variable identifier declared inside + // a function begins at the end of the ConstSpec or VarSpec and ends at + // the end of the innermost containing block. + // (Global identifiers are resolved in a separate phase after parsing.) + spec := &ast.ValueSpec{doc, idents, typ, values, p.lineComment} + p.declare(spec, iota, p.topScope, ast.Con, idents...) + + return spec +} + +func parseTypeSpec(p *parser, doc *ast.CommentGroup, _ int) ast.Spec { + if p.trace { + defer un(trace(p, "TypeSpec")) + } + + ident := p.parseIdent() + + // Go spec: The scope of a type identifier declared inside a function begins + // at the identifier in the TypeSpec and ends at the end of the innermost + // containing block. + // (Global identifiers are resolved in a separate phase after parsing.) + spec := &ast.TypeSpec{doc, ident, nil, nil} + p.declare(spec, nil, p.topScope, ast.Typ, ident) + + spec.Type = p.parseType() + p.expectSemi() // call before accessing p.linecomment + spec.Comment = p.lineComment + + return spec +} + +func parseVarSpec(p *parser, doc *ast.CommentGroup, _ int) ast.Spec { + if p.trace { + defer un(trace(p, "VarSpec")) + } + + idents := p.parseIdentList() + typ := p.tryType() + var values []ast.Expr + if typ == nil || p.tok == token.ASSIGN { + p.expect(token.ASSIGN) + values = p.parseRhsList() + } + p.expectSemi() // call before accessing p.linecomment + + // Go spec: The scope of a constant or variable identifier declared inside + // a function begins at the end of the ConstSpec or VarSpec and ends at + // the end of the innermost containing block. + // (Global identifiers are resolved in a separate phase after parsing.) + spec := &ast.ValueSpec{doc, idents, typ, values, p.lineComment} + p.declare(spec, nil, p.topScope, ast.Var, idents...) + + return spec +} + +func (p *parser) parseGenDecl(keyword token.Token, f parseSpecFunction) *ast.GenDecl { + if p.trace { + defer un(trace(p, "GenDecl("+keyword.String()+")")) + } + + doc := p.leadComment + pos := p.expect(keyword) + var lparen, rparen token.Pos + var list []ast.Spec + if p.tok == token.LPAREN { + lparen = p.pos + p.next() + for iota := 0; p.tok != token.RPAREN && p.tok != token.EOF; iota++ { + list = append(list, f(p, p.leadComment, iota)) + } + rparen = p.expect(token.RPAREN) + p.expectSemi() + } else { + list = append(list, f(p, nil, 0)) + } + + return &ast.GenDecl{doc, pos, keyword, lparen, list, rparen} +} + +func (p *parser) parseReceiver(scope *ast.Scope) *ast.FieldList { + if p.trace { + defer un(trace(p, "Receiver")) + } + + pos := p.pos + par := p.parseParameters(scope, false) + + // must have exactly one receiver + if par.NumFields() != 1 { + p.errorExpected(pos, "exactly one receiver") + // TODO determine a better range for BadExpr below + par.List = []*ast.Field{&ast.Field{Type: &ast.BadExpr{pos, pos}}} + return par + } + + // recv type must be of the form ["*"] identifier + recv := par.List[0] + base := deref(recv.Type) + if _, isIdent := base.(*ast.Ident); !isIdent { + p.errorExpected(base.Pos(), "(unqualified) identifier") + par.List = []*ast.Field{&ast.Field{Type: &ast.BadExpr{recv.Pos(), recv.End()}}} + } + + return par +} + +func (p *parser) parseFuncDecl() *ast.FuncDecl { + if p.trace { + defer un(trace(p, "FunctionDecl")) + } + + doc := p.leadComment + pos := p.expect(token.FUNC) + scope := ast.NewScope(p.topScope) // function scope + + var recv *ast.FieldList + if p.tok == token.LPAREN { + recv = p.parseReceiver(scope) + } + + ident := p.parseIdent() + + params, results := p.parseSignature(scope) + + var body *ast.BlockStmt + if p.tok == token.LBRACE { + body = p.parseBody(scope) + } + p.expectSemi() + + decl := &ast.FuncDecl{doc, recv, ident, &ast.FuncType{pos, params, results}, body} + if recv == nil { + // Go spec: The scope of an identifier denoting a constant, type, + // variable, or function (but not method) declared at top level + // (outside any function) is the package block. + // + // init() functions cannot be referred to and there may + // be more than one - don't put them in the pkgScope + if ident.Name != "init" { + p.declare(decl, nil, p.pkgScope, ast.Fun, ident) + } + } + + return decl +} + +func (p *parser) parseDecl() ast.Decl { + if p.trace { + defer un(trace(p, "Declaration")) + } + + var f parseSpecFunction + switch p.tok { + case token.CONST: + f = parseConstSpec + + case token.TYPE: + f = parseTypeSpec + + case token.VAR: + f = parseVarSpec + + case token.FUNC: + return p.parseFuncDecl() + + default: + pos := p.pos + p.errorExpected(pos, "declaration") + p.next() // make progress + decl := &ast.BadDecl{pos, p.pos} + return decl + } + + return p.parseGenDecl(p.tok, f) +} + +func (p *parser) parseDeclList() (list []ast.Decl) { + if p.trace { + defer un(trace(p, "DeclList")) + } + + for p.tok != token.EOF { + list = append(list, p.parseDecl()) + } + + return +} + +// ---------------------------------------------------------------------------- +// Source files + +func (p *parser) parseFile() *ast.File { + if p.trace { + defer un(trace(p, "File")) + } + + // package clause + doc := p.leadComment + pos := p.expect(token.PACKAGE) + // Go spec: The package clause is not a declaration; + // the package name does not appear in any scope. + ident := p.parseIdent() + if ident.Name == "_" { + p.error(p.pos, "invalid package name _") + } + p.expectSemi() + + var decls []ast.Decl + + // Don't bother parsing the rest if we had errors already. + // Likely not a Go source file at all. + + if p.ErrorCount() == 0 && p.mode&PackageClauseOnly == 0 { + // import decls + for p.tok == token.IMPORT { + decls = append(decls, p.parseGenDecl(token.IMPORT, parseImportSpec)) + } + + if p.mode&ImportsOnly == 0 { + // rest of package body + for p.tok != token.EOF { + decls = append(decls, p.parseDecl()) + } + } + } + + assert(p.topScope == p.pkgScope, "imbalanced scopes") + + // resolve global identifiers within the same file + i := 0 + for _, ident := range p.unresolved { + // i <= index for current ident + assert(ident.Obj == unresolved, "object already resolved") + ident.Obj = p.pkgScope.Lookup(ident.Name) // also removes unresolved sentinel + if ident.Obj == nil { + p.unresolved[i] = ident + i++ + } + } + + return &ast.File{doc, pos, ident, decls, p.pkgScope, p.imports, p.unresolved[0:i], p.comments} +} diff --git a/src/pkg/go/parser/parser_test.go b/src/pkg/go/parser/parser_test.go new file mode 100644 index 000000000..39a78e515 --- /dev/null +++ b/src/pkg/go/parser/parser_test.go @@ -0,0 +1,130 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package parser + +import ( + "go/token" + "os" + "testing" +) + +var fset = token.NewFileSet() + +var illegalInputs = []interface{}{ + nil, + 3.14, + []byte(nil), + "foo!", + `package p; func f() { if /* should have condition */ {} };`, + `package p; func f() { if ; /* should have condition */ {} };`, + `package p; func f() { if f(); /* should have condition */ {} };`, + `package p; const c; /* should have constant value */`, + `package p; func f() { if _ = range x; true {} };`, + `package p; func f() { switch _ = range x; true {} };`, + `package p; func f() { for _ = range x ; ; {} };`, + `package p; func f() { for ; ; _ = range x {} };`, + `package p; func f() { for ; _ = range x ; {} };`, + `package p; var a = [1]int; /* illegal expression */`, + `package p; var a = [...]int; /* illegal expression */`, + `package p; var a = struct{} /* illegal expression */`, + `package p; var a = func(); /* illegal expression */`, + `package p; var a = interface{} /* illegal expression */`, + `package p; var a = []int /* illegal expression */`, + `package p; var a = map[int]int /* illegal expression */`, + `package p; var a = chan int; /* illegal expression */`, + `package p; var a = []int{[]int}; /* illegal expression */`, + `package p; var a = ([]int); /* illegal expression */`, + `package p; var a = a[[]int:[]int]; /* illegal expression */`, + `package p; var a = <- chan int; /* illegal expression */`, + `package p; func f() { select { case _ <- chan int: } };`, +} + +func TestParseIllegalInputs(t *testing.T) { + for _, src := range illegalInputs { + _, err := ParseFile(fset, "", src, 0) + if err == nil { + t.Errorf("ParseFile(%v) should have failed", src) + } + } +} + +var validPrograms = []interface{}{ + "package p\n", + `package p;`, + `package p; import "fmt"; func f() { fmt.Println("Hello, World!") };`, + `package p; func f() { if f(T{}) {} };`, + `package p; func f() { _ = (<-chan int)(x) };`, + `package p; func f() { _ = (<-chan <-chan int)(x) };`, + `package p; func f(func() func() func());`, + `package p; func f(...T);`, + `package p; func f(float, ...int);`, + `package p; func f(x int, a ...int) { f(0, a...); f(1, a...,) };`, + `package p; type T []int; var a []bool; func f() { if a[T{42}[0]] {} };`, + `package p; type T []int; func g(int) bool { return true }; func f() { if g(T{42}[0]) {} };`, + `package p; type T []int; func f() { for _ = range []int{T{42}[0]} {} };`, + `package p; var a = T{{1, 2}, {3, 4}}`, + `package p; func f() { select { case <- c: case c <- d: case c <- <- d: case <-c <- d: } };`, + `package p; func f() { select { case x := (<-c): } };`, + `package p; func f() { if ; true {} };`, + `package p; func f() { switch ; {} };`, + `package p; func f() { for _ = range "foo" + "bar" {} };`, +} + +func TestParseValidPrograms(t *testing.T) { + for _, src := range validPrograms { + _, err := ParseFile(fset, "", src, 0) + if err != nil { + t.Errorf("ParseFile(%q): %v", src, err) + } + } +} + +var validFiles = []string{ + "parser.go", + "parser_test.go", +} + +func TestParse3(t *testing.T) { + for _, filename := range validFiles { + _, err := ParseFile(fset, filename, nil, DeclarationErrors) + if err != nil { + t.Errorf("ParseFile(%s): %v", filename, err) + } + } +} + +func nameFilter(filename string) bool { + switch filename { + case "parser.go": + case "interface.go": + case "parser_test.go": + default: + return false + } + return true +} + +func dirFilter(f *os.FileInfo) bool { return nameFilter(f.Name) } + +func TestParse4(t *testing.T) { + path := "." + pkgs, err := ParseDir(fset, path, dirFilter, 0) + if err != nil { + t.Fatalf("ParseDir(%s): %v", path, err) + } + if len(pkgs) != 1 { + t.Errorf("incorrect number of packages: %d", len(pkgs)) + } + pkg := pkgs["parser"] + if pkg == nil { + t.Errorf(`package "parser" not found`) + return + } + for filename := range pkg.Files { + if !nameFilter(filename) { + t.Errorf("unexpected package file: %s", filename) + } + } +} diff --git a/src/pkg/go/printer/Makefile b/src/pkg/go/printer/Makefile new file mode 100644 index 000000000..6a71efc93 --- /dev/null +++ b/src/pkg/go/printer/Makefile @@ -0,0 +1,12 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=go/printer +GOFILES=\ + printer.go\ + nodes.go\ + +include ../../../Make.pkg diff --git a/src/pkg/go/printer/nodes.go b/src/pkg/go/printer/nodes.go new file mode 100644 index 000000000..9cd975ec1 --- /dev/null +++ b/src/pkg/go/printer/nodes.go @@ -0,0 +1,1510 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements printing of AST nodes; specifically +// expressions, statements, declarations, and files. It uses +// the print functionality implemented in printer.go. + +package printer + +import ( + "bytes" + "go/ast" + "go/token" +) + +// Other formatting issues: +// - better comment formatting for /*-style comments at the end of a line (e.g. a declaration) +// when the comment spans multiple lines; if such a comment is just two lines, formatting is +// not idempotent +// - formatting of expression lists +// - should use blank instead of tab to separate one-line function bodies from +// the function header unless there is a group of consecutive one-liners + +// ---------------------------------------------------------------------------- +// Common AST nodes. + +// Print as many newlines as necessary (but at least min newlines) to get to +// the current line. ws is printed before the first line break. If newSection +// is set, the first line break is printed as formfeed. Returns true if any +// line break was printed; returns false otherwise. +// +// TODO(gri): linebreak may add too many lines if the next statement at "line" +// is preceded by comments because the computation of n assumes +// the current position before the comment and the target position +// after the comment. Thus, after interspersing such comments, the +// space taken up by them is not considered to reduce the number of +// linebreaks. At the moment there is no easy way to know about +// future (not yet interspersed) comments in this function. +// +func (p *printer) linebreak(line, min int, ws whiteSpace, newSection bool) (printedBreak bool) { + n := p.nlines(line-p.pos.Line, min) + if n > 0 { + p.print(ws) + if newSection { + p.print(formfeed) + n-- + } + for ; n > 0; n-- { + p.print(newline) + } + printedBreak = true + } + return +} + +// setComment sets g as the next comment if g != nil and if node comments +// are enabled - this mode is used when printing source code fragments such +// as exports only. It assumes that there are no other pending comments to +// intersperse. +func (p *printer) setComment(g *ast.CommentGroup) { + if g == nil || !p.useNodeComments { + return + } + if p.comments == nil { + // initialize p.comments lazily + p.comments = make([]*ast.CommentGroup, 1) + } else if p.cindex < len(p.comments) { + // for some reason there are pending comments; this + // should never happen - handle gracefully and flush + // all comments up to g, ignore anything after that + p.flush(p.fset.Position(g.List[0].Pos()), token.ILLEGAL) + } + p.comments[0] = g + p.cindex = 0 +} + +type exprListMode uint + +const ( + blankStart exprListMode = 1 << iota // print a blank before a non-empty list + blankEnd // print a blank after a non-empty list + commaSep // elements are separated by commas + commaTerm // list is optionally terminated by a comma + noIndent // no extra indentation in multi-line lists + periodSep // elements are separated by periods +) + +// Sets multiLine to true if the identifier list spans multiple lines. +// If indent is set, a multi-line identifier list is indented after the +// first linebreak encountered. +func (p *printer) identList(list []*ast.Ident, indent bool, multiLine *bool) { + // convert into an expression list so we can re-use exprList formatting + xlist := make([]ast.Expr, len(list)) + for i, x := range list { + xlist[i] = x + } + mode := commaSep + if !indent { + mode |= noIndent + } + p.exprList(token.NoPos, xlist, 1, mode, multiLine, token.NoPos) +} + +// Print a list of expressions. If the list spans multiple +// source lines, the original line breaks are respected between +// expressions. Sets multiLine to true if the list spans multiple +// lines. +// +// TODO(gri) Consider rewriting this to be independent of []ast.Expr +// so that we can use the algorithm for any kind of list +// (e.g., pass list via a channel over which to range). +func (p *printer) exprList(prev0 token.Pos, list []ast.Expr, depth int, mode exprListMode, multiLine *bool, next0 token.Pos) { + if len(list) == 0 { + return + } + + if mode&blankStart != 0 { + p.print(blank) + } + + prev := p.fset.Position(prev0) + next := p.fset.Position(next0) + line := p.fset.Position(list[0].Pos()).Line + endLine := p.fset.Position(list[len(list)-1].End()).Line + + if prev.IsValid() && prev.Line == line && line == endLine { + // all list entries on a single line + for i, x := range list { + if i > 0 { + if mode&commaSep != 0 { + p.print(token.COMMA) + } + p.print(blank) + } + p.expr0(x, depth, multiLine) + } + if mode&blankEnd != 0 { + p.print(blank) + } + return + } + + // list entries span multiple lines; + // use source code positions to guide line breaks + + // don't add extra indentation if noIndent is set; + // i.e., pretend that the first line is already indented + ws := ignore + if mode&noIndent == 0 { + ws = indent + } + + // the first linebreak is always a formfeed since this section must not + // depend on any previous formatting + prevBreak := -1 // index of last expression that was followed by a linebreak + if prev.IsValid() && prev.Line < line && p.linebreak(line, 0, ws, true) { + ws = ignore + *multiLine = true + prevBreak = 0 + } + + // initialize expression/key size: a zero value indicates expr/key doesn't fit on a single line + size := 0 + + // print all list elements + for i, x := range list { + prevLine := line + line = p.fset.Position(x.Pos()).Line + + // determine if the next linebreak, if any, needs to use formfeed: + // in general, use the entire node size to make the decision; for + // key:value expressions, use the key size + // TODO(gri) for a better result, should probably incorporate both + // the key and the node size into the decision process + useFF := true + + // determine element size: all bets are off if we don't have + // position information for the previous and next token (likely + // generated code - simply ignore the size in this case by setting + // it to 0) + prevSize := size + const infinity = 1e6 // larger than any source line + size = p.nodeSize(x, infinity) + pair, isPair := x.(*ast.KeyValueExpr) + if size <= infinity && prev.IsValid() && next.IsValid() { + // x fits on a single line + if isPair { + size = p.nodeSize(pair.Key, infinity) // size <= infinity + } + } else { + // size too large or we don't have good layout information + size = 0 + } + + // if the previous line and the current line had single- + // line-expressions and the key sizes are small or the + // the ratio between the key sizes does not exceed a + // threshold, align columns and do not use formfeed + if prevSize > 0 && size > 0 { + const smallSize = 20 + if prevSize <= smallSize && size <= smallSize { + useFF = false + } else { + const r = 4 // threshold + ratio := float64(size) / float64(prevSize) + useFF = ratio <= 1/r || r <= ratio + } + } + + if i > 0 { + switch { + case mode&commaSep != 0: + p.print(token.COMMA) + case mode&periodSep != 0: + p.print(token.PERIOD) + } + needsBlank := mode&periodSep == 0 // period-separated list elements don't need a blank + if prevLine < line && prevLine > 0 && line > 0 { + // lines are broken using newlines so comments remain aligned + // unless forceFF is set or there are multiple expressions on + // the same line in which case formfeed is used + if p.linebreak(line, 0, ws, useFF || prevBreak+1 < i) { + ws = ignore + *multiLine = true + prevBreak = i + needsBlank = false // we got a line break instead + } + } + if needsBlank { + p.print(blank) + } + } + + if isPair && size > 0 && len(list) > 1 { + // we have a key:value expression that fits onto one line and + // is in a list with more then one entry: use a column for the + // key such that consecutive entries can align if possible + p.expr(pair.Key, multiLine) + p.print(pair.Colon, token.COLON, vtab) + p.expr(pair.Value, multiLine) + } else { + p.expr0(x, depth, multiLine) + } + } + + if mode&commaTerm != 0 && next.IsValid() && p.pos.Line < next.Line { + // print a terminating comma if the next token is on a new line + p.print(token.COMMA) + if ws == ignore && mode&noIndent == 0 { + // unindent if we indented + p.print(unindent) + } + p.print(formfeed) // terminating comma needs a line break to look good + return + } + + if mode&blankEnd != 0 { + p.print(blank) + } + + if ws == ignore && mode&noIndent == 0 { + // unindent if we indented + p.print(unindent) + } +} + +// Sets multiLine to true if the the parameter list spans multiple lines. +func (p *printer) parameters(fields *ast.FieldList, multiLine *bool) { + p.print(fields.Opening, token.LPAREN) + if len(fields.List) > 0 { + var prevLine, line int + for i, par := range fields.List { + if i > 0 { + p.print(token.COMMA) + if len(par.Names) > 0 { + line = p.fset.Position(par.Names[0].Pos()).Line + } else { + line = p.fset.Position(par.Type.Pos()).Line + } + if 0 < prevLine && prevLine < line && p.linebreak(line, 0, ignore, true) { + *multiLine = true + } else { + p.print(blank) + } + } + if len(par.Names) > 0 { + p.identList(par.Names, false, multiLine) + p.print(blank) + } + p.expr(par.Type, multiLine) + prevLine = p.fset.Position(par.Type.Pos()).Line + } + } + p.print(fields.Closing, token.RPAREN) +} + +// Sets multiLine to true if the signature spans multiple lines. +func (p *printer) signature(params, result *ast.FieldList, multiLine *bool) { + p.parameters(params, multiLine) + n := result.NumFields() + if n > 0 { + p.print(blank) + if n == 1 && result.List[0].Names == nil { + // single anonymous result; no ()'s + p.expr(result.List[0].Type, multiLine) + return + } + p.parameters(result, multiLine) + } +} + +func identListSize(list []*ast.Ident, maxSize int) (size int) { + for i, x := range list { + if i > 0 { + size += 2 // ", " + } + size += len(x.Name) + if size >= maxSize { + break + } + } + return +} + +func (p *printer) isOneLineFieldList(list []*ast.Field) bool { + if len(list) != 1 { + return false // allow only one field + } + f := list[0] + if f.Tag != nil || f.Comment != nil { + return false // don't allow tags or comments + } + // only name(s) and type + const maxSize = 30 // adjust as appropriate, this is an approximate value + namesSize := identListSize(f.Names, maxSize) + if namesSize > 0 { + namesSize = 1 // blank between names and types + } + typeSize := p.nodeSize(f.Type, maxSize) + return namesSize+typeSize <= maxSize +} + +func (p *printer) setLineComment(text string) { + p.setComment(&ast.CommentGroup{[]*ast.Comment{&ast.Comment{token.NoPos, text}}}) +} + +func (p *printer) fieldList(fields *ast.FieldList, isStruct, isIncomplete bool) { + lbrace := fields.Opening + list := fields.List + rbrace := fields.Closing + srcIsOneLine := lbrace.IsValid() && rbrace.IsValid() && p.fset.Position(lbrace).Line == p.fset.Position(rbrace).Line + + if !isIncomplete && !p.commentBefore(p.fset.Position(rbrace)) && srcIsOneLine { + // possibly a one-line struct/interface + if len(list) == 0 { + // no blank between keyword and {} in this case + p.print(lbrace, token.LBRACE, rbrace, token.RBRACE) + return + } else if isStruct && p.isOneLineFieldList(list) { // for now ignore interfaces + // small enough - print on one line + // (don't use identList and ignore source line breaks) + p.print(lbrace, token.LBRACE, blank) + f := list[0] + for i, x := range f.Names { + if i > 0 { + p.print(token.COMMA, blank) + } + p.expr(x, ignoreMultiLine) + } + if len(f.Names) > 0 { + p.print(blank) + } + p.expr(f.Type, ignoreMultiLine) + p.print(blank, rbrace, token.RBRACE) + return + } + } + + // at least one entry or incomplete + p.print(blank, lbrace, token.LBRACE, indent, formfeed) + if isStruct { + + sep := vtab + if len(list) == 1 { + sep = blank + } + var ml bool + for i, f := range list { + if i > 0 { + p.linebreak(p.fset.Position(f.Pos()).Line, 1, ignore, ml) + } + ml = false + extraTabs := 0 + p.setComment(f.Doc) + if len(f.Names) > 0 { + // named fields + p.identList(f.Names, false, &ml) + p.print(sep) + p.expr(f.Type, &ml) + extraTabs = 1 + } else { + // anonymous field + p.expr(f.Type, &ml) + extraTabs = 2 + } + if f.Tag != nil { + if len(f.Names) > 0 && sep == vtab { + p.print(sep) + } + p.print(sep) + p.expr(f.Tag, &ml) + extraTabs = 0 + } + if f.Comment != nil { + for ; extraTabs > 0; extraTabs-- { + p.print(sep) + } + p.setComment(f.Comment) + } + } + if isIncomplete { + if len(list) > 0 { + p.print(formfeed) + } + p.flush(p.fset.Position(rbrace), token.RBRACE) // make sure we don't lose the last line comment + p.setLineComment("// contains filtered or unexported fields") + } + + } else { // interface + + var ml bool + for i, f := range list { + if i > 0 { + p.linebreak(p.fset.Position(f.Pos()).Line, 1, ignore, ml) + } + ml = false + p.setComment(f.Doc) + if ftyp, isFtyp := f.Type.(*ast.FuncType); isFtyp { + // method + p.expr(f.Names[0], &ml) + p.signature(ftyp.Params, ftyp.Results, &ml) + } else { + // embedded interface + p.expr(f.Type, &ml) + } + p.setComment(f.Comment) + } + if isIncomplete { + if len(list) > 0 { + p.print(formfeed) + } + p.flush(p.fset.Position(rbrace), token.RBRACE) // make sure we don't lose the last line comment + p.setLineComment("// contains filtered or unexported methods") + } + + } + p.print(unindent, formfeed, rbrace, token.RBRACE) +} + +// ---------------------------------------------------------------------------- +// Expressions + +func walkBinary(e *ast.BinaryExpr) (has4, has5 bool, maxProblem int) { + switch e.Op.Precedence() { + case 4: + has4 = true + case 5: + has5 = true + } + + switch l := e.X.(type) { + case *ast.BinaryExpr: + if l.Op.Precedence() < e.Op.Precedence() { + // parens will be inserted. + // pretend this is an *ast.ParenExpr and do nothing. + break + } + h4, h5, mp := walkBinary(l) + has4 = has4 || h4 + has5 = has5 || h5 + if maxProblem < mp { + maxProblem = mp + } + } + + switch r := e.Y.(type) { + case *ast.BinaryExpr: + if r.Op.Precedence() <= e.Op.Precedence() { + // parens will be inserted. + // pretend this is an *ast.ParenExpr and do nothing. + break + } + h4, h5, mp := walkBinary(r) + has4 = has4 || h4 + has5 = has5 || h5 + if maxProblem < mp { + maxProblem = mp + } + + case *ast.StarExpr: + if e.Op == token.QUO { // `*/` + maxProblem = 5 + } + + case *ast.UnaryExpr: + switch e.Op.String() + r.Op.String() { + case "/*", "&&", "&^": + maxProblem = 5 + case "++", "--": + if maxProblem < 4 { + maxProblem = 4 + } + } + } + return +} + +func cutoff(e *ast.BinaryExpr, depth int) int { + has4, has5, maxProblem := walkBinary(e) + if maxProblem > 0 { + return maxProblem + 1 + } + if has4 && has5 { + if depth == 1 { + return 5 + } + return 4 + } + if depth == 1 { + return 6 + } + return 4 +} + +func diffPrec(expr ast.Expr, prec int) int { + x, ok := expr.(*ast.BinaryExpr) + if !ok || prec != x.Op.Precedence() { + return 1 + } + return 0 +} + +func reduceDepth(depth int) int { + depth-- + if depth < 1 { + depth = 1 + } + return depth +} + +// Format the binary expression: decide the cutoff and then format. +// Let's call depth == 1 Normal mode, and depth > 1 Compact mode. +// (Algorithm suggestion by Russ Cox.) +// +// The precedences are: +// 5 * / % << >> & &^ +// 4 + - | ^ +// 3 == != < <= > >= +// 2 && +// 1 || +// +// The only decision is whether there will be spaces around levels 4 and 5. +// There are never spaces at level 6 (unary), and always spaces at levels 3 and below. +// +// To choose the cutoff, look at the whole expression but excluding primary +// expressions (function calls, parenthesized exprs), and apply these rules: +// +// 1) If there is a binary operator with a right side unary operand +// that would clash without a space, the cutoff must be (in order): +// +// /* 6 +// && 6 +// &^ 6 +// ++ 5 +// -- 5 +// +// (Comparison operators always have spaces around them.) +// +// 2) If there is a mix of level 5 and level 4 operators, then the cutoff +// is 5 (use spaces to distinguish precedence) in Normal mode +// and 4 (never use spaces) in Compact mode. +// +// 3) If there are no level 4 operators or no level 5 operators, then the +// cutoff is 6 (always use spaces) in Normal mode +// and 4 (never use spaces) in Compact mode. +// +// Sets multiLine to true if the binary expression spans multiple lines. +func (p *printer) binaryExpr(x *ast.BinaryExpr, prec1, cutoff, depth int, multiLine *bool) { + prec := x.Op.Precedence() + if prec < prec1 { + // parenthesis needed + // Note: The parser inserts an ast.ParenExpr node; thus this case + // can only occur if the AST is created in a different way. + p.print(token.LPAREN) + p.expr0(x, reduceDepth(depth), multiLine) // parentheses undo one level of depth + p.print(token.RPAREN) + return + } + + printBlank := prec < cutoff + + ws := indent + p.expr1(x.X, prec, depth+diffPrec(x.X, prec), multiLine) + if printBlank { + p.print(blank) + } + xline := p.pos.Line // before the operator (it may be on the next line!) + yline := p.fset.Position(x.Y.Pos()).Line + p.print(x.OpPos, x.Op) + if xline != yline && xline > 0 && yline > 0 { + // at least one line break, but respect an extra empty line + // in the source + if p.linebreak(yline, 1, ws, true) { + ws = ignore + *multiLine = true + printBlank = false // no blank after line break + } + } + if printBlank { + p.print(blank) + } + p.expr1(x.Y, prec+1, depth+1, multiLine) + if ws == ignore { + p.print(unindent) + } +} + +func isBinary(expr ast.Expr) bool { + _, ok := expr.(*ast.BinaryExpr) + return ok +} + +// If the expression contains one or more selector expressions, splits it into +// two expressions at the rightmost period. Writes entire expr to suffix when +// selector isn't found. Rewrites AST nodes for calls, index expressions and +// type assertions, all of which may be found in selector chains, to make them +// parts of the chain. +func splitSelector(expr ast.Expr) (body, suffix ast.Expr) { + switch x := expr.(type) { + case *ast.SelectorExpr: + body, suffix = x.X, x.Sel + return + case *ast.CallExpr: + body, suffix = splitSelector(x.Fun) + if body != nil { + suffix = &ast.CallExpr{suffix, x.Lparen, x.Args, x.Ellipsis, x.Rparen} + return + } + case *ast.IndexExpr: + body, suffix = splitSelector(x.X) + if body != nil { + suffix = &ast.IndexExpr{suffix, x.Lbrack, x.Index, x.Rbrack} + return + } + case *ast.SliceExpr: + body, suffix = splitSelector(x.X) + if body != nil { + suffix = &ast.SliceExpr{suffix, x.Lbrack, x.Low, x.High, x.Rbrack} + return + } + case *ast.TypeAssertExpr: + body, suffix = splitSelector(x.X) + if body != nil { + suffix = &ast.TypeAssertExpr{suffix, x.Type} + return + } + } + suffix = expr + return +} + +// Convert an expression into an expression list split at the periods of +// selector expressions. +func selectorExprList(expr ast.Expr) (list []ast.Expr) { + // split expression + for expr != nil { + var suffix ast.Expr + expr, suffix = splitSelector(expr) + list = append(list, suffix) + } + + // reverse list + for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 { + list[i], list[j] = list[j], list[i] + } + + return +} + +// Sets multiLine to true if the expression spans multiple lines. +func (p *printer) expr1(expr ast.Expr, prec1, depth int, multiLine *bool) { + p.print(expr.Pos()) + + switch x := expr.(type) { + case *ast.BadExpr: + p.print("BadExpr") + + case *ast.Ident: + p.print(x) + + case *ast.BinaryExpr: + if depth < 1 { + p.internalError("depth < 1:", depth) + depth = 1 + } + p.binaryExpr(x, prec1, cutoff(x, depth), depth, multiLine) + + case *ast.KeyValueExpr: + p.expr(x.Key, multiLine) + p.print(x.Colon, token.COLON, blank) + p.expr(x.Value, multiLine) + + case *ast.StarExpr: + const prec = token.UnaryPrec + if prec < prec1 { + // parenthesis needed + p.print(token.LPAREN) + p.print(token.MUL) + p.expr(x.X, multiLine) + p.print(token.RPAREN) + } else { + // no parenthesis needed + p.print(token.MUL) + p.expr(x.X, multiLine) + } + + case *ast.UnaryExpr: + const prec = token.UnaryPrec + if prec < prec1 { + // parenthesis needed + p.print(token.LPAREN) + p.expr(x, multiLine) + p.print(token.RPAREN) + } else { + // no parenthesis needed + p.print(x.Op) + if x.Op == token.RANGE { + // TODO(gri) Remove this code if it cannot be reached. + p.print(blank) + } + p.expr1(x.X, prec, depth, multiLine) + } + + case *ast.BasicLit: + p.print(x) + + case *ast.FuncLit: + p.expr(x.Type, multiLine) + p.funcBody(x.Body, p.distance(x.Type.Pos(), p.pos), true, multiLine) + + case *ast.ParenExpr: + if _, hasParens := x.X.(*ast.ParenExpr); hasParens { + // don't print parentheses around an already parenthesized expression + // TODO(gri) consider making this more general and incorporate precedence levels + p.expr0(x.X, reduceDepth(depth), multiLine) // parentheses undo one level of depth + } else { + p.print(token.LPAREN) + p.expr0(x.X, reduceDepth(depth), multiLine) // parentheses undo one level of depth + p.print(x.Rparen, token.RPAREN) + } + + case *ast.SelectorExpr: + parts := selectorExprList(expr) + p.exprList(token.NoPos, parts, depth, periodSep, multiLine, token.NoPos) + + case *ast.TypeAssertExpr: + p.expr1(x.X, token.HighestPrec, depth, multiLine) + p.print(token.PERIOD, token.LPAREN) + if x.Type != nil { + p.expr(x.Type, multiLine) + } else { + p.print(token.TYPE) + } + p.print(token.RPAREN) + + case *ast.IndexExpr: + // TODO(gri): should treat[] like parentheses and undo one level of depth + p.expr1(x.X, token.HighestPrec, 1, multiLine) + p.print(x.Lbrack, token.LBRACK) + p.expr0(x.Index, depth+1, multiLine) + p.print(x.Rbrack, token.RBRACK) + + case *ast.SliceExpr: + // TODO(gri): should treat[] like parentheses and undo one level of depth + p.expr1(x.X, token.HighestPrec, 1, multiLine) + p.print(x.Lbrack, token.LBRACK) + if x.Low != nil { + p.expr0(x.Low, depth+1, multiLine) + } + // blanks around ":" if both sides exist and either side is a binary expression + if depth <= 1 && x.Low != nil && x.High != nil && (isBinary(x.Low) || isBinary(x.High)) { + p.print(blank, token.COLON, blank) + } else { + p.print(token.COLON) + } + if x.High != nil { + p.expr0(x.High, depth+1, multiLine) + } + p.print(x.Rbrack, token.RBRACK) + + case *ast.CallExpr: + if len(x.Args) > 1 { + depth++ + } + p.expr1(x.Fun, token.HighestPrec, depth, multiLine) + p.print(x.Lparen, token.LPAREN) + p.exprList(x.Lparen, x.Args, depth, commaSep|commaTerm, multiLine, x.Rparen) + if x.Ellipsis.IsValid() { + p.print(x.Ellipsis, token.ELLIPSIS) + } + p.print(x.Rparen, token.RPAREN) + + case *ast.CompositeLit: + // composite literal elements that are composite literals themselves may have the type omitted + if x.Type != nil { + p.expr1(x.Type, token.HighestPrec, depth, multiLine) + } + p.print(x.Lbrace, token.LBRACE) + p.exprList(x.Lbrace, x.Elts, 1, commaSep|commaTerm, multiLine, x.Rbrace) + // do not insert extra line breaks because of comments before + // the closing '}' as it might break the code if there is no + // trailing ',' + p.print(noExtraLinebreak, x.Rbrace, token.RBRACE, noExtraLinebreak) + + case *ast.Ellipsis: + p.print(token.ELLIPSIS) + if x.Elt != nil { + p.expr(x.Elt, multiLine) + } + + case *ast.ArrayType: + p.print(token.LBRACK) + if x.Len != nil { + p.expr(x.Len, multiLine) + } + p.print(token.RBRACK) + p.expr(x.Elt, multiLine) + + case *ast.StructType: + p.print(token.STRUCT) + p.fieldList(x.Fields, true, x.Incomplete) + + case *ast.FuncType: + p.print(token.FUNC) + p.signature(x.Params, x.Results, multiLine) + + case *ast.InterfaceType: + p.print(token.INTERFACE) + p.fieldList(x.Methods, false, x.Incomplete) + + case *ast.MapType: + p.print(token.MAP, token.LBRACK) + p.expr(x.Key, multiLine) + p.print(token.RBRACK) + p.expr(x.Value, multiLine) + + case *ast.ChanType: + switch x.Dir { + case ast.SEND | ast.RECV: + p.print(token.CHAN) + case ast.RECV: + p.print(token.ARROW, token.CHAN) + case ast.SEND: + p.print(token.CHAN, token.ARROW) + } + p.print(blank) + p.expr(x.Value, multiLine) + + default: + panic("unreachable") + } + + return +} + +func (p *printer) expr0(x ast.Expr, depth int, multiLine *bool) { + p.expr1(x, token.LowestPrec, depth, multiLine) +} + +// Sets multiLine to true if the expression spans multiple lines. +func (p *printer) expr(x ast.Expr, multiLine *bool) { + const depth = 1 + p.expr1(x, token.LowestPrec, depth, multiLine) +} + +// ---------------------------------------------------------------------------- +// Statements + +// Print the statement list indented, but without a newline after the last statement. +// Extra line breaks between statements in the source are respected but at most one +// empty line is printed between statements. +func (p *printer) stmtList(list []ast.Stmt, _indent int, nextIsRBrace bool) { + // TODO(gri): fix _indent code + if _indent > 0 { + p.print(indent) + } + var multiLine bool + for i, s := range list { + // _indent == 0 only for lists of switch/select case clauses; + // in those cases each clause is a new section + p.linebreak(p.fset.Position(s.Pos()).Line, 1, ignore, i == 0 || _indent == 0 || multiLine) + multiLine = false + p.stmt(s, nextIsRBrace && i == len(list)-1, &multiLine) + } + if _indent > 0 { + p.print(unindent) + } +} + +// block prints an *ast.BlockStmt; it always spans at least two lines. +func (p *printer) block(s *ast.BlockStmt, indent int) { + p.print(s.Pos(), token.LBRACE) + p.stmtList(s.List, indent, true) + p.linebreak(p.fset.Position(s.Rbrace).Line, 1, ignore, true) + p.print(s.Rbrace, token.RBRACE) +} + +func isTypeName(x ast.Expr) bool { + switch t := x.(type) { + case *ast.Ident: + return true + case *ast.SelectorExpr: + return isTypeName(t.X) + } + return false +} + +func stripParens(x ast.Expr) ast.Expr { + if px, strip := x.(*ast.ParenExpr); strip { + // parentheses must not be stripped if there are any + // unparenthesized composite literals starting with + // a type name + ast.Inspect(px.X, func(node ast.Node) bool { + switch x := node.(type) { + case *ast.ParenExpr: + // parentheses protect enclosed composite literals + return false + case *ast.CompositeLit: + if isTypeName(x.Type) { + strip = false // do not strip parentheses + } + return false + } + // in all other cases, keep inspecting + return true + }) + if strip { + return stripParens(px.X) + } + } + return x +} + +func (p *printer) controlClause(isForStmt bool, init ast.Stmt, expr ast.Expr, post ast.Stmt) { + p.print(blank) + needsBlank := false + if init == nil && post == nil { + // no semicolons required + if expr != nil { + p.expr(stripParens(expr), ignoreMultiLine) + needsBlank = true + } + } else { + // all semicolons required + // (they are not separators, print them explicitly) + if init != nil { + p.stmt(init, false, ignoreMultiLine) + } + p.print(token.SEMICOLON, blank) + if expr != nil { + p.expr(stripParens(expr), ignoreMultiLine) + needsBlank = true + } + if isForStmt { + p.print(token.SEMICOLON, blank) + needsBlank = false + if post != nil { + p.stmt(post, false, ignoreMultiLine) + needsBlank = true + } + } + } + if needsBlank { + p.print(blank) + } +} + +// Sets multiLine to true if the statements spans multiple lines. +func (p *printer) stmt(stmt ast.Stmt, nextIsRBrace bool, multiLine *bool) { + p.print(stmt.Pos()) + + switch s := stmt.(type) { + case *ast.BadStmt: + p.print("BadStmt") + + case *ast.DeclStmt: + p.decl(s.Decl, multiLine) + + case *ast.EmptyStmt: + // nothing to do + + case *ast.LabeledStmt: + // a "correcting" unindent immediately following a line break + // is applied before the line break if there is no comment + // between (see writeWhitespace) + p.print(unindent) + p.expr(s.Label, multiLine) + p.print(s.Colon, token.COLON, indent) + if e, isEmpty := s.Stmt.(*ast.EmptyStmt); isEmpty { + if !nextIsRBrace { + p.print(newline, e.Pos(), token.SEMICOLON) + break + } + } else { + p.linebreak(p.fset.Position(s.Stmt.Pos()).Line, 1, ignore, true) + } + p.stmt(s.Stmt, nextIsRBrace, multiLine) + + case *ast.ExprStmt: + const depth = 1 + p.expr0(s.X, depth, multiLine) + + case *ast.SendStmt: + const depth = 1 + p.expr0(s.Chan, depth, multiLine) + p.print(blank, s.Arrow, token.ARROW, blank) + p.expr0(s.Value, depth, multiLine) + + case *ast.IncDecStmt: + const depth = 1 + p.expr0(s.X, depth+1, multiLine) + p.print(s.TokPos, s.Tok) + + case *ast.AssignStmt: + var depth = 1 + if len(s.Lhs) > 1 && len(s.Rhs) > 1 { + depth++ + } + p.exprList(s.Pos(), s.Lhs, depth, commaSep, multiLine, s.TokPos) + p.print(blank, s.TokPos, s.Tok) + p.exprList(s.TokPos, s.Rhs, depth, blankStart|commaSep, multiLine, token.NoPos) + + case *ast.GoStmt: + p.print(token.GO, blank) + p.expr(s.Call, multiLine) + + case *ast.DeferStmt: + p.print(token.DEFER, blank) + p.expr(s.Call, multiLine) + + case *ast.ReturnStmt: + p.print(token.RETURN) + if s.Results != nil { + p.exprList(s.Pos(), s.Results, 1, blankStart|commaSep, multiLine, token.NoPos) + } + + case *ast.BranchStmt: + p.print(s.Tok) + if s.Label != nil { + p.print(blank) + p.expr(s.Label, multiLine) + } + + case *ast.BlockStmt: + p.block(s, 1) + *multiLine = true + + case *ast.IfStmt: + p.print(token.IF) + p.controlClause(false, s.Init, s.Cond, nil) + p.block(s.Body, 1) + *multiLine = true + if s.Else != nil { + p.print(blank, token.ELSE, blank) + switch s.Else.(type) { + case *ast.BlockStmt, *ast.IfStmt: + p.stmt(s.Else, nextIsRBrace, ignoreMultiLine) + default: + p.print(token.LBRACE, indent, formfeed) + p.stmt(s.Else, true, ignoreMultiLine) + p.print(unindent, formfeed, token.RBRACE) + } + } + + case *ast.CaseClause: + if s.List != nil { + p.print(token.CASE) + p.exprList(s.Pos(), s.List, 1, blankStart|commaSep, multiLine, s.Colon) + } else { + p.print(token.DEFAULT) + } + p.print(s.Colon, token.COLON) + p.stmtList(s.Body, 1, nextIsRBrace) + + case *ast.SwitchStmt: + p.print(token.SWITCH) + p.controlClause(false, s.Init, s.Tag, nil) + p.block(s.Body, 0) + *multiLine = true + + case *ast.TypeSwitchStmt: + p.print(token.SWITCH) + if s.Init != nil { + p.print(blank) + p.stmt(s.Init, false, ignoreMultiLine) + p.print(token.SEMICOLON) + } + p.print(blank) + p.stmt(s.Assign, false, ignoreMultiLine) + p.print(blank) + p.block(s.Body, 0) + *multiLine = true + + case *ast.CommClause: + if s.Comm != nil { + p.print(token.CASE, blank) + p.stmt(s.Comm, false, ignoreMultiLine) + } else { + p.print(token.DEFAULT) + } + p.print(s.Colon, token.COLON) + p.stmtList(s.Body, 1, nextIsRBrace) + + case *ast.SelectStmt: + p.print(token.SELECT, blank) + body := s.Body + if len(body.List) == 0 && !p.commentBefore(p.fset.Position(body.Rbrace)) { + // print empty select statement w/o comments on one line + p.print(body.Lbrace, token.LBRACE, body.Rbrace, token.RBRACE) + } else { + p.block(body, 0) + *multiLine = true + } + + case *ast.ForStmt: + p.print(token.FOR) + p.controlClause(true, s.Init, s.Cond, s.Post) + p.block(s.Body, 1) + *multiLine = true + + case *ast.RangeStmt: + p.print(token.FOR, blank) + p.expr(s.Key, multiLine) + if s.Value != nil { + p.print(token.COMMA, blank) + p.expr(s.Value, multiLine) + } + p.print(blank, s.TokPos, s.Tok, blank, token.RANGE, blank) + p.expr(stripParens(s.X), multiLine) + p.print(blank) + p.block(s.Body, 1) + *multiLine = true + + default: + panic("unreachable") + } + + return +} + +// ---------------------------------------------------------------------------- +// Declarations + +// The keepTypeColumn function determines if the type column of a series of +// consecutive const or var declarations must be kept, or if initialization +// values (V) can be placed in the type column (T) instead. The i'th entry +// in the result slice is true if the type column in spec[i] must be kept. +// +// For example, the declaration: +// +// const ( +// foobar int = 42 // comment +// x = 7 // comment +// foo +// bar = 991 +// ) +// +// leads to the type/values matrix below. A run of value columns (V) can +// be moved into the type column if there is no type for any of the values +// in that column (we only move entire columns so that they align properly). +// +// matrix formatted result +// matrix +// T V -> T V -> true there is a T and so the type +// - V - V true column must be kept +// - - - - false +// - V V - false V is moved into T column +// +func keepTypeColumn(specs []ast.Spec) []bool { + m := make([]bool, len(specs)) + + populate := func(i, j int, keepType bool) { + if keepType { + for ; i < j; i++ { + m[i] = true + } + } + } + + i0 := -1 // if i0 >= 0 we are in a run and i0 is the start of the run + var keepType bool + for i, s := range specs { + t := s.(*ast.ValueSpec) + if t.Values != nil { + if i0 < 0 { + // start of a run of ValueSpecs with non-nil Values + i0 = i + keepType = false + } + } else { + if i0 >= 0 { + // end of a run + populate(i0, i, keepType) + i0 = -1 + } + } + if t.Type != nil { + keepType = true + } + } + if i0 >= 0 { + // end of a run + populate(i0, len(specs), keepType) + } + + return m +} + +func (p *printer) valueSpec(s *ast.ValueSpec, keepType, doIndent bool, multiLine *bool) { + p.setComment(s.Doc) + p.identList(s.Names, doIndent, multiLine) // always present + extraTabs := 3 + if s.Type != nil || keepType { + p.print(vtab) + extraTabs-- + } + if s.Type != nil { + p.expr(s.Type, multiLine) + } + if s.Values != nil { + p.print(vtab, token.ASSIGN) + p.exprList(token.NoPos, s.Values, 1, blankStart|commaSep, multiLine, token.NoPos) + extraTabs-- + } + if s.Comment != nil { + for ; extraTabs > 0; extraTabs-- { + p.print(vtab) + } + p.setComment(s.Comment) + } +} + +// The parameter n is the number of specs in the group. If doIndent is set, +// multi-line identifier lists in the spec are indented when the first +// linebreak is encountered. +// Sets multiLine to true if the spec spans multiple lines. +// +func (p *printer) spec(spec ast.Spec, n int, doIndent bool, multiLine *bool) { + switch s := spec.(type) { + case *ast.ImportSpec: + p.setComment(s.Doc) + if s.Name != nil { + p.expr(s.Name, multiLine) + p.print(blank) + } + p.expr(s.Path, multiLine) + p.setComment(s.Comment) + + case *ast.ValueSpec: + if n != 1 { + p.internalError("expected n = 1; got", n) + } + p.setComment(s.Doc) + p.identList(s.Names, doIndent, multiLine) // always present + if s.Type != nil { + p.print(blank) + p.expr(s.Type, multiLine) + } + if s.Values != nil { + p.print(blank, token.ASSIGN) + p.exprList(token.NoPos, s.Values, 1, blankStart|commaSep, multiLine, token.NoPos) + } + p.setComment(s.Comment) + + case *ast.TypeSpec: + p.setComment(s.Doc) + p.expr(s.Name, multiLine) + if n == 1 { + p.print(blank) + } else { + p.print(vtab) + } + p.expr(s.Type, multiLine) + p.setComment(s.Comment) + + default: + panic("unreachable") + } +} + +// Sets multiLine to true if the declaration spans multiple lines. +func (p *printer) genDecl(d *ast.GenDecl, multiLine *bool) { + p.setComment(d.Doc) + p.print(d.Pos(), d.Tok, blank) + + if d.Lparen.IsValid() { + // group of parenthesized declarations + p.print(d.Lparen, token.LPAREN) + if n := len(d.Specs); n > 0 { + p.print(indent, formfeed) + if n > 1 && (d.Tok == token.CONST || d.Tok == token.VAR) { + // two or more grouped const/var declarations: + // determine if the type column must be kept + keepType := keepTypeColumn(d.Specs) + var ml bool + for i, s := range d.Specs { + if i > 0 { + p.linebreak(p.fset.Position(s.Pos()).Line, 1, ignore, ml) + } + ml = false + p.valueSpec(s.(*ast.ValueSpec), keepType[i], false, &ml) + } + } else { + var ml bool + for i, s := range d.Specs { + if i > 0 { + p.linebreak(p.fset.Position(s.Pos()).Line, 1, ignore, ml) + } + ml = false + p.spec(s, n, false, &ml) + } + } + p.print(unindent, formfeed) + *multiLine = true + } + p.print(d.Rparen, token.RPAREN) + + } else { + // single declaration + p.spec(d.Specs[0], 1, true, multiLine) + } +} + +// nodeSize determines the size of n in chars after formatting. +// The result is <= maxSize if the node fits on one line with at +// most maxSize chars and the formatted output doesn't contain +// any control chars. Otherwise, the result is > maxSize. +// +func (p *printer) nodeSize(n ast.Node, maxSize int) (size int) { + // nodeSize invokes the printer, which may invoke nodeSize + // recursively. For deep composite literal nests, this can + // lead to an exponential algorithm. Remember previous + // results to prune the recursion (was issue 1628). + if size, found := p.nodeSizes[n]; found { + return size + } + + size = maxSize + 1 // assume n doesn't fit + p.nodeSizes[n] = size + + // nodeSize computation must be independent of particular + // style so that we always get the same decision; print + // in RawFormat + cfg := Config{Mode: RawFormat} + var buf bytes.Buffer + if _, err := cfg.fprint(&buf, p.fset, n, p.nodeSizes); err != nil { + return + } + if buf.Len() <= maxSize { + for _, ch := range buf.Bytes() { + if ch < ' ' { + return + } + } + size = buf.Len() // n fits + p.nodeSizes[n] = size + } + return +} + +func (p *printer) isOneLineFunc(b *ast.BlockStmt, headerSize int) bool { + pos1 := b.Pos() + pos2 := b.Rbrace + if pos1.IsValid() && pos2.IsValid() && p.fset.Position(pos1).Line != p.fset.Position(pos2).Line { + // opening and closing brace are on different lines - don't make it a one-liner + return false + } + if len(b.List) > 5 || p.commentBefore(p.fset.Position(pos2)) { + // too many statements or there is a comment inside - don't make it a one-liner + return false + } + // otherwise, estimate body size + const maxSize = 100 + bodySize := 0 + for i, s := range b.List { + if i > 0 { + bodySize += 2 // space for a semicolon and blank + } + bodySize += p.nodeSize(s, maxSize) + } + return headerSize+bodySize <= maxSize +} + +// Sets multiLine to true if the function body spans multiple lines. +func (p *printer) funcBody(b *ast.BlockStmt, headerSize int, isLit bool, multiLine *bool) { + if b == nil { + return + } + + if p.isOneLineFunc(b, headerSize) { + sep := vtab + if isLit { + sep = blank + } + p.print(sep, b.Lbrace, token.LBRACE) + if len(b.List) > 0 { + p.print(blank) + for i, s := range b.List { + if i > 0 { + p.print(token.SEMICOLON, blank) + } + p.stmt(s, i == len(b.List)-1, ignoreMultiLine) + } + p.print(blank) + } + p.print(b.Rbrace, token.RBRACE) + return + } + + p.print(blank) + p.block(b, 1) + *multiLine = true +} + +// distance returns the column difference between from and to if both +// are on the same line; if they are on different lines (or unknown) +// the result is infinity. +func (p *printer) distance(from0 token.Pos, to token.Position) int { + from := p.fset.Position(from0) + if from.IsValid() && to.IsValid() && from.Line == to.Line { + return to.Column - from.Column + } + return infinity +} + +// Sets multiLine to true if the declaration spans multiple lines. +func (p *printer) funcDecl(d *ast.FuncDecl, multiLine *bool) { + p.setComment(d.Doc) + p.print(d.Pos(), token.FUNC, blank) + if d.Recv != nil { + p.parameters(d.Recv, multiLine) // method: print receiver + p.print(blank) + } + p.expr(d.Name, multiLine) + p.signature(d.Type.Params, d.Type.Results, multiLine) + p.funcBody(d.Body, p.distance(d.Pos(), p.pos), false, multiLine) +} + +// Sets multiLine to true if the declaration spans multiple lines. +func (p *printer) decl(decl ast.Decl, multiLine *bool) { + switch d := decl.(type) { + case *ast.BadDecl: + p.print(d.Pos(), "BadDecl") + case *ast.GenDecl: + p.genDecl(d, multiLine) + case *ast.FuncDecl: + p.funcDecl(d, multiLine) + default: + panic("unreachable") + } +} + +// ---------------------------------------------------------------------------- +// Files + +func declToken(decl ast.Decl) (tok token.Token) { + tok = token.ILLEGAL + switch d := decl.(type) { + case *ast.GenDecl: + tok = d.Tok + case *ast.FuncDecl: + tok = token.FUNC + } + return +} + +func (p *printer) file(src *ast.File) { + p.setComment(src.Doc) + p.print(src.Pos(), token.PACKAGE, blank) + p.expr(src.Name, ignoreMultiLine) + + if len(src.Decls) > 0 { + tok := token.ILLEGAL + for _, d := range src.Decls { + prev := tok + tok = declToken(d) + // if the declaration token changed (e.g., from CONST to TYPE) + // print an empty line between top-level declarations + min := 1 + if prev != tok { + min = 2 + } + p.linebreak(p.fset.Position(d.Pos()).Line, min, ignore, false) + p.decl(d, ignoreMultiLine) + } + } + + p.print(newline) +} diff --git a/src/pkg/go/printer/performance_test.go b/src/pkg/go/printer/performance_test.go new file mode 100644 index 000000000..84fb2808e --- /dev/null +++ b/src/pkg/go/printer/performance_test.go @@ -0,0 +1,58 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements a simple printer performance benchmark: +// gotest -bench=BenchmarkPrint + +package printer + +import ( + "bytes" + "go/ast" + "go/parser" + "io" + "io/ioutil" + "log" + "testing" +) + +var testfile *ast.File + +func testprint(out io.Writer, file *ast.File) { + if _, err := (&Config{TabIndent | UseSpaces, 8}).Fprint(out, fset, file); err != nil { + log.Fatalf("print error: %s", err) + } +} + +// cannot initialize in init because (printer) Fprint launches goroutines. +func initialize() { + const filename = "testdata/parser.go" + + src, err := ioutil.ReadFile(filename) + if err != nil { + log.Fatalf("%s", err) + } + + file, err := parser.ParseFile(fset, filename, src, parser.ParseComments) + if err != nil { + log.Fatalf("%s", err) + } + + var buf bytes.Buffer + testprint(&buf, file) + if !bytes.Equal(buf.Bytes(), src) { + log.Fatalf("print error: %s not idempotent", filename) + } + + testfile = file +} + +func BenchmarkPrint(b *testing.B) { + if testfile == nil { + initialize() + } + for i := 0; i < b.N; i++ { + testprint(ioutil.Discard, testfile) + } +} diff --git a/src/pkg/go/printer/printer.go b/src/pkg/go/printer/printer.go new file mode 100644 index 000000000..871fefa0c --- /dev/null +++ b/src/pkg/go/printer/printer.go @@ -0,0 +1,1012 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package printer implements printing of AST nodes. +package printer + +import ( + "bytes" + "fmt" + "go/ast" + "go/token" + "io" + "os" + "path/filepath" + "runtime" + "tabwriter" +) + +const debug = false // enable for debugging + + +type whiteSpace int + +const ( + ignore = whiteSpace(0) + blank = whiteSpace(' ') + vtab = whiteSpace('\v') + newline = whiteSpace('\n') + formfeed = whiteSpace('\f') + indent = whiteSpace('>') + unindent = whiteSpace('<') +) + +var ( + esc = []byte{tabwriter.Escape} + htab = []byte{'\t'} + htabs = []byte("\t\t\t\t\t\t\t\t") + newlines = []byte("\n\n\n\n\n\n\n\n") // more than the max determined by nlines + formfeeds = []byte("\f\f\f\f\f\f\f\f") // more than the max determined by nlines +) + +// Special positions +var noPos token.Position // use noPos when a position is needed but not known +var infinity = 1 << 30 + +// Use ignoreMultiLine if the multiLine information is not important. +var ignoreMultiLine = new(bool) + +// A pmode value represents the current printer mode. +type pmode int + +const ( + inLiteral pmode = 1 << iota + noExtraLinebreak +) + +type printer struct { + // Configuration (does not change after initialization) + output io.Writer + Config + fset *token.FileSet + errors chan os.Error + + // Current state + written int // number of bytes written + indent int // current indentation + mode pmode // current printer mode + lastTok token.Token // the last token printed (token.ILLEGAL if it's whitespace) + + // Reused buffers + wsbuf []whiteSpace // delayed white space + litbuf bytes.Buffer // for creation of escaped literals and comments + + // The (possibly estimated) position in the generated output; + // in AST space (i.e., pos is set whenever a token position is + // known accurately, and updated dependending on what has been + // written). + pos token.Position + + // The value of pos immediately after the last item has been + // written using writeItem. + last token.Position + + // The list of all source comments, in order of appearance. + comments []*ast.CommentGroup // may be nil + cindex int // current comment index + useNodeComments bool // if not set, ignore lead and line comments of nodes + + // Cache of already computed node sizes. + nodeSizes map[ast.Node]int +} + +func (p *printer) init(output io.Writer, cfg *Config, fset *token.FileSet, nodeSizes map[ast.Node]int) { + p.output = output + p.Config = *cfg + p.fset = fset + p.errors = make(chan os.Error) + p.wsbuf = make([]whiteSpace, 0, 16) // whitespace sequences are short + p.nodeSizes = nodeSizes +} + +func (p *printer) internalError(msg ...interface{}) { + if debug { + fmt.Print(p.pos.String() + ": ") + fmt.Println(msg...) + panic("go/printer") + } +} + +// escape escapes string s by bracketing it with tabwriter.Escape. +// Escaped strings pass through tabwriter unchanged. (Note that +// valid Go programs cannot contain tabwriter.Escape bytes since +// they do not appear in legal UTF-8 sequences). +// +func (p *printer) escape(s string) string { + p.litbuf.Reset() + p.litbuf.WriteByte(tabwriter.Escape) + p.litbuf.WriteString(s) + p.litbuf.WriteByte(tabwriter.Escape) + return p.litbuf.String() +} + +// nlines returns the adjusted number of linebreaks given the desired number +// of breaks n such that min <= result <= max. +// +func (p *printer) nlines(n, min int) int { + const max = 2 // max. number of newlines + switch { + case n < min: + return min + case n > max: + return max + } + return n +} + +// write0 writes raw (uninterpreted) data to p.output and handles errors. +// write0 does not indent after newlines, and does not HTML-escape or update p.pos. +// +func (p *printer) write0(data []byte) { + if len(data) > 0 { + n, err := p.output.Write(data) + p.written += n + if err != nil { + p.errors <- err + runtime.Goexit() + } + } +} + +// write interprets data and writes it to p.output. It inserts indentation +// after a line break unless in a tabwriter escape sequence. +// It updates p.pos as a side-effect. +// +func (p *printer) write(data []byte) { + i0 := 0 + for i, b := range data { + switch b { + case '\n', '\f': + // write segment ending in b + p.write0(data[i0 : i+1]) + + // update p.pos + p.pos.Offset += i + 1 - i0 + p.pos.Line++ + p.pos.Column = 1 + + if p.mode&inLiteral == 0 { + // write indentation + // use "hard" htabs - indentation columns + // must not be discarded by the tabwriter + j := p.indent + for ; j > len(htabs); j -= len(htabs) { + p.write0(htabs) + } + p.write0(htabs[0:j]) + + // update p.pos + p.pos.Offset += p.indent + p.pos.Column += p.indent + } + + // next segment start + i0 = i + 1 + + case tabwriter.Escape: + p.mode ^= inLiteral + + // ignore escape chars introduced by printer - they are + // invisible and must not affect p.pos (was issue #1089) + p.pos.Offset-- + p.pos.Column-- + } + } + + // write remaining segment + p.write0(data[i0:]) + + // update p.pos + d := len(data) - i0 + p.pos.Offset += d + p.pos.Column += d +} + +func (p *printer) writeNewlines(n int, useFF bool) { + if n > 0 { + n = p.nlines(n, 0) + if useFF { + p.write(formfeeds[0:n]) + } else { + p.write(newlines[0:n]) + } + } +} + +// writeItem writes data at position pos. data is the text corresponding to +// a single lexical token, but may also be comment text. pos is the actual +// (or at least very accurately estimated) position of the data in the original +// source text. writeItem updates p.last to the position immediately following +// the data. +// +func (p *printer) writeItem(pos token.Position, data string) { + if pos.IsValid() { + // continue with previous position if we don't have a valid pos + if p.last.IsValid() && p.last.Filename != pos.Filename { + // the file has changed - reset state + // (used when printing merged ASTs of different files + // e.g., the result of ast.MergePackageFiles) + p.indent = 0 + p.mode = 0 + p.wsbuf = p.wsbuf[0:0] + } + p.pos = pos + } + if debug { + // do not update p.pos - use write0 + _, filename := filepath.Split(pos.Filename) + p.write0([]byte(fmt.Sprintf("[%s:%d:%d]", filename, pos.Line, pos.Column))) + } + p.write([]byte(data)) + p.last = p.pos +} + +// writeCommentPrefix writes the whitespace before a comment. +// If there is any pending whitespace, it consumes as much of +// it as is likely to help position the comment nicely. +// pos is the comment position, next the position of the item +// after all pending comments, prev is the previous comment in +// a group of comments (or nil), and isKeyword indicates if the +// next item is a keyword. +// +func (p *printer) writeCommentPrefix(pos, next token.Position, prev *ast.Comment, isKeyword bool) { + if p.written == 0 { + // the comment is the first item to be printed - don't write any whitespace + return + } + + if pos.IsValid() && pos.Filename != p.last.Filename { + // comment in a different file - separate with newlines (writeNewlines will limit the number) + p.writeNewlines(10, true) + return + } + + if pos.Line == p.last.Line && (prev == nil || prev.Text[1] != '/') { + // comment on the same line as last item: + // separate with at least one separator + hasSep := false + if prev == nil { + // first comment of a comment group + j := 0 + for i, ch := range p.wsbuf { + switch ch { + case blank: + // ignore any blanks before a comment + p.wsbuf[i] = ignore + continue + case vtab: + // respect existing tabs - important + // for proper formatting of commented structs + hasSep = true + continue + case indent: + // apply pending indentation + continue + } + j = i + break + } + p.writeWhitespace(j) + } + // make sure there is at least one separator + if !hasSep { + if pos.Line == next.Line { + // next item is on the same line as the comment + // (which must be a /*-style comment): separate + // with a blank instead of a tab + p.write([]byte{' '}) + } else { + p.write(htab) + } + } + + } else { + // comment on a different line: + // separate with at least one line break + if prev == nil { + // first comment of a comment group + j := 0 + for i, ch := range p.wsbuf { + switch ch { + case blank, vtab: + // ignore any horizontal whitespace before line breaks + p.wsbuf[i] = ignore + continue + case indent: + // apply pending indentation + continue + case unindent: + // if the next token is a keyword, apply the outdent + // if it appears that the comment is aligned with the + // keyword; otherwise assume the outdent is part of a + // closing block and stop (this scenario appears with + // comments before a case label where the comments + // apply to the next case instead of the current one) + if isKeyword && pos.Column == next.Column { + continue + } + case newline, formfeed: + // TODO(gri): may want to keep formfeed info in some cases + p.wsbuf[i] = ignore + } + j = i + break + } + p.writeWhitespace(j) + } + // use formfeeds to break columns before a comment; + // this is analogous to using formfeeds to separate + // individual lines of /*-style comments - but make + // sure there is at least one line break if the previous + // comment was a line comment + n := pos.Line - p.last.Line // if !pos.IsValid(), pos.Line == 0, and n will be 0 + if n <= 0 && prev != nil && prev.Text[1] == '/' { + n = 1 + } + p.writeNewlines(n, true) + } +} + +// TODO(gri): It should be possible to convert the code below from using +// []byte to string and in the process eliminate some conversions. + +// Split comment text into lines +func split(text []byte) [][]byte { + // count lines (comment text never ends in a newline) + n := 1 + for _, c := range text { + if c == '\n' { + n++ + } + } + + // split + lines := make([][]byte, n) + n = 0 + i := 0 + for j, c := range text { + if c == '\n' { + lines[n] = text[i:j] // exclude newline + i = j + 1 // discard newline + n++ + } + } + lines[n] = text[i:] + + return lines +} + +func isBlank(s []byte) bool { + for _, b := range s { + if b > ' ' { + return false + } + } + return true +} + +func commonPrefix(a, b []byte) []byte { + i := 0 + for i < len(a) && i < len(b) && a[i] == b[i] && (a[i] <= ' ' || a[i] == '*') { + i++ + } + return a[0:i] +} + +func stripCommonPrefix(lines [][]byte) { + if len(lines) < 2 { + return // at most one line - nothing to do + } + // len(lines) >= 2 + + // The heuristic in this function tries to handle a few + // common patterns of /*-style comments: Comments where + // the opening /* and closing */ are aligned and the + // rest of the comment text is aligned and indented with + // blanks or tabs, cases with a vertical "line of stars" + // on the left, and cases where the closing */ is on the + // same line as the last comment text. + + // Compute maximum common white prefix of all but the first, + // last, and blank lines, and replace blank lines with empty + // lines (the first line starts with /* and has no prefix). + // In case of two-line comments, consider the last line for + // the prefix computation since otherwise the prefix would + // be empty. + // + // Note that the first and last line are never empty (they + // contain the opening /* and closing */ respectively) and + // thus they can be ignored by the blank line check. + var prefix []byte + if len(lines) > 2 { + for i, line := range lines[1 : len(lines)-1] { + switch { + case isBlank(line): + lines[1+i] = nil // range starts at line 1 + case prefix == nil: + prefix = commonPrefix(line, line) + default: + prefix = commonPrefix(prefix, line) + } + } + } else { // len(lines) == 2 + line := lines[1] + prefix = commonPrefix(line, line) + } + + /* + * Check for vertical "line of stars" and correct prefix accordingly. + */ + lineOfStars := false + if i := bytes.Index(prefix, []byte{'*'}); i >= 0 { + // Line of stars present. + if i > 0 && prefix[i-1] == ' ' { + i-- // remove trailing blank from prefix so stars remain aligned + } + prefix = prefix[0:i] + lineOfStars = true + } else { + // No line of stars present. + // Determine the white space on the first line after the /* + // and before the beginning of the comment text, assume two + // blanks instead of the /* unless the first character after + // the /* is a tab. If the first comment line is empty but + // for the opening /*, assume up to 3 blanks or a tab. This + // whitespace may be found as suffix in the common prefix. + first := lines[0] + if isBlank(first[2:]) { + // no comment text on the first line: + // reduce prefix by up to 3 blanks or a tab + // if present - this keeps comment text indented + // relative to the /* and */'s if it was indented + // in the first place + i := len(prefix) + for n := 0; n < 3 && i > 0 && prefix[i-1] == ' '; n++ { + i-- + } + if i == len(prefix) && i > 0 && prefix[i-1] == '\t' { + i-- + } + prefix = prefix[0:i] + } else { + // comment text on the first line + suffix := make([]byte, len(first)) + n := 2 // start after opening /* + for n < len(first) && first[n] <= ' ' { + suffix[n] = first[n] + n++ + } + if n > 2 && suffix[2] == '\t' { + // assume the '\t' compensates for the /* + suffix = suffix[2:n] + } else { + // otherwise assume two blanks + suffix[0], suffix[1] = ' ', ' ' + suffix = suffix[0:n] + } + // Shorten the computed common prefix by the length of + // suffix, if it is found as suffix of the prefix. + if bytes.HasSuffix(prefix, suffix) { + prefix = prefix[0 : len(prefix)-len(suffix)] + } + } + } + + // Handle last line: If it only contains a closing */, align it + // with the opening /*, otherwise align the text with the other + // lines. + last := lines[len(lines)-1] + closing := []byte("*/") + i := bytes.Index(last, closing) + if isBlank(last[0:i]) { + // last line only contains closing */ + var sep []byte + if lineOfStars { + // insert an aligning blank + sep = []byte{' '} + } + lines[len(lines)-1] = bytes.Join([][]byte{prefix, closing}, sep) + } else { + // last line contains more comment text - assume + // it is aligned like the other lines + prefix = commonPrefix(prefix, last) + } + + // Remove the common prefix from all but the first and empty lines. + for i, line := range lines[1:] { + if len(line) != 0 { + lines[1+i] = line[len(prefix):] // range starts at line 1 + } + } +} + +func (p *printer) writeComment(comment *ast.Comment) { + text := comment.Text + + // shortcut common case of //-style comments + if text[1] == '/' { + p.writeItem(p.fset.Position(comment.Pos()), p.escape(text)) + return + } + + // for /*-style comments, print line by line and let the + // write function take care of the proper indentation + lines := split([]byte(text)) + stripCommonPrefix(lines) + + // write comment lines, separated by formfeed, + // without a line break after the last line + linebreak := formfeeds[0:1] + pos := p.fset.Position(comment.Pos()) + for i, line := range lines { + if i > 0 { + p.write(linebreak) + pos = p.pos + } + if len(line) > 0 { + p.writeItem(pos, p.escape(string(line))) + } + } +} + +// writeCommentSuffix writes a line break after a comment if indicated +// and processes any leftover indentation information. If a line break +// is needed, the kind of break (newline vs formfeed) depends on the +// pending whitespace. writeCommentSuffix returns true if a pending +// formfeed was dropped from the whitespace buffer. +// +func (p *printer) writeCommentSuffix(needsLinebreak bool) (droppedFF bool) { + for i, ch := range p.wsbuf { + switch ch { + case blank, vtab: + // ignore trailing whitespace + p.wsbuf[i] = ignore + case indent, unindent: + // don't lose indentation information + case newline, formfeed: + // if we need a line break, keep exactly one + // but remember if we dropped any formfeeds + if needsLinebreak { + needsLinebreak = false + } else { + if ch == formfeed { + droppedFF = true + } + p.wsbuf[i] = ignore + } + } + } + p.writeWhitespace(len(p.wsbuf)) + + // make sure we have a line break + if needsLinebreak { + p.write([]byte{'\n'}) + } + + return +} + +// intersperseComments consumes all comments that appear before the next token +// tok and prints it together with the buffered whitespace (i.e., the whitespace +// that needs to be written before the next token). A heuristic is used to mix +// the comments and whitespace. intersperseComments returns true if a pending +// formfeed was dropped from the whitespace buffer. +// +func (p *printer) intersperseComments(next token.Position, tok token.Token) (droppedFF bool) { + var last *ast.Comment + for ; p.commentBefore(next); p.cindex++ { + for _, c := range p.comments[p.cindex].List { + p.writeCommentPrefix(p.fset.Position(c.Pos()), next, last, tok.IsKeyword()) + p.writeComment(c) + last = c + } + } + + if last != nil { + if last.Text[1] == '*' && p.fset.Position(last.Pos()).Line == next.Line { + // the last comment is a /*-style comment and the next item + // follows on the same line: separate with an extra blank + p.write([]byte{' '}) + } + // ensure that there is a line break after a //-style comment, + // before a closing '}' unless explicitly disabled, or at eof + needsLinebreak := + last.Text[1] == '/' || + tok == token.RBRACE && p.mode&noExtraLinebreak == 0 || + tok == token.EOF + return p.writeCommentSuffix(needsLinebreak) + } + + // no comment was written - we should never reach here since + // intersperseComments should not be called in that case + p.internalError("intersperseComments called without pending comments") + return false +} + +// whiteWhitespace writes the first n whitespace entries. +func (p *printer) writeWhitespace(n int) { + // write entries + var data [1]byte + for i := 0; i < n; i++ { + switch ch := p.wsbuf[i]; ch { + case ignore: + // ignore! + case indent: + p.indent++ + case unindent: + p.indent-- + if p.indent < 0 { + p.internalError("negative indentation:", p.indent) + p.indent = 0 + } + case newline, formfeed: + // A line break immediately followed by a "correcting" + // unindent is swapped with the unindent - this permits + // proper label positioning. If a comment is between + // the line break and the label, the unindent is not + // part of the comment whitespace prefix and the comment + // will be positioned correctly indented. + if i+1 < n && p.wsbuf[i+1] == unindent { + // Use a formfeed to terminate the current section. + // Otherwise, a long label name on the next line leading + // to a wide column may increase the indentation column + // of lines before the label; effectively leading to wrong + // indentation. + p.wsbuf[i], p.wsbuf[i+1] = unindent, formfeed + i-- // do it again + continue + } + fallthrough + default: + data[0] = byte(ch) + p.write(data[0:]) + } + } + + // shift remaining entries down + i := 0 + for ; n < len(p.wsbuf); n++ { + p.wsbuf[i] = p.wsbuf[n] + i++ + } + p.wsbuf = p.wsbuf[0:i] +} + +// ---------------------------------------------------------------------------- +// Printing interface + + +func mayCombine(prev token.Token, next byte) (b bool) { + switch prev { + case token.INT: + b = next == '.' // 1. + case token.ADD: + b = next == '+' // ++ + case token.SUB: + b = next == '-' // -- + case token.QUO: + b = next == '*' // /* + case token.LSS: + b = next == '-' || next == '<' // <- or << + case token.AND: + b = next == '&' || next == '^' // && or &^ + } + return +} + +// print prints a list of "items" (roughly corresponding to syntactic +// tokens, but also including whitespace and formatting information). +// It is the only print function that should be called directly from +// any of the AST printing functions in nodes.go. +// +// Whitespace is accumulated until a non-whitespace token appears. Any +// comments that need to appear before that token are printed first, +// taking into account the amount and structure of any pending white- +// space for best comment placement. Then, any leftover whitespace is +// printed, followed by the actual token. +// +func (p *printer) print(args ...interface{}) { + for _, f := range args { + next := p.pos // estimated position of next item + var data string + var tok token.Token + + switch x := f.(type) { + case pmode: + // toggle printer mode + p.mode ^= x + case whiteSpace: + if x == ignore { + // don't add ignore's to the buffer; they + // may screw up "correcting" unindents (see + // LabeledStmt) + break + } + i := len(p.wsbuf) + if i == cap(p.wsbuf) { + // Whitespace sequences are very short so this should + // never happen. Handle gracefully (but possibly with + // bad comment placement) if it does happen. + p.writeWhitespace(i) + i = 0 + } + p.wsbuf = p.wsbuf[0 : i+1] + p.wsbuf[i] = x + case *ast.Ident: + data = x.Name + tok = token.IDENT + case *ast.BasicLit: + data = p.escape(x.Value) + tok = x.Kind + case token.Token: + s := x.String() + if mayCombine(p.lastTok, s[0]) { + // the previous and the current token must be + // separated by a blank otherwise they combine + // into a different incorrect token sequence + // (except for token.INT followed by a '.' this + // should never happen because it is taken care + // of via binary expression formatting) + if len(p.wsbuf) != 0 { + p.internalError("whitespace buffer not empty") + } + p.wsbuf = p.wsbuf[0:1] + p.wsbuf[0] = ' ' + } + data = s + tok = x + case token.Pos: + if x.IsValid() { + next = p.fset.Position(x) // accurate position of next item + } + tok = p.lastTok + default: + fmt.Fprintf(os.Stderr, "print: unsupported argument type %T\n", f) + panic("go/printer type") + } + p.lastTok = tok + p.pos = next + + if data != "" { + droppedFF := p.flush(next, tok) + + // intersperse extra newlines if present in the source + // (don't do this in flush as it will cause extra newlines + // at the end of a file) - use formfeeds if we dropped one + // before + p.writeNewlines(next.Line-p.pos.Line, droppedFF) + + p.writeItem(next, data) + } + } +} + +// commentBefore returns true iff the current comment occurs +// before the next position in the source code. +// +func (p *printer) commentBefore(next token.Position) bool { + return p.cindex < len(p.comments) && p.fset.Position(p.comments[p.cindex].List[0].Pos()).Offset < next.Offset +} + +// Flush prints any pending comments and whitespace occurring +// textually before the position of the next token tok. Flush +// returns true if a pending formfeed character was dropped +// from the whitespace buffer as a result of interspersing +// comments. +// +func (p *printer) flush(next token.Position, tok token.Token) (droppedFF bool) { + if p.commentBefore(next) { + // if there are comments before the next item, intersperse them + droppedFF = p.intersperseComments(next, tok) + } else { + // otherwise, write any leftover whitespace + p.writeWhitespace(len(p.wsbuf)) + } + return +} + +// ---------------------------------------------------------------------------- +// Trimmer + +// A trimmer is an io.Writer filter for stripping tabwriter.Escape +// characters, trailing blanks and tabs, and for converting formfeed +// and vtab characters into newlines and htabs (in case no tabwriter +// is used). Text bracketed by tabwriter.Escape characters is passed +// through unchanged. +// +type trimmer struct { + output io.Writer + state int + space bytes.Buffer +} + +// trimmer is implemented as a state machine. +// It can be in one of the following states: +const ( + inSpace = iota // inside space + inEscape // inside text bracketed by tabwriter.Escapes + inText // inside text +) + +// Design note: It is tempting to eliminate extra blanks occurring in +// whitespace in this function as it could simplify some +// of the blanks logic in the node printing functions. +// However, this would mess up any formatting done by +// the tabwriter. + +func (p *trimmer) Write(data []byte) (n int, err os.Error) { + // invariants: + // p.state == inSpace: + // p.space is unwritten + // p.state == inEscape, inText: + // data[m:n] is unwritten + m := 0 + var b byte + for n, b = range data { + if b == '\v' { + b = '\t' // convert to htab + } + switch p.state { + case inSpace: + switch b { + case '\t', ' ': + p.space.WriteByte(b) // WriteByte returns no errors + case '\n', '\f': + p.space.Reset() // discard trailing space + _, err = p.output.Write(newlines[0:1]) // write newline + case tabwriter.Escape: + _, err = p.output.Write(p.space.Bytes()) + p.state = inEscape + m = n + 1 // +1: skip tabwriter.Escape + default: + _, err = p.output.Write(p.space.Bytes()) + p.state = inText + m = n + } + case inEscape: + if b == tabwriter.Escape { + _, err = p.output.Write(data[m:n]) + p.state = inSpace + p.space.Reset() + } + case inText: + switch b { + case '\t', ' ': + _, err = p.output.Write(data[m:n]) + p.state = inSpace + p.space.Reset() + p.space.WriteByte(b) // WriteByte returns no errors + case '\n', '\f': + _, err = p.output.Write(data[m:n]) + p.state = inSpace + p.space.Reset() + _, err = p.output.Write(newlines[0:1]) // write newline + case tabwriter.Escape: + _, err = p.output.Write(data[m:n]) + p.state = inEscape + m = n + 1 // +1: skip tabwriter.Escape + } + default: + panic("unreachable") + } + if err != nil { + return + } + } + n = len(data) + + switch p.state { + case inEscape, inText: + _, err = p.output.Write(data[m:n]) + p.state = inSpace + p.space.Reset() + } + + return +} + +// ---------------------------------------------------------------------------- +// Public interface + +// General printing is controlled with these Config.Mode flags. +const ( + RawFormat uint = 1 << iota // do not use a tabwriter; if set, UseSpaces is ignored + TabIndent // use tabs for indentation independent of UseSpaces + UseSpaces // use spaces instead of tabs for alignment +) + +// A Config node controls the output of Fprint. +type Config struct { + Mode uint // default: 0 + Tabwidth int // default: 8 +} + +// fprint implements Fprint and takes a nodesSizes map for setting up the printer state. +func (cfg *Config) fprint(output io.Writer, fset *token.FileSet, node interface{}, nodeSizes map[ast.Node]int) (int, os.Error) { + // redirect output through a trimmer to eliminate trailing whitespace + // (Input to a tabwriter must be untrimmed since trailing tabs provide + // formatting information. The tabwriter could provide trimming + // functionality but no tabwriter is used when RawFormat is set.) + output = &trimmer{output: output} + + // setup tabwriter if needed and redirect output + var tw *tabwriter.Writer + if cfg.Mode&RawFormat == 0 { + minwidth := cfg.Tabwidth + + padchar := byte('\t') + if cfg.Mode&UseSpaces != 0 { + padchar = ' ' + } + + twmode := tabwriter.DiscardEmptyColumns + if cfg.Mode&TabIndent != 0 { + minwidth = 0 + twmode |= tabwriter.TabIndent + } + + tw = tabwriter.NewWriter(output, minwidth, cfg.Tabwidth, 1, padchar, twmode) + output = tw + } + + // setup printer and print node + var p printer + p.init(output, cfg, fset, nodeSizes) + go func() { + switch n := node.(type) { + case ast.Expr: + p.useNodeComments = true + p.expr(n, ignoreMultiLine) + case ast.Stmt: + p.useNodeComments = true + // A labeled statement will un-indent to position the + // label. Set indent to 1 so we don't get indent "underflow". + if _, labeledStmt := n.(*ast.LabeledStmt); labeledStmt { + p.indent = 1 + } + p.stmt(n, false, ignoreMultiLine) + case ast.Decl: + p.useNodeComments = true + p.decl(n, ignoreMultiLine) + case ast.Spec: + p.useNodeComments = true + p.spec(n, 1, false, ignoreMultiLine) + case *ast.File: + p.comments = n.Comments + p.useNodeComments = n.Comments == nil + p.file(n) + default: + p.errors <- fmt.Errorf("printer.Fprint: unsupported node type %T", n) + runtime.Goexit() + } + p.flush(token.Position{Offset: infinity, Line: infinity}, token.EOF) + p.errors <- nil // no errors + }() + err := <-p.errors // wait for completion of goroutine + + // flush tabwriter, if any + if tw != nil { + tw.Flush() // ignore errors + } + + return p.written, err +} + +// Fprint "pretty-prints" an AST node to output and returns the number +// of bytes written and an error (if any) for a given configuration cfg. +// Position information is interpreted relative to the file set fset. +// The node type must be *ast.File, or assignment-compatible to ast.Expr, +// ast.Decl, ast.Spec, or ast.Stmt. +// +func (cfg *Config) Fprint(output io.Writer, fset *token.FileSet, node interface{}) (int, os.Error) { + return cfg.fprint(output, fset, node, make(map[ast.Node]int)) +} + +// Fprint "pretty-prints" an AST node to output. +// It calls Config.Fprint with default settings. +// +func Fprint(output io.Writer, fset *token.FileSet, node interface{}) os.Error { + _, err := (&Config{Tabwidth: 8}).Fprint(output, fset, node) // don't care about number of bytes written + return err +} diff --git a/src/pkg/go/printer/printer_test.go b/src/pkg/go/printer/printer_test.go new file mode 100644 index 000000000..ff2d906b5 --- /dev/null +++ b/src/pkg/go/printer/printer_test.go @@ -0,0 +1,194 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package printer + +import ( + "bytes" + "flag" + "io/ioutil" + "go/ast" + "go/parser" + "go/token" + "path/filepath" + "testing" + "time" +) + +const ( + dataDir = "testdata" + tabwidth = 8 +) + +var update = flag.Bool("update", false, "update golden files") + +var fset = token.NewFileSet() + +func lineString(text []byte, i int) string { + i0 := i + for i < len(text) && text[i] != '\n' { + i++ + } + return string(text[i0:i]) +} + +type checkMode uint + +const ( + export checkMode = 1 << iota + rawFormat +) + +func runcheck(t *testing.T, source, golden string, mode checkMode) { + // parse source + prog, err := parser.ParseFile(fset, source, nil, parser.ParseComments) + if err != nil { + t.Error(err) + return + } + + // filter exports if necessary + if mode&export != 0 { + ast.FileExports(prog) // ignore result + prog.Comments = nil // don't print comments that are not in AST + } + + // determine printer configuration + cfg := Config{Tabwidth: tabwidth} + if mode&rawFormat != 0 { + cfg.Mode |= RawFormat + } + + // format source + var buf bytes.Buffer + if _, err := cfg.Fprint(&buf, fset, prog); err != nil { + t.Error(err) + } + res := buf.Bytes() + + // update golden files if necessary + if *update { + if err := ioutil.WriteFile(golden, res, 0644); err != nil { + t.Error(err) + } + return + } + + // get golden + gld, err := ioutil.ReadFile(golden) + if err != nil { + t.Error(err) + return + } + + // compare lengths + if len(res) != len(gld) { + t.Errorf("len = %d, expected %d (= len(%s))", len(res), len(gld), golden) + } + + // compare contents + for i, line, offs := 0, 1, 0; i < len(res) && i < len(gld); i++ { + ch := res[i] + if ch != gld[i] { + t.Errorf("%s:%d:%d: %s", source, line, i-offs+1, lineString(res, offs)) + t.Errorf("%s:%d:%d: %s", golden, line, i-offs+1, lineString(gld, offs)) + t.Error() + return + } + if ch == '\n' { + line++ + offs = i + 1 + } + } +} + +func check(t *testing.T, source, golden string, mode checkMode) { + // start a timer to produce a time-out signal + tc := make(chan int) + go func() { + time.Sleep(10e9) // plenty of a safety margin, even for very slow machines + tc <- 0 + }() + + // run the test + cc := make(chan int) + go func() { + runcheck(t, source, golden, mode) + cc <- 0 + }() + + // wait for the first finisher + select { + case <-tc: + // test running past time out + t.Errorf("%s: running too slowly", source) + case <-cc: + // test finished within alloted time margin + } +} + +type entry struct { + source, golden string + mode checkMode +} + +// Use gotest -update to create/update the respective golden files. +var data = []entry{ + {"empty.input", "empty.golden", 0}, + {"comments.input", "comments.golden", 0}, + {"comments.input", "comments.x", export}, + {"linebreaks.input", "linebreaks.golden", 0}, + {"expressions.input", "expressions.golden", 0}, + {"expressions.input", "expressions.raw", rawFormat}, + {"declarations.input", "declarations.golden", 0}, + {"statements.input", "statements.golden", 0}, + {"slow.input", "slow.golden", 0}, +} + +func TestFiles(t *testing.T) { + for i, e := range data { + source := filepath.Join(dataDir, e.source) + golden := filepath.Join(dataDir, e.golden) + check(t, source, golden, e.mode) + // TODO(gri) check that golden is idempotent + //check(t, golden, golden, e.mode) + if testing.Short() && i >= 3 { + break + } + } +} + +// TestLineComments, using a simple test case, checks that consequtive line +// comments are properly terminated with a newline even if the AST position +// information is incorrect. +// +func TestLineComments(t *testing.T) { + const src = `// comment 1 + // comment 2 + // comment 3 + package main + ` + + fset := token.NewFileSet() + ast1, err1 := parser.ParseFile(fset, "", src, parser.ParseComments) + if err1 != nil { + panic(err1) + } + + var buf bytes.Buffer + fset = token.NewFileSet() // use the wrong file set + Fprint(&buf, fset, ast1) + + nlines := 0 + for _, ch := range buf.Bytes() { + if ch == '\n' { + nlines++ + } + } + + const expected = 3 + if nlines < expected { + t.Errorf("got %d, expected %d\n", nlines, expected) + } +} diff --git a/src/pkg/go/printer/testdata/comments.golden b/src/pkg/go/printer/testdata/comments.golden new file mode 100644 index 000000000..7b332252c --- /dev/null +++ b/src/pkg/go/printer/testdata/comments.golden @@ -0,0 +1,473 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This is a package for testing comment placement by go/printer. +// +package main + +import "fmt" // fmt + +const c0 = 0 // zero +const ( + c1 = iota // c1 + c2 // c2 +) + +// Alignment of comments in declarations> +const ( + _ T = iota // comment + _ // comment + _ // comment + _ = iota + 10 + _ // comments + + _ = 10 // comment + _ T = 20 // comment +) + +const ( + _____ = iota // foo + _ // bar + _ = 0 // bal + _ // bat +) + +const ( + _ T = iota // comment + _ // comment + _ // comment + _ = iota + 10 + _ // comment + _ = 10 + _ = 20 // comment + _ T = 0 // comment +) + +// The SZ struct; it is empty. +type SZ struct{} + +// The S0 struct; no field is exported. +type S0 struct { + int + x, y, z int // 3 unexported fields +} + +// The S1 struct; some fields are not exported. +type S1 struct { + S0 + A, B, C float // 3 exported fields + D, b, c int // 2 unexported fields +} + +// The S2 struct; all fields are exported. +type S2 struct { + S1 + A, B, C float // 3 exported fields +} + +// The IZ interface; it is empty. +type SZ interface{} + +// The I0 interface; no method is exported. +type I0 interface { + f(x int) int // unexported method +} + +// The I1 interface; some methods are not exported. +type I1 interface { + I0 + F(x float) float // exported methods + g(x int) int // unexported method +} + +// The I2 interface; all methods are exported. +type I2 interface { + I0 + F(x float) float // exported method + G(x float) float // exported method +} + +// The S3 struct; all comments except for the last one must appear in the export. +type S3 struct { + // lead comment for F1 + F1 int // line comment for F1 + // lead comment for F2 + F2 int // line comment for F2 + f3 int // f3 is not exported +} + +// This comment group should be separated +// with a newline from the next comment +// group. + +// This comment should NOT be associated with the next declaration. + +var x int // x +var () + +// This comment SHOULD be associated with the next declaration. +func f0() { + const pi = 3.14 // pi + var s1 struct{} /* an empty struct */ /* foo */ + // a struct constructor + // -------------------- + var s2 struct{} = struct{}{} + x := pi +} +// +// NO SPACE HERE +// +func f1() { + f0() + /* 1 */ + // 2 + /* 3 */ + /* 4 */ + f0() +} + +func _() { + // this comment should be properly indented +} + +func _(x int) int { + if x < 0 { // the tab printed before this comment's // must not affect the remaining lines + return -x // this statement should be properly indented + } + if x < 0 { /* the tab printed before this comment's /* must not affect the remaining lines */ + return -x // this statement should be properly indented + } + return x +} + +func typeswitch(x interface{}) { + switch v := x.(type) { + case bool, int, float: + case string: + default: + } + + switch x.(type) { + } + + switch v0, ok := x.(int); v := x.(type) { + } + + switch v0, ok := x.(int); x.(type) { + case byte: // this comment should be on the same line as the keyword + // this comment should be normally indented + _ = 0 + case bool, int, float: + // this comment should be indented + case string: + default: + // this comment should be indented + } + // this comment should not be indented +} + +func _() { + /* freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line + aligned line */ +} + +func _() { + /* freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line + aligned line */ +} + +func _() { + /* + freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line + aligned line */ +} + +func _() { + /* + freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line + aligned line */ +} + +func _() { + /* freestanding comment + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line */ +} + +func _() { + /* freestanding comment + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line */ +} + +func _() { + /* + freestanding comment + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line */ +} + +func _() { + /* + freestanding comment + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line */ +} + +/* + * line + * of + * stars + */ + +/* another line + * of + * stars */ + +/* and another line + * of + * stars */ + +/* a line of + * stars */ + +/* and another line of + * stars */ + +/* a line of stars + */ + +/* and another line of + */ + +/* a line of stars + */ + +/* and another line of + */ + +/* +aligned in middle +here + not here +*/ + +/* +blank line in middle: + +with no leading spaces on blank line. +*/ + +/* + aligned in middle + here + not here +*/ + +/* + blank line in middle: + + with no leading spaces on blank line. +*/ + +func _() { + /* + * line + * of + * stars + */ + + /* + aligned in middle + here + not here + */ + + /* + blank line in middle: + + with no leading spaces on blank line. + */ +} + +// Some interesting interspersed comments +func _( /* this */ x /* is */ /* an */ int) { +} + +func _( /* no params */ ) {} + +func _() { + f( /* no args */ ) +} + +func ( /* comment1 */ T /* comment2 */ ) _() {} + +func _() { /* one-line functions with comments are formatted as multi-line functions */ +} + +func _() { + _ = 0 + /* closing curly brace should be on new line */ +} + +func _() { + _ = []int{0, 1 /* don't introduce a newline after this comment - was issue 1365 */ } +} + +// Comments immediately adjacent to punctuation (for which the go/printer +// may only have estimated position information) must remain after the punctuation. +func _() { + _ = T{ + 1, // comment after comma + 2, /* comment after comma */ + 3, // comment after comma + } + _ = T{ + 1, // comment after comma + 2, /* comment after comma */ + 3, // comment after comma + } + _ = T{ + /* comment before literal */ 1, + 2, /* comment before comma - ok to move after comma */ + 3, /* comment before comma - ok to move after comma */ + } + + for i = 0; // comment after semicolon + i < 9; /* comment after semicolon */ + i++ { // comment after opening curly brace + } + + // TODO(gri) the last comment in this example should be aligned */ + for i = 0; // comment after semicolon + i < 9; /* comment before semicolon - ok to move after semicolon */ + i++ /* comment before opening curly brace */ { + } +} + +// Line comments with tabs +func _() { + var finput *bufio.Reader // input file + var stderr *bufio.Writer + var ftable *bufio.Writer // y.go file + var foutput *bufio.Writer // y.output file + + var oflag string // -o [y.go] - y.go file + var vflag string // -v [y.output] - y.output file + var lflag bool // -l - disable line directives +} + +/* This comment is the last entry in this file. It must be printed and should be followed by a newline */ diff --git a/src/pkg/go/printer/testdata/comments.input b/src/pkg/go/printer/testdata/comments.input new file mode 100644 index 000000000..2a9a86b68 --- /dev/null +++ b/src/pkg/go/printer/testdata/comments.input @@ -0,0 +1,483 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This is a package for testing comment placement by go/printer. +// +package main + +import "fmt" // fmt + +const c0 = 0 // zero +const ( + c1 = iota // c1 + c2 // c2 +) + +// Alignment of comments in declarations> +const ( + _ T = iota // comment + _ // comment + _ // comment + _ = iota+10 + _ // comments + + _ = 10 // comment + _ T = 20 // comment +) + +const ( + _____ = iota // foo + _ // bar + _ = 0 // bal + _ // bat +) + +const ( + _ T = iota // comment + _ // comment + _ // comment + _ = iota + 10 + _ // comment + _ = 10 + _ = 20 // comment + _ T = 0 // comment +) + +// The SZ struct; it is empty. +type SZ struct {} + +// The S0 struct; no field is exported. +type S0 struct { + int + x, y, z int // 3 unexported fields +} + +// The S1 struct; some fields are not exported. +type S1 struct { + S0 + A, B, C float // 3 exported fields + D, b, c int // 2 unexported fields +} + +// The S2 struct; all fields are exported. +type S2 struct { + S1 + A, B, C float // 3 exported fields +} + +// The IZ interface; it is empty. +type SZ interface {} + +// The I0 interface; no method is exported. +type I0 interface { + f(x int) int // unexported method +} + +// The I1 interface; some methods are not exported. +type I1 interface { + I0 + F(x float) float // exported methods + g(x int) int // unexported method +} + +// The I2 interface; all methods are exported. +type I2 interface { + I0 + F(x float) float // exported method + G(x float) float // exported method +} + +// The S3 struct; all comments except for the last one must appear in the export. +type S3 struct { + // lead comment for F1 + F1 int // line comment for F1 + // lead comment for F2 + F2 int // line comment for F2 + f3 int // f3 is not exported +} + +// This comment group should be separated +// with a newline from the next comment +// group. + +// This comment should NOT be associated with the next declaration. + +var x int // x +var () + + +// This comment SHOULD be associated with the next declaration. +func f0() { + const pi = 3.14 // pi + var s1 struct {} /* an empty struct */ /* foo */ + // a struct constructor + // -------------------- + var s2 struct {} = struct {}{} + x := pi +} +// +// NO SPACE HERE +// +func f1() { + f0() + /* 1 */ + // 2 + /* 3 */ + /* 4 */ + f0() +} + + +func _() { + // this comment should be properly indented +} + + +func _(x int) int { + if x < 0 { // the tab printed before this comment's // must not affect the remaining lines + return -x // this statement should be properly indented + } + if x < 0 { /* the tab printed before this comment's /* must not affect the remaining lines */ + return -x // this statement should be properly indented + } + return x +} + + +func typeswitch(x interface{}) { + switch v := x.(type) { + case bool, int, float: + case string: + default: + } + + switch x.(type) { + } + + switch v0, ok := x.(int); v := x.(type) { + } + + switch v0, ok := x.(int); x.(type) { + case byte: // this comment should be on the same line as the keyword + // this comment should be normally indented + _ = 0 + case bool, int, float: + // this comment should be indented + case string: + default: + // this comment should be indented + } + // this comment should not be indented +} + +func _() { + /* freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line + aligned line */ +} + +func _() { + /* freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line + aligned line */ +} + + +func _() { + /* + freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line + aligned line */ +} + +func _() { + /* + freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line + aligned line */ +} + +func _() { + /* freestanding comment + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line */ +} + +func _() { + /* freestanding comment + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line + */ +} + +func _() { + /* freestanding comment + aligned line */ +} + + +func _() { + /* + freestanding comment + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line */ +} + +func _() { + /* + freestanding comment + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line + */ +} + +func _() { + /* + freestanding comment + aligned line */ +} + +/* + * line + * of + * stars + */ + +/* another line + * of + * stars */ + +/* and another line + * of + * stars */ + +/* a line of + * stars */ + +/* and another line of + * stars */ + +/* a line of stars +*/ + +/* and another line of +*/ + +/* a line of stars + */ + +/* and another line of + */ + +/* +aligned in middle +here + not here +*/ + +/* +blank line in middle: + +with no leading spaces on blank line. +*/ + +/* + aligned in middle + here + not here +*/ + +/* + blank line in middle: + + with no leading spaces on blank line. +*/ + +func _() { + /* + * line + * of + * stars + */ + + /* + aligned in middle + here + not here + */ + + /* + blank line in middle: + + with no leading spaces on blank line. +*/ +} + + +// Some interesting interspersed comments +func _(/* this */x/* is *//* an */ int) { +} + +func _(/* no params */) {} + +func _() { + f(/* no args */) +} + +func (/* comment1 */ T /* comment2 */) _() {} + +func _() { /* one-line functions with comments are formatted as multi-line functions */ } + +func _() { + _ = 0 + /* closing curly brace should be on new line */ } + +func _() { + _ = []int{0, 1 /* don't introduce a newline after this comment - was issue 1365 */} +} + + +// Comments immediately adjacent to punctuation (for which the go/printer +// may only have estimated position information) must remain after the punctuation. +func _() { + _ = T{ + 1, // comment after comma + 2, /* comment after comma */ + 3 , // comment after comma + } + _ = T{ + 1 ,// comment after comma + 2 ,/* comment after comma */ + 3,// comment after comma + } + _ = T{ + /* comment before literal */1, + 2/* comment before comma - ok to move after comma */, + 3 /* comment before comma - ok to move after comma */ , + } + + for + i=0;// comment after semicolon + i<9;/* comment after semicolon */ + i++{// comment after opening curly brace + } + + // TODO(gri) the last comment in this example should be aligned */ + for + i=0;// comment after semicolon + i<9/* comment before semicolon - ok to move after semicolon */; + i++ /* comment before opening curly brace */ { + } +} + + +// Line comments with tabs +func _() { +var finput *bufio.Reader // input file +var stderr *bufio.Writer +var ftable *bufio.Writer // y.go file +var foutput *bufio.Writer // y.output file + +var oflag string // -o [y.go] - y.go file +var vflag string // -v [y.output] - y.output file +var lflag bool // -l - disable line directives +} + + +/* This comment is the last entry in this file. It must be printed and should be followed by a newline */ diff --git a/src/pkg/go/printer/testdata/comments.x b/src/pkg/go/printer/testdata/comments.x new file mode 100644 index 000000000..ae7729286 --- /dev/null +++ b/src/pkg/go/printer/testdata/comments.x @@ -0,0 +1,56 @@ +// This is a package for testing comment placement by go/printer. +// +package main + +// The SZ struct; it is empty. +type SZ struct{} + +// The S0 struct; no field is exported. +type S0 struct { + // contains filtered or unexported fields +} + +// The S1 struct; some fields are not exported. +type S1 struct { + S0 + A, B, C float // 3 exported fields + D int // 2 unexported fields + // contains filtered or unexported fields +} + +// The S2 struct; all fields are exported. +type S2 struct { + S1 + A, B, C float // 3 exported fields +} + +// The IZ interface; it is empty. +type SZ interface{} + +// The I0 interface; no method is exported. +type I0 interface { + // contains filtered or unexported methods +} + +// The I1 interface; some methods are not exported. +type I1 interface { + I0 + F(x float) float // exported methods + // contains filtered or unexported methods +} + +// The I2 interface; all methods are exported. +type I2 interface { + I0 + F(x float) float // exported method + G(x float) float // exported method +} + +// The S3 struct; all comments except for the last one must appear in the export. +type S3 struct { + // lead comment for F1 + F1 int // line comment for F1 + // lead comment for F2 + F2 int // line comment for F2 + // contains filtered or unexported fields +} diff --git a/src/pkg/go/printer/testdata/declarations.golden b/src/pkg/go/printer/testdata/declarations.golden new file mode 100644 index 000000000..970533e8c --- /dev/null +++ b/src/pkg/go/printer/testdata/declarations.golden @@ -0,0 +1,747 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package imports + +import "io" + +import ( + _ "io" +) + +import _ "io" + +import ( + "io" + "io" + "io" +) + +import ( + "io" + aLongRename "io" + + b "io" +) + +import ( + "unrenamed" + renamed "renameMe" + . "io" + _ "io" + "io" + . "os" +) + +// no newlines between consecutive single imports, but +// respect extra line breaks in the source (at most one empty line) +import _ "io" +import _ "io" +import _ "io" + +import _ "os" +import _ "os" +import _ "os" + +import _ "fmt" +import _ "fmt" +import _ "fmt" + +import "foo" // a comment +import "bar" // a comment + +import ( + _ "foo" + // a comment + "bar" + "foo" // a comment + "bar" // a comment +) + +// comments + renames +import ( + "unrenamed" // a comment + renamed "renameMe" + . "io" /* a comment */ + _ "io/ioutil" // a comment + "io" // testing alignment + . "os" + // a comment +) + +// a case that caused problems in the past (comment placement) +import ( + . "fmt" + "io" + "malloc" // for the malloc count test only + "math" + "strings" + "testing" +) + +// more import examples +import ( + "xxx" + "much longer name" // comment + "short name" // comment +) + +import ( + _ "xxx" + "much longer name" // comment +) + +import ( + mymath "math" + "/foo/bar/long_package_path" // a comment +) + +import ( + "package_a" // comment + "package_b" + my_better_c "package_c" // comment + "package_d" // comment + my_e "package_e" // comment + + "package_a" // comment + "package_bb" + "package_ccc" // comment + "package_dddd" // comment +) + +// at least one empty line between declarations of different kind +import _ "io" + +var _ int + +// printing of constant literals +const ( + _ = "foobar" + _ = "a۰۱۸" + _ = "foo६४" + _ = "bar9876" + _ = 0 + _ = 1 + _ = 123456789012345678890 + _ = 01234567 + _ = 0xcafebabe + _ = 0. + _ = .0 + _ = 3.14159265 + _ = 1e0 + _ = 1e+100 + _ = 1e-100 + _ = 2.71828e-1000 + _ = 0i + _ = 1i + _ = 012345678901234567889i + _ = 123456789012345678890i + _ = 0.i + _ = .0i + _ = 3.14159265i + _ = 1e0i + _ = 1e+100i + _ = 1e-100i + _ = 2.71828e-1000i + _ = 'a' + _ = '\000' + _ = '\xFF' + _ = '\uff16' + _ = '\U0000ff16' + _ = `foobar` + _ = `foo +--- +--- +bar` +) + +func _() { + type _ int + type _ *int + type _ []int + type _ map[string]int + type _ chan int + type _ func() int + + var _ int + var _ *int + var _ []int + var _ map[string]int + var _ chan int + var _ func() int + + type _ struct{} + type _ *struct{} + type _ []struct{} + type _ map[string]struct{} + type _ chan struct{} + type _ func() struct{} + + type _ interface{} + type _ *interface{} + type _ []interface{} + type _ map[string]interface{} + type _ chan interface{} + type _ func() interface{} + + var _ struct{} + var _ *struct{} + var _ []struct{} + var _ map[string]struct{} + var _ chan struct{} + var _ func() struct{} + + var _ interface{} + var _ *interface{} + var _ []interface{} + var _ map[string]interface{} + var _ chan interface{} + var _ func() interface{} +} + +// don't lose blank lines in grouped declarations +const ( + _ int = 0 + _ float = 1 + + _ string = "foo" + + _ = iota + _ + + // a comment + _ + + _ +) + +type ( + _ int + _ struct{} + + _ interface{} + + // a comment + _ map[string]int +) + +var ( + _ int = 0 + _ float = 1 + + _ string = "foo" + + _ bool + + // a comment + _ bool +) + +// don't lose blank lines in this struct +type _ struct { + String struct { + Str, Len int + } + Slice struct { + Array, Len, Cap int + } + Eface struct { + Typ, Ptr int + } + + UncommonType struct { + Name, PkgPath int + } + CommonType struct { + Size, Hash, Alg, Align, FieldAlign, String, UncommonType int + } + Type struct { + Typ, Ptr int + } + StructField struct { + Name, PkgPath, Typ, Tag, Offset int + } + StructType struct { + Fields int + } + PtrType struct { + Elem int + } + SliceType struct { + Elem int + } + ArrayType struct { + Elem, Len int + } + + Stktop struct { + Stackguard, Stackbase, Gobuf int + } + Gobuf struct { + Sp, Pc, G int + } + G struct { + Stackbase, Sched, Status, Alllink int + } +} + +// no tabs for single or ungrouped decls +func _() { + const xxxxxx = 0 + type x int + var xxx int + var yyyy float = 3.14 + var zzzzz = "bar" + + const ( + xxxxxx = 0 + ) + type ( + x int + ) + var ( + xxx int + ) + var ( + yyyy float = 3.14 + ) + var ( + zzzzz = "bar" + ) +} + +// tabs for multiple or grouped decls +func _() { + // no entry has a type + const ( + zzzzzz = 1 + z = 2 + zzz = 3 + ) + // some entries have a type + const ( + xxxxxx = 1 + x = 2 + xxx = 3 + yyyyyyyy float = iota + yyyy = "bar" + yyy + yy = 2 + ) +} + +func _() { + // no entry has a type + var ( + zzzzzz = 1 + z = 2 + zzz = 3 + ) + // no entry has a value + var ( + _ int + _ float + _ string + + _ int // comment + _ float // comment + _ string // comment + ) + // some entries have a type + var ( + xxxxxx int + x float + xxx string + yyyyyyyy int = 1234 + y float = 3.14 + yyyy = "bar" + yyy string = "foo" + ) + // mixed entries - all comments should be aligned + var ( + a, b, c int + x = 10 + d int // comment + y = 20 // comment + f, ff, fff, ffff int = 0, 1, 2, 3 // comment + ) + // respect original line breaks + var _ = []T{ + T{0x20, "Telugu"}, + } + var _ = []T{ + // respect original line breaks + T{0x20, "Telugu"}, + } +} + +func _() { + type ( + xxxxxx int + x float + xxx string + xxxxx []x + xx struct{} + xxxxxxx struct { + _, _ int + _ float + } + xxxx chan<- string + ) +} + +// alignment of "=" in consecutive lines (extended example from issue 1414) +const ( + umax uint = ^uint(0) // maximum value for a uint + bpu = 1 << (5 + umax>>63) // bits per uint + foo + bar = -1 +) + +// typical enum +const ( + a MyType = iota + abcd + b + c + def +) + +// excerpt from godoc.go +var ( + goroot = flag.String("goroot", runtime.GOROOT(), "Go root directory") + testDir = flag.String("testdir", "", "Go root subdirectory - for testing only (faster startups)") + pkgPath = flag.String("path", "", "additional package directories (colon-separated)") + filter = flag.String("filter", "", "filter file containing permitted package directory paths") + filterMin = flag.Int("filter_minutes", 0, "filter file update interval in minutes; disabled if <= 0") + filterDelay delayTime // actual filter update interval in minutes; usually filterDelay == filterMin, but filterDelay may back off exponentially +) + +// formatting of structs +type _ struct{} + +type _ struct { /* this comment should be visible */ +} + +type _ struct { + // this comment should be visible and properly indented +} + +type _ struct { // this comment must not change indentation + f int + f, ff, fff, ffff int +} + +type _ struct { + string +} + +type _ struct { + string // comment +} + +type _ struct { + string "tag" +} + +type _ struct { + string "tag" // comment +} + +type _ struct { + f int +} + +type _ struct { + f int // comment +} + +type _ struct { + f int "tag" +} + +type _ struct { + f int "tag" // comment +} + +type _ struct { + bool + a, b, c int + int "tag" + ES // comment + float "tag" // comment + f int // comment + f, ff, fff, ffff int // comment + g float "tag" + h float "tag" // comment +} + +type _ struct { + a, b, + c, d int // this line should be indented + u, v, w, x float // this line should be indented + p, q, + r, s float // this line should be indented +} + +// difficult cases +type _ struct { + bool // comment + text []byte // comment +} + +// formatting of interfaces +type EI interface{} + +type _ interface { + EI +} + +type _ interface { + f() + fffff() +} + +type _ interface { + EI + f() + fffffg() +} + +type _ interface { // this comment must not change indentation + EI // here's a comment + f() // no blank between identifier and () + fffff() // no blank between identifier and () + gggggggggggg(x, y, z int) // hurray +} + +// formatting of variable declarations +func _() { + type day struct { + n int + short, long string + } + var ( + Sunday = day{0, "SUN", "Sunday"} + Monday = day{1, "MON", "Monday"} + Tuesday = day{2, "TUE", "Tuesday"} + Wednesday = day{3, "WED", "Wednesday"} + Thursday = day{4, "THU", "Thursday"} + Friday = day{5, "FRI", "Friday"} + Saturday = day{6, "SAT", "Saturday"} + ) +} + +// formatting of multi-line variable declarations +var a1, b1, c1 int // all on one line + +var a2, b2, + c2 int // this line should be indented + +var ( + a3, b3, + c3, d3 int // this line should be indented + a4, b4, c4 int // this line should be indented +) + +func _() { + var privateKey2 = &Block{Type: "RSA PRIVATE KEY", + Headers: map[string]string{}, + Bytes: []uint8{0x30, 0x82, 0x1, 0x3a, 0x2, 0x1, 0x0, 0x2, + 0x41, 0x0, 0xb2, 0x99, 0xf, 0x49, 0xc4, 0x7d, 0xfa, 0x8c, + 0xd4, 0x0, 0xae, 0x6a, 0x4d, 0x1b, 0x8a, 0x3b, 0x6a, 0x13, + 0x64, 0x2b, 0x23, 0xf2, 0x8b, 0x0, 0x3b, 0xfb, 0x97, 0x79, + }, + } +} + +func _() { + var Universe = Scope{ + Names: map[string]*Ident{ + // basic types + "bool": nil, + "byte": nil, + "int8": nil, + "int16": nil, + "int32": nil, + "int64": nil, + "uint8": nil, + "uint16": nil, + "uint32": nil, + "uint64": nil, + "float32": nil, + "float64": nil, + "string": nil, + + // convenience types + "int": nil, + "uint": nil, + "uintptr": nil, + "float": nil, + + // constants + "false": nil, + "true": nil, + "iota": nil, + "nil": nil, + + // functions + "cap": nil, + "len": nil, + "new": nil, + "make": nil, + "panic": nil, + "panicln": nil, + "print": nil, + "println": nil, + }, + } +} + +// alignment of map composite entries +var _ = map[int]int{ + // small key sizes: always align even if size ratios are large + a: a, + abcdefghabcdefgh: a, + ab: a, + abc: a, + abcdefgabcdefg: a, + abcd: a, + abcde: a, + abcdef: a, + + // mixed key sizes: align when key sizes change within accepted ratio + abcdefgh: a, + abcdefghabcdefg: a, + abcdefghij: a, + abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij: a, // outlier - do not align with previous line + abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij: a, // align with previous line + + ab: a, // do not align with previous line + abcde: a, // align with previous line +} + +func _() { + var _ = T{ + a, // must introduce trailing comma + } +} + +// formatting of function results +func _() func() {} +func _() func(int) { return nil } +func _() func(int) int { return nil } +func _() func(int) func(int) func() { return nil } + +// formatting of consecutive single-line functions +func _() {} +func _() {} +func _() {} + +func _() {} // an empty line before this function +func _() {} +func _() {} + +func _() { f(1, 2, 3) } +func _(x int) int { y := x; return y + 1 } +func _() int { type T struct{}; var x T; return x } + +// these must remain multi-line since they are multi-line in the source +func _() { + f(1, 2, 3) +} +func _(x int) int { + y := x + return y + 1 +} +func _() int { + type T struct{} + var x T + return x +} + +// making function declarations safe for new semicolon rules +func _() { /* multi-line func because of comment */ +} + +func _() { + /* multi-line func because block is on multiple lines */ +} + +// ellipsis parameters +func _(...int) +func _(...*int) +func _(...[]int) +func _(...struct{}) +func _(bool, ...interface{}) +func _(bool, ...func()) +func _(bool, ...func(...int)) +func _(bool, ...map[string]int) +func _(bool, ...chan int) + +func _(b bool, x ...int) +func _(b bool, x ...*int) +func _(b bool, x ...[]int) +func _(b bool, x ...struct{}) +func _(x ...interface{}) +func _(x ...func()) +func _(x ...func(...int)) +func _(x ...map[string]int) +func _(x ...chan int) + +// these parameter lists must remain multi-line since they are multi-line in the source +func _(bool, +int) { +} +func _(x bool, +y int) { +} +func _(x, +y bool) { +} +func _(bool, // comment +int) { +} +func _(x bool, // comment +y int) { +} +func _(x, // comment +y bool) { +} +func _(bool, // comment +// comment +int) { +} +func _(x bool, // comment +// comment +y int) { +} +func _(x, // comment +// comment +y bool) { +} +func _(bool, +// comment +int) { +} +func _(x bool, +// comment +y int) { +} +func _(x, +// comment +y bool) { +} +func _(x, // comment +y, // comment +z bool) { +} +func _(x, // comment +y, // comment +z bool) { +} +func _(x int, // comment +y float, // comment +z bool) { +} diff --git a/src/pkg/go/printer/testdata/declarations.input b/src/pkg/go/printer/testdata/declarations.input new file mode 100644 index 000000000..c6134096b --- /dev/null +++ b/src/pkg/go/printer/testdata/declarations.input @@ -0,0 +1,757 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package imports + +import "io" + +import ( + _ "io" +) + +import _ "io" + +import ( + "io" + "io" + "io" +) + +import ( + "io" + aLongRename "io" + + b "io" +) + +import ( + "unrenamed" + renamed "renameMe" + . "io" + _ "io" + "io" + . "os" +) + +// no newlines between consecutive single imports, but +// respect extra line breaks in the source (at most one empty line) +import _ "io" +import _ "io" +import _ "io" + +import _ "os" +import _ "os" +import _ "os" + + +import _ "fmt" +import _ "fmt" +import _ "fmt" + +import "foo" // a comment +import "bar" // a comment + +import ( + _ "foo" + // a comment + "bar" + "foo" // a comment + "bar" // a comment +) + +// comments + renames +import ( + "unrenamed" // a comment + renamed "renameMe" + . "io" /* a comment */ + _ "io/ioutil" // a comment + "io" // testing alignment + . "os" + // a comment +) + +// a case that caused problems in the past (comment placement) +import ( + . "fmt" + "io" + "malloc" // for the malloc count test only + "math" + "strings" + "testing" +) + +// more import examples +import ( + "xxx" + "much longer name" // comment + "short name" // comment +) + +import ( + _ "xxx" + "much longer name" // comment +) + +import ( + mymath "math" + "/foo/bar/long_package_path" // a comment +) + +import ( + "package_a" // comment + "package_b" + my_better_c "package_c" // comment + "package_d" // comment + my_e "package_e" // comment + + "package_a" // comment + "package_bb" + "package_ccc" // comment + "package_dddd" // comment +) + +// at least one empty line between declarations of different kind +import _ "io" +var _ int + + +// printing of constant literals +const ( + _ = "foobar" + _ = "a۰۱۸" + _ = "foo६४" + _ = "bar9876" + _ = 0 + _ = 1 + _ = 123456789012345678890 + _ = 01234567 + _ = 0xcafebabe + _ = 0. + _ = .0 + _ = 3.14159265 + _ = 1e0 + _ = 1e+100 + _ = 1e-100 + _ = 2.71828e-1000 + _ = 0i + _ = 1i + _ = 012345678901234567889i + _ = 123456789012345678890i + _ = 0.i + _ = .0i + _ = 3.14159265i + _ = 1e0i + _ = 1e+100i + _ = 1e-100i + _ = 2.71828e-1000i + _ = 'a' + _ = '\000' + _ = '\xFF' + _ = '\uff16' + _ = '\U0000ff16' + _ = `foobar` + _ = `foo +--- +--- +bar` +) + + +func _() { + type _ int + type _ *int + type _ []int + type _ map[string]int + type _ chan int + type _ func() int + + var _ int + var _ *int + var _ []int + var _ map[string]int + var _ chan int + var _ func() int + + type _ struct{} + type _ *struct{} + type _ []struct{} + type _ map[string]struct{} + type _ chan struct{} + type _ func() struct{} + + type _ interface{} + type _ *interface{} + type _ []interface{} + type _ map[string]interface{} + type _ chan interface{} + type _ func() interface{} + + var _ struct{} + var _ *struct{} + var _ []struct{} + var _ map[string]struct{} + var _ chan struct{} + var _ func() struct{} + + var _ interface{} + var _ *interface{} + var _ []interface{} + var _ map[string]interface{} + var _ chan interface{} + var _ func() interface{} +} + + +// don't lose blank lines in grouped declarations +const ( + _ int = 0 + _ float = 1 + + _ string = "foo" + + _ = iota + _ + + // a comment + _ + + _ +) + + +type ( + _ int + _ struct {} + + _ interface{} + + // a comment + _ map[string]int +) + + +var ( + _ int = 0 + _ float = 1 + + _ string = "foo" + + _ bool + + // a comment + _ bool +) + + +// don't lose blank lines in this struct +type _ struct { + String struct { + Str, Len int + } + Slice struct { + Array, Len, Cap int + } + Eface struct { + Typ, Ptr int + } + + UncommonType struct { + Name, PkgPath int + } + CommonType struct { + Size, Hash, Alg, Align, FieldAlign, String, UncommonType int + } + Type struct { + Typ, Ptr int + } + StructField struct { + Name, PkgPath, Typ, Tag, Offset int + } + StructType struct { + Fields int + } + PtrType struct { + Elem int + } + SliceType struct { + Elem int + } + ArrayType struct { + Elem, Len int + } + + Stktop struct { + Stackguard, Stackbase, Gobuf int + } + Gobuf struct { + Sp, Pc, G int + } + G struct { + Stackbase, Sched, Status, Alllink int + } +} + + +// no tabs for single or ungrouped decls +func _() { + const xxxxxx = 0 + type x int + var xxx int + var yyyy float = 3.14 + var zzzzz = "bar" + + const ( + xxxxxx = 0 + ) + type ( + x int + ) + var ( + xxx int + ) + var ( + yyyy float = 3.14 + ) + var ( + zzzzz = "bar" + ) +} + +// tabs for multiple or grouped decls +func _() { + // no entry has a type + const ( + zzzzzz = 1 + z = 2 + zzz = 3 + ) + // some entries have a type + const ( + xxxxxx = 1 + x = 2 + xxx = 3 + yyyyyyyy float = iota + yyyy = "bar" + yyy + yy = 2 + ) +} + +func _() { + // no entry has a type + var ( + zzzzzz = 1 + z = 2 + zzz = 3 + ) + // no entry has a value + var ( + _ int + _ float + _ string + + _ int // comment + _ float // comment + _ string // comment + ) + // some entries have a type + var ( + xxxxxx int + x float + xxx string + yyyyyyyy int = 1234 + y float = 3.14 + yyyy = "bar" + yyy string = "foo" + ) + // mixed entries - all comments should be aligned + var ( + a, b, c int + x = 10 + d int // comment + y = 20 // comment + f, ff, fff, ffff int = 0, 1, 2, 3 // comment + ) + // respect original line breaks + var _ = []T { + T{0x20, "Telugu"}, + } + var _ = []T { + // respect original line breaks + T{0x20, "Telugu"}, + } +} + +func _() { + type ( + xxxxxx int + x float + xxx string + xxxxx []x + xx struct{} + xxxxxxx struct { + _, _ int + _ float + } + xxxx chan<- string + ) +} + +// alignment of "=" in consecutive lines (extended example from issue 1414) +const ( + umax uint = ^uint(0) // maximum value for a uint + bpu = 1 << (5 + umax>>63) // bits per uint + foo + bar = -1 +) + +// typical enum +const ( + a MyType = iota + abcd + b + c + def +) + +// excerpt from godoc.go +var ( + goroot = flag.String("goroot", runtime.GOROOT(), "Go root directory") + testDir = flag.String("testdir", "", "Go root subdirectory - for testing only (faster startups)") + pkgPath = flag.String("path", "", "additional package directories (colon-separated)") + filter = flag.String("filter", "", "filter file containing permitted package directory paths") + filterMin = flag.Int("filter_minutes", 0, "filter file update interval in minutes; disabled if <= 0") + filterDelay delayTime // actual filter update interval in minutes; usually filterDelay == filterMin, but filterDelay may back off exponentially +) + + +// formatting of structs +type _ struct{} + +type _ struct{ /* this comment should be visible */ } + +type _ struct{ + // this comment should be visible and properly indented +} + +type _ struct { // this comment must not change indentation + f int + f, ff, fff, ffff int +} + +type _ struct { + string +} + +type _ struct { + string // comment +} + +type _ struct { + string "tag" +} + +type _ struct { + string "tag" // comment +} + +type _ struct { + f int +} + +type _ struct { + f int // comment +} + +type _ struct { + f int "tag" +} + +type _ struct { + f int "tag" // comment +} + +type _ struct { + bool + a, b, c int + int "tag" + ES // comment + float "tag" // comment + f int // comment + f, ff, fff, ffff int // comment + g float "tag" + h float "tag" // comment +} + +type _ struct { a, b, +c, d int // this line should be indented +u, v, w, x float // this line should be indented +p, q, +r, s float // this line should be indented +} + + +// difficult cases +type _ struct { + bool // comment + text []byte // comment +} + + +// formatting of interfaces +type EI interface{} + +type _ interface { + EI +} + +type _ interface { + f() + fffff() +} + +type _ interface { + EI + f() + fffffg() +} + +type _ interface { // this comment must not change indentation + EI // here's a comment + f() // no blank between identifier and () + fffff() // no blank between identifier and () + gggggggggggg(x, y, z int) () // hurray +} + + +// formatting of variable declarations +func _() { + type day struct { n int; short, long string } + var ( + Sunday = day{ 0, "SUN", "Sunday" } + Monday = day{ 1, "MON", "Monday" } + Tuesday = day{ 2, "TUE", "Tuesday" } + Wednesday = day{ 3, "WED", "Wednesday" } + Thursday = day{ 4, "THU", "Thursday" } + Friday = day{ 5, "FRI", "Friday" } + Saturday = day{ 6, "SAT", "Saturday" } + ) +} + + +// formatting of multi-line variable declarations +var a1, b1, c1 int // all on one line + +var a2, b2, +c2 int // this line should be indented + +var (a3, b3, +c3, d3 int // this line should be indented +a4, b4, c4 int // this line should be indented +) + + +func _() { + var privateKey2 = &Block{Type: "RSA PRIVATE KEY", + Headers: map[string]string{}, + Bytes: []uint8{0x30, 0x82, 0x1, 0x3a, 0x2, 0x1, 0x0, 0x2, + 0x41, 0x0, 0xb2, 0x99, 0xf, 0x49, 0xc4, 0x7d, 0xfa, 0x8c, + 0xd4, 0x0, 0xae, 0x6a, 0x4d, 0x1b, 0x8a, 0x3b, 0x6a, 0x13, + 0x64, 0x2b, 0x23, 0xf2, 0x8b, 0x0, 0x3b, 0xfb, 0x97, 0x79, + }, + } +} + + +func _() { + var Universe = Scope { + Names: map[string]*Ident { + // basic types + "bool": nil, + "byte": nil, + "int8": nil, + "int16": nil, + "int32": nil, + "int64": nil, + "uint8": nil, + "uint16": nil, + "uint32": nil, + "uint64": nil, + "float32": nil, + "float64": nil, + "string": nil, + + // convenience types + "int": nil, + "uint": nil, + "uintptr": nil, + "float": nil, + + // constants + "false": nil, + "true": nil, + "iota": nil, + "nil": nil, + + // functions + "cap": nil, + "len": nil, + "new": nil, + "make": nil, + "panic": nil, + "panicln": nil, + "print": nil, + "println": nil, + }, + } +} + + +// alignment of map composite entries +var _ = map[int]int{ + // small key sizes: always align even if size ratios are large + a: a, + abcdefghabcdefgh: a, + ab: a, + abc: a, + abcdefgabcdefg: a, + abcd: a, + abcde: a, + abcdef: a, + + // mixed key sizes: align when key sizes change within accepted ratio + abcdefgh: a, + abcdefghabcdefg: a, + abcdefghij: a, + abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij: a, // outlier - do not align with previous line + abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij: a, // align with previous line + + ab: a, // do not align with previous line + abcde: a, // align with previous line +} + + +func _() { + var _ = T{ + a, // must introduce trailing comma + } +} + + +// formatting of function results +func _() func() {} +func _() func(int) { return nil } +func _() func(int) int { return nil } +func _() func(int) func(int) func() { return nil } + + +// formatting of consecutive single-line functions +func _() {} +func _() {} +func _() {} + +func _() {} // an empty line before this function +func _() {} +func _() {} + +func _() { f(1, 2, 3) } +func _(x int) int { y := x; return y+1 } +func _() int { type T struct{}; var x T; return x } + +// these must remain multi-line since they are multi-line in the source +func _() { + f(1, 2, 3) +} +func _(x int) int { + y := x; return y+1 +} +func _() int { + type T struct{}; var x T; return x +} + + +// making function declarations safe for new semicolon rules +func _() { /* multi-line func because of comment */ } + +func _() { +/* multi-line func because block is on multiple lines */ } + + +// ellipsis parameters +func _(...int) +func _(...*int) +func _(...[]int) +func _(...struct{}) +func _(bool, ...interface{}) +func _(bool, ...func()) +func _(bool, ...func(...int)) +func _(bool, ...map[string]int) +func _(bool, ...chan int) + +func _(b bool, x ...int) +func _(b bool, x ...*int) +func _(b bool, x ...[]int) +func _(b bool, x ...struct{}) +func _(x ...interface{}) +func _(x ...func()) +func _(x ...func(...int)) +func _(x ...map[string]int) +func _(x ...chan int) + + +// these parameter lists must remain multi-line since they are multi-line in the source +func _(bool, +int) { +} +func _(x bool, +y int) { +} +func _(x, +y bool) { +} +func _(bool, // comment +int) { +} +func _(x bool, // comment +y int) { +} +func _(x, // comment +y bool) { +} +func _(bool, // comment +// comment +int) { +} +func _(x bool, // comment +// comment +y int) { +} +func _(x, // comment +// comment +y bool) { +} +func _(bool, +// comment +int) { +} +func _(x bool, +// comment +y int) { +} +func _(x, +// comment +y bool) { +} +func _(x, // comment +y,// comment +z bool) { +} +func _(x, // comment + y,// comment + z bool) { +} +func _(x int, // comment + y float, // comment + z bool) { +} diff --git a/src/pkg/go/printer/testdata/empty.golden b/src/pkg/go/printer/testdata/empty.golden new file mode 100644 index 000000000..a055f4758 --- /dev/null +++ b/src/pkg/go/printer/testdata/empty.golden @@ -0,0 +1,5 @@ +// a comment at the beginning of the file + +package empty + +// a comment at the end of the file diff --git a/src/pkg/go/printer/testdata/empty.input b/src/pkg/go/printer/testdata/empty.input new file mode 100644 index 000000000..a055f4758 --- /dev/null +++ b/src/pkg/go/printer/testdata/empty.input @@ -0,0 +1,5 @@ +// a comment at the beginning of the file + +package empty + +// a comment at the end of the file diff --git a/src/pkg/go/printer/testdata/expressions.golden b/src/pkg/go/printer/testdata/expressions.golden new file mode 100644 index 000000000..d0cf24ad6 --- /dev/null +++ b/src/pkg/go/printer/testdata/expressions.golden @@ -0,0 +1,627 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package expressions + +type T struct { + x, y, z int +} + +var ( + a, b, c, d, e int + under_bar int + longIdentifier1, longIdentifier2, longIdentifier3 int + t0, t1, t2 T + s string + p *int +) + +func _() { + // no spaces around simple or parenthesized expressions + _ = (a + 0) + _ = a + b + _ = a + b + c + _ = a + b - c + _ = a - b - c + _ = a + (b * c) + _ = a + (b / c) + _ = a - (b % c) + _ = 1 + a + _ = a + 1 + _ = a + b + 1 + _ = s[a] + _ = s[a:] + _ = s[:b] + _ = s[1:2] + _ = s[a:b] + _ = s[0:len(s)] + _ = s[0] << 1 + _ = (s[0] << 1) & 0xf + _ = s[0]<<2 | s[1]>>4 + _ = "foo" + s + _ = s + "foo" + _ = 'a' + 'b' + _ = len(s) / 2 + _ = len(t0.x) / a + + // spaces around expressions of different precedence or expressions containing spaces + _ = a + -b + _ = a - ^b + _ = a / *p + _ = a + b*c + _ = 1 + b*c + _ = a + 2*c + _ = a + c*2 + _ = 1 + 2*3 + _ = s[1 : 2*3] + _ = s[a : b-c] + _ = s[0:] + _ = s[a+b] + _ = s[:b-c] + _ = s[a+b:] + _ = a[a<<b+1] + _ = a[a<<b+1:] + _ = s[a+b : len(s)] + _ = s[len(s):-a] + _ = s[a : len(s)+1] + _ = s[a:len(s)+1] + s + + // spaces around operators with equal or lower precedence than comparisons + _ = a == b + _ = a != b + _ = a > b + _ = a >= b + _ = a < b + _ = a <= b + _ = a < b && c > d + _ = a < b || c > d + + // spaces around "long" operands + _ = a + longIdentifier1 + _ = longIdentifier1 + a + _ = longIdentifier1 + longIdentifier2*longIdentifier3 + _ = s + "a longer string" + + // some selected cases + _ = a + t0.x + _ = a + t0.x + t1.x*t2.x + _ = a + b + c + d + e + 2*3 + _ = a + b + c + 2*3 + d + e + _ = (a + b + c) * 2 + _ = a - b + c - d + (a + b + c) + d&e + _ = under_bar - 1 + _ = Open(dpath+"/file", O_WRONLY|O_CREAT, 0666) + _ = int(c0&_Mask4)<<18 | int(c1&_Maskx)<<12 | int(c2&_Maskx)<<6 | int(c3&_Maskx) + + // the parser does not restrict expressions that may appear as statements + true + 42 + "foo" + x + (x) + a + b + a + b + c + a + (b * c) + a + (b / c) + 1 + a + a + 1 + s[a] + x << 1 + (s[0] << 1) & 0xf + "foo" + s + x == y + x < y || z > 42 +} + +func _() { + _ = a + b + _ = a + b + c + _ = a + b*c + _ = a + (b * c) + _ = (a + b) * c + _ = a + (b * c * d) + _ = a + (b*c + d) + + _ = 1 << x + _ = -1 << x + _ = 1<<x - 1 + _ = -1<<x - 1 + + _ = f(a + b) + _ = f(a + b + c) + _ = f(a + b*c) + _ = f(a + (b * c)) + _ = f(1<<x-1, 1<<x-2) + + _ = 1<<d.logWindowSize - 1 + + buf = make(x, 2*cap(b.buf)+n) + + dst[i*3+2] = dbuf[0] << 2 + dst[i*3+2] = dbuf[0]<<2 | dbuf[1]>>4 + + b.buf = b.buf[0 : b.off+m+n] + b.buf = b.buf[0 : b.off+m*n] + f(b.buf[0 : b.off+m+n]) + + signed += ' ' * 8 + tw.octal(header[148:155], chksum) + + _ = x > 0 && i >= 0 + + x1, x0 := x>>w2, x&m2 + z0 = t1<<w2 + t0 + z1 = (t1 + t0>>w2) >> w2 + q1, r1 := x1/d1, x1%d1 + r1 = r1*b2 | x0>>w2 + x1 = (x1 << z) | (x0 >> (uint(w) - z)) + x1 = x1<<z | x0>>(uint(w)-z) + + _ = buf[0 : len(buf)+1] + _ = buf[0 : n+1] + + a, b = b, a + a = b + c + a = b*c + d + _ = a*b + c + _ = a - b - c + _ = a - (b - c) + _ = a - b*c + _ = a - (b * c) + _ = a * b / c + _ = a / *b + _ = x[a|^b] + _ = x[a / *b] + _ = a & ^b + _ = a + +b + _ = a - -b + _ = x[a*-b] + _ = x[a + +b] + _ = x ^ y ^ z + _ = b[a>>24] ^ b[(a>>16)&0xFF] ^ b[(a>>8)&0xFF] ^ b[a&0xFF] + _ = len(longVariableName) * 2 + + _ = token(matchType + xlength<<lengthShift + xoffset) +} + +func f(x int, args ...int) { + f(0, args...) + f(1, args) + f(2, args[0]) + + // make sure syntactically legal code remains syntactically legal + f(3, 42 ...) // a blank must remain between 42 and ... + f(4, 42....) + f(5, 42....) + f(6, 42.0...) + f(7, 42.0...) + f(8, .42...) + f(9, .42...) + f(10, 42e0...) + f(11, 42e0...) + + _ = 42 .x // a blank must remain between 42 and .x + _ = 42..x + _ = 42..x + _ = 42.0.x + _ = 42.0.x + _ = .42.x + _ = .42.x + _ = 42e0.x + _ = 42e0.x + + // a blank must remain between the binary operator and the 2nd operand + _ = x / *y + _ = x < -1 + _ = x < <-1 + _ = x + +1 + _ = x - -1 + _ = x & &x + _ = x & ^x + + _ = f(x / *y, x < -1, x < <-1, x + +1, x - -1, x & &x, x & ^x) +} + +func _() { + _ = T{} + _ = struct{}{} + _ = [10]T{} + _ = [...]T{} + _ = []T{} + _ = map[int]T{} +} + +// one-line structs/interfaces in composite literals (up to a threshold) +func _() { + _ = struct{}{} + _ = struct{ x int }{0} + _ = struct{ x, y, z int }{0, 1, 2} + _ = struct{ int }{0} + _ = struct{ s struct{ int } }{struct{ int }{0}} +} + +func _() { + // do not modify literals + _ = "tab1 tab2 tab3 end" // string contains 3 tabs + _ = "tab1 tab2 tab3 end" // same string with 3 blanks - may be unaligned because editors see tabs in strings + _ = "" // this comment should be aligned with the one on the previous line + _ = `` + _ = ` +` + _ = `foo + bar` + _ = `three spaces before the end of the line starting here: +they must not be removed` +} + +func _() { + // smart handling of indentation for multi-line raw strings + var _ = `` + var _ = `foo` + var _ = `foo +bar` + + var _ = `` + var _ = `foo` + var _ = + // the next line should remain indented + `foo +bar` + + var _ = // comment + `` + var _ = // comment + `foo` + var _ = // comment + // the next line should remain indented + `foo +bar` + + var _ = /* comment */ `` + var _ = /* comment */ `foo` + var _ = /* comment */ `foo +bar` + + var _ = /* comment */ + `` + var _ = /* comment */ + `foo` + var _ = /* comment */ + // the next line should remain indented + `foo +bar` + + var board = []int( + `........... +........... +....●●●.... +....●●●.... +..●●●●●●●.. +..●●●○●●●.. +..●●●●●●●.. +....●●●.... +....●●●.... +........... +........... +`) + + var state = S{ + "foo", + // the next line should remain indented + `........... +........... +....●●●.... +....●●●.... +..●●●●●●●.. +..●●●○●●●.. +..●●●●●●●.. +....●●●.... +....●●●.... +........... +........... +`, + "bar", + } +} + +func _() { + // one-line function literals (body is on a single line) + _ = func() {} + _ = func() int { return 0 } + _ = func(x, y int) bool { m := (x + y) / 2; return m < 0 } + + // multi-line function literals (body is not on one line) + _ = func() { + } + _ = func() int { + return 0 + } + _ = func(x, y int) bool { + m := (x + y) / 2 + return x < y + } + + f(func() { + }) + f(func() int { + return 0 + }) + f(func(x, y int) bool { + m := (x + y) / 2 + return x < y + }) +} + +func _() { + _ = [][]int{ + []int{1}, + []int{1, 2}, + []int{1, 2, 3}, + } + _ = [][]int{ + {1}, + []int{1, 2}, + []int{1, 2, 3}, + } + _ = [][]int{ + {1}, + {1, 2}, + {1, 2, 3}, + } + _ = [][]int{{1}, {1, 2}, {1, 2, 3}} +} + +// various multi-line expressions +func _() { + // do not add extra indentation to multi-line string lists + _ = "foo" + "bar" + _ = "foo" + + "bar" + + "bah" + _ = []string{ + "abc" + + "def", + "foo" + + "bar", + } +} + +const _ = F1 + + `string = "%s";` + + `ptr = *;` + + `datafmt.T2 = s ["-" p "-"];` + +const _ = `datafmt "datafmt";` + + `default = "%v";` + + `array = *;` + + `datafmt.T3 = s {" " a a / ","};` + +const _ = `datafmt "datafmt";` + + `default = "%v";` + + `array = *;` + + `datafmt.T3 = s {" " a a / ","};` + +func _() { + _ = F1 + + `string = "%s";` + + `ptr = *;` + + `datafmt.T2 = s ["-" p "-"];` + + _ = + `datafmt "datafmt";` + + `default = "%v";` + + `array = *;` + + `datafmt.T3 = s {" " a a / ","};` + + _ = `datafmt "datafmt";` + + `default = "%v";` + + `array = *;` + + `datafmt.T3 = s {" " a a / ","};` +} + +func _() { + // respect source lines in multi-line expressions + _ = a + + b + + c + _ = a < b || + b < a + _ = "933262154439441526816992388562667004907159682643816214685929" + + "638952175999932299156089414639761565182862536979208272237582" + + "51185210916864000000000000000000000000" // 100! + _ = "170141183460469231731687303715884105727" // prime +} + +// Alignment after overlong lines +const ( + _ = "991" + _ = "2432902008176640000" // 20! + _ = "933262154439441526816992388562667004907159682643816214685929" + + "638952175999932299156089414639761565182862536979208272237582" + + "51185210916864000000000000000000000000" // 100! + _ = "170141183460469231731687303715884105727" // prime +) + +// Correct placement of operators and comments in multi-line expressions +func _() { + _ = a + // comment + b + // comment + c + _ = "a" + + "b" + // comment + "c" + _ = "ba0408" + "7265717569726564" // field 71, encoding 2, string "required" +} + +// Correct placement of terminating comma/closing parentheses in multi-line calls. +func _() { + f(1, + 2, + 3) + f(1, + 2, + 3, + ) + f(1, + 2, + 3) // comment + f(1, + 2, + 3, // comment + ) + f(1, + 2, + 3) // comment + f(1, + 2, + 3, // comment + ) +} + +// Align comments in multi-line lists of single-line expressions. +var txpix = [NCOL]draw.Color{ + draw.Yellow, // yellow + draw.Cyan, // cyan + draw.Green, // lime green + draw.GreyBlue, // slate + draw.Red, /* red */ + draw.GreyGreen, /* olive green */ + draw.Blue, /* blue */ + draw.Color(0xFF55AAFF), /* pink */ + draw.Color(0xFFAAFFFF), /* lavender */ + draw.Color(0xBB005DFF), /* maroon */ +} + +func same(t, u *Time) bool { + // respect source lines in multi-line expressions + return t.Year == u.Year && + t.Month == u.Month && + t.Day == u.Day && + t.Hour == u.Hour && + t.Minute == u.Minute && + t.Second == u.Second && + t.Weekday == u.Weekday && + t.ZoneOffset == u.ZoneOffset && + t.Zone == u.Zone +} + +func (p *parser) charClass() { + // respect source lines in multi-line expressions + if cc.negate && len(cc.ranges) == 2 && + cc.ranges[0] == '\n' && cc.ranges[1] == '\n' { + nl := new(_NotNl) + p.re.add(nl) + } +} + +func addState(s []state, inst instr, match []int) { + // handle comments correctly in multi-line expressions + for i := 0; i < l; i++ { + if s[i].inst.index() == index && // same instruction + s[i].match[0] < pos { // earlier match already going; leftmost wins + return s + } + } +} + +func (self *T) foo(x int) *T { return self } + +func _() { module.Func1().Func2() } + +func _() { + _ = new(T). + foo(1). + foo(2). + foo(3) + + _ = new(T). + foo(1). + foo(2). // inline comments + foo(3) + + _ = new(T).foo(1).foo(2).foo(3) + + // handle multiline argument list correctly + _ = new(T). + foo( + 1). + foo(2) + + _ = new(T).foo( + 1).foo(2) + + _ = Array[3+ + 4] + + _ = Method(1, 2, + 3) + + _ = new(T). + foo(). + bar().(*Type) + + _ = new(T). + foo(). + bar().(*Type). + baz() + + _ = new(T). + foo(). + bar()["idx"] + + _ = new(T). + foo(). + bar()["idx"]. + baz() + + _ = new(T). + foo(). + bar()[1:2] + + _ = new(T). + foo(). + bar()[1:2]. + baz() + + _ = new(T). + Field. + Array[3+ + 4]. + Table["foo"]. + Blob.(*Type). + Slices[1:4]. + Method(1, 2, + 3). + Thingy + + _ = a.b.c + _ = a. + b. + c + _ = a.b().c + _ = a. + b(). + c + _ = a.b[0].c + _ = a. + b[0]. + c + _ = a.b[0:].c + _ = a. + b[0:]. + c + _ = a.b.(T).c + _ = a. + b.(T). + c +} + +// Don't introduce extra newlines in strangely formatted expression lists. +func f() { + // os.Open parameters should remain on two lines + if writer, err = os.Open(outfile, s.O_WRONLY|os.O_CREATE| + os.O_TRUNC, 0666); err != nil { + log.Fatal(err) + } +} diff --git a/src/pkg/go/printer/testdata/expressions.input b/src/pkg/go/printer/testdata/expressions.input new file mode 100644 index 000000000..d11314983 --- /dev/null +++ b/src/pkg/go/printer/testdata/expressions.input @@ -0,0 +1,656 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package expressions + +type T struct { + x, y, z int +} + +var ( + a, b, c, d, e int + under_bar int + longIdentifier1, longIdentifier2, longIdentifier3 int + t0, t1, t2 T + s string + p *int +) + + +func _() { + // no spaces around simple or parenthesized expressions + _ = (a+0) + _ = a+b + _ = a+b+c + _ = a+b-c + _ = a-b-c + _ = a+(b*c) + _ = a+(b/c) + _ = a-(b%c) + _ = 1+a + _ = a+1 + _ = a+b+1 + _ = s[a] + _ = s[a:] + _ = s[:b] + _ = s[1:2] + _ = s[a:b] + _ = s[0:len(s)] + _ = s[0]<<1 + _ = (s[0]<<1)&0xf + _ = s[0] << 2 | s[1] >> 4 + _ = "foo"+s + _ = s+"foo" + _ = 'a'+'b' + _ = len(s)/2 + _ = len(t0.x)/a + + // spaces around expressions of different precedence or expressions containing spaces + _ = a + -b + _ = a - ^b + _ = a / *p + _ = a + b*c + _ = 1 + b*c + _ = a + 2*c + _ = a + c*2 + _ = 1 + 2*3 + _ = s[1 : 2*3] + _ = s[a : b-c] + _ = s[0:] + _ = s[a+b] + _ = s[: b-c] + _ = s[a+b :] + _ = a[a<<b+1] + _ = a[a<<b+1 :] + _ = s[a+b : len(s)] + _ = s[len(s) : -a] + _ = s[a : len(s)+1] + _ = s[a : len(s)+1]+s + + // spaces around operators with equal or lower precedence than comparisons + _ = a == b + _ = a != b + _ = a > b + _ = a >= b + _ = a < b + _ = a <= b + _ = a < b && c > d + _ = a < b || c > d + + // spaces around "long" operands + _ = a + longIdentifier1 + _ = longIdentifier1 + a + _ = longIdentifier1 + longIdentifier2 * longIdentifier3 + _ = s + "a longer string" + + // some selected cases + _ = a + t0.x + _ = a + t0.x + t1.x * t2.x + _ = a + b + c + d + e + 2*3 + _ = a + b + c + 2*3 + d + e + _ = (a+b+c)*2 + _ = a - b + c - d + (a+b+c) + d&e + _ = under_bar-1 + _ = Open(dpath + "/file", O_WRONLY | O_CREAT, 0666) + _ = int(c0&_Mask4)<<18 | int(c1&_Maskx)<<12 | int(c2&_Maskx)<<6 | int(c3&_Maskx) + + // the parser does not restrict expressions that may appear as statements + true + 42 + "foo" + x + (x) + a+b + a+b+c + a+(b*c) + a+(b/c) + 1+a + a+1 + s[a] + x<<1 + (s[0]<<1)&0xf + "foo"+s + x == y + x < y || z > 42 +} + + +func _() { + _ = a+b + _ = a+b+c + _ = a+b*c + _ = a+(b*c) + _ = (a+b)*c + _ = a+(b*c*d) + _ = a+(b*c+d) + + _ = 1<<x + _ = -1<<x + _ = 1<<x-1 + _ = -1<<x-1 + + _ = f(a+b) + _ = f(a+b+c) + _ = f(a+b*c) + _ = f(a+(b*c)) + _ = f(1<<x-1, 1<<x-2) + + _ = 1<<d.logWindowSize-1 + + buf = make(x, 2*cap(b.buf) + n) + + dst[i*3+2] = dbuf[0]<<2 + dst[i*3+2] = dbuf[0]<<2 | dbuf[1]>>4 + + b.buf = b.buf[0:b.off+m+n] + b.buf = b.buf[0:b.off+m*n] + f(b.buf[0:b.off+m+n]) + + signed += ' '*8 + tw.octal(header[148:155], chksum) + + _ = x > 0 && i >= 0 + + x1, x0 := x>>w2, x&m2 + z0 = t1<<w2+t0 + z1 = (t1+t0>>w2)>>w2 + q1, r1 := x1/d1, x1%d1 + r1 = r1*b2 | x0>>w2 + x1 = (x1<<z)|(x0>>(uint(w)-z)) + x1 = x1<<z | x0>>(uint(w)-z) + + _ = buf[0:len(buf)+1] + _ = buf[0:n+1] + + a,b = b,a + a = b+c + a = b*c+d + _ = a*b+c + _ = a-b-c + _ = a-(b-c) + _ = a-b*c + _ = a-(b*c) + _ = a*b/c + _ = a/ *b + _ = x[a|^b] + _ = x[a/ *b] + _ = a& ^b + _ = a+ +b + _ = a- -b + _ = x[a*-b] + _ = x[a+ +b] + _ = x^y^z + _ = b[a>>24] ^ b[(a>>16)&0xFF] ^ b[(a>>8)&0xFF] ^ b[a&0xFF] + _ = len(longVariableName)*2 + + _ = token(matchType + xlength<<lengthShift + xoffset) +} + + +func f(x int, args ...int) { + f(0, args...) + f(1, args) + f(2, args[0]) + + // make sure syntactically legal code remains syntactically legal + f(3, 42 ...) // a blank must remain between 42 and ... + f(4, 42. ...) + f(5, 42....) + f(6, 42.0 ...) + f(7, 42.0...) + f(8, .42 ...) + f(9, .42...) + f(10, 42e0 ...) + f(11, 42e0...) + + _ = 42 .x // a blank must remain between 42 and .x + _ = 42. .x + _ = 42..x + _ = 42.0 .x + _ = 42.0.x + _ = .42 .x + _ = .42.x + _ = 42e0 .x + _ = 42e0.x + + // a blank must remain between the binary operator and the 2nd operand + _ = x/ *y + _ = x< -1 + _ = x< <-1 + _ = x+ +1 + _ = x- -1 + _ = x& &x + _ = x& ^x + + _ = f(x/ *y, x< -1, x< <-1, x+ +1, x- -1, x& &x, x& ^x) +} + + +func _() { + _ = T{} + _ = struct{}{} + _ = [10]T{} + _ = [...]T{} + _ = []T{} + _ = map[int]T{} +} + + +// one-line structs/interfaces in composite literals (up to a threshold) +func _() { + _ = struct{}{} + _ = struct{ x int }{0} + _ = struct{ x, y, z int }{0, 1, 2} + _ = struct{ int }{0} + _ = struct{ s struct { int } }{struct{ int}{0} } +} + + +func _() { + // do not modify literals + _ = "tab1 tab2 tab3 end" // string contains 3 tabs + _ = "tab1 tab2 tab3 end" // same string with 3 blanks - may be unaligned because editors see tabs in strings + _ = "" // this comment should be aligned with the one on the previous line + _ = `` + _ = ` +` +_ = `foo + bar` + _ = `three spaces before the end of the line starting here: +they must not be removed` +} + + +func _() { + // smart handling of indentation for multi-line raw strings + var _ = `` + var _ = `foo` + var _ = `foo +bar` + + +var _ = + `` +var _ = + `foo` +var _ = + // the next line should remain indented + `foo +bar` + + + var _ = // comment + `` + var _ = // comment + `foo` + var _ = // comment + // the next line should remain indented + `foo +bar` + + +var _ = /* comment */ `` +var _ = /* comment */ `foo` +var _ = /* comment */ `foo +bar` + + + var _ = /* comment */ + `` + var _ = /* comment */ + `foo` + var _ = /* comment */ + // the next line should remain indented + `foo +bar` + + +var board = []int( + `........... +........... +....●●●.... +....●●●.... +..●●●●●●●.. +..●●●○●●●.. +..●●●●●●●.. +....●●●.... +....●●●.... +........... +........... +`) + + + var state = S{ + "foo", + // the next line should remain indented + `........... +........... +....●●●.... +....●●●.... +..●●●●●●●.. +..●●●○●●●.. +..●●●●●●●.. +....●●●.... +....●●●.... +........... +........... +`, + "bar", + } +} + + +func _() { + // one-line function literals (body is on a single line) + _ = func() {} + _ = func() int { return 0 } + _ = func(x, y int) bool { m := (x+y)/2; return m < 0 } + + // multi-line function literals (body is not on one line) + _ = func() { + } + _ = func() int { + return 0 + } + _ = func(x, y int) bool { + m := (x+y)/2; return x < y } + + f(func() { + }) + f(func() int { + return 0 + }) + f(func(x, y int) bool { + m := (x+y)/2; return x < y }) +} + + +func _() { + _ = [][]int { + []int{1}, + []int{1, 2}, + []int{1, 2, 3}, + } + _ = [][]int { + {1}, + []int{1, 2}, + []int{1, 2, 3}, + } + _ = [][]int { + {1}, + {1, 2}, + {1, 2, 3}, + } + _ = [][]int {{1}, {1, 2}, {1, 2, 3}} +} + + +// various multi-line expressions +func _() { + // do not add extra indentation to multi-line string lists + _ = "foo" + "bar" + _ = "foo" + + "bar" + + "bah" + _ = []string { + "abc" + + "def", + "foo" + + "bar", + } +} + + +const _ = F1 + + `string = "%s";` + + `ptr = *;` + + `datafmt.T2 = s ["-" p "-"];` + + +const _ = + `datafmt "datafmt";` + + `default = "%v";` + + `array = *;` + + `datafmt.T3 = s {" " a a / ","};` + + +const _ = `datafmt "datafmt";` + +`default = "%v";` + +`array = *;` + +`datafmt.T3 = s {" " a a / ","};` + + +func _() { + _ = F1 + + `string = "%s";` + + `ptr = *;` + + `datafmt.T2 = s ["-" p "-"];` + + _ = + `datafmt "datafmt";` + + `default = "%v";` + + `array = *;` + + `datafmt.T3 = s {" " a a / ","};` + + _ = `datafmt "datafmt";` + + `default = "%v";` + + `array = *;` + + `datafmt.T3 = s {" " a a / ","};` +} + + +func _() { + // respect source lines in multi-line expressions + _ = a+ + b+ + c + _ = a < b || + b < a + _ = "933262154439441526816992388562667004907159682643816214685929" + + "638952175999932299156089414639761565182862536979208272237582" + + "51185210916864000000000000000000000000" // 100! + _ = "170141183460469231731687303715884105727" // prime +} + + +// Alignment after overlong lines +const ( + _ = "991" + _ = "2432902008176640000" // 20! + _ = "933262154439441526816992388562667004907159682643816214685929" + + "638952175999932299156089414639761565182862536979208272237582" + + "51185210916864000000000000000000000000" // 100! + _ = "170141183460469231731687303715884105727" // prime +) + + +// Correct placement of operators and comments in multi-line expressions +func _() { + _ = a + // comment + b + // comment + c + _ = "a" + + "b" + // comment + "c" + _ = "ba0408" + "7265717569726564" // field 71, encoding 2, string "required" +} + + +// Correct placement of terminating comma/closing parentheses in multi-line calls. +func _() { + f(1, + 2, + 3) + f(1, + 2, + 3, + ) + f(1, + 2, + 3) // comment + f(1, + 2, + 3, // comment + ) + f(1, + 2, + 3)// comment + f(1, + 2, + 3,// comment + ) +} + + +// Align comments in multi-line lists of single-line expressions. +var txpix = [NCOL]draw.Color{ + draw.Yellow, // yellow + draw.Cyan, // cyan + draw.Green, // lime green + draw.GreyBlue, // slate + draw.Red, /* red */ + draw.GreyGreen, /* olive green */ + draw.Blue, /* blue */ + draw.Color(0xFF55AAFF), /* pink */ + draw.Color(0xFFAAFFFF), /* lavender */ + draw.Color(0xBB005DFF), /* maroon */ +} + + +func same(t, u *Time) bool { + // respect source lines in multi-line expressions + return t.Year == u.Year && + t.Month == u.Month && + t.Day == u.Day && + t.Hour == u.Hour && + t.Minute == u.Minute && + t.Second == u.Second && + t.Weekday == u.Weekday && + t.ZoneOffset == u.ZoneOffset && + t.Zone == u.Zone +} + + +func (p *parser) charClass() { + // respect source lines in multi-line expressions + if cc.negate && len(cc.ranges) == 2 && + cc.ranges[0] == '\n' && cc.ranges[1] == '\n' { + nl := new(_NotNl) + p.re.add(nl) + } +} + + +func addState(s []state, inst instr, match []int) { + // handle comments correctly in multi-line expressions + for i := 0; i < l; i++ { + if s[i].inst.index() == index && // same instruction + s[i].match[0] < pos { // earlier match already going; leftmost wins + return s + } + } +} + +func (self *T) foo(x int) *T { return self } + +func _() { module.Func1().Func2() } + +func _() { + _ = new(T). + foo(1). + foo(2). + foo(3) + + _ = new(T). + foo(1). + foo(2). // inline comments + foo(3) + + _ = new(T).foo(1).foo(2).foo(3) + + // handle multiline argument list correctly + _ = new(T). + foo( + 1). + foo(2) + + _ = new(T).foo( + 1).foo(2) + + _ = Array[3 + +4] + + _ = Method(1, 2, + 3) + + _ = new(T). + foo(). + bar() . (*Type) + + _ = new(T). +foo(). +bar().(*Type). +baz() + + _ = new(T). + foo(). + bar()["idx"] + + _ = new(T). + foo(). + bar()["idx"] . + baz() + + _ = new(T). + foo(). + bar()[1:2] + + _ = new(T). + foo(). + bar()[1:2]. + baz() + + _ = new(T). + Field. + Array[3+ + 4]. + Table ["foo"]. + Blob. (*Type). + Slices[1:4]. + Method(1, 2, + 3). + Thingy + + _ = a.b.c + _ = a. + b. + c + _ = a.b().c + _ = a. + b(). + c + _ = a.b[0].c + _ = a. + b[0]. + c + _ = a.b[0:].c + _ = a. + b[0:]. + c + _ = a.b.(T).c + _ = a. + b. + (T). + c +} + + +// Don't introduce extra newlines in strangely formatted expression lists. +func f() { + // os.Open parameters should remain on two lines + if writer, err = os.Open(outfile, s.O_WRONLY|os.O_CREATE| + os.O_TRUNC, 0666); err != nil { + log.Fatal(err) + } +} diff --git a/src/pkg/go/printer/testdata/expressions.raw b/src/pkg/go/printer/testdata/expressions.raw new file mode 100644 index 000000000..d7819a3ba --- /dev/null +++ b/src/pkg/go/printer/testdata/expressions.raw @@ -0,0 +1,627 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package expressions + +type T struct { + x, y, z int +} + +var ( + a, b, c, d, e int + under_bar int + longIdentifier1, longIdentifier2, longIdentifier3 int + t0, t1, t2 T + s string + p *int +) + +func _() { + // no spaces around simple or parenthesized expressions + _ = (a + 0) + _ = a + b + _ = a + b + c + _ = a + b - c + _ = a - b - c + _ = a + (b * c) + _ = a + (b / c) + _ = a - (b % c) + _ = 1 + a + _ = a + 1 + _ = a + b + 1 + _ = s[a] + _ = s[a:] + _ = s[:b] + _ = s[1:2] + _ = s[a:b] + _ = s[0:len(s)] + _ = s[0] << 1 + _ = (s[0] << 1) & 0xf + _ = s[0]<<2 | s[1]>>4 + _ = "foo" + s + _ = s + "foo" + _ = 'a' + 'b' + _ = len(s) / 2 + _ = len(t0.x) / a + + // spaces around expressions of different precedence or expressions containing spaces + _ = a + -b + _ = a - ^b + _ = a / *p + _ = a + b*c + _ = 1 + b*c + _ = a + 2*c + _ = a + c*2 + _ = 1 + 2*3 + _ = s[1 : 2*3] + _ = s[a : b-c] + _ = s[0:] + _ = s[a+b] + _ = s[:b-c] + _ = s[a+b:] + _ = a[a<<b+1] + _ = a[a<<b+1:] + _ = s[a+b : len(s)] + _ = s[len(s):-a] + _ = s[a : len(s)+1] + _ = s[a:len(s)+1] + s + + // spaces around operators with equal or lower precedence than comparisons + _ = a == b + _ = a != b + _ = a > b + _ = a >= b + _ = a < b + _ = a <= b + _ = a < b && c > d + _ = a < b || c > d + + // spaces around "long" operands + _ = a + longIdentifier1 + _ = longIdentifier1 + a + _ = longIdentifier1 + longIdentifier2*longIdentifier3 + _ = s + "a longer string" + + // some selected cases + _ = a + t0.x + _ = a + t0.x + t1.x*t2.x + _ = a + b + c + d + e + 2*3 + _ = a + b + c + 2*3 + d + e + _ = (a + b + c) * 2 + _ = a - b + c - d + (a + b + c) + d&e + _ = under_bar - 1 + _ = Open(dpath+"/file", O_WRONLY|O_CREAT, 0666) + _ = int(c0&_Mask4)<<18 | int(c1&_Maskx)<<12 | int(c2&_Maskx)<<6 | int(c3&_Maskx) + + // the parser does not restrict expressions that may appear as statements + true + 42 + "foo" + x + (x) + a + b + a + b + c + a + (b * c) + a + (b / c) + 1 + a + a + 1 + s[a] + x << 1 + (s[0] << 1) & 0xf + "foo" + s + x == y + x < y || z > 42 +} + +func _() { + _ = a + b + _ = a + b + c + _ = a + b*c + _ = a + (b * c) + _ = (a + b) * c + _ = a + (b * c * d) + _ = a + (b*c + d) + + _ = 1 << x + _ = -1 << x + _ = 1<<x - 1 + _ = -1<<x - 1 + + _ = f(a + b) + _ = f(a + b + c) + _ = f(a + b*c) + _ = f(a + (b * c)) + _ = f(1<<x-1, 1<<x-2) + + _ = 1<<d.logWindowSize - 1 + + buf = make(x, 2*cap(b.buf)+n) + + dst[i*3+2] = dbuf[0] << 2 + dst[i*3+2] = dbuf[0]<<2 | dbuf[1]>>4 + + b.buf = b.buf[0 : b.off+m+n] + b.buf = b.buf[0 : b.off+m*n] + f(b.buf[0 : b.off+m+n]) + + signed += ' ' * 8 + tw.octal(header[148:155], chksum) + + _ = x > 0 && i >= 0 + + x1, x0 := x>>w2, x&m2 + z0 = t1<<w2 + t0 + z1 = (t1 + t0>>w2) >> w2 + q1, r1 := x1/d1, x1%d1 + r1 = r1*b2 | x0>>w2 + x1 = (x1 << z) | (x0 >> (uint(w) - z)) + x1 = x1<<z | x0>>(uint(w)-z) + + _ = buf[0 : len(buf)+1] + _ = buf[0 : n+1] + + a, b = b, a + a = b + c + a = b*c + d + _ = a*b + c + _ = a - b - c + _ = a - (b - c) + _ = a - b*c + _ = a - (b * c) + _ = a * b / c + _ = a / *b + _ = x[a|^b] + _ = x[a / *b] + _ = a & ^b + _ = a + +b + _ = a - -b + _ = x[a*-b] + _ = x[a + +b] + _ = x ^ y ^ z + _ = b[a>>24] ^ b[(a>>16)&0xFF] ^ b[(a>>8)&0xFF] ^ b[a&0xFF] + _ = len(longVariableName) * 2 + + _ = token(matchType + xlength<<lengthShift + xoffset) +} + +func f(x int, args ...int) { + f(0, args...) + f(1, args) + f(2, args[0]) + + // make sure syntactically legal code remains syntactically legal + f(3, 42 ...) // a blank must remain between 42 and ... + f(4, 42....) + f(5, 42....) + f(6, 42.0...) + f(7, 42.0...) + f(8, .42...) + f(9, .42...) + f(10, 42e0...) + f(11, 42e0...) + + _ = 42 .x // a blank must remain between 42 and .x + _ = 42..x + _ = 42..x + _ = 42.0.x + _ = 42.0.x + _ = .42.x + _ = .42.x + _ = 42e0.x + _ = 42e0.x + + // a blank must remain between the binary operator and the 2nd operand + _ = x / *y + _ = x < -1 + _ = x < <-1 + _ = x + +1 + _ = x - -1 + _ = x & &x + _ = x & ^x + + _ = f(x / *y, x < -1, x < <-1, x + +1, x - -1, x & &x, x & ^x) +} + +func _() { + _ = T{} + _ = struct{}{} + _ = [10]T{} + _ = [...]T{} + _ = []T{} + _ = map[int]T{} +} + +// one-line structs/interfaces in composite literals (up to a threshold) +func _() { + _ = struct{}{} + _ = struct{ x int }{0} + _ = struct{ x, y, z int }{0, 1, 2} + _ = struct{ int }{0} + _ = struct{ s struct{ int } }{struct{ int }{0}} +} + +func _() { + // do not modify literals + _ = "tab1 tab2 tab3 end" // string contains 3 tabs + _ = "tab1 tab2 tab3 end" // same string with 3 blanks - may be unaligned because editors see tabs in strings + _ = "" // this comment should be aligned with the one on the previous line + _ = `` + _ = ` +` + _ = `foo + bar` + _ = `three spaces before the end of the line starting here: +they must not be removed` +} + +func _() { + // smart handling of indentation for multi-line raw strings + var _ = `` + var _ = `foo` + var _ = `foo +bar` + + var _ = `` + var _ = `foo` + var _ = + // the next line should remain indented + `foo +bar` + + var _ = // comment + `` + var _ = // comment + `foo` + var _ = // comment + // the next line should remain indented + `foo +bar` + + var _ = /* comment */ `` + var _ = /* comment */ `foo` + var _ = /* comment */ `foo +bar` + + var _ = /* comment */ + `` + var _ = /* comment */ + `foo` + var _ = /* comment */ + // the next line should remain indented + `foo +bar` + + var board = []int( + `........... +........... +....●●●.... +....●●●.... +..●●●●●●●.. +..●●●○●●●.. +..●●●●●●●.. +....●●●.... +....●●●.... +........... +........... +`) + + var state = S{ + "foo", + // the next line should remain indented + `........... +........... +....●●●.... +....●●●.... +..●●●●●●●.. +..●●●○●●●.. +..●●●●●●●.. +....●●●.... +....●●●.... +........... +........... +`, + "bar", + } +} + +func _() { + // one-line function literals (body is on a single line) + _ = func() {} + _ = func() int { return 0 } + _ = func(x, y int) bool { m := (x + y) / 2; return m < 0 } + + // multi-line function literals (body is not on one line) + _ = func() { + } + _ = func() int { + return 0 + } + _ = func(x, y int) bool { + m := (x + y) / 2 + return x < y + } + + f(func() { + }) + f(func() int { + return 0 + }) + f(func(x, y int) bool { + m := (x + y) / 2 + return x < y + }) +} + +func _() { + _ = [][]int{ + []int{1}, + []int{1, 2}, + []int{1, 2, 3}, + } + _ = [][]int{ + {1}, + []int{1, 2}, + []int{1, 2, 3}, + } + _ = [][]int{ + {1}, + {1, 2}, + {1, 2, 3}, + } + _ = [][]int{{1}, {1, 2}, {1, 2, 3}} +} + +// various multi-line expressions +func _() { + // do not add extra indentation to multi-line string lists + _ = "foo" + "bar" + _ = "foo" + + "bar" + + "bah" + _ = []string{ + "abc" + + "def", + "foo" + + "bar", + } +} + +const _ = F1 + + `string = "%s";` + + `ptr = *;` + + `datafmt.T2 = s ["-" p "-"];` + +const _ = `datafmt "datafmt";` + + `default = "%v";` + + `array = *;` + + `datafmt.T3 = s {" " a a / ","};` + +const _ = `datafmt "datafmt";` + + `default = "%v";` + + `array = *;` + + `datafmt.T3 = s {" " a a / ","};` + +func _() { + _ = F1 + + `string = "%s";` + + `ptr = *;` + + `datafmt.T2 = s ["-" p "-"];` + + _ = + `datafmt "datafmt";` + + `default = "%v";` + + `array = *;` + + `datafmt.T3 = s {" " a a / ","};` + + _ = `datafmt "datafmt";` + + `default = "%v";` + + `array = *;` + + `datafmt.T3 = s {" " a a / ","};` +} + +func _() { + // respect source lines in multi-line expressions + _ = a + + b + + c + _ = a < b || + b < a + _ = "933262154439441526816992388562667004907159682643816214685929" + + "638952175999932299156089414639761565182862536979208272237582" + + "51185210916864000000000000000000000000" // 100! + _ = "170141183460469231731687303715884105727" // prime +} + +// Alignment after overlong lines +const ( + _ = "991" + _ = "2432902008176640000" // 20! + _ = "933262154439441526816992388562667004907159682643816214685929" + + "638952175999932299156089414639761565182862536979208272237582" + + "51185210916864000000000000000000000000" // 100! + _ = "170141183460469231731687303715884105727" // prime +) + +// Correct placement of operators and comments in multi-line expressions +func _() { + _ = a + // comment + b + // comment + c + _ = "a" + + "b" + // comment + "c" + _ = "ba0408" + "7265717569726564" // field 71, encoding 2, string "required" +} + +// Correct placement of terminating comma/closing parentheses in multi-line calls. +func _() { + f(1, + 2, + 3) + f(1, + 2, + 3, + ) + f(1, + 2, + 3) // comment + f(1, + 2, + 3, // comment + ) + f(1, + 2, + 3) // comment + f(1, + 2, + 3, // comment + ) +} + +// Align comments in multi-line lists of single-line expressions. +var txpix = [NCOL]draw.Color{ + draw.Yellow, // yellow + draw.Cyan, // cyan + draw.Green, // lime green + draw.GreyBlue, // slate + draw.Red, /* red */ + draw.GreyGreen, /* olive green */ + draw.Blue, /* blue */ + draw.Color(0xFF55AAFF), /* pink */ + draw.Color(0xFFAAFFFF), /* lavender */ + draw.Color(0xBB005DFF), /* maroon */ +} + +func same(t, u *Time) bool { + // respect source lines in multi-line expressions + return t.Year == u.Year && + t.Month == u.Month && + t.Day == u.Day && + t.Hour == u.Hour && + t.Minute == u.Minute && + t.Second == u.Second && + t.Weekday == u.Weekday && + t.ZoneOffset == u.ZoneOffset && + t.Zone == u.Zone +} + +func (p *parser) charClass() { + // respect source lines in multi-line expressions + if cc.negate && len(cc.ranges) == 2 && + cc.ranges[0] == '\n' && cc.ranges[1] == '\n' { + nl := new(_NotNl) + p.re.add(nl) + } +} + +func addState(s []state, inst instr, match []int) { + // handle comments correctly in multi-line expressions + for i := 0; i < l; i++ { + if s[i].inst.index() == index && // same instruction + s[i].match[0] < pos { // earlier match already going; leftmost wins + return s + } + } +} + +func (self *T) foo(x int) *T { return self } + +func _() { module.Func1().Func2() } + +func _() { + _ = new(T). + foo(1). + foo(2). + foo(3) + + _ = new(T). + foo(1). + foo(2). // inline comments + foo(3) + + _ = new(T).foo(1).foo(2).foo(3) + + // handle multiline argument list correctly + _ = new(T). + foo( + 1). + foo(2) + + _ = new(T).foo( + 1).foo(2) + + _ = Array[3+ + 4] + + _ = Method(1, 2, + 3) + + _ = new(T). + foo(). + bar().(*Type) + + _ = new(T). + foo(). + bar().(*Type). + baz() + + _ = new(T). + foo(). + bar()["idx"] + + _ = new(T). + foo(). + bar()["idx"]. + baz() + + _ = new(T). + foo(). + bar()[1:2] + + _ = new(T). + foo(). + bar()[1:2]. + baz() + + _ = new(T). + Field. + Array[3+ + 4]. + Table["foo"]. + Blob.(*Type). + Slices[1:4]. + Method(1, 2, + 3). + Thingy + + _ = a.b.c + _ = a. + b. + c + _ = a.b().c + _ = a. + b(). + c + _ = a.b[0].c + _ = a. + b[0]. + c + _ = a.b[0:].c + _ = a. + b[0:]. + c + _ = a.b.(T).c + _ = a. + b.(T). + c +} + +// Don't introduce extra newlines in strangely formatted expression lists. +func f() { + // os.Open parameters should remain on two lines + if writer, err = os.Open(outfile, s.O_WRONLY|os.O_CREATE| + os.O_TRUNC, 0666); err != nil { + log.Fatal(err) + } +} diff --git a/src/pkg/go/printer/testdata/linebreaks.golden b/src/pkg/go/printer/testdata/linebreaks.golden new file mode 100644 index 000000000..be780da67 --- /dev/null +++ b/src/pkg/go/printer/testdata/linebreaks.golden @@ -0,0 +1,223 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package linebreaks + +import ( + "bytes" + "fmt" + "io" + "os" + "reflect" + "strings" + "testing" +) + +type writerTestEntry struct { + header *Header + contents string +} + +type writerTest struct { + file string // filename of expected output + entries []*writerTestEntry +} + +var writerTests = []*writerTest{ + &writerTest{ + file: "testdata/writer.tar", + entries: []*writerTestEntry{ + &writerTestEntry{ + header: &Header{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + Mtime: 1246508266, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + contents: "Kilts", + }, + &writerTestEntry{ + header: &Header{ + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + Mtime: 1245217492, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + contents: "Google.com\n", + }, + }, + }, + // The truncated test file was produced using these commands: + // dd if=/dev/zero bs=1048576 count=16384 > /tmp/16gig.txt + // tar -b 1 -c -f- /tmp/16gig.txt | dd bs=512 count=8 > writer-big.tar + &writerTest{ + file: "testdata/writer-big.tar", + entries: []*writerTestEntry{ + &writerTestEntry{ + header: &Header{ + Name: "tmp/16gig.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 16 << 30, + Mtime: 1254699560, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + // no contents + }, + }, + }, +} + +type untarTest struct { + file string + headers []*Header +} + +var untarTests = []*untarTest{ + &untarTest{ + file: "testdata/gnu.tar", + headers: []*Header{ + &Header{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + Mtime: 1244428340, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + &Header{ + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + Mtime: 1244436044, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + }, + }, + &untarTest{ + file: "testdata/star.tar", + headers: []*Header{ + &Header{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + Mtime: 1244592783, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + Atime: 1244592783, + Ctime: 1244592783, + }, + &Header{ + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + Mtime: 1244592783, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + Atime: 1244592783, + Ctime: 1244592783, + }, + }, + }, + &untarTest{ + file: "testdata/v7.tar", + headers: []*Header{ + &Header{ + Name: "small.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 5, + Mtime: 1244593104, + Typeflag: '\x00', + }, + &Header{ + Name: "small2.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 11, + Mtime: 1244593104, + Typeflag: '\x00', + }, + }, + }, +} + +var facts = map[int]string{ + 0: "1", + 1: "1", + 2: "2", + 10: "3628800", + 20: "2432902008176640000", + 100: "933262154439441526816992388562667004907159682643816214685929" + + "638952175999932299156089414639761565182862536979208272237582" + + "51185210916864000000000000000000000000", +} + +func usage() { + fmt.Fprintf(os.Stderr, + // TODO(gri): the 2nd string of this string list should not be indented + "usage: godoc package [name ...]\n"+ + " godoc -http=:6060\n") + flag.PrintDefaults() + os.Exit(2) +} + +func TestReader(t *testing.T) { +testLoop: + for i, test := range untarTests { + f, err := os.Open(test.file, os.O_RDONLY, 0444) + if err != nil { + t.Errorf("test %d: Unexpected error: %v", i, err) + continue + } + tr := NewReader(f) + for j, header := range test.headers { + hdr, err := tr.Next() + if err != nil || hdr == nil { + t.Errorf("test %d, entry %d: Didn't get entry: %v", i, j, err) + f.Close() + continue testLoop + } + if !reflect.DeepEqual(hdr, header) { + t.Errorf("test %d, entry %d: Incorrect header:\nhave %+v\nwant %+v", + i, j, *hdr, *header) + } + } + hdr, err := tr.Next() + if hdr != nil || err != nil { + t.Errorf("test %d: Unexpected entry or error: hdr=%v err=%v", i, err) + } + f.Close() + } +} + +// There should be exactly one linebreak after this comment. diff --git a/src/pkg/go/printer/testdata/linebreaks.input b/src/pkg/go/printer/testdata/linebreaks.input new file mode 100644 index 000000000..457b491e6 --- /dev/null +++ b/src/pkg/go/printer/testdata/linebreaks.input @@ -0,0 +1,223 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package linebreaks + +import ( + "bytes" + "fmt" + "io" + "os" + "reflect" + "strings" + "testing" +) + +type writerTestEntry struct { + header *Header + contents string +} + +type writerTest struct { + file string // filename of expected output + entries []*writerTestEntry +} + +var writerTests = []*writerTest{ + &writerTest{ + file: "testdata/writer.tar", + entries: []*writerTestEntry{ + &writerTestEntry{ + header: &Header{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + Mtime: 1246508266, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + contents: "Kilts", + }, + &writerTestEntry{ + header: &Header{ + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + Mtime: 1245217492, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + contents: "Google.com\n", + }, + }, + }, + // The truncated test file was produced using these commands: + // dd if=/dev/zero bs=1048576 count=16384 > /tmp/16gig.txt + // tar -b 1 -c -f- /tmp/16gig.txt | dd bs=512 count=8 > writer-big.tar + &writerTest{ + file: "testdata/writer-big.tar", + entries: []*writerTestEntry{ + &writerTestEntry{ + header: &Header{ + Name: "tmp/16gig.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 16 << 30, + Mtime: 1254699560, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + // no contents + }, + }, + }, +} + +type untarTest struct { + file string + headers []*Header +} + +var untarTests = []*untarTest{ + &untarTest{ + file: "testdata/gnu.tar", + headers: []*Header{ + &Header{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + Mtime: 1244428340, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + &Header{ + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + Mtime: 1244436044, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + }, + }, + &untarTest{ + file: "testdata/star.tar", + headers: []*Header{ + &Header{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + Mtime: 1244592783, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + Atime: 1244592783, + Ctime: 1244592783, + }, + &Header{ + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + Mtime: 1244592783, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + Atime: 1244592783, + Ctime: 1244592783, + }, + }, + }, + &untarTest{ + file: "testdata/v7.tar", + headers: []*Header{ + &Header{ + Name: "small.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 5, + Mtime: 1244593104, + Typeflag: '\x00', + }, + &Header{ + Name: "small2.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 11, + Mtime: 1244593104, + Typeflag: '\x00', + }, + }, + }, +} + +var facts = map[int] string { + 0: "1", + 1: "1", + 2: "2", + 10: "3628800", + 20: "2432902008176640000", + 100: "933262154439441526816992388562667004907159682643816214685929" + + "638952175999932299156089414639761565182862536979208272237582" + + "51185210916864000000000000000000000000", +} + +func usage() { + fmt.Fprintf(os.Stderr, + // TODO(gri): the 2nd string of this string list should not be indented + "usage: godoc package [name ...]\n" + + " godoc -http=:6060\n") + flag.PrintDefaults() + os.Exit(2) +} + +func TestReader(t *testing.T) { +testLoop: + for i, test := range untarTests { + f, err := os.Open(test.file, os.O_RDONLY, 0444) + if err != nil { + t.Errorf("test %d: Unexpected error: %v", i, err) + continue + } + tr := NewReader(f) + for j, header := range test.headers { + hdr, err := tr.Next() + if err != nil || hdr == nil { + t.Errorf("test %d, entry %d: Didn't get entry: %v", i, j, err) + f.Close() + continue testLoop + } + if !reflect.DeepEqual(hdr, header) { + t.Errorf("test %d, entry %d: Incorrect header:\nhave %+v\nwant %+v", + i, j, *hdr, *header) + } + } + hdr, err := tr.Next() + if hdr != nil || err != nil { + t.Errorf("test %d: Unexpected entry or error: hdr=%v err=%v", i, err) + } + f.Close() + } +} + +// There should be exactly one linebreak after this comment. diff --git a/src/pkg/go/printer/testdata/parser.go b/src/pkg/go/printer/testdata/parser.go new file mode 100644 index 000000000..2d27af499 --- /dev/null +++ b/src/pkg/go/printer/testdata/parser.go @@ -0,0 +1,2153 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package parser implements a parser for Go source files. Input may be +// provided in a variety of forms (see the various Parse* functions); the +// output is an abstract syntax tree (AST) representing the Go source. The +// parser is invoked through one of the Parse* functions. +// +package parser + +import ( + "fmt" + "go/ast" + "go/scanner" + "go/token" +) + +// The mode parameter to the Parse* functions is a set of flags (or 0). +// They control the amount of source code parsed and other optional +// parser functionality. +// +const ( + PackageClauseOnly uint = 1 << iota // parsing stops after package clause + ImportsOnly // parsing stops after import declarations + ParseComments // parse comments and add them to AST + Trace // print a trace of parsed productions + DeclarationErrors // report declaration errors +) + +// The parser structure holds the parser's internal state. +type parser struct { + file *token.File + scanner.ErrorVector + scanner scanner.Scanner + + // Tracing/debugging + mode uint // parsing mode + trace bool // == (mode & Trace != 0) + indent uint // indentation used for tracing output + + // Comments + comments []*ast.CommentGroup + leadComment *ast.CommentGroup // last lead comment + lineComment *ast.CommentGroup // last line comment + + // Next token + pos token.Pos // token position + tok token.Token // one token look-ahead + lit string // token literal + + // Non-syntactic parser control + exprLev int // < 0: in control clause, >= 0: in expression + + // Ordinary identifer scopes + pkgScope *ast.Scope // pkgScope.Outer == nil + topScope *ast.Scope // top-most scope; may be pkgScope + unresolved []*ast.Ident // unresolved identifiers + imports []*ast.ImportSpec // list of imports + + // Label scope + // (maintained by open/close LabelScope) + labelScope *ast.Scope // label scope for current function + targetStack [][]*ast.Ident // stack of unresolved labels +} + +// scannerMode returns the scanner mode bits given the parser's mode bits. +func scannerMode(mode uint) uint { + var m uint = scanner.InsertSemis + if mode&ParseComments != 0 { + m |= scanner.ScanComments + } + return m +} + +func (p *parser) init(fset *token.FileSet, filename string, src []byte, mode uint) { + p.file = fset.AddFile(filename, fset.Base(), len(src)) + p.scanner.Init(p.file, src, p, scannerMode(mode)) + + p.mode = mode + p.trace = mode&Trace != 0 // for convenience (p.trace is used frequently) + + p.next() + + // set up the pkgScope here (as opposed to in parseFile) because + // there are other parser entry points (ParseExpr, etc.) + p.openScope() + p.pkgScope = p.topScope + + // for the same reason, set up a label scope + p.openLabelScope() +} + +// ---------------------------------------------------------------------------- +// Scoping support + +func (p *parser) openScope() { + p.topScope = ast.NewScope(p.topScope) +} + +func (p *parser) closeScope() { + p.topScope = p.topScope.Outer +} + +func (p *parser) openLabelScope() { + p.labelScope = ast.NewScope(p.labelScope) + p.targetStack = append(p.targetStack, nil) +} + +func (p *parser) closeLabelScope() { + // resolve labels + n := len(p.targetStack) - 1 + scope := p.labelScope + for _, ident := range p.targetStack[n] { + ident.Obj = scope.Lookup(ident.Name) + if ident.Obj == nil && p.mode&DeclarationErrors != 0 { + p.error(ident.Pos(), fmt.Sprintf("label %s undefined", ident.Name)) + } + } + // pop label scope + p.targetStack = p.targetStack[0:n] + p.labelScope = p.labelScope.Outer +} + +func (p *parser) declare(decl interface{}, scope *ast.Scope, kind ast.ObjKind, idents ...*ast.Ident) { + for _, ident := range idents { + assert(ident.Obj == nil, "identifier already declared or resolved") + if ident.Name != "_" { + obj := ast.NewObj(kind, ident.Name) + // remember the corresponding declaration for redeclaration + // errors and global variable resolution/typechecking phase + obj.Decl = decl + if alt := scope.Insert(obj); alt != nil && p.mode&DeclarationErrors != 0 { + prevDecl := "" + if pos := alt.Pos(); pos.IsValid() { + prevDecl = fmt.Sprintf("\n\tprevious declaration at %s", p.file.Position(pos)) + } + p.error(ident.Pos(), fmt.Sprintf("%s redeclared in this block%s", ident.Name, prevDecl)) + } + ident.Obj = obj + } + } +} + +func (p *parser) shortVarDecl(idents []*ast.Ident) { + // Go spec: A short variable declaration may redeclare variables + // provided they were originally declared in the same block with + // the same type, and at least one of the non-blank variables is new. + n := 0 // number of new variables + for _, ident := range idents { + assert(ident.Obj == nil, "identifier already declared or resolved") + if ident.Name != "_" { + obj := ast.NewObj(ast.Var, ident.Name) + // short var declarations cannot have redeclaration errors + // and are not global => no need to remember the respective + // declaration + alt := p.topScope.Insert(obj) + if alt == nil { + n++ // new declaration + alt = obj + } + ident.Obj = alt + } + } + if n == 0 && p.mode&DeclarationErrors != 0 { + p.error(idents[0].Pos(), "no new variables on left side of :=") + } +} + +// The unresolved object is a sentinel to mark identifiers that have been added +// to the list of unresolved identifiers. The sentinel is only used for verifying +// internal consistency. +var unresolved = new(ast.Object) + +func (p *parser) resolve(x ast.Expr) { + // nothing to do if x is not an identifier or the blank identifier + ident, _ := x.(*ast.Ident) + if ident == nil { + return + } + assert(ident.Obj == nil, "identifier already declared or resolved") + if ident.Name == "_" { + return + } + // try to resolve the identifier + for s := p.topScope; s != nil; s = s.Outer { + if obj := s.Lookup(ident.Name); obj != nil { + ident.Obj = obj + return + } + } + // all local scopes are known, so any unresolved identifier + // must be found either in the file scope, package scope + // (perhaps in another file), or universe scope --- collect + // them so that they can be resolved later + ident.Obj = unresolved + p.unresolved = append(p.unresolved, ident) +} + +// ---------------------------------------------------------------------------- +// Parsing support + +func (p *parser) printTrace(a ...interface{}) { + const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " + + ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " + const n = uint(len(dots)) + pos := p.file.Position(p.pos) + fmt.Printf("%5d:%3d: ", pos.Line, pos.Column) + i := 2 * p.indent + for ; i > n; i -= n { + fmt.Print(dots) + } + fmt.Print(dots[0:i]) + fmt.Println(a...) +} + +func trace(p *parser, msg string) *parser { + p.printTrace(msg, "(") + p.indent++ + return p +} + +// Usage pattern: defer un(trace(p, "...")); +func un(p *parser) { + p.indent-- + p.printTrace(")") +} + +// Advance to the next token. +func (p *parser) next0() { + // Because of one-token look-ahead, print the previous token + // when tracing as it provides a more readable output. The + // very first token (!p.pos.IsValid()) is not initialized + // (it is token.ILLEGAL), so don't print it . + if p.trace && p.pos.IsValid() { + s := p.tok.String() + switch { + case p.tok.IsLiteral(): + p.printTrace(s, p.lit) + case p.tok.IsOperator(), p.tok.IsKeyword(): + p.printTrace("\"" + s + "\"") + default: + p.printTrace(s) + } + } + + p.pos, p.tok, p.lit = p.scanner.Scan() +} + +// Consume a comment and return it and the line on which it ends. +func (p *parser) consumeComment() (comment *ast.Comment, endline int) { + // /*-style comments may end on a different line than where they start. + // Scan the comment for '\n' chars and adjust endline accordingly. + endline = p.file.Line(p.pos) + if p.lit[1] == '*' { + // don't use range here - no need to decode Unicode code points + for i := 0; i < len(p.lit); i++ { + if p.lit[i] == '\n' { + endline++ + } + } + } + + comment = &ast.Comment{p.pos, p.lit} + p.next0() + + return +} + +// Consume a group of adjacent comments, add it to the parser's +// comments list, and return it together with the line at which +// the last comment in the group ends. An empty line or non-comment +// token terminates a comment group. +// +func (p *parser) consumeCommentGroup() (comments *ast.CommentGroup, endline int) { + var list []*ast.Comment + endline = p.file.Line(p.pos) + for p.tok == token.COMMENT && endline+1 >= p.file.Line(p.pos) { + var comment *ast.Comment + comment, endline = p.consumeComment() + list = append(list, comment) + } + + // add comment group to the comments list + comments = &ast.CommentGroup{list} + p.comments = append(p.comments, comments) + + return +} + +// Advance to the next non-comment token. In the process, collect +// any comment groups encountered, and remember the last lead and +// and line comments. +// +// A lead comment is a comment group that starts and ends in a +// line without any other tokens and that is followed by a non-comment +// token on the line immediately after the comment group. +// +// A line comment is a comment group that follows a non-comment +// token on the same line, and that has no tokens after it on the line +// where it ends. +// +// Lead and line comments may be considered documentation that is +// stored in the AST. +// +func (p *parser) next() { + p.leadComment = nil + p.lineComment = nil + line := p.file.Line(p.pos) // current line + p.next0() + + if p.tok == token.COMMENT { + var comment *ast.CommentGroup + var endline int + + if p.file.Line(p.pos) == line { + // The comment is on same line as the previous token; it + // cannot be a lead comment but may be a line comment. + comment, endline = p.consumeCommentGroup() + if p.file.Line(p.pos) != endline { + // The next token is on a different line, thus + // the last comment group is a line comment. + p.lineComment = comment + } + } + + // consume successor comments, if any + endline = -1 + for p.tok == token.COMMENT { + comment, endline = p.consumeCommentGroup() + } + + if endline+1 == p.file.Line(p.pos) { + // The next token is following on the line immediately after the + // comment group, thus the last comment group is a lead comment. + p.leadComment = comment + } + } +} + +func (p *parser) error(pos token.Pos, msg string) { + p.Error(p.file.Position(pos), msg) +} + +func (p *parser) errorExpected(pos token.Pos, msg string) { + msg = "expected " + msg + if pos == p.pos { + // the error happened at the current position; + // make the error message more specific + if p.tok == token.SEMICOLON && p.lit[0] == '\n' { + msg += ", found newline" + } else { + msg += ", found '" + p.tok.String() + "'" + if p.tok.IsLiteral() { + msg += " " + p.lit + } + } + } + p.error(pos, msg) +} + +func (p *parser) expect(tok token.Token) token.Pos { + pos := p.pos + if p.tok != tok { + p.errorExpected(pos, "'"+tok.String()+"'") + } + p.next() // make progress + return pos +} + +func (p *parser) expectSemi() { + if p.tok != token.RPAREN && p.tok != token.RBRACE { + p.expect(token.SEMICOLON) + } +} + +func assert(cond bool, msg string) { + if !cond { + panic("go/parser internal error: " + msg) + } +} + +// ---------------------------------------------------------------------------- +// Identifiers + +func (p *parser) parseIdent() *ast.Ident { + pos := p.pos + name := "_" + if p.tok == token.IDENT { + name = p.lit + p.next() + } else { + p.expect(token.IDENT) // use expect() error handling + } + return &ast.Ident{pos, name, nil} +} + +func (p *parser) parseIdentList() (list []*ast.Ident) { + if p.trace { + defer un(trace(p, "IdentList")) + } + + list = append(list, p.parseIdent()) + for p.tok == token.COMMA { + p.next() + list = append(list, p.parseIdent()) + } + + return +} + +// ---------------------------------------------------------------------------- +// Common productions + +// If lhs is set, result list elements which are identifiers are not resolved. +func (p *parser) parseExprList(lhs bool) (list []ast.Expr) { + if p.trace { + defer un(trace(p, "ExpressionList")) + } + + list = append(list, p.parseExpr(lhs)) + for p.tok == token.COMMA { + p.next() + list = append(list, p.parseExpr(lhs)) + } + + return +} + +func (p *parser) parseLhsList() []ast.Expr { + list := p.parseExprList(true) + switch p.tok { + case token.DEFINE: + // lhs of a short variable declaration + p.shortVarDecl(p.makeIdentList(list)) + case token.COLON: + // lhs of a label declaration or a communication clause of a select + // statement (parseLhsList is not called when parsing the case clause + // of a switch statement): + // - labels are declared by the caller of parseLhsList + // - for communication clauses, if there is a stand-alone identifier + // followed by a colon, we have a syntax error; there is no need + // to resolve the identifier in that case + default: + // identifiers must be declared elsewhere + for _, x := range list { + p.resolve(x) + } + } + return list +} + +func (p *parser) parseRhsList() []ast.Expr { + return p.parseExprList(false) +} + +// ---------------------------------------------------------------------------- +// Types + +func (p *parser) parseType() ast.Expr { + if p.trace { + defer un(trace(p, "Type")) + } + + typ := p.tryType() + + if typ == nil { + pos := p.pos + p.errorExpected(pos, "type") + p.next() // make progress + return &ast.BadExpr{pos, p.pos} + } + + return typ +} + +// If the result is an identifier, it is not resolved. +func (p *parser) parseTypeName() ast.Expr { + if p.trace { + defer un(trace(p, "TypeName")) + } + + ident := p.parseIdent() + // don't resolve ident yet - it may be a parameter or field name + + if p.tok == token.PERIOD { + // ident is a package name + p.next() + p.resolve(ident) + sel := p.parseIdent() + return &ast.SelectorExpr{ident, sel} + } + + return ident +} + +func (p *parser) parseArrayType(ellipsisOk bool) ast.Expr { + if p.trace { + defer un(trace(p, "ArrayType")) + } + + lbrack := p.expect(token.LBRACK) + var len ast.Expr + if ellipsisOk && p.tok == token.ELLIPSIS { + len = &ast.Ellipsis{p.pos, nil} + p.next() + } else if p.tok != token.RBRACK { + len = p.parseRhs() + } + p.expect(token.RBRACK) + elt := p.parseType() + + return &ast.ArrayType{lbrack, len, elt} +} + +func (p *parser) makeIdentList(list []ast.Expr) []*ast.Ident { + idents := make([]*ast.Ident, len(list)) + for i, x := range list { + ident, isIdent := x.(*ast.Ident) + if !isIdent { + pos := x.(ast.Expr).Pos() + p.errorExpected(pos, "identifier") + ident = &ast.Ident{pos, "_", nil} + } + idents[i] = ident + } + return idents +} + +func (p *parser) parseFieldDecl(scope *ast.Scope) *ast.Field { + if p.trace { + defer un(trace(p, "FieldDecl")) + } + + doc := p.leadComment + + // fields + list, typ := p.parseVarList(false) + + // optional tag + var tag *ast.BasicLit + if p.tok == token.STRING { + tag = &ast.BasicLit{p.pos, p.tok, p.lit} + p.next() + } + + // analyze case + var idents []*ast.Ident + if typ != nil { + // IdentifierList Type + idents = p.makeIdentList(list) + } else { + // ["*"] TypeName (AnonymousField) + typ = list[0] // we always have at least one element + p.resolve(typ) + if n := len(list); n > 1 || !isTypeName(deref(typ)) { + pos := typ.Pos() + p.errorExpected(pos, "anonymous field") + typ = &ast.BadExpr{pos, list[n-1].End()} + } + } + + p.expectSemi() // call before accessing p.linecomment + + field := &ast.Field{doc, idents, typ, tag, p.lineComment} + p.declare(field, scope, ast.Var, idents...) + + return field +} + +func (p *parser) parseStructType() *ast.StructType { + if p.trace { + defer un(trace(p, "StructType")) + } + + pos := p.expect(token.STRUCT) + lbrace := p.expect(token.LBRACE) + scope := ast.NewScope(nil) // struct scope + var list []*ast.Field + for p.tok == token.IDENT || p.tok == token.MUL || p.tok == token.LPAREN { + // a field declaration cannot start with a '(' but we accept + // it here for more robust parsing and better error messages + // (parseFieldDecl will check and complain if necessary) + list = append(list, p.parseFieldDecl(scope)) + } + rbrace := p.expect(token.RBRACE) + + // TODO(gri): store struct scope in AST + return &ast.StructType{pos, &ast.FieldList{lbrace, list, rbrace}, false} +} + +func (p *parser) parsePointerType() *ast.StarExpr { + if p.trace { + defer un(trace(p, "PointerType")) + } + + star := p.expect(token.MUL) + base := p.parseType() + + return &ast.StarExpr{star, base} +} + +func (p *parser) tryVarType(isParam bool) ast.Expr { + if isParam && p.tok == token.ELLIPSIS { + pos := p.pos + p.next() + typ := p.tryIdentOrType(isParam) // don't use parseType so we can provide better error message + if typ == nil { + p.error(pos, "'...' parameter is missing type") + typ = &ast.BadExpr{pos, p.pos} + } + if p.tok != token.RPAREN { + p.error(pos, "can use '...' with last parameter type only") + } + return &ast.Ellipsis{pos, typ} + } + return p.tryIdentOrType(false) +} + +func (p *parser) parseVarType(isParam bool) ast.Expr { + typ := p.tryVarType(isParam) + if typ == nil { + pos := p.pos + p.errorExpected(pos, "type") + p.next() // make progress + typ = &ast.BadExpr{pos, p.pos} + } + return typ +} + +func (p *parser) parseVarList(isParam bool) (list []ast.Expr, typ ast.Expr) { + if p.trace { + defer un(trace(p, "VarList")) + } + + // a list of identifiers looks like a list of type names + for { + // parseVarType accepts any type (including parenthesized ones) + // even though the syntax does not permit them here: we + // accept them all for more robust parsing and complain + // afterwards + list = append(list, p.parseVarType(isParam)) + if p.tok != token.COMMA { + break + } + p.next() + } + + // if we had a list of identifiers, it must be followed by a type + typ = p.tryVarType(isParam) + if typ != nil { + p.resolve(typ) + } + + return +} + +func (p *parser) parseParameterList(scope *ast.Scope, ellipsisOk bool) (params []*ast.Field) { + if p.trace { + defer un(trace(p, "ParameterList")) + } + + list, typ := p.parseVarList(ellipsisOk) + if typ != nil { + // IdentifierList Type + idents := p.makeIdentList(list) + field := &ast.Field{nil, idents, typ, nil, nil} + params = append(params, field) + // Go spec: The scope of an identifier denoting a function + // parameter or result variable is the function body. + p.declare(field, scope, ast.Var, idents...) + if p.tok == token.COMMA { + p.next() + } + + for p.tok != token.RPAREN && p.tok != token.EOF { + idents := p.parseIdentList() + typ := p.parseVarType(ellipsisOk) + field := &ast.Field{nil, idents, typ, nil, nil} + params = append(params, field) + // Go spec: The scope of an identifier denoting a function + // parameter or result variable is the function body. + p.declare(field, scope, ast.Var, idents...) + if p.tok != token.COMMA { + break + } + p.next() + } + + } else { + // Type { "," Type } (anonymous parameters) + params = make([]*ast.Field, len(list)) + for i, x := range list { + p.resolve(x) + params[i] = &ast.Field{Type: x} + } + } + + return +} + +func (p *parser) parseParameters(scope *ast.Scope, ellipsisOk bool) *ast.FieldList { + if p.trace { + defer un(trace(p, "Parameters")) + } + + var params []*ast.Field + lparen := p.expect(token.LPAREN) + if p.tok != token.RPAREN { + params = p.parseParameterList(scope, ellipsisOk) + } + rparen := p.expect(token.RPAREN) + + return &ast.FieldList{lparen, params, rparen} +} + +func (p *parser) parseResult(scope *ast.Scope) *ast.FieldList { + if p.trace { + defer un(trace(p, "Result")) + } + + if p.tok == token.LPAREN { + return p.parseParameters(scope, false) + } + + typ := p.tryType() + if typ != nil { + list := make([]*ast.Field, 1) + list[0] = &ast.Field{Type: typ} + return &ast.FieldList{List: list} + } + + return nil +} + +func (p *parser) parseSignature(scope *ast.Scope) (params, results *ast.FieldList) { + if p.trace { + defer un(trace(p, "Signature")) + } + + params = p.parseParameters(scope, true) + results = p.parseResult(scope) + + return +} + +func (p *parser) parseFuncType() (*ast.FuncType, *ast.Scope) { + if p.trace { + defer un(trace(p, "FuncType")) + } + + pos := p.expect(token.FUNC) + scope := ast.NewScope(p.topScope) // function scope + params, results := p.parseSignature(scope) + + return &ast.FuncType{pos, params, results}, scope +} + +func (p *parser) parseMethodSpec(scope *ast.Scope) *ast.Field { + if p.trace { + defer un(trace(p, "MethodSpec")) + } + + doc := p.leadComment + var idents []*ast.Ident + var typ ast.Expr + x := p.parseTypeName() + if ident, isIdent := x.(*ast.Ident); isIdent && p.tok == token.LPAREN { + // method + idents = []*ast.Ident{ident} + scope := ast.NewScope(nil) // method scope + params, results := p.parseSignature(scope) + typ = &ast.FuncType{token.NoPos, params, results} + } else { + // embedded interface + typ = x + } + p.expectSemi() // call before accessing p.linecomment + + spec := &ast.Field{doc, idents, typ, nil, p.lineComment} + p.declare(spec, scope, ast.Fun, idents...) + + return spec +} + +func (p *parser) parseInterfaceType() *ast.InterfaceType { + if p.trace { + defer un(trace(p, "InterfaceType")) + } + + pos := p.expect(token.INTERFACE) + lbrace := p.expect(token.LBRACE) + scope := ast.NewScope(nil) // interface scope + var list []*ast.Field + for p.tok == token.IDENT { + list = append(list, p.parseMethodSpec(scope)) + } + rbrace := p.expect(token.RBRACE) + + // TODO(gri): store interface scope in AST + return &ast.InterfaceType{pos, &ast.FieldList{lbrace, list, rbrace}, false} +} + +func (p *parser) parseMapType() *ast.MapType { + if p.trace { + defer un(trace(p, "MapType")) + } + + pos := p.expect(token.MAP) + p.expect(token.LBRACK) + key := p.parseType() + p.expect(token.RBRACK) + value := p.parseType() + + return &ast.MapType{pos, key, value} +} + +func (p *parser) parseChanType() *ast.ChanType { + if p.trace { + defer un(trace(p, "ChanType")) + } + + pos := p.pos + dir := ast.SEND | ast.RECV + if p.tok == token.CHAN { + p.next() + if p.tok == token.ARROW { + p.next() + dir = ast.SEND + } + } else { + p.expect(token.ARROW) + p.expect(token.CHAN) + dir = ast.RECV + } + value := p.parseType() + + return &ast.ChanType{pos, dir, value} +} + +// If the result is an identifier, it is not resolved. +func (p *parser) tryIdentOrType(ellipsisOk bool) ast.Expr { + switch p.tok { + case token.IDENT: + return p.parseTypeName() + case token.LBRACK: + return p.parseArrayType(ellipsisOk) + case token.STRUCT: + return p.parseStructType() + case token.MUL: + return p.parsePointerType() + case token.FUNC: + typ, _ := p.parseFuncType() + return typ + case token.INTERFACE: + return p.parseInterfaceType() + case token.MAP: + return p.parseMapType() + case token.CHAN, token.ARROW: + return p.parseChanType() + case token.LPAREN: + lparen := p.pos + p.next() + typ := p.parseType() + rparen := p.expect(token.RPAREN) + return &ast.ParenExpr{lparen, typ, rparen} + } + + // no type found + return nil +} + +func (p *parser) tryType() ast.Expr { + typ := p.tryIdentOrType(false) + if typ != nil { + p.resolve(typ) + } + return typ +} + +// ---------------------------------------------------------------------------- +// Blocks + +func (p *parser) parseStmtList() (list []ast.Stmt) { + if p.trace { + defer un(trace(p, "StatementList")) + } + + for p.tok != token.CASE && p.tok != token.DEFAULT && p.tok != token.RBRACE && p.tok != token.EOF { + list = append(list, p.parseStmt()) + } + + return +} + +func (p *parser) parseBody(scope *ast.Scope) *ast.BlockStmt { + if p.trace { + defer un(trace(p, "Body")) + } + + lbrace := p.expect(token.LBRACE) + p.topScope = scope // open function scope + p.openLabelScope() + list := p.parseStmtList() + p.closeLabelScope() + p.closeScope() + rbrace := p.expect(token.RBRACE) + + return &ast.BlockStmt{lbrace, list, rbrace} +} + +func (p *parser) parseBlockStmt() *ast.BlockStmt { + if p.trace { + defer un(trace(p, "BlockStmt")) + } + + lbrace := p.expect(token.LBRACE) + p.openScope() + list := p.parseStmtList() + p.closeScope() + rbrace := p.expect(token.RBRACE) + + return &ast.BlockStmt{lbrace, list, rbrace} +} + +// ---------------------------------------------------------------------------- +// Expressions + +func (p *parser) parseFuncTypeOrLit() ast.Expr { + if p.trace { + defer un(trace(p, "FuncTypeOrLit")) + } + + typ, scope := p.parseFuncType() + if p.tok != token.LBRACE { + // function type only + return typ + } + + p.exprLev++ + body := p.parseBody(scope) + p.exprLev-- + + return &ast.FuncLit{typ, body} +} + +// parseOperand may return an expression or a raw type (incl. array +// types of the form [...]T. Callers must verify the result. +// If lhs is set and the result is an identifier, it is not resolved. +// +func (p *parser) parseOperand(lhs bool) ast.Expr { + if p.trace { + defer un(trace(p, "Operand")) + } + + switch p.tok { + case token.IDENT: + x := p.parseIdent() + if !lhs { + p.resolve(x) + } + return x + + case token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING: + x := &ast.BasicLit{p.pos, p.tok, p.lit} + p.next() + return x + + case token.LPAREN: + lparen := p.pos + p.next() + p.exprLev++ + x := p.parseRhs() + p.exprLev-- + rparen := p.expect(token.RPAREN) + return &ast.ParenExpr{lparen, x, rparen} + + case token.FUNC: + return p.parseFuncTypeOrLit() + + default: + if typ := p.tryIdentOrType(true); typ != nil { + // could be type for composite literal or conversion + _, isIdent := typ.(*ast.Ident) + assert(!isIdent, "type cannot be identifier") + return typ + } + } + + pos := p.pos + p.errorExpected(pos, "operand") + p.next() // make progress + return &ast.BadExpr{pos, p.pos} +} + +func (p *parser) parseSelector(x ast.Expr) ast.Expr { + if p.trace { + defer un(trace(p, "Selector")) + } + + sel := p.parseIdent() + + return &ast.SelectorExpr{x, sel} +} + +func (p *parser) parseTypeAssertion(x ast.Expr) ast.Expr { + if p.trace { + defer un(trace(p, "TypeAssertion")) + } + + p.expect(token.LPAREN) + var typ ast.Expr + if p.tok == token.TYPE { + // type switch: typ == nil + p.next() + } else { + typ = p.parseType() + } + p.expect(token.RPAREN) + + return &ast.TypeAssertExpr{x, typ} +} + +func (p *parser) parseIndexOrSlice(x ast.Expr) ast.Expr { + if p.trace { + defer un(trace(p, "IndexOrSlice")) + } + + lbrack := p.expect(token.LBRACK) + p.exprLev++ + var low, high ast.Expr + isSlice := false + if p.tok != token.COLON { + low = p.parseRhs() + } + if p.tok == token.COLON { + isSlice = true + p.next() + if p.tok != token.RBRACK { + high = p.parseRhs() + } + } + p.exprLev-- + rbrack := p.expect(token.RBRACK) + + if isSlice { + return &ast.SliceExpr{x, lbrack, low, high, rbrack} + } + return &ast.IndexExpr{x, lbrack, low, rbrack} +} + +func (p *parser) parseCallOrConversion(fun ast.Expr) *ast.CallExpr { + if p.trace { + defer un(trace(p, "CallOrConversion")) + } + + lparen := p.expect(token.LPAREN) + p.exprLev++ + var list []ast.Expr + var ellipsis token.Pos + for p.tok != token.RPAREN && p.tok != token.EOF && !ellipsis.IsValid() { + list = append(list, p.parseRhs()) + if p.tok == token.ELLIPSIS { + ellipsis = p.pos + p.next() + } + if p.tok != token.COMMA { + break + } + p.next() + } + p.exprLev-- + rparen := p.expect(token.RPAREN) + + return &ast.CallExpr{fun, lparen, list, ellipsis, rparen} +} + +func (p *parser) parseElement(keyOk bool) ast.Expr { + if p.trace { + defer un(trace(p, "Element")) + } + + if p.tok == token.LBRACE { + return p.parseLiteralValue(nil) + } + + x := p.parseExpr(keyOk) // don't resolve if map key + if keyOk { + if p.tok == token.COLON { + colon := p.pos + p.next() + return &ast.KeyValueExpr{x, colon, p.parseElement(false)} + } + p.resolve(x) // not a map key + } + + return x +} + +func (p *parser) parseElementList() (list []ast.Expr) { + if p.trace { + defer un(trace(p, "ElementList")) + } + + for p.tok != token.RBRACE && p.tok != token.EOF { + list = append(list, p.parseElement(true)) + if p.tok != token.COMMA { + break + } + p.next() + } + + return +} + +func (p *parser) parseLiteralValue(typ ast.Expr) ast.Expr { + if p.trace { + defer un(trace(p, "LiteralValue")) + } + + lbrace := p.expect(token.LBRACE) + var elts []ast.Expr + p.exprLev++ + if p.tok != token.RBRACE { + elts = p.parseElementList() + } + p.exprLev-- + rbrace := p.expect(token.RBRACE) + return &ast.CompositeLit{typ, lbrace, elts, rbrace} +} + +// checkExpr checks that x is an expression (and not a type). +func (p *parser) checkExpr(x ast.Expr) ast.Expr { + switch t := unparen(x).(type) { + case *ast.BadExpr: + case *ast.Ident: + case *ast.BasicLit: + case *ast.FuncLit: + case *ast.CompositeLit: + case *ast.ParenExpr: + panic("unreachable") + case *ast.SelectorExpr: + case *ast.IndexExpr: + case *ast.SliceExpr: + case *ast.TypeAssertExpr: + if t.Type == nil { + // the form X.(type) is only allowed in type switch expressions + p.errorExpected(x.Pos(), "expression") + x = &ast.BadExpr{x.Pos(), x.End()} + } + case *ast.CallExpr: + case *ast.StarExpr: + case *ast.UnaryExpr: + if t.Op == token.RANGE { + // the range operator is only allowed at the top of a for statement + p.errorExpected(x.Pos(), "expression") + x = &ast.BadExpr{x.Pos(), x.End()} + } + case *ast.BinaryExpr: + default: + // all other nodes are not proper expressions + p.errorExpected(x.Pos(), "expression") + x = &ast.BadExpr{x.Pos(), x.End()} + } + return x +} + +// isTypeName returns true iff x is a (qualified) TypeName. +func isTypeName(x ast.Expr) bool { + switch t := x.(type) { + case *ast.BadExpr: + case *ast.Ident: + case *ast.SelectorExpr: + _, isIdent := t.X.(*ast.Ident) + return isIdent + default: + return false // all other nodes are not type names + } + return true +} + +// isLiteralType returns true iff x is a legal composite literal type. +func isLiteralType(x ast.Expr) bool { + switch t := x.(type) { + case *ast.BadExpr: + case *ast.Ident: + case *ast.SelectorExpr: + _, isIdent := t.X.(*ast.Ident) + return isIdent + case *ast.ArrayType: + case *ast.StructType: + case *ast.MapType: + default: + return false // all other nodes are not legal composite literal types + } + return true +} + +// If x is of the form *T, deref returns T, otherwise it returns x. +func deref(x ast.Expr) ast.Expr { + if p, isPtr := x.(*ast.StarExpr); isPtr { + x = p.X + } + return x +} + +// If x is of the form (T), unparen returns unparen(T), otherwise it returns x. +func unparen(x ast.Expr) ast.Expr { + if p, isParen := x.(*ast.ParenExpr); isParen { + x = unparen(p.X) + } + return x +} + +// checkExprOrType checks that x is an expression or a type +// (and not a raw type such as [...]T). +// +func (p *parser) checkExprOrType(x ast.Expr) ast.Expr { + switch t := unparen(x).(type) { + case *ast.ParenExpr: + panic("unreachable") + case *ast.UnaryExpr: + if t.Op == token.RANGE { + // the range operator is only allowed at the top of a for statement + p.errorExpected(x.Pos(), "expression") + x = &ast.BadExpr{x.Pos(), x.End()} + } + case *ast.ArrayType: + if len, isEllipsis := t.Len.(*ast.Ellipsis); isEllipsis { + p.error(len.Pos(), "expected array length, found '...'") + x = &ast.BadExpr{x.Pos(), x.End()} + } + } + + // all other nodes are expressions or types + return x +} + +// If lhs is set and the result is an identifier, it is not resolved. +func (p *parser) parsePrimaryExpr(lhs bool) ast.Expr { + if p.trace { + defer un(trace(p, "PrimaryExpr")) + } + + x := p.parseOperand(lhs) +L: + for { + switch p.tok { + case token.PERIOD: + p.next() + if lhs { + p.resolve(x) + } + switch p.tok { + case token.IDENT: + x = p.parseSelector(p.checkExpr(x)) + case token.LPAREN: + x = p.parseTypeAssertion(p.checkExpr(x)) + default: + pos := p.pos + p.next() // make progress + p.errorExpected(pos, "selector or type assertion") + x = &ast.BadExpr{pos, p.pos} + } + case token.LBRACK: + if lhs { + p.resolve(x) + } + x = p.parseIndexOrSlice(p.checkExpr(x)) + case token.LPAREN: + if lhs { + p.resolve(x) + } + x = p.parseCallOrConversion(p.checkExprOrType(x)) + case token.LBRACE: + if isLiteralType(x) && (p.exprLev >= 0 || !isTypeName(x)) { + if lhs { + p.resolve(x) + } + x = p.parseLiteralValue(x) + } else { + break L + } + default: + break L + } + lhs = false // no need to try to resolve again + } + + return x +} + +// If lhs is set and the result is an identifier, it is not resolved. +func (p *parser) parseUnaryExpr(lhs bool) ast.Expr { + if p.trace { + defer un(trace(p, "UnaryExpr")) + } + + switch p.tok { + case token.ADD, token.SUB, token.NOT, token.XOR, token.AND, token.RANGE: + pos, op := p.pos, p.tok + p.next() + x := p.parseUnaryExpr(false) + return &ast.UnaryExpr{pos, op, p.checkExpr(x)} + + case token.ARROW: + // channel type or receive expression + pos := p.pos + p.next() + if p.tok == token.CHAN { + p.next() + value := p.parseType() + return &ast.ChanType{pos, ast.RECV, value} + } + + x := p.parseUnaryExpr(false) + return &ast.UnaryExpr{pos, token.ARROW, p.checkExpr(x)} + + case token.MUL: + // pointer type or unary "*" expression + pos := p.pos + p.next() + x := p.parseUnaryExpr(false) + return &ast.StarExpr{pos, p.checkExprOrType(x)} + } + + return p.parsePrimaryExpr(lhs) +} + +// If lhs is set and the result is an identifier, it is not resolved. +func (p *parser) parseBinaryExpr(lhs bool, prec1 int) ast.Expr { + if p.trace { + defer un(trace(p, "BinaryExpr")) + } + + x := p.parseUnaryExpr(lhs) + for prec := p.tok.Precedence(); prec >= prec1; prec-- { + for p.tok.Precedence() == prec { + pos, op := p.pos, p.tok + p.next() + if lhs { + p.resolve(x) + lhs = false + } + y := p.parseBinaryExpr(false, prec+1) + x = &ast.BinaryExpr{p.checkExpr(x), pos, op, p.checkExpr(y)} + } + } + + return x +} + +// If lhs is set and the result is an identifier, it is not resolved. +// TODO(gri): parseExpr may return a type or even a raw type ([..]int) - +// should reject when a type/raw type is obviously not allowed +func (p *parser) parseExpr(lhs bool) ast.Expr { + if p.trace { + defer un(trace(p, "Expression")) + } + + return p.parseBinaryExpr(lhs, token.LowestPrec+1) +} + +func (p *parser) parseRhs() ast.Expr { + return p.parseExpr(false) +} + +// ---------------------------------------------------------------------------- +// Statements + +func (p *parser) parseSimpleStmt(labelOk bool) ast.Stmt { + if p.trace { + defer un(trace(p, "SimpleStmt")) + } + + x := p.parseLhsList() + + switch p.tok { + case + token.DEFINE, token.ASSIGN, token.ADD_ASSIGN, + token.SUB_ASSIGN, token.MUL_ASSIGN, token.QUO_ASSIGN, + token.REM_ASSIGN, token.AND_ASSIGN, token.OR_ASSIGN, + token.XOR_ASSIGN, token.SHL_ASSIGN, token.SHR_ASSIGN, token.AND_NOT_ASSIGN: + // assignment statement + pos, tok := p.pos, p.tok + p.next() + y := p.parseRhsList() + return &ast.AssignStmt{x, pos, tok, y} + } + + if len(x) > 1 { + p.errorExpected(x[0].Pos(), "1 expression") + // continue with first expression + } + + switch p.tok { + case token.COLON: + // labeled statement + colon := p.pos + p.next() + if label, isIdent := x[0].(*ast.Ident); labelOk && isIdent { + // Go spec: The scope of a label is the body of the function + // in which it is declared and excludes the body of any nested + // function. + stmt := &ast.LabeledStmt{label, colon, p.parseStmt()} + p.declare(stmt, p.labelScope, ast.Lbl, label) + return stmt + } + p.error(x[0].Pos(), "illegal label declaration") + return &ast.BadStmt{x[0].Pos(), colon + 1} + + case token.ARROW: + // send statement + arrow := p.pos + p.next() // consume "<-" + y := p.parseRhs() + return &ast.SendStmt{x[0], arrow, y} + + case token.INC, token.DEC: + // increment or decrement + s := &ast.IncDecStmt{x[0], p.pos, p.tok} + p.next() // consume "++" or "--" + return s + } + + // expression + return &ast.ExprStmt{x[0]} +} + +func (p *parser) parseCallExpr() *ast.CallExpr { + x := p.parseRhs() + if call, isCall := x.(*ast.CallExpr); isCall { + return call + } + p.errorExpected(x.Pos(), "function/method call") + return nil +} + +func (p *parser) parseGoStmt() ast.Stmt { + if p.trace { + defer un(trace(p, "GoStmt")) + } + + pos := p.expect(token.GO) + call := p.parseCallExpr() + p.expectSemi() + if call == nil { + return &ast.BadStmt{pos, pos + 2} // len("go") + } + + return &ast.GoStmt{pos, call} +} + +func (p *parser) parseDeferStmt() ast.Stmt { + if p.trace { + defer un(trace(p, "DeferStmt")) + } + + pos := p.expect(token.DEFER) + call := p.parseCallExpr() + p.expectSemi() + if call == nil { + return &ast.BadStmt{pos, pos + 5} // len("defer") + } + + return &ast.DeferStmt{pos, call} +} + +func (p *parser) parseReturnStmt() *ast.ReturnStmt { + if p.trace { + defer un(trace(p, "ReturnStmt")) + } + + pos := p.pos + p.expect(token.RETURN) + var x []ast.Expr + if p.tok != token.SEMICOLON && p.tok != token.RBRACE { + x = p.parseRhsList() + } + p.expectSemi() + + return &ast.ReturnStmt{pos, x} +} + +func (p *parser) parseBranchStmt(tok token.Token) *ast.BranchStmt { + if p.trace { + defer un(trace(p, "BranchStmt")) + } + + pos := p.expect(tok) + var label *ast.Ident + if tok != token.FALLTHROUGH && p.tok == token.IDENT { + label = p.parseIdent() + // add to list of unresolved targets + n := len(p.targetStack) - 1 + p.targetStack[n] = append(p.targetStack[n], label) + } + p.expectSemi() + + return &ast.BranchStmt{pos, tok, label} +} + +func (p *parser) makeExpr(s ast.Stmt) ast.Expr { + if s == nil { + return nil + } + if es, isExpr := s.(*ast.ExprStmt); isExpr { + return p.checkExpr(es.X) + } + p.error(s.Pos(), "expected condition, found simple statement") + return &ast.BadExpr{s.Pos(), s.End()} +} + +func (p *parser) parseIfStmt() *ast.IfStmt { + if p.trace { + defer un(trace(p, "IfStmt")) + } + + pos := p.expect(token.IF) + p.openScope() + defer p.closeScope() + + var s ast.Stmt + var x ast.Expr + { + prevLev := p.exprLev + p.exprLev = -1 + if p.tok == token.SEMICOLON { + p.next() + x = p.parseRhs() + } else { + s = p.parseSimpleStmt(false) + if p.tok == token.SEMICOLON { + p.next() + x = p.parseRhs() + } else { + x = p.makeExpr(s) + s = nil + } + } + p.exprLev = prevLev + } + + body := p.parseBlockStmt() + var else_ ast.Stmt + if p.tok == token.ELSE { + p.next() + else_ = p.parseStmt() + } else { + p.expectSemi() + } + + return &ast.IfStmt{pos, s, x, body, else_} +} + +func (p *parser) parseTypeList() (list []ast.Expr) { + if p.trace { + defer un(trace(p, "TypeList")) + } + + list = append(list, p.parseType()) + for p.tok == token.COMMA { + p.next() + list = append(list, p.parseType()) + } + + return +} + +func (p *parser) parseCaseClause(exprSwitch bool) *ast.CaseClause { + if p.trace { + defer un(trace(p, "CaseClause")) + } + + pos := p.pos + var list []ast.Expr + if p.tok == token.CASE { + p.next() + if exprSwitch { + list = p.parseRhsList() + } else { + list = p.parseTypeList() + } + } else { + p.expect(token.DEFAULT) + } + + colon := p.expect(token.COLON) + p.openScope() + body := p.parseStmtList() + p.closeScope() + + return &ast.CaseClause{pos, list, colon, body} +} + +func isExprSwitch(s ast.Stmt) bool { + if s == nil { + return true + } + if e, ok := s.(*ast.ExprStmt); ok { + if a, ok := e.X.(*ast.TypeAssertExpr); ok { + return a.Type != nil // regular type assertion + } + return true + } + return false +} + +func (p *parser) parseSwitchStmt() ast.Stmt { + if p.trace { + defer un(trace(p, "SwitchStmt")) + } + + pos := p.expect(token.SWITCH) + p.openScope() + defer p.closeScope() + + var s1, s2 ast.Stmt + if p.tok != token.LBRACE { + prevLev := p.exprLev + p.exprLev = -1 + if p.tok != token.SEMICOLON { + s2 = p.parseSimpleStmt(false) + } + if p.tok == token.SEMICOLON { + p.next() + s1 = s2 + s2 = nil + if p.tok != token.LBRACE { + s2 = p.parseSimpleStmt(false) + } + } + p.exprLev = prevLev + } + + exprSwitch := isExprSwitch(s2) + lbrace := p.expect(token.LBRACE) + var list []ast.Stmt + for p.tok == token.CASE || p.tok == token.DEFAULT { + list = append(list, p.parseCaseClause(exprSwitch)) + } + rbrace := p.expect(token.RBRACE) + p.expectSemi() + body := &ast.BlockStmt{lbrace, list, rbrace} + + if exprSwitch { + return &ast.SwitchStmt{pos, s1, p.makeExpr(s2), body} + } + // type switch + // TODO(gri): do all the checks! + return &ast.TypeSwitchStmt{pos, s1, s2, body} +} + +func (p *parser) parseCommClause() *ast.CommClause { + if p.trace { + defer un(trace(p, "CommClause")) + } + + p.openScope() + pos := p.pos + var comm ast.Stmt + if p.tok == token.CASE { + p.next() + lhs := p.parseLhsList() + if p.tok == token.ARROW { + // SendStmt + if len(lhs) > 1 { + p.errorExpected(lhs[0].Pos(), "1 expression") + // continue with first expression + } + arrow := p.pos + p.next() + rhs := p.parseRhs() + comm = &ast.SendStmt{lhs[0], arrow, rhs} + } else { + // RecvStmt + pos := p.pos + tok := p.tok + var rhs ast.Expr + if tok == token.ASSIGN || tok == token.DEFINE { + // RecvStmt with assignment + if len(lhs) > 2 { + p.errorExpected(lhs[0].Pos(), "1 or 2 expressions") + // continue with first two expressions + lhs = lhs[0:2] + } + p.next() + rhs = p.parseRhs() + } else { + // rhs must be single receive operation + if len(lhs) > 1 { + p.errorExpected(lhs[0].Pos(), "1 expression") + // continue with first expression + } + rhs = lhs[0] + lhs = nil // there is no lhs + } + if x, isUnary := rhs.(*ast.UnaryExpr); !isUnary || x.Op != token.ARROW { + p.errorExpected(rhs.Pos(), "send or receive operation") + rhs = &ast.BadExpr{rhs.Pos(), rhs.End()} + } + if lhs != nil { + comm = &ast.AssignStmt{lhs, pos, tok, []ast.Expr{rhs}} + } else { + comm = &ast.ExprStmt{rhs} + } + } + } else { + p.expect(token.DEFAULT) + } + + colon := p.expect(token.COLON) + body := p.parseStmtList() + p.closeScope() + + return &ast.CommClause{pos, comm, colon, body} +} + +func (p *parser) parseSelectStmt() *ast.SelectStmt { + if p.trace { + defer un(trace(p, "SelectStmt")) + } + + pos := p.expect(token.SELECT) + lbrace := p.expect(token.LBRACE) + var list []ast.Stmt + for p.tok == token.CASE || p.tok == token.DEFAULT { + list = append(list, p.parseCommClause()) + } + rbrace := p.expect(token.RBRACE) + p.expectSemi() + body := &ast.BlockStmt{lbrace, list, rbrace} + + return &ast.SelectStmt{pos, body} +} + +func (p *parser) parseForStmt() ast.Stmt { + if p.trace { + defer un(trace(p, "ForStmt")) + } + + pos := p.expect(token.FOR) + p.openScope() + defer p.closeScope() + + var s1, s2, s3 ast.Stmt + if p.tok != token.LBRACE { + prevLev := p.exprLev + p.exprLev = -1 + if p.tok != token.SEMICOLON { + s2 = p.parseSimpleStmt(false) + } + if p.tok == token.SEMICOLON { + p.next() + s1 = s2 + s2 = nil + if p.tok != token.SEMICOLON { + s2 = p.parseSimpleStmt(false) + } + p.expectSemi() + if p.tok != token.LBRACE { + s3 = p.parseSimpleStmt(false) + } + } + p.exprLev = prevLev + } + + body := p.parseBlockStmt() + p.expectSemi() + + if as, isAssign := s2.(*ast.AssignStmt); isAssign { + // possibly a for statement with a range clause; check assignment operator + if as.Tok != token.ASSIGN && as.Tok != token.DEFINE { + p.errorExpected(as.TokPos, "'=' or ':='") + return &ast.BadStmt{pos, body.End()} + } + // check lhs + var key, value ast.Expr + switch len(as.Lhs) { + case 2: + key, value = as.Lhs[0], as.Lhs[1] + case 1: + key = as.Lhs[0] + default: + p.errorExpected(as.Lhs[0].Pos(), "1 or 2 expressions") + return &ast.BadStmt{pos, body.End()} + } + // check rhs + if len(as.Rhs) != 1 { + p.errorExpected(as.Rhs[0].Pos(), "1 expression") + return &ast.BadStmt{pos, body.End()} + } + if rhs, isUnary := as.Rhs[0].(*ast.UnaryExpr); isUnary && rhs.Op == token.RANGE { + // rhs is range expression + // (any short variable declaration was handled by parseSimpleStat above) + return &ast.RangeStmt{pos, key, value, as.TokPos, as.Tok, rhs.X, body} + } + p.errorExpected(s2.Pos(), "range clause") + return &ast.BadStmt{pos, body.End()} + } + + // regular for statement + return &ast.ForStmt{pos, s1, p.makeExpr(s2), s3, body} +} + +func (p *parser) parseStmt() (s ast.Stmt) { + if p.trace { + defer un(trace(p, "Statement")) + } + + switch p.tok { + case token.CONST, token.TYPE, token.VAR: + s = &ast.DeclStmt{p.parseDecl()} + case + // tokens that may start a top-level expression + token.IDENT, token.INT, token.FLOAT, token.CHAR, token.STRING, token.FUNC, token.LPAREN, // operand + token.LBRACK, token.STRUCT, // composite type + token.MUL, token.AND, token.ARROW, token.ADD, token.SUB, token.XOR: // unary operators + s = p.parseSimpleStmt(true) + // because of the required look-ahead, labeled statements are + // parsed by parseSimpleStmt - don't expect a semicolon after + // them + if _, isLabeledStmt := s.(*ast.LabeledStmt); !isLabeledStmt { + p.expectSemi() + } + case token.GO: + s = p.parseGoStmt() + case token.DEFER: + s = p.parseDeferStmt() + case token.RETURN: + s = p.parseReturnStmt() + case token.BREAK, token.CONTINUE, token.GOTO, token.FALLTHROUGH: + s = p.parseBranchStmt(p.tok) + case token.LBRACE: + s = p.parseBlockStmt() + p.expectSemi() + case token.IF: + s = p.parseIfStmt() + case token.SWITCH: + s = p.parseSwitchStmt() + case token.SELECT: + s = p.parseSelectStmt() + case token.FOR: + s = p.parseForStmt() + case token.SEMICOLON: + s = &ast.EmptyStmt{p.pos} + p.next() + case token.RBRACE: + // a semicolon may be omitted before a closing "}" + s = &ast.EmptyStmt{p.pos} + default: + // no statement found + pos := p.pos + p.errorExpected(pos, "statement") + p.next() // make progress + s = &ast.BadStmt{pos, p.pos} + } + + return +} + +// ---------------------------------------------------------------------------- +// Declarations + +type parseSpecFunction func(p *parser, doc *ast.CommentGroup, iota int) ast.Spec + +func parseImportSpec(p *parser, doc *ast.CommentGroup, _ int) ast.Spec { + if p.trace { + defer un(trace(p, "ImportSpec")) + } + + var ident *ast.Ident + switch p.tok { + case token.PERIOD: + ident = &ast.Ident{p.pos, ".", nil} + p.next() + case token.IDENT: + ident = p.parseIdent() + } + + var path *ast.BasicLit + if p.tok == token.STRING { + path = &ast.BasicLit{p.pos, p.tok, p.lit} + p.next() + } else { + p.expect(token.STRING) // use expect() error handling + } + p.expectSemi() // call before accessing p.linecomment + + // collect imports + spec := &ast.ImportSpec{doc, ident, path, p.lineComment} + p.imports = append(p.imports, spec) + + return spec +} + +func parseConstSpec(p *parser, doc *ast.CommentGroup, iota int) ast.Spec { + if p.trace { + defer un(trace(p, "ConstSpec")) + } + + idents := p.parseIdentList() + typ := p.tryType() + var values []ast.Expr + if typ != nil || p.tok == token.ASSIGN || iota == 0 { + p.expect(token.ASSIGN) + values = p.parseRhsList() + } + p.expectSemi() // call before accessing p.linecomment + + // Go spec: The scope of a constant or variable identifier declared inside + // a function begins at the end of the ConstSpec or VarSpec and ends at + // the end of the innermost containing block. + // (Global identifiers are resolved in a separate phase after parsing.) + spec := &ast.ValueSpec{doc, idents, typ, values, p.lineComment} + p.declare(spec, p.topScope, ast.Con, idents...) + + return spec +} + +func parseTypeSpec(p *parser, doc *ast.CommentGroup, _ int) ast.Spec { + if p.trace { + defer un(trace(p, "TypeSpec")) + } + + ident := p.parseIdent() + + // Go spec: The scope of a type identifier declared inside a function begins + // at the identifier in the TypeSpec and ends at the end of the innermost + // containing block. + // (Global identifiers are resolved in a separate phase after parsing.) + spec := &ast.TypeSpec{doc, ident, nil, nil} + p.declare(spec, p.topScope, ast.Typ, ident) + + spec.Type = p.parseType() + p.expectSemi() // call before accessing p.linecomment + spec.Comment = p.lineComment + + return spec +} + +func parseVarSpec(p *parser, doc *ast.CommentGroup, _ int) ast.Spec { + if p.trace { + defer un(trace(p, "VarSpec")) + } + + idents := p.parseIdentList() + typ := p.tryType() + var values []ast.Expr + if typ == nil || p.tok == token.ASSIGN { + p.expect(token.ASSIGN) + values = p.parseRhsList() + } + p.expectSemi() // call before accessing p.linecomment + + // Go spec: The scope of a constant or variable identifier declared inside + // a function begins at the end of the ConstSpec or VarSpec and ends at + // the end of the innermost containing block. + // (Global identifiers are resolved in a separate phase after parsing.) + spec := &ast.ValueSpec{doc, idents, typ, values, p.lineComment} + p.declare(spec, p.topScope, ast.Var, idents...) + + return spec +} + +func (p *parser) parseGenDecl(keyword token.Token, f parseSpecFunction) *ast.GenDecl { + if p.trace { + defer un(trace(p, "GenDecl("+keyword.String()+")")) + } + + doc := p.leadComment + pos := p.expect(keyword) + var lparen, rparen token.Pos + var list []ast.Spec + if p.tok == token.LPAREN { + lparen = p.pos + p.next() + for iota := 0; p.tok != token.RPAREN && p.tok != token.EOF; iota++ { + list = append(list, f(p, p.leadComment, iota)) + } + rparen = p.expect(token.RPAREN) + p.expectSemi() + } else { + list = append(list, f(p, nil, 0)) + } + + return &ast.GenDecl{doc, pos, keyword, lparen, list, rparen} +} + +func (p *parser) parseReceiver(scope *ast.Scope) *ast.FieldList { + if p.trace { + defer un(trace(p, "Receiver")) + } + + pos := p.pos + par := p.parseParameters(scope, false) + + // must have exactly one receiver + if par.NumFields() != 1 { + p.errorExpected(pos, "exactly one receiver") + // TODO determine a better range for BadExpr below + par.List = []*ast.Field{&ast.Field{Type: &ast.BadExpr{pos, pos}}} + return par + } + + // recv type must be of the form ["*"] identifier + recv := par.List[0] + base := deref(recv.Type) + if _, isIdent := base.(*ast.Ident); !isIdent { + p.errorExpected(base.Pos(), "(unqualified) identifier") + par.List = []*ast.Field{&ast.Field{Type: &ast.BadExpr{recv.Pos(), recv.End()}}} + } + + return par +} + +func (p *parser) parseFuncDecl() *ast.FuncDecl { + if p.trace { + defer un(trace(p, "FunctionDecl")) + } + + doc := p.leadComment + pos := p.expect(token.FUNC) + scope := ast.NewScope(p.topScope) // function scope + + var recv *ast.FieldList + if p.tok == token.LPAREN { + recv = p.parseReceiver(scope) + } + + ident := p.parseIdent() + + params, results := p.parseSignature(scope) + + var body *ast.BlockStmt + if p.tok == token.LBRACE { + body = p.parseBody(scope) + } + p.expectSemi() + + decl := &ast.FuncDecl{doc, recv, ident, &ast.FuncType{pos, params, results}, body} + if recv == nil { + // Go spec: The scope of an identifier denoting a constant, type, + // variable, or function (but not method) declared at top level + // (outside any function) is the package block. + // + // init() functions cannot be referred to and there may + // be more than one - don't put them in the pkgScope + if ident.Name != "init" { + p.declare(decl, p.pkgScope, ast.Fun, ident) + } + } + + return decl +} + +func (p *parser) parseDecl() ast.Decl { + if p.trace { + defer un(trace(p, "Declaration")) + } + + var f parseSpecFunction + switch p.tok { + case token.CONST: + f = parseConstSpec + + case token.TYPE: + f = parseTypeSpec + + case token.VAR: + f = parseVarSpec + + case token.FUNC: + return p.parseFuncDecl() + + default: + pos := p.pos + p.errorExpected(pos, "declaration") + p.next() // make progress + decl := &ast.BadDecl{pos, p.pos} + return decl + } + + return p.parseGenDecl(p.tok, f) +} + +func (p *parser) parseDeclList() (list []ast.Decl) { + if p.trace { + defer un(trace(p, "DeclList")) + } + + for p.tok != token.EOF { + list = append(list, p.parseDecl()) + } + + return +} + +// ---------------------------------------------------------------------------- +// Source files + +func (p *parser) parseFile() *ast.File { + if p.trace { + defer un(trace(p, "File")) + } + + // package clause + doc := p.leadComment + pos := p.expect(token.PACKAGE) + // Go spec: The package clause is not a declaration; + // the package name does not appear in any scope. + ident := p.parseIdent() + if ident.Name == "_" { + p.error(p.pos, "invalid package name _") + } + p.expectSemi() + + var decls []ast.Decl + + // Don't bother parsing the rest if we had errors already. + // Likely not a Go source file at all. + + if p.ErrorCount() == 0 && p.mode&PackageClauseOnly == 0 { + // import decls + for p.tok == token.IMPORT { + decls = append(decls, p.parseGenDecl(token.IMPORT, parseImportSpec)) + } + + if p.mode&ImportsOnly == 0 { + // rest of package body + for p.tok != token.EOF { + decls = append(decls, p.parseDecl()) + } + } + } + + assert(p.topScope == p.pkgScope, "imbalanced scopes") + + // resolve global identifiers within the same file + i := 0 + for _, ident := range p.unresolved { + // i <= index for current ident + assert(ident.Obj == unresolved, "object already resolved") + ident.Obj = p.pkgScope.Lookup(ident.Name) // also removes unresolved sentinel + if ident.Obj == nil { + p.unresolved[i] = ident + i++ + } + } + + // TODO(gri): store p.imports in AST + return &ast.File{doc, pos, ident, decls, p.pkgScope, p.imports, p.unresolved[0:i], p.comments} +} diff --git a/src/pkg/go/printer/testdata/slow.golden b/src/pkg/go/printer/testdata/slow.golden new file mode 100644 index 000000000..43a15cb1d --- /dev/null +++ b/src/pkg/go/printer/testdata/slow.golden @@ -0,0 +1,85 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package deepequal_test + +import ( + "testing" + "google3/spam/archer/frontend/deepequal" +) + +func TestTwoNilValues(t *testing.T) { + if err := deepequal.Check(nil, nil); err != nil { + t.Errorf("expected nil, saw %v", err) + } +} + +type Foo struct { + bar *Bar + bang *Bar +} + +type Bar struct { + baz *Baz + foo []*Foo +} + +type Baz struct { + entries map[int]interface{} + whatever string +} + +func newFoo() *Foo { + return &Foo{bar: &Bar{baz: &Baz{ + entries: map[int]interface{}{ + 42: &Foo{}, + 21: &Bar{}, + 11: &Baz{whatever: "it's just a test"}}}}, + bang: &Bar{foo: []*Foo{ + &Foo{bar: &Bar{baz: &Baz{ + entries: map[int]interface{}{ + 43: &Foo{}, + 22: &Bar{}, + 13: &Baz{whatever: "this is nuts"}}}}, + bang: &Bar{foo: []*Foo{ + &Foo{bar: &Bar{baz: &Baz{ + entries: map[int]interface{}{ + 61: &Foo{}, + 71: &Bar{}, + 11: &Baz{whatever: "no, it's Go"}}}}, + bang: &Bar{foo: []*Foo{ + &Foo{bar: &Bar{baz: &Baz{ + entries: map[int]interface{}{ + 0: &Foo{}, + -2: &Bar{}, + -11: &Baz{whatever: "we need to go deeper"}}}}, + bang: &Bar{foo: []*Foo{ + &Foo{bar: &Bar{baz: &Baz{ + entries: map[int]interface{}{ + -2: &Foo{}, + -5: &Bar{}, + -7: &Baz{whatever: "are you serious?"}}}}, + bang: &Bar{foo: []*Foo{}}}, + &Foo{bar: &Bar{baz: &Baz{ + entries: map[int]interface{}{ + -100: &Foo{}, + 50: &Bar{}, + 20: &Baz{whatever: "na, not really ..."}}}}, + bang: &Bar{foo: []*Foo{}}}}}}}}}, + &Foo{bar: &Bar{baz: &Baz{ + entries: map[int]interface{}{ + 2: &Foo{}, + 1: &Bar{}, + -1: &Baz{whatever: "... it's just a test."}}}}, + bang: &Bar{foo: []*Foo{}}}}}}}}} +} + +func TestElaborate(t *testing.T) { + a := newFoo() + b := newFoo() + + if err := deepequal.Check(a, b); err != nil { + t.Errorf("expected nil, saw %v", err) + } +} diff --git a/src/pkg/go/printer/testdata/slow.input b/src/pkg/go/printer/testdata/slow.input new file mode 100644 index 000000000..0e5a23d88 --- /dev/null +++ b/src/pkg/go/printer/testdata/slow.input @@ -0,0 +1,85 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package deepequal_test + +import ( + "testing" + "google3/spam/archer/frontend/deepequal" +) + +func TestTwoNilValues(t *testing.T) { + if err := deepequal.Check(nil, nil); err != nil { + t.Errorf("expected nil, saw %v", err) + } +} + +type Foo struct { + bar *Bar + bang *Bar +} + +type Bar struct { + baz *Baz + foo []*Foo +} + +type Baz struct { + entries map[int]interface{} + whatever string +} + +func newFoo() (*Foo) { +return &Foo{bar: &Bar{ baz: &Baz{ +entries: map[int]interface{}{ +42: &Foo{}, +21: &Bar{}, +11: &Baz{ whatever: "it's just a test" }}}}, + bang: &Bar{foo: []*Foo{ +&Foo{bar: &Bar{ baz: &Baz{ +entries: map[int]interface{}{ +43: &Foo{}, +22: &Bar{}, +13: &Baz{ whatever: "this is nuts" }}}}, + bang: &Bar{foo: []*Foo{ +&Foo{bar: &Bar{ baz: &Baz{ +entries: map[int]interface{}{ +61: &Foo{}, +71: &Bar{}, +11: &Baz{ whatever: "no, it's Go" }}}}, + bang: &Bar{foo: []*Foo{ +&Foo{bar: &Bar{ baz: &Baz{ +entries: map[int]interface{}{ +0: &Foo{}, +-2: &Bar{}, +-11: &Baz{ whatever: "we need to go deeper" }}}}, + bang: &Bar{foo: []*Foo{ +&Foo{bar: &Bar{ baz: &Baz{ +entries: map[int]interface{}{ +-2: &Foo{}, +-5: &Bar{}, +-7: &Baz{ whatever: "are you serious?" }}}}, + bang: &Bar{foo: []*Foo{}}}, +&Foo{bar: &Bar{ baz: &Baz{ +entries: map[int]interface{}{ +-100: &Foo{}, +50: &Bar{}, +20: &Baz{ whatever: "na, not really ..." }}}}, + bang: &Bar{foo: []*Foo{}}}}}}}}}, +&Foo{bar: &Bar{ baz: &Baz{ +entries: map[int]interface{}{ +2: &Foo{}, +1: &Bar{}, +-1: &Baz{ whatever: "... it's just a test." }}}}, + bang: &Bar{foo: []*Foo{}}}}}}}}} +} + +func TestElaborate(t *testing.T) { + a := newFoo() + b := newFoo() + + if err := deepequal.Check(a, b); err != nil { + t.Errorf("expected nil, saw %v", err) + } +} diff --git a/src/pkg/go/printer/testdata/statements.golden b/src/pkg/go/printer/testdata/statements.golden new file mode 100644 index 000000000..a6d85107f --- /dev/null +++ b/src/pkg/go/printer/testdata/statements.golden @@ -0,0 +1,412 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package statements + +var expr bool + +func use(x interface{}) {} + +// Formatting of if-statement headers. +func _() { + if true { + } + if true { + } // no semicolon printed + if expr { + } + if expr { + } // no semicolon printed + if expr { + } // no parens printed + if expr { + } // no semicolon and parens printed + if x := expr; true { + use(x) + } + if x := expr; expr { + use(x) + } +} + +// Formatting of switch-statement headers. +func _() { + switch { + } + switch { + } // no semicolon printed + switch expr { + } + switch expr { + } // no semicolon printed + switch expr { + } // no parens printed + switch expr { + } // no semicolon and parens printed + switch x := expr; { + default: + use( + x) + } + switch x := expr; expr { + default: + use(x) + } +} + +// Formatting of switch statement bodies. +func _() { + switch { + } + + switch x := 0; x { + case 1: + use(x) + use(x) // followed by an empty line + + case 2: // followed by an empty line + + use(x) // followed by an empty line + + case 3: // no empty lines + use(x) + use(x) + } + + switch x { + case 0: + use(x) + case 1: // this comment should have no effect on the previous or next line + use(x) + } + + switch x := 0; x { + case 1: + x = 0 + // this comment should be indented + case 2: + x = 0 + // this comment should not be indented, it is aligned with the next case + case 3: + x = 0 + /* indented comment + aligned + aligned + */ + // bla + /* and more */ + case 4: + x = 0 + /* not indented comment + aligned + aligned + */ + // bla + /* and more */ + case 5: + } +} + +// Formatting of selected select statements. +func _() { + select {} + select { /* this comment should not be tab-aligned because the closing } is on the same line */ + } + select { /* this comment should be tab-aligned */ + } + select { // this comment should be tab-aligned + } + select { + case <-c: + } +} + +// Formatting of for-statement headers. +func _() { + for { + } + for expr { + } + for expr { + } // no parens printed + for { + } // no semicolons printed + for x := expr; ; { + use(x) + } + for expr { + } // no semicolons printed + for expr { + } // no semicolons and parens printed + for ; ; expr = false { + } + for x := expr; expr; { + use(x) + } + for x := expr; ; expr = false { + use(x) + } + for ; expr; expr = false { + } + for x := expr; expr; expr = false { + use(x) + } + for x := range []int{} { + use(x) + } + for x := range []int{} { + use(x) + } // no parens printed +} + +// Don't remove mandatory parentheses around composite literals in control clauses. +func _() { + // strip parentheses - no composite literals or composite literals don't start with a type name + if x { + } + if x { + } + if []T{} { + } + if []T{} { + } + if []T{} { + } + + for x { + } + for x { + } + for []T{} { + } + for []T{} { + } + for []T{} { + } + + switch x { + } + switch x { + } + switch []T{} { + } + switch []T{} { + } + + for _ = range []T{T{42}} { + } + + // leave parentheses - composite literals start with a type name + if (T{}) { + } + if (T{}) { + } + if (T{}) { + } + + for (T{}) { + } + for (T{}) { + } + for (T{}) { + } + + switch (T{}) { + } + switch (T{}) { + } + + for _ = range (T1{T{42}}) { + } + + if x == (T{42}[0]) { + } + if (x == T{42}[0]) { + } + if x == (T{42}[0]) { + } + if x == (T{42}[0]) { + } + if x == (T{42}[0]) { + } + if x == a+b*(T{42}[0]) { + } + if (x == a+b*T{42}[0]) { + } + if x == a+b*(T{42}[0]) { + } + if x == a+(b * (T{42}[0])) { + } + if x == a+b*(T{42}[0]) { + } + if (a + b*(T{42}[0])) == x { + } + if (a + b*(T{42}[0])) == x { + } + + if struct{ x bool }{false}.x { + } + if (struct{ x bool }{false}.x) == false { + } + if struct{ x bool }{false}.x == false { + } +} + +// Extra empty lines inside functions. Do respect source code line +// breaks between statement boundaries but print at most one empty +// line at a time. +func _() { + + const _ = 0 + + const _ = 1 + type _ int + type _ float + + var _ = 0 + var x = 1 + + // Each use(x) call below should have at most one empty line before and after. + // Known bug: The first use call may have more than one empty line before + // (see go/printer/nodes.go, func linebreak). + + + use(x) + + if x < x { + + use(x) + + } else { + + use(x) + + } +} + +// Formatting around labels. +func _() { +L: +} + +func _() { + // this comment should be indented +L: // no semicolon needed +} + +func _() { + switch 0 { + case 0: + L0: + ; // semicolon required + case 1: + L1: + ; // semicolon required + default: + L2: // no semicolon needed + } +} + +func _() { + f() +L1: + f() +L2: + ; +L3: +} + +func _() { + // this comment should be indented +L: +} + +func _() { +L: + _ = 0 +} + +func _() { + // this comment should be indented +L: + _ = 0 +} + +func _() { + for { + L1: + _ = 0 + L2: + _ = 0 + } +} + +func _() { + // this comment should be indented + for { + L1: + _ = 0 + L2: + _ = 0 + } +} + +func _() { + if true { + _ = 0 + } + _ = 0 // the indentation here should not be affected by the long label name +AnOverlongLabel: + _ = 0 + + if true { + _ = 0 + } + _ = 0 + +L: + _ = 0 +} + +func _() { + for { + goto L + } +L: + + MoreCode() +} + +func _() { + for { + goto L + } +L: // A comment on the same line as the label, followed by a single empty line. + // Known bug: There may be more than one empty line before MoreCode() + // (see go/printer/nodes.go, func linebreak). + + + MoreCode() +} + +func _() { + for { + goto L + } +L: + + // There should be a single empty line before this comment. + MoreCode() +} + +func _() { + for { + goto AVeryLongLabelThatShouldNotAffectFormatting + } +AVeryLongLabelThatShouldNotAffectFormatting: + // There should be a single empty line after this comment. + + // There should be a single empty line before this comment. + MoreCode() +} diff --git a/src/pkg/go/printer/testdata/statements.input b/src/pkg/go/printer/testdata/statements.input new file mode 100644 index 000000000..86a753c5a --- /dev/null +++ b/src/pkg/go/printer/testdata/statements.input @@ -0,0 +1,351 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package statements + +var expr bool + +func use(x interface{}) {} + +// Formatting of if-statement headers. +func _() { + if true {} + if; true {} // no semicolon printed + if expr{} + if;expr{} // no semicolon printed + if (expr){} // no parens printed + if;((expr)){} // no semicolon and parens printed + if x:=expr;true{ + use(x)} + if x:=expr; expr {use(x)} +} + + +// Formatting of switch-statement headers. +func _() { + switch {} + switch;{} // no semicolon printed + switch expr {} + switch;expr{} // no semicolon printed + switch (expr) {} // no parens printed + switch;((expr)){} // no semicolon and parens printed + switch x := expr; { default:use( +x) + } + switch x := expr; expr {default:use(x)} +} + + +// Formatting of switch statement bodies. +func _() { + switch { + } + + switch x := 0; x { + case 1: + use(x) + use(x) // followed by an empty line + + case 2: // followed by an empty line + + use(x) // followed by an empty line + + case 3: // no empty lines + use(x) + use(x) + } + + switch x { + case 0: + use(x) + case 1: // this comment should have no effect on the previous or next line + use(x) + } + + switch x := 0; x { + case 1: + x = 0 + // this comment should be indented + case 2: + x = 0 + // this comment should not be indented, it is aligned with the next case + case 3: + x = 0 + /* indented comment + aligned + aligned + */ + // bla + /* and more */ + case 4: + x = 0 + /* not indented comment + aligned + aligned + */ + // bla + /* and more */ + case 5: + } +} + + +// Formatting of selected select statements. +func _() { + select { + } + select { /* this comment should not be tab-aligned because the closing } is on the same line */ } + select { /* this comment should be tab-aligned */ + } + select { // this comment should be tab-aligned + } + select { case <-c: } +} + + +// Formatting of for-statement headers. +func _() { + for{} + for expr {} + for (expr) {} // no parens printed + for;;{} // no semicolons printed + for x :=expr;; {use( x)} + for; expr;{} // no semicolons printed + for; ((expr));{} // no semicolons and parens printed + for; ; expr = false {} + for x :=expr; expr; {use(x)} + for x := expr;; expr=false {use(x)} + for;expr;expr =false { + } + for x := expr;expr;expr = false { use(x) } + for x := range []int{} { use(x) } + for x := range (([]int{})) { use(x) } // no parens printed +} + + +// Don't remove mandatory parentheses around composite literals in control clauses. +func _() { + // strip parentheses - no composite literals or composite literals don't start with a type name + if (x) {} + if (((x))) {} + if ([]T{}) {} + if (([]T{})) {} + if ; (((([]T{})))) {} + + for (x) {} + for (((x))) {} + for ([]T{}) {} + for (([]T{})) {} + for ; (((([]T{})))) ; {} + + switch (x) {} + switch (((x))) {} + switch ([]T{}) {} + switch ; (((([]T{})))) {} + + for _ = range ((([]T{T{42}}))) {} + + // leave parentheses - composite literals start with a type name + if (T{}) {} + if ((T{})) {} + if ; ((((T{})))) {} + + for (T{}) {} + for ((T{})) {} + for ; ((((T{})))) ; {} + + switch (T{}) {} + switch ; ((((T{})))) {} + + for _ = range (((T1{T{42}}))) {} + + if x == (T{42}[0]) {} + if (x == T{42}[0]) {} + if (x == (T{42}[0])) {} + if (x == (((T{42}[0])))) {} + if (((x == (T{42}[0])))) {} + if x == a + b*(T{42}[0]) {} + if (x == a + b*T{42}[0]) {} + if (x == a + b*(T{42}[0])) {} + if (x == a + ((b * (T{42}[0])))) {} + if (((x == a + b * (T{42}[0])))) {} + if (((a + b * (T{42}[0])) == x)) {} + if (((a + b * (T{42}[0])))) == x {} + + if (struct{x bool}{false}.x) {} + if (struct{x bool}{false}.x) == false {} + if (struct{x bool}{false}.x == false) {} +} + + +// Extra empty lines inside functions. Do respect source code line +// breaks between statement boundaries but print at most one empty +// line at a time. +func _() { + + const _ = 0 + + const _ = 1 + type _ int + type _ float + + var _ = 0 + var x = 1 + + // Each use(x) call below should have at most one empty line before and after. + // Known bug: The first use call may have more than one empty line before + // (see go/printer/nodes.go, func linebreak). + + + + use(x) + + if x < x { + + use(x) + + } else { + + use(x) + + } +} + + +// Formatting around labels. +func _() { + L: +} + + +func _() { + // this comment should be indented + L: ; // no semicolon needed +} + + +func _() { + switch 0 { + case 0: + L0: ; // semicolon required + case 1: + L1: ; // semicolon required + default: + L2: ; // no semicolon needed + } +} + + +func _() { + f() +L1: + f() +L2: + ; +L3: +} + + +func _() { + // this comment should be indented + L: +} + + +func _() { + L: _ = 0 +} + + +func _() { + // this comment should be indented + L: _ = 0 +} + + +func _() { + for { + L1: _ = 0 + L2: + _ = 0 + } +} + + +func _() { + // this comment should be indented + for { + L1: _ = 0 + L2: + _ = 0 + } +} + + +func _() { + if true { + _ = 0 + } + _ = 0 // the indentation here should not be affected by the long label name +AnOverlongLabel: + _ = 0 + + if true { + _ = 0 + } + _ = 0 + +L: _ = 0 +} + + +func _() { + for { + goto L + } +L: + + MoreCode() +} + + +func _() { + for { + goto L + } +L: // A comment on the same line as the label, followed by a single empty line. + // Known bug: There may be more than one empty line before MoreCode() + // (see go/printer/nodes.go, func linebreak). + + + + + MoreCode() +} + + +func _() { + for { + goto L + } +L: + + + + + // There should be a single empty line before this comment. + MoreCode() +} + + +func _() { + for { + goto AVeryLongLabelThatShouldNotAffectFormatting + } +AVeryLongLabelThatShouldNotAffectFormatting: + // There should be a single empty line after this comment. + + // There should be a single empty line before this comment. + MoreCode() +} diff --git a/src/pkg/go/scanner/Makefile b/src/pkg/go/scanner/Makefile new file mode 100644 index 000000000..453faac00 --- /dev/null +++ b/src/pkg/go/scanner/Makefile @@ -0,0 +1,12 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=go/scanner +GOFILES=\ + errors.go\ + scanner.go\ + +include ../../../Make.pkg diff --git a/src/pkg/go/scanner/errors.go b/src/pkg/go/scanner/errors.go new file mode 100644 index 000000000..a0927e416 --- /dev/null +++ b/src/pkg/go/scanner/errors.go @@ -0,0 +1,168 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package scanner + +import ( + "fmt" + "go/token" + "io" + "os" + "sort" +) + +// An implementation of an ErrorHandler may be provided to the Scanner. +// If a syntax error is encountered and a handler was installed, Error +// is called with a position and an error message. The position points +// to the beginning of the offending token. +// +type ErrorHandler interface { + Error(pos token.Position, msg string) +} + +// ErrorVector implements the ErrorHandler interface. It maintains a list +// of errors which can be retrieved with GetErrorList and GetError. The +// zero value for an ErrorVector is an empty ErrorVector ready to use. +// +// A common usage pattern is to embed an ErrorVector alongside a +// scanner in a data structure that uses the scanner. By passing a +// reference to an ErrorVector to the scanner's Init call, default +// error handling is obtained. +// +type ErrorVector struct { + errors []*Error +} + +// Reset resets an ErrorVector to no errors. +func (h *ErrorVector) Reset() { h.errors = h.errors[:0] } + +// ErrorCount returns the number of errors collected. +func (h *ErrorVector) ErrorCount() int { return len(h.errors) } + +// Within ErrorVector, an error is represented by an Error node. The +// position Pos, if valid, points to the beginning of the offending +// token, and the error condition is described by Msg. +// +type Error struct { + Pos token.Position + Msg string +} + +func (e *Error) String() string { + if e.Pos.Filename != "" || e.Pos.IsValid() { + // don't print "<unknown position>" + // TODO(gri) reconsider the semantics of Position.IsValid + return e.Pos.String() + ": " + e.Msg + } + return e.Msg +} + +// An ErrorList is a (possibly sorted) list of Errors. +type ErrorList []*Error + +// ErrorList implements the sort Interface. +func (p ErrorList) Len() int { return len(p) } +func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +func (p ErrorList) Less(i, j int) bool { + e := &p[i].Pos + f := &p[j].Pos + // Note that it is not sufficient to simply compare file offsets because + // the offsets do not reflect modified line information (through //line + // comments). + if e.Filename < f.Filename { + return true + } + if e.Filename == f.Filename { + if e.Line < f.Line { + return true + } + if e.Line == f.Line { + return e.Column < f.Column + } + } + return false +} + +func (p ErrorList) String() string { + switch len(p) { + case 0: + return "unspecified error" + case 1: + return p[0].String() + } + return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p)-1) +} + +// These constants control the construction of the ErrorList +// returned by GetErrors. +// +const ( + Raw = iota // leave error list unchanged + Sorted // sort error list by file, line, and column number + NoMultiples // sort error list and leave only the first error per line +) + +// GetErrorList returns the list of errors collected by an ErrorVector. +// The construction of the ErrorList returned is controlled by the mode +// parameter. If there are no errors, the result is nil. +// +func (h *ErrorVector) GetErrorList(mode int) ErrorList { + if len(h.errors) == 0 { + return nil + } + + list := make(ErrorList, len(h.errors)) + copy(list, h.errors) + + if mode >= Sorted { + sort.Sort(list) + } + + if mode >= NoMultiples { + var last token.Position // initial last.Line is != any legal error line + i := 0 + for _, e := range list { + if e.Pos.Filename != last.Filename || e.Pos.Line != last.Line { + last = e.Pos + list[i] = e + i++ + } + } + list = list[0:i] + } + + return list +} + +// GetError is like GetErrorList, but it returns an os.Error instead +// so that a nil result can be assigned to an os.Error variable and +// remains nil. +// +func (h *ErrorVector) GetError(mode int) os.Error { + if len(h.errors) == 0 { + return nil + } + + return h.GetErrorList(mode) +} + +// ErrorVector implements the ErrorHandler interface. +func (h *ErrorVector) Error(pos token.Position, msg string) { + h.errors = append(h.errors, &Error{pos, msg}) +} + +// PrintError is a utility function that prints a list of errors to w, +// one error per line, if the err parameter is an ErrorList. Otherwise +// it prints the err string. +// +func PrintError(w io.Writer, err os.Error) { + if list, ok := err.(ErrorList); ok { + for _, e := range list { + fmt.Fprintf(w, "%s\n", e) + } + } else { + fmt.Fprintf(w, "%s\n", err) + } +} diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go new file mode 100644 index 000000000..7f3dd2373 --- /dev/null +++ b/src/pkg/go/scanner/scanner.go @@ -0,0 +1,670 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package scanner implements a scanner for Go source text. Takes a []byte as +// source which can then be tokenized through repeated calls to the Scan +// function. Typical use: +// +// var s Scanner +// fset := token.NewFileSet() // position information is relative to fset +// file := fset.AddFile(filename, fset.Base(), len(src)) // register file +// s.Init(file, src, nil /* no error handler */, 0) +// for { +// pos, tok, lit := s.Scan() +// if tok == token.EOF { +// break +// } +// // do something here with pos, tok, and lit +// } +// +package scanner + +import ( + "bytes" + "fmt" + "go/token" + "path/filepath" + "strconv" + "unicode" + "utf8" +) + +// A Scanner holds the scanner's internal state while processing +// a given text. It can be allocated as part of another data +// structure but must be initialized via Init before use. +// +type Scanner struct { + // immutable state + file *token.File // source file handle + dir string // directory portion of file.Name() + src []byte // source + err ErrorHandler // error reporting; or nil + mode uint // scanning mode + + // scanning state + ch int // current character + offset int // character offset + rdOffset int // reading offset (position after current character) + lineOffset int // current line offset + insertSemi bool // insert a semicolon before next newline + + // public state - ok to modify + ErrorCount int // number of errors encountered +} + +// Read the next Unicode char into S.ch. +// S.ch < 0 means end-of-file. +// +func (S *Scanner) next() { + if S.rdOffset < len(S.src) { + S.offset = S.rdOffset + if S.ch == '\n' { + S.lineOffset = S.offset + S.file.AddLine(S.offset) + } + r, w := int(S.src[S.rdOffset]), 1 + switch { + case r == 0: + S.error(S.offset, "illegal character NUL") + case r >= 0x80: + // not ASCII + r, w = utf8.DecodeRune(S.src[S.rdOffset:]) + if r == utf8.RuneError && w == 1 { + S.error(S.offset, "illegal UTF-8 encoding") + } + } + S.rdOffset += w + S.ch = r + } else { + S.offset = len(S.src) + if S.ch == '\n' { + S.lineOffset = S.offset + S.file.AddLine(S.offset) + } + S.ch = -1 // eof + } +} + +// The mode parameter to the Init function is a set of flags (or 0). +// They control scanner behavior. +// +const ( + ScanComments = 1 << iota // return comments as COMMENT tokens + AllowIllegalChars // do not report an error for illegal chars + InsertSemis // automatically insert semicolons +) + +// Init prepares the scanner S to tokenize the text src by setting the +// scanner at the beginning of src. The scanner uses the file set file +// for position information and it adds line information for each line. +// It is ok to re-use the same file when re-scanning the same file as +// line information which is already present is ignored. Init causes a +// panic if the file size does not match the src size. +// +// Calls to Scan will use the error handler err if they encounter a +// syntax error and err is not nil. Also, for each error encountered, +// the Scanner field ErrorCount is incremented by one. The mode parameter +// determines how comments, illegal characters, and semicolons are handled. +// +// Note that Init may call err if there is an error in the first character +// of the file. +// +func (S *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode uint) { + // Explicitly initialize all fields since a scanner may be reused. + if file.Size() != len(src) { + panic("file size does not match src len") + } + S.file = file + S.dir, _ = filepath.Split(file.Name()) + S.src = src + S.err = err + S.mode = mode + + S.ch = ' ' + S.offset = 0 + S.rdOffset = 0 + S.lineOffset = 0 + S.insertSemi = false + S.ErrorCount = 0 + + S.next() +} + +func (S *Scanner) error(offs int, msg string) { + if S.err != nil { + S.err.Error(S.file.Position(S.file.Pos(offs)), msg) + } + S.ErrorCount++ +} + +var prefix = []byte("//line ") + +func (S *Scanner) interpretLineComment(text []byte) { + if bytes.HasPrefix(text, prefix) { + // get filename and line number, if any + if i := bytes.LastIndex(text, []byte{':'}); i > 0 { + if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 { + // valid //line filename:line comment; + filename := filepath.Clean(string(text[len(prefix):i])) + if !filepath.IsAbs(filename) { + // make filename relative to current directory + filename = filepath.Join(S.dir, filename) + } + // update scanner position + S.file.AddLineInfo(S.lineOffset, filename, line-1) // -1 since comment applies to next line + } + } + } +} + +func (S *Scanner) scanComment() { + // initial '/' already consumed; S.ch == '/' || S.ch == '*' + offs := S.offset - 1 // position of initial '/' + + if S.ch == '/' { + //-style comment + S.next() + for S.ch != '\n' && S.ch >= 0 { + S.next() + } + if offs == S.lineOffset { + // comment starts at the beginning of the current line + S.interpretLineComment(S.src[offs:S.offset]) + } + return + } + + /*-style comment */ + S.next() + for S.ch >= 0 { + ch := S.ch + S.next() + if ch == '*' && S.ch == '/' { + S.next() + return + } + } + + S.error(offs, "comment not terminated") +} + +func (S *Scanner) findLineEnd() bool { + // initial '/' already consumed + + defer func(offs int) { + // reset scanner state to where it was upon calling findLineEnd + S.ch = '/' + S.offset = offs + S.rdOffset = offs + 1 + S.next() // consume initial '/' again + }(S.offset - 1) + + // read ahead until a newline, EOF, or non-comment token is found + for S.ch == '/' || S.ch == '*' { + if S.ch == '/' { + //-style comment always contains a newline + return true + } + /*-style comment: look for newline */ + S.next() + for S.ch >= 0 { + ch := S.ch + if ch == '\n' { + return true + } + S.next() + if ch == '*' && S.ch == '/' { + S.next() + break + } + } + S.skipWhitespace() // S.insertSemi is set + if S.ch < 0 || S.ch == '\n' { + return true + } + if S.ch != '/' { + // non-comment token + return false + } + S.next() // consume '/' + } + + return false +} + +func isLetter(ch int) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch) +} + +func isDigit(ch int) bool { + return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch) +} + +func (S *Scanner) scanIdentifier() token.Token { + offs := S.offset + for isLetter(S.ch) || isDigit(S.ch) { + S.next() + } + return token.Lookup(S.src[offs:S.offset]) +} + +func digitVal(ch int) int { + switch { + case '0' <= ch && ch <= '9': + return ch - '0' + case 'a' <= ch && ch <= 'f': + return ch - 'a' + 10 + case 'A' <= ch && ch <= 'F': + return ch - 'A' + 10 + } + return 16 // larger than any legal digit val +} + +func (S *Scanner) scanMantissa(base int) { + for digitVal(S.ch) < base { + S.next() + } +} + +func (S *Scanner) scanNumber(seenDecimalPoint bool) token.Token { + // digitVal(S.ch) < 10 + tok := token.INT + + if seenDecimalPoint { + tok = token.FLOAT + S.scanMantissa(10) + goto exponent + } + + if S.ch == '0' { + // int or float + offs := S.offset + S.next() + if S.ch == 'x' || S.ch == 'X' { + // hexadecimal int + S.next() + S.scanMantissa(16) + if S.offset-offs <= 2 { + // only scanned "0x" or "0X" + S.error(offs, "illegal hexadecimal number") + } + } else { + // octal int or float + seenDecimalDigit := false + S.scanMantissa(8) + if S.ch == '8' || S.ch == '9' { + // illegal octal int or float + seenDecimalDigit = true + S.scanMantissa(10) + } + if S.ch == '.' || S.ch == 'e' || S.ch == 'E' || S.ch == 'i' { + goto fraction + } + // octal int + if seenDecimalDigit { + S.error(offs, "illegal octal number") + } + } + goto exit + } + + // decimal int or float + S.scanMantissa(10) + +fraction: + if S.ch == '.' { + tok = token.FLOAT + S.next() + S.scanMantissa(10) + } + +exponent: + if S.ch == 'e' || S.ch == 'E' { + tok = token.FLOAT + S.next() + if S.ch == '-' || S.ch == '+' { + S.next() + } + S.scanMantissa(10) + } + + if S.ch == 'i' { + tok = token.IMAG + S.next() + } + +exit: + return tok +} + +func (S *Scanner) scanEscape(quote int) { + offs := S.offset + + var i, base, max uint32 + switch S.ch { + case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote: + S.next() + return + case '0', '1', '2', '3', '4', '5', '6', '7': + i, base, max = 3, 8, 255 + case 'x': + S.next() + i, base, max = 2, 16, 255 + case 'u': + S.next() + i, base, max = 4, 16, unicode.MaxRune + case 'U': + S.next() + i, base, max = 8, 16, unicode.MaxRune + default: + S.next() // always make progress + S.error(offs, "unknown escape sequence") + return + } + + var x uint32 + for ; i > 0 && S.ch != quote && S.ch >= 0; i-- { + d := uint32(digitVal(S.ch)) + if d >= base { + S.error(S.offset, "illegal character in escape sequence") + break + } + x = x*base + d + S.next() + } + // in case of an error, consume remaining chars + for ; i > 0 && S.ch != quote && S.ch >= 0; i-- { + S.next() + } + if x > max || 0xd800 <= x && x < 0xe000 { + S.error(offs, "escape sequence is invalid Unicode code point") + } +} + +func (S *Scanner) scanChar() { + // '\'' opening already consumed + offs := S.offset - 1 + + n := 0 + for S.ch != '\'' { + ch := S.ch + n++ + S.next() + if ch == '\n' || ch < 0 { + S.error(offs, "character literal not terminated") + n = 1 + break + } + if ch == '\\' { + S.scanEscape('\'') + } + } + + S.next() + + if n != 1 { + S.error(offs, "illegal character literal") + } +} + +func (S *Scanner) scanString() { + // '"' opening already consumed + offs := S.offset - 1 + + for S.ch != '"' { + ch := S.ch + S.next() + if ch == '\n' || ch < 0 { + S.error(offs, "string not terminated") + break + } + if ch == '\\' { + S.scanEscape('"') + } + } + + S.next() +} + +func (S *Scanner) scanRawString() { + // '`' opening already consumed + offs := S.offset - 1 + + for S.ch != '`' { + ch := S.ch + S.next() + if ch < 0 { + S.error(offs, "string not terminated") + break + } + } + + S.next() +} + +func (S *Scanner) skipWhitespace() { + for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' { + S.next() + } +} + +// Helper functions for scanning multi-byte tokens such as >> += >>= . +// Different routines recognize different length tok_i based on matches +// of ch_i. If a token ends in '=', the result is tok1 or tok3 +// respectively. Otherwise, the result is tok0 if there was no other +// matching character, or tok2 if the matching character was ch2. + +func (S *Scanner) switch2(tok0, tok1 token.Token) token.Token { + if S.ch == '=' { + S.next() + return tok1 + } + return tok0 +} + +func (S *Scanner) switch3(tok0, tok1 token.Token, ch2 int, tok2 token.Token) token.Token { + if S.ch == '=' { + S.next() + return tok1 + } + if S.ch == ch2 { + S.next() + return tok2 + } + return tok0 +} + +func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Token) token.Token { + if S.ch == '=' { + S.next() + return tok1 + } + if S.ch == ch2 { + S.next() + if S.ch == '=' { + S.next() + return tok3 + } + return tok2 + } + return tok0 +} + +// Scan scans the next token and returns the token position, +// the token, and the literal string corresponding to the +// token. The source end is indicated by token.EOF. +// +// If the returned token is token.SEMICOLON, the corresponding +// literal string is ";" if the semicolon was present in the source, +// and "\n" if the semicolon was inserted because of a newline or +// at EOF. +// +// For more tolerant parsing, Scan will return a valid token if +// possible even if a syntax error was encountered. Thus, even +// if the resulting token sequence contains no illegal tokens, +// a client may not assume that no error occurred. Instead it +// must check the scanner's ErrorCount or the number of calls +// of the error handler, if there was one installed. +// +// Scan adds line information to the file added to the file +// set with Init. Token positions are relative to that file +// and thus relative to the file set. +// +func (S *Scanner) Scan() (token.Pos, token.Token, string) { +scanAgain: + S.skipWhitespace() + + // current token start + insertSemi := false + offs := S.offset + tok := token.ILLEGAL + + // determine token value + switch ch := S.ch; { + case isLetter(ch): + tok = S.scanIdentifier() + switch tok { + case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN: + insertSemi = true + } + case digitVal(ch) < 10: + insertSemi = true + tok = S.scanNumber(false) + default: + S.next() // always make progress + switch ch { + case -1: + if S.insertSemi { + S.insertSemi = false // EOF consumed + return S.file.Pos(offs), token.SEMICOLON, "\n" + } + tok = token.EOF + case '\n': + // we only reach here if S.insertSemi was + // set in the first place and exited early + // from S.skipWhitespace() + S.insertSemi = false // newline consumed + return S.file.Pos(offs), token.SEMICOLON, "\n" + case '"': + insertSemi = true + tok = token.STRING + S.scanString() + case '\'': + insertSemi = true + tok = token.CHAR + S.scanChar() + case '`': + insertSemi = true + tok = token.STRING + S.scanRawString() + case ':': + tok = S.switch2(token.COLON, token.DEFINE) + case '.': + if digitVal(S.ch) < 10 { + insertSemi = true + tok = S.scanNumber(true) + } else if S.ch == '.' { + S.next() + if S.ch == '.' { + S.next() + tok = token.ELLIPSIS + } + } else { + tok = token.PERIOD + } + case ',': + tok = token.COMMA + case ';': + tok = token.SEMICOLON + case '(': + tok = token.LPAREN + case ')': + insertSemi = true + tok = token.RPAREN + case '[': + tok = token.LBRACK + case ']': + insertSemi = true + tok = token.RBRACK + case '{': + tok = token.LBRACE + case '}': + insertSemi = true + tok = token.RBRACE + case '+': + tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC) + if tok == token.INC { + insertSemi = true + } + case '-': + tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC) + if tok == token.DEC { + insertSemi = true + } + case '*': + tok = S.switch2(token.MUL, token.MUL_ASSIGN) + case '/': + if S.ch == '/' || S.ch == '*' { + // comment + if S.insertSemi && S.findLineEnd() { + // reset position to the beginning of the comment + S.ch = '/' + S.offset = offs + S.rdOffset = offs + 1 + S.insertSemi = false // newline consumed + return S.file.Pos(offs), token.SEMICOLON, "\n" + } + S.scanComment() + if S.mode&ScanComments == 0 { + // skip comment + S.insertSemi = false // newline consumed + goto scanAgain + } + tok = token.COMMENT + } else { + tok = S.switch2(token.QUO, token.QUO_ASSIGN) + } + case '%': + tok = S.switch2(token.REM, token.REM_ASSIGN) + case '^': + tok = S.switch2(token.XOR, token.XOR_ASSIGN) + case '<': + if S.ch == '-' { + S.next() + tok = token.ARROW + } else { + tok = S.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN) + } + case '>': + tok = S.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN) + case '=': + tok = S.switch2(token.ASSIGN, token.EQL) + case '!': + tok = S.switch2(token.NOT, token.NEQ) + case '&': + if S.ch == '^' { + S.next() + tok = S.switch2(token.AND_NOT, token.AND_NOT_ASSIGN) + } else { + tok = S.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND) + } + case '|': + tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR) + default: + if S.mode&AllowIllegalChars == 0 { + S.error(offs, fmt.Sprintf("illegal character %#U", ch)) + } + insertSemi = S.insertSemi // preserve insertSemi info + } + } + + if S.mode&InsertSemis != 0 { + S.insertSemi = insertSemi + } + + // TODO(gri): The scanner API should change such that the literal string + // is only valid if an actual literal was scanned. This will + // permit a more efficient implementation. + return S.file.Pos(offs), tok, string(S.src[offs:S.offset]) +} diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go new file mode 100644 index 000000000..eb9e1cb81 --- /dev/null +++ b/src/pkg/go/scanner/scanner_test.go @@ -0,0 +1,672 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package scanner + +import ( + "go/token" + "os" + "path/filepath" + "runtime" + "testing" +) + +var fset = token.NewFileSet() + +const /* class */ ( + special = iota + literal + operator + keyword +) + +func tokenclass(tok token.Token) int { + switch { + case tok.IsLiteral(): + return literal + case tok.IsOperator(): + return operator + case tok.IsKeyword(): + return keyword + } + return special +} + +type elt struct { + tok token.Token + lit string + class int +} + +var tokens = [...]elt{ + // Special tokens + {token.COMMENT, "/* a comment */", special}, + {token.COMMENT, "// a comment \n", special}, + + // Identifiers and basic type literals + {token.IDENT, "foobar", literal}, + {token.IDENT, "a۰۱۸", literal}, + {token.IDENT, "foo६४", literal}, + {token.IDENT, "bar9876", literal}, + {token.INT, "0", literal}, + {token.INT, "1", literal}, + {token.INT, "123456789012345678890", literal}, + {token.INT, "01234567", literal}, + {token.INT, "0xcafebabe", literal}, + {token.FLOAT, "0.", literal}, + {token.FLOAT, ".0", literal}, + {token.FLOAT, "3.14159265", literal}, + {token.FLOAT, "1e0", literal}, + {token.FLOAT, "1e+100", literal}, + {token.FLOAT, "1e-100", literal}, + {token.FLOAT, "2.71828e-1000", literal}, + {token.IMAG, "0i", literal}, + {token.IMAG, "1i", literal}, + {token.IMAG, "012345678901234567889i", literal}, + {token.IMAG, "123456789012345678890i", literal}, + {token.IMAG, "0.i", literal}, + {token.IMAG, ".0i", literal}, + {token.IMAG, "3.14159265i", literal}, + {token.IMAG, "1e0i", literal}, + {token.IMAG, "1e+100i", literal}, + {token.IMAG, "1e-100i", literal}, + {token.IMAG, "2.71828e-1000i", literal}, + {token.CHAR, "'a'", literal}, + {token.CHAR, "'\\000'", literal}, + {token.CHAR, "'\\xFF'", literal}, + {token.CHAR, "'\\uff16'", literal}, + {token.CHAR, "'\\U0000ff16'", literal}, + {token.STRING, "`foobar`", literal}, + {token.STRING, "`" + `foo + bar` + + "`", + literal, + }, + + // Operators and delimiters + {token.ADD, "+", operator}, + {token.SUB, "-", operator}, + {token.MUL, "*", operator}, + {token.QUO, "/", operator}, + {token.REM, "%", operator}, + + {token.AND, "&", operator}, + {token.OR, "|", operator}, + {token.XOR, "^", operator}, + {token.SHL, "<<", operator}, + {token.SHR, ">>", operator}, + {token.AND_NOT, "&^", operator}, + + {token.ADD_ASSIGN, "+=", operator}, + {token.SUB_ASSIGN, "-=", operator}, + {token.MUL_ASSIGN, "*=", operator}, + {token.QUO_ASSIGN, "/=", operator}, + {token.REM_ASSIGN, "%=", operator}, + + {token.AND_ASSIGN, "&=", operator}, + {token.OR_ASSIGN, "|=", operator}, + {token.XOR_ASSIGN, "^=", operator}, + {token.SHL_ASSIGN, "<<=", operator}, + {token.SHR_ASSIGN, ">>=", operator}, + {token.AND_NOT_ASSIGN, "&^=", operator}, + + {token.LAND, "&&", operator}, + {token.LOR, "||", operator}, + {token.ARROW, "<-", operator}, + {token.INC, "++", operator}, + {token.DEC, "--", operator}, + + {token.EQL, "==", operator}, + {token.LSS, "<", operator}, + {token.GTR, ">", operator}, + {token.ASSIGN, "=", operator}, + {token.NOT, "!", operator}, + + {token.NEQ, "!=", operator}, + {token.LEQ, "<=", operator}, + {token.GEQ, ">=", operator}, + {token.DEFINE, ":=", operator}, + {token.ELLIPSIS, "...", operator}, + + {token.LPAREN, "(", operator}, + {token.LBRACK, "[", operator}, + {token.LBRACE, "{", operator}, + {token.COMMA, ",", operator}, + {token.PERIOD, ".", operator}, + + {token.RPAREN, ")", operator}, + {token.RBRACK, "]", operator}, + {token.RBRACE, "}", operator}, + {token.SEMICOLON, ";", operator}, + {token.COLON, ":", operator}, + + // Keywords + {token.BREAK, "break", keyword}, + {token.CASE, "case", keyword}, + {token.CHAN, "chan", keyword}, + {token.CONST, "const", keyword}, + {token.CONTINUE, "continue", keyword}, + + {token.DEFAULT, "default", keyword}, + {token.DEFER, "defer", keyword}, + {token.ELSE, "else", keyword}, + {token.FALLTHROUGH, "fallthrough", keyword}, + {token.FOR, "for", keyword}, + + {token.FUNC, "func", keyword}, + {token.GO, "go", keyword}, + {token.GOTO, "goto", keyword}, + {token.IF, "if", keyword}, + {token.IMPORT, "import", keyword}, + + {token.INTERFACE, "interface", keyword}, + {token.MAP, "map", keyword}, + {token.PACKAGE, "package", keyword}, + {token.RANGE, "range", keyword}, + {token.RETURN, "return", keyword}, + + {token.SELECT, "select", keyword}, + {token.STRUCT, "struct", keyword}, + {token.SWITCH, "switch", keyword}, + {token.TYPE, "type", keyword}, + {token.VAR, "var", keyword}, +} + +const whitespace = " \t \n\n\n" // to separate tokens + +type testErrorHandler struct { + t *testing.T +} + +func (h *testErrorHandler) Error(pos token.Position, msg string) { + h.t.Errorf("Error() called (msg = %s)", msg) +} + +func newlineCount(s string) int { + n := 0 + for i := 0; i < len(s); i++ { + if s[i] == '\n' { + n++ + } + } + return n +} + +func checkPos(t *testing.T, lit string, p token.Pos, expected token.Position) { + pos := fset.Position(p) + if pos.Filename != expected.Filename { + t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename) + } + if pos.Offset != expected.Offset { + t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset) + } + if pos.Line != expected.Line { + t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line) + } + if pos.Column != expected.Column { + t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column) + } +} + +// Verify that calling Scan() provides the correct results. +func TestScan(t *testing.T) { + // make source + var src string + for _, e := range tokens { + src += e.lit + whitespace + } + src_linecount := newlineCount(src) + whitespace_linecount := newlineCount(whitespace) + + // verify scan + var s Scanner + s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), &testErrorHandler{t}, ScanComments) + index := 0 + epos := token.Position{"", 0, 1, 1} // expected position + for { + pos, tok, lit := s.Scan() + e := elt{token.EOF, "", special} + if index < len(tokens) { + e = tokens[index] + } + if tok == token.EOF { + lit = "<EOF>" + epos.Line = src_linecount + epos.Column = 2 + } + checkPos(t, lit, pos, epos) + if tok != e.tok { + t.Errorf("bad token for %q: got %s, expected %s", lit, tok.String(), e.tok.String()) + } + if e.tok.IsLiteral() && lit != e.lit { + t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, e.lit) + } + if tokenclass(tok) != e.class { + t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class) + } + epos.Offset += len(lit) + len(whitespace) + epos.Line += newlineCount(lit) + whitespace_linecount + if tok == token.COMMENT && lit[1] == '/' { + // correct for unaccounted '/n' in //-style comment + epos.Offset++ + epos.Line++ + } + index++ + if tok == token.EOF { + break + } + } + if s.ErrorCount != 0 { + t.Errorf("found %d errors", s.ErrorCount) + } +} + +func checkSemi(t *testing.T, line string, mode uint) { + var S Scanner + file := fset.AddFile("TestSemis", fset.Base(), len(line)) + S.Init(file, []byte(line), nil, mode) + pos, tok, lit := S.Scan() + for tok != token.EOF { + if tok == token.ILLEGAL { + // the illegal token literal indicates what + // kind of semicolon literal to expect + semiLit := "\n" + if lit[0] == '#' { + semiLit = ";" + } + // next token must be a semicolon + semiPos := file.Position(pos) + semiPos.Offset++ + semiPos.Column++ + pos, tok, lit = S.Scan() + if tok == token.SEMICOLON { + if lit != semiLit { + t.Errorf(`bad literal for %q: got %q, expected %q`, line, lit, semiLit) + } + checkPos(t, line, pos, semiPos) + } else { + t.Errorf("bad token for %q: got %s, expected ;", line, tok.String()) + } + } else if tok == token.SEMICOLON { + t.Errorf("bad token for %q: got ;, expected no ;", line) + } + pos, tok, lit = S.Scan() + } +} + +var lines = []string{ + // # indicates a semicolon present in the source + // $ indicates an automatically inserted semicolon + "", + "#;", + "foo$\n", + "123$\n", + "1.2$\n", + "'x'$\n", + `"x"` + "$\n", + "`x`$\n", + + "+\n", + "-\n", + "*\n", + "/\n", + "%\n", + + "&\n", + "|\n", + "^\n", + "<<\n", + ">>\n", + "&^\n", + + "+=\n", + "-=\n", + "*=\n", + "/=\n", + "%=\n", + + "&=\n", + "|=\n", + "^=\n", + "<<=\n", + ">>=\n", + "&^=\n", + + "&&\n", + "||\n", + "<-\n", + "++$\n", + "--$\n", + + "==\n", + "<\n", + ">\n", + "=\n", + "!\n", + + "!=\n", + "<=\n", + ">=\n", + ":=\n", + "...\n", + + "(\n", + "[\n", + "{\n", + ",\n", + ".\n", + + ")$\n", + "]$\n", + "}$\n", + "#;\n", + ":\n", + + "break$\n", + "case\n", + "chan\n", + "const\n", + "continue$\n", + + "default\n", + "defer\n", + "else\n", + "fallthrough$\n", + "for\n", + + "func\n", + "go\n", + "goto\n", + "if\n", + "import\n", + + "interface\n", + "map\n", + "package\n", + "range\n", + "return$\n", + + "select\n", + "struct\n", + "switch\n", + "type\n", + "var\n", + + "foo$//comment\n", + "foo$//comment", + "foo$/*comment*/\n", + "foo$/*\n*/", + "foo$/*comment*/ \n", + "foo$/*\n*/ ", + + "foo $// comment\n", + "foo $// comment", + "foo $/*comment*/\n", + "foo $/*\n*/", + "foo $/* */ /* \n */ bar$/**/\n", + "foo $/*0*/ /*1*/ /*2*/\n", + + "foo $/*comment*/ \n", + "foo $/*0*/ /*1*/ /*2*/ \n", + "foo $/**/ /*-------------*/ /*----\n*/bar $/* \n*/baa$\n", + "foo $/* an EOF terminates a line */", + "foo $/* an EOF terminates a line */ /*", + "foo $/* an EOF terminates a line */ //", + + "package main$\n\nfunc main() {\n\tif {\n\t\treturn /* */ }$\n}$\n", + "package main$", +} + +func TestSemis(t *testing.T) { + for _, line := range lines { + checkSemi(t, line, AllowIllegalChars|InsertSemis) + checkSemi(t, line, AllowIllegalChars|InsertSemis|ScanComments) + + // if the input ended in newlines, the input must tokenize the + // same with or without those newlines + for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- { + checkSemi(t, line[0:i], AllowIllegalChars|InsertSemis) + checkSemi(t, line[0:i], AllowIllegalChars|InsertSemis|ScanComments) + } + } +} + +type segment struct { + srcline string // a line of source text + filename string // filename for current token + line int // line number for current token +} + +var segments = []segment{ + // exactly one token per line since the test consumes one token per segment + {" line1", filepath.Join("dir", "TestLineComments"), 1}, + {"\nline2", filepath.Join("dir", "TestLineComments"), 2}, + {"\nline3 //line File1.go:100", filepath.Join("dir", "TestLineComments"), 3}, // bad line comment, ignored + {"\nline4", filepath.Join("dir", "TestLineComments"), 4}, + {"\n//line File1.go:100\n line100", filepath.Join("dir", "File1.go"), 100}, + {"\n//line File2.go:200\n line200", filepath.Join("dir", "File2.go"), 200}, + {"\n//line :1\n line1", "dir", 1}, + {"\n//line foo:42\n line42", filepath.Join("dir", "foo"), 42}, + {"\n //line foo:42\n line44", filepath.Join("dir", "foo"), 44}, // bad line comment, ignored + {"\n//line foo 42\n line46", filepath.Join("dir", "foo"), 46}, // bad line comment, ignored + {"\n//line foo:42 extra text\n line48", filepath.Join("dir", "foo"), 48}, // bad line comment, ignored + {"\n//line ./foo:42\n line42", filepath.Join("dir", "foo"), 42}, + {"\n//line a/b/c/File1.go:100\n line100", filepath.Join("dir", "a", "b", "c", "File1.go"), 100}, +} + +var unixsegments = []segment{ + {"\n//line /bar:42\n line42", "/bar", 42}, +} + +var winsegments = []segment{ + {"\n//line c:\\bar:42\n line42", "c:\\bar", 42}, + {"\n//line c:\\dir\\File1.go:100\n line100", "c:\\dir\\File1.go", 100}, +} + +// Verify that comments of the form "//line filename:line" are interpreted correctly. +func TestLineComments(t *testing.T) { + segs := segments + if runtime.GOOS == "windows" { + segs = append(segs, winsegments...) + } else { + segs = append(segs, unixsegments...) + } + + // make source + var src string + for _, e := range segs { + src += e.srcline + } + + // verify scan + var S Scanner + file := fset.AddFile(filepath.Join("dir", "TestLineComments"), fset.Base(), len(src)) + S.Init(file, []byte(src), nil, 0) + for _, s := range segs { + p, _, lit := S.Scan() + pos := file.Position(p) + checkPos(t, lit, p, token.Position{s.filename, pos.Offset, s.line, pos.Column}) + } + + if S.ErrorCount != 0 { + t.Errorf("found %d errors", S.ErrorCount) + } +} + +// Verify that initializing the same scanner more then once works correctly. +func TestInit(t *testing.T) { + var s Scanner + + // 1st init + src1 := "if true { }" + f1 := fset.AddFile("src1", fset.Base(), len(src1)) + s.Init(f1, []byte(src1), nil, 0) + if f1.Size() != len(src1) { + t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1)) + } + s.Scan() // if + s.Scan() // true + _, tok, _ := s.Scan() // { + if tok != token.LBRACE { + t.Errorf("bad token: got %s, expected %s", tok.String(), token.LBRACE) + } + + // 2nd init + src2 := "go true { ]" + f2 := fset.AddFile("src2", fset.Base(), len(src2)) + s.Init(f2, []byte(src2), nil, 0) + if f2.Size() != len(src2) { + t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2)) + } + _, tok, _ = s.Scan() // go + if tok != token.GO { + t.Errorf("bad token: got %s, expected %s", tok.String(), token.GO) + } + + if s.ErrorCount != 0 { + t.Errorf("found %d errors", s.ErrorCount) + } +} + +func TestIllegalChars(t *testing.T) { + var s Scanner + + const src = "*?*$*@*" + file := fset.AddFile("", fset.Base(), len(src)) + s.Init(file, []byte(src), &testErrorHandler{t}, AllowIllegalChars) + for offs, ch := range src { + pos, tok, lit := s.Scan() + if poffs := file.Offset(pos); poffs != offs { + t.Errorf("bad position for %s: got %d, expected %d", lit, poffs, offs) + } + if tok == token.ILLEGAL && lit != string(ch) { + t.Errorf("bad token: got %s, expected %s", lit, string(ch)) + } + } + + if s.ErrorCount != 0 { + t.Errorf("found %d errors", s.ErrorCount) + } +} + +func TestStdErrorHander(t *testing.T) { + const src = "@\n" + // illegal character, cause an error + "@ @\n" + // two errors on the same line + "//line File2:20\n" + + "@\n" + // different file, but same line + "//line File2:1\n" + + "@ @\n" + // same file, decreasing line number + "//line File1:1\n" + + "@ @ @" // original file, line 1 again + + v := new(ErrorVector) + var s Scanner + s.Init(fset.AddFile("File1", fset.Base(), len(src)), []byte(src), v, 0) + for { + if _, tok, _ := s.Scan(); tok == token.EOF { + break + } + } + + list := v.GetErrorList(Raw) + if len(list) != 9 { + t.Errorf("found %d raw errors, expected 9", len(list)) + PrintError(os.Stderr, list) + } + + list = v.GetErrorList(Sorted) + if len(list) != 9 { + t.Errorf("found %d sorted errors, expected 9", len(list)) + PrintError(os.Stderr, list) + } + + list = v.GetErrorList(NoMultiples) + if len(list) != 4 { + t.Errorf("found %d one-per-line errors, expected 4", len(list)) + PrintError(os.Stderr, list) + } + + if v.ErrorCount() != s.ErrorCount { + t.Errorf("found %d errors, expected %d", v.ErrorCount(), s.ErrorCount) + } +} + +type errorCollector struct { + cnt int // number of errors encountered + msg string // last error message encountered + pos token.Position // last error position encountered +} + +func (h *errorCollector) Error(pos token.Position, msg string) { + h.cnt++ + h.msg = msg + h.pos = pos +} + +func checkError(t *testing.T, src string, tok token.Token, pos int, err string) { + var s Scanner + var h errorCollector + s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), &h, ScanComments) + _, tok0, _ := s.Scan() + _, tok1, _ := s.Scan() + if tok0 != tok { + t.Errorf("%q: got %s, expected %s", src, tok0, tok) + } + if tok1 != token.EOF { + t.Errorf("%q: got %s, expected EOF", src, tok1) + } + cnt := 0 + if err != "" { + cnt = 1 + } + if h.cnt != cnt { + t.Errorf("%q: got cnt %d, expected %d", src, h.cnt, cnt) + } + if h.msg != err { + t.Errorf("%q: got msg %q, expected %q", src, h.msg, err) + } + if h.pos.Offset != pos { + t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset, pos) + } +} + +var errors = []struct { + src string + tok token.Token + pos int + err string +}{ + {"\a", token.ILLEGAL, 0, "illegal character U+0007"}, + {`#`, token.ILLEGAL, 0, "illegal character U+0023 '#'"}, + {`…`, token.ILLEGAL, 0, "illegal character U+2026 '…'"}, + {`' '`, token.CHAR, 0, ""}, + {`''`, token.CHAR, 0, "illegal character literal"}, + {`'\8'`, token.CHAR, 2, "unknown escape sequence"}, + {`'\08'`, token.CHAR, 3, "illegal character in escape sequence"}, + {`'\x0g'`, token.CHAR, 4, "illegal character in escape sequence"}, + {`'\Uffffffff'`, token.CHAR, 2, "escape sequence is invalid Unicode code point"}, + {`'`, token.CHAR, 0, "character literal not terminated"}, + {`""`, token.STRING, 0, ""}, + {`"`, token.STRING, 0, "string not terminated"}, + {"``", token.STRING, 0, ""}, + {"`", token.STRING, 0, "string not terminated"}, + {"/**/", token.COMMENT, 0, ""}, + {"/*", token.COMMENT, 0, "comment not terminated"}, + {"077", token.INT, 0, ""}, + {"078.", token.FLOAT, 0, ""}, + {"07801234567.", token.FLOAT, 0, ""}, + {"078e0", token.FLOAT, 0, ""}, + {"078", token.INT, 0, "illegal octal number"}, + {"07800000009", token.INT, 0, "illegal octal number"}, + {"0x", token.INT, 0, "illegal hexadecimal number"}, + {"0X", token.INT, 0, "illegal hexadecimal number"}, + {"\"abc\x00def\"", token.STRING, 4, "illegal character NUL"}, + {"\"abc\x80def\"", token.STRING, 4, "illegal UTF-8 encoding"}, +} + +func TestScanErrors(t *testing.T) { + for _, e := range errors { + checkError(t, e.src, e.tok, e.pos, e.err) + } +} diff --git a/src/pkg/go/token/Makefile b/src/pkg/go/token/Makefile new file mode 100644 index 000000000..4a4e64dc8 --- /dev/null +++ b/src/pkg/go/token/Makefile @@ -0,0 +1,12 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=go/token +GOFILES=\ + position.go\ + token.go\ + +include ../../../Make.pkg diff --git a/src/pkg/go/token/position.go b/src/pkg/go/token/position.go new file mode 100644 index 000000000..c559e19f8 --- /dev/null +++ b/src/pkg/go/token/position.go @@ -0,0 +1,424 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// TODO(gri) consider making this a separate package outside the go directory. + +package token + +import ( + "fmt" + "sort" + "sync" +) + +// Position describes an arbitrary source position +// including the file, line, and column location. +// A Position is valid if the line number is > 0. +// +type Position struct { + Filename string // filename, if any + Offset int // offset, starting at 0 + Line int // line number, starting at 1 + Column int // column number, starting at 1 (character count) +} + +// IsValid returns true if the position is valid. +func (pos *Position) IsValid() bool { return pos.Line > 0 } + +// String returns a string in one of several forms: +// +// file:line:column valid position with file name +// line:column valid position without file name +// file invalid position with file name +// - invalid position without file name +// +func (pos Position) String() string { + s := pos.Filename + if pos.IsValid() { + if s != "" { + s += ":" + } + s += fmt.Sprintf("%d:%d", pos.Line, pos.Column) + } + if s == "" { + s = "-" + } + return s +} + +// Pos is a compact encoding of a source position within a file set. +// It can be converted into a Position for a more convenient, but much +// larger, representation. +// +// The Pos value for a given file is a number in the range [base, base+size], +// where base and size are specified when adding the file to the file set via +// AddFile. +// +// To create the Pos value for a specific source offset, first add +// the respective file to the current file set (via FileSet.AddFile) +// and then call File.Pos(offset) for that file. Given a Pos value p +// for a specific file set fset, the corresponding Position value is +// obtained by calling fset.Position(p). +// +// Pos values can be compared directly with the usual comparison operators: +// If two Pos values p and q are in the same file, comparing p and q is +// equivalent to comparing the respective source file offsets. If p and q +// are in different files, p < q is true if the file implied by p was added +// to the respective file set before the file implied by q. +// +type Pos int + +// The zero value for Pos is NoPos; there is no file and line information +// associated with it, and NoPos().IsValid() is false. NoPos is always +// smaller than any other Pos value. The corresponding Position value +// for NoPos is the zero value for Position. +// +const NoPos Pos = 0 + +// IsValid returns true if the position is valid. +func (p Pos) IsValid() bool { + return p != NoPos +} + +func searchFiles(a []*File, x int) int { + return sort.Search(len(a), func(i int) bool { return a[i].base > x }) - 1 +} + +func (s *FileSet) file(p Pos) *File { + if f := s.last; f != nil && f.base <= int(p) && int(p) <= f.base+f.size { + return f + } + if i := searchFiles(s.files, int(p)); i >= 0 { + f := s.files[i] + // f.base <= int(p) by definition of searchFiles + if int(p) <= f.base+f.size { + s.last = f + return f + } + } + return nil +} + +// File returns the file which contains the position p. +// If no such file is found (for instance for p == NoPos), +// the result is nil. +// +func (s *FileSet) File(p Pos) (f *File) { + if p != NoPos { + s.mutex.RLock() + f = s.file(p) + s.mutex.RUnlock() + } + return +} + +func (f *File) position(p Pos) (pos Position) { + offset := int(p) - f.base + pos.Offset = offset + pos.Filename, pos.Line, pos.Column = f.info(offset) + return +} + +// Position converts a Pos in the fileset into a general Position. +func (s *FileSet) Position(p Pos) (pos Position) { + if p != NoPos { + // TODO(gri) consider optimizing the case where p + // is in the last file added, or perhaps + // looked at - will eliminate one level + // of search + s.mutex.RLock() + if f := s.file(p); f != nil { + pos = f.position(p) + } + s.mutex.RUnlock() + } + return +} + +type lineInfo struct { + offset int + filename string + line int +} + +// AddLineInfo adds alternative file and line number information for +// a given file offset. The offset must be larger than the offset for +// the previously added alternative line info and smaller than the +// file size; otherwise the information is ignored. +// +// AddLineInfo is typically used to register alternative position +// information for //line filename:line comments in source files. +// +func (f *File) AddLineInfo(offset int, filename string, line int) { + f.set.mutex.Lock() + if i := len(f.infos); i == 0 || f.infos[i-1].offset < offset && offset < f.size { + f.infos = append(f.infos, lineInfo{offset, filename, line}) + } + f.set.mutex.Unlock() +} + +// A File is a handle for a file belonging to a FileSet. +// A File has a name, size, and line offset table. +// +type File struct { + set *FileSet + name string // file name as provided to AddFile + base int // Pos value range for this file is [base...base+size] + size int // file size as provided to AddFile + + // lines and infos are protected by set.mutex + lines []int + infos []lineInfo +} + +// Name returns the file name of file f as registered with AddFile. +func (f *File) Name() string { + return f.name +} + +// Base returns the base offset of file f as registered with AddFile. +func (f *File) Base() int { + return f.base +} + +// Size returns the size of file f as registered with AddFile. +func (f *File) Size() int { + return f.size +} + +// LineCount returns the number of lines in file f. +func (f *File) LineCount() int { + f.set.mutex.RLock() + n := len(f.lines) + f.set.mutex.RUnlock() + return n +} + +// AddLine adds the line offset for a new line. +// The line offset must be larger than the offset for the previous line +// and smaller than the file size; otherwise the line offset is ignored. +// +func (f *File) AddLine(offset int) { + f.set.mutex.Lock() + if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset < f.size { + f.lines = append(f.lines, offset) + } + f.set.mutex.Unlock() +} + +// SetLines sets the line offsets for a file and returns true if successful. +// The line offsets are the offsets of the first character of each line; +// for instance for the content "ab\nc\n" the line offsets are {0, 3}. +// An empty file has an empty line offset table. +// Each line offset must be larger than the offset for the previous line +// and smaller than the file size; otherwise SetLines fails and returns +// false. +// +func (f *File) SetLines(lines []int) bool { + // verify validity of lines table + size := f.size + for i, offset := range lines { + if i > 0 && offset <= lines[i-1] || size <= offset { + return false + } + } + + // set lines table + f.set.mutex.Lock() + f.lines = lines + f.set.mutex.Unlock() + return true +} + +// SetLinesForContent sets the line offsets for the given file content. +func (f *File) SetLinesForContent(content []byte) { + var lines []int + line := 0 + for offset, b := range content { + if line >= 0 { + lines = append(lines, line) + } + line = -1 + if b == '\n' { + line = offset + 1 + } + } + + // set lines table + f.set.mutex.Lock() + f.lines = lines + f.set.mutex.Unlock() +} + +// Pos returns the Pos value for the given file offset; +// the offset must be <= f.Size(). +// f.Pos(f.Offset(p)) == p. +// +func (f *File) Pos(offset int) Pos { + if offset > f.size { + panic("illegal file offset") + } + return Pos(f.base + offset) +} + +// Offset returns the offset for the given file position p; +// p must be a valid Pos value in that file. +// f.Offset(f.Pos(offset)) == offset. +// +func (f *File) Offset(p Pos) int { + if int(p) < f.base || int(p) > f.base+f.size { + panic("illegal Pos value") + } + return int(p) - f.base +} + +// Line returns the line number for the given file position p; +// p must be a Pos value in that file or NoPos. +// +func (f *File) Line(p Pos) int { + // TODO(gri) this can be implemented much more efficiently + return f.Position(p).Line +} + +// Position returns the Position value for the given file position p; +// p must be a Pos value in that file or NoPos. +// +func (f *File) Position(p Pos) (pos Position) { + if p != NoPos { + if int(p) < f.base || int(p) > f.base+f.size { + panic("illegal Pos value") + } + pos = f.position(p) + } + return +} + +func searchInts(a []int, x int) int { + // This function body is a manually inlined version of: + // + // return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1 + // + // With better compiler optimizations, this may not be needed in the + // future, but at the moment this change improves the go/printer + // benchmark performance by ~30%. This has a direct impact on the + // speed of gofmt and thus seems worthwhile (2011-04-29). + i, j := 0, len(a) + for i < j { + h := i + (j-i)/2 // avoid overflow when computing h + // i ≤ h < j + if a[h] <= x { + i = h + 1 + } else { + j = h + } + } + return i - 1 +} + +func searchLineInfos(a []lineInfo, x int) int { + return sort.Search(len(a), func(i int) bool { return a[i].offset > x }) - 1 +} + +// info returns the file name, line, and column number for a file offset. +func (f *File) info(offset int) (filename string, line, column int) { + filename = f.name + if i := searchInts(f.lines, offset); i >= 0 { + line, column = i+1, offset-f.lines[i]+1 + } + if len(f.infos) > 0 { + // almost no files have extra line infos + if i := searchLineInfos(f.infos, offset); i >= 0 { + alt := &f.infos[i] + filename = alt.filename + if i := searchInts(f.lines, alt.offset); i >= 0 { + line += alt.line - i - 1 + } + } + } + return +} + +// A FileSet represents a set of source files. +// Methods of file sets are synchronized; multiple goroutines +// may invoke them concurrently. +// +type FileSet struct { + mutex sync.RWMutex // protects the file set + base int // base offset for the next file + files []*File // list of files in the order added to the set + last *File // cache of last file looked up +} + +// NewFileSet creates a new file set. +func NewFileSet() *FileSet { + s := new(FileSet) + s.base = 1 // 0 == NoPos + return s +} + +// Base returns the minimum base offset that must be provided to +// AddFile when adding the next file. +// +func (s *FileSet) Base() int { + s.mutex.RLock() + b := s.base + s.mutex.RUnlock() + return b + +} + +// AddFile adds a new file with a given filename, base offset, and file size +// to the file set s and returns the file. Multiple files may have the same +// name. The base offset must not be smaller than the FileSet's Base(), and +// size must not be negative. +// +// Adding the file will set the file set's Base() value to base + size + 1 +// as the minimum base value for the next file. The following relationship +// exists between a Pos value p for a given file offset offs: +// +// int(p) = base + offs +// +// with offs in the range [0, size] and thus p in the range [base, base+size]. +// For convenience, File.Pos may be used to create file-specific position +// values from a file offset. +// +func (s *FileSet) AddFile(filename string, base, size int) *File { + s.mutex.Lock() + defer s.mutex.Unlock() + if base < s.base || size < 0 { + panic("illegal base or size") + } + // base >= s.base && size >= 0 + f := &File{s, filename, base, size, []int{0}, nil} + base += size + 1 // +1 because EOF also has a position + if base < 0 { + panic("token.Pos offset overflow (> 2G of source code in file set)") + } + // add the file to the file set + s.base = base + s.files = append(s.files, f) + s.last = f + return f +} + +// Files returns the files added to the file set. +func (s *FileSet) Files() <-chan *File { + ch := make(chan *File) + go func() { + for i := 0; ; i++ { + var f *File + s.mutex.RLock() + if i < len(s.files) { + f = s.files[i] + } + s.mutex.RUnlock() + if f == nil { + break + } + ch <- f + } + close(ch) + }() + return ch +} diff --git a/src/pkg/go/token/position_test.go b/src/pkg/go/token/position_test.go new file mode 100644 index 000000000..30bec5991 --- /dev/null +++ b/src/pkg/go/token/position_test.go @@ -0,0 +1,180 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package token + +import ( + "fmt" + "testing" +) + +func checkPos(t *testing.T, msg string, p, q Position) { + if p.Filename != q.Filename { + t.Errorf("%s: expected filename = %q; got %q", msg, q.Filename, p.Filename) + } + if p.Offset != q.Offset { + t.Errorf("%s: expected offset = %d; got %d", msg, q.Offset, p.Offset) + } + if p.Line != q.Line { + t.Errorf("%s: expected line = %d; got %d", msg, q.Line, p.Line) + } + if p.Column != q.Column { + t.Errorf("%s: expected column = %d; got %d", msg, q.Column, p.Column) + } +} + +func TestNoPos(t *testing.T) { + if NoPos.IsValid() { + t.Errorf("NoPos should not be valid") + } + var fset *FileSet + checkPos(t, "nil NoPos", fset.Position(NoPos), Position{}) + fset = NewFileSet() + checkPos(t, "fset NoPos", fset.Position(NoPos), Position{}) +} + +var tests = []struct { + filename string + source []byte // may be nil + size int + lines []int +}{ + {"a", []byte{}, 0, []int{}}, + {"b", []byte("01234"), 5, []int{0}}, + {"c", []byte("\n\n\n\n\n\n\n\n\n"), 9, []int{0, 1, 2, 3, 4, 5, 6, 7, 8}}, + {"d", nil, 100, []int{0, 5, 10, 20, 30, 70, 71, 72, 80, 85, 90, 99}}, + {"e", nil, 777, []int{0, 80, 100, 120, 130, 180, 267, 455, 500, 567, 620}}, + {"f", []byte("package p\n\nimport \"fmt\""), 23, []int{0, 10, 11}}, + {"g", []byte("package p\n\nimport \"fmt\"\n"), 24, []int{0, 10, 11}}, + {"h", []byte("package p\n\nimport \"fmt\"\n "), 25, []int{0, 10, 11, 24}}, +} + +func linecol(lines []int, offs int) (int, int) { + prevLineOffs := 0 + for line, lineOffs := range lines { + if offs < lineOffs { + return line, offs - prevLineOffs + 1 + } + prevLineOffs = lineOffs + } + return len(lines), offs - prevLineOffs + 1 +} + +func verifyPositions(t *testing.T, fset *FileSet, f *File, lines []int) { + for offs := 0; offs < f.Size(); offs++ { + p := f.Pos(offs) + offs2 := f.Offset(p) + if offs2 != offs { + t.Errorf("%s, Offset: expected offset %d; got %d", f.Name(), offs, offs2) + } + line, col := linecol(lines, offs) + msg := fmt.Sprintf("%s (offs = %d, p = %d)", f.Name(), offs, p) + checkPos(t, msg, f.Position(f.Pos(offs)), Position{f.Name(), offs, line, col}) + checkPos(t, msg, fset.Position(p), Position{f.Name(), offs, line, col}) + } +} + +func makeTestSource(size int, lines []int) []byte { + src := make([]byte, size) + for _, offs := range lines { + if offs > 0 { + src[offs-1] = '\n' + } + } + return src +} + +func TestPositions(t *testing.T) { + const delta = 7 // a non-zero base offset increment + fset := NewFileSet() + for _, test := range tests { + // verify consistency of test case + if test.source != nil && len(test.source) != test.size { + t.Errorf("%s: inconsistent test case: expected file size %d; got %d", test.filename, test.size, len(test.source)) + } + + // add file and verify name and size + f := fset.AddFile(test.filename, fset.Base()+delta, test.size) + if f.Name() != test.filename { + t.Errorf("expected filename %q; got %q", test.filename, f.Name()) + } + if f.Size() != test.size { + t.Errorf("%s: expected file size %d; got %d", f.Name(), test.size, f.Size()) + } + if fset.File(f.Pos(0)) != f { + t.Errorf("%s: f.Pos(0) was not found in f", f.Name()) + } + + // add lines individually and verify all positions + for i, offset := range test.lines { + f.AddLine(offset) + if f.LineCount() != i+1 { + t.Errorf("%s, AddLine: expected line count %d; got %d", f.Name(), i+1, f.LineCount()) + } + // adding the same offset again should be ignored + f.AddLine(offset) + if f.LineCount() != i+1 { + t.Errorf("%s, AddLine: expected unchanged line count %d; got %d", f.Name(), i+1, f.LineCount()) + } + verifyPositions(t, fset, f, test.lines[0:i+1]) + } + + // add lines with SetLines and verify all positions + if ok := f.SetLines(test.lines); !ok { + t.Errorf("%s: SetLines failed", f.Name()) + } + if f.LineCount() != len(test.lines) { + t.Errorf("%s, SetLines: expected line count %d; got %d", f.Name(), len(test.lines), f.LineCount()) + } + verifyPositions(t, fset, f, test.lines) + + // add lines with SetLinesForContent and verify all positions + src := test.source + if src == nil { + // no test source available - create one from scratch + src = makeTestSource(test.size, test.lines) + } + f.SetLinesForContent(src) + if f.LineCount() != len(test.lines) { + t.Errorf("%s, SetLinesForContent: expected line count %d; got %d", f.Name(), len(test.lines), f.LineCount()) + } + verifyPositions(t, fset, f, test.lines) + } +} + +func TestLineInfo(t *testing.T) { + fset := NewFileSet() + f := fset.AddFile("foo", fset.Base(), 500) + lines := []int{0, 42, 77, 100, 210, 220, 277, 300, 333, 401} + // add lines individually and provide alternative line information + for _, offs := range lines { + f.AddLine(offs) + f.AddLineInfo(offs, "bar", 42) + } + // verify positions for all offsets + for offs := 0; offs <= f.Size(); offs++ { + p := f.Pos(offs) + _, col := linecol(lines, offs) + msg := fmt.Sprintf("%s (offs = %d, p = %d)", f.Name(), offs, p) + checkPos(t, msg, f.Position(f.Pos(offs)), Position{"bar", offs, 42, col}) + checkPos(t, msg, fset.Position(p), Position{"bar", offs, 42, col}) + } +} + +func TestFiles(t *testing.T) { + fset := NewFileSet() + for i, test := range tests { + fset.AddFile(test.filename, fset.Base(), test.size) + j := 0 + for g := range fset.Files() { + if g.Name() != tests[j].filename { + t.Errorf("expected filename = %s; got %s", tests[j].filename, g.Name()) + } + j++ + } + if j != i+1 { + t.Errorf("expected %d files; got %d", i+1, j) + } + } +} diff --git a/src/pkg/go/token/token.go b/src/pkg/go/token/token.go new file mode 100644 index 000000000..557374052 --- /dev/null +++ b/src/pkg/go/token/token.go @@ -0,0 +1,310 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package token defines constants representing the lexical tokens of the Go +// programming language and basic operations on tokens (printing, predicates). +// +package token + +import "strconv" + +// Token is the set of lexical tokens of the Go programming language. +type Token int + +// The list of tokens. +const ( + // Special tokens + ILLEGAL Token = iota + EOF + COMMENT + + literal_beg + // Identifiers and basic type literals + // (these tokens stand for classes of literals) + IDENT // main + INT // 12345 + FLOAT // 123.45 + IMAG // 123.45i + CHAR // 'a' + STRING // "abc" + literal_end + + operator_beg + // Operators and delimiters + ADD // + + SUB // - + MUL // * + QUO // / + REM // % + + AND // & + OR // | + XOR // ^ + SHL // << + SHR // >> + AND_NOT // &^ + + ADD_ASSIGN // += + SUB_ASSIGN // -= + MUL_ASSIGN // *= + QUO_ASSIGN // /= + REM_ASSIGN // %= + + AND_ASSIGN // &= + OR_ASSIGN // |= + XOR_ASSIGN // ^= + SHL_ASSIGN // <<= + SHR_ASSIGN // >>= + AND_NOT_ASSIGN // &^= + + LAND // && + LOR // || + ARROW // <- + INC // ++ + DEC // -- + + EQL // == + LSS // < + GTR // > + ASSIGN // = + NOT // ! + + NEQ // != + LEQ // <= + GEQ // >= + DEFINE // := + ELLIPSIS // ... + + LPAREN // ( + LBRACK // [ + LBRACE // { + COMMA // , + PERIOD // . + + RPAREN // ) + RBRACK // ] + RBRACE // } + SEMICOLON // ; + COLON // : + operator_end + + keyword_beg + // Keywords + BREAK + CASE + CHAN + CONST + CONTINUE + + DEFAULT + DEFER + ELSE + FALLTHROUGH + FOR + + FUNC + GO + GOTO + IF + IMPORT + + INTERFACE + MAP + PACKAGE + RANGE + RETURN + + SELECT + STRUCT + SWITCH + TYPE + VAR + keyword_end +) + +var tokens = [...]string{ + ILLEGAL: "ILLEGAL", + + EOF: "EOF", + COMMENT: "COMMENT", + + IDENT: "IDENT", + INT: "INT", + FLOAT: "FLOAT", + IMAG: "IMAG", + CHAR: "CHAR", + STRING: "STRING", + + ADD: "+", + SUB: "-", + MUL: "*", + QUO: "/", + REM: "%", + + AND: "&", + OR: "|", + XOR: "^", + SHL: "<<", + SHR: ">>", + AND_NOT: "&^", + + ADD_ASSIGN: "+=", + SUB_ASSIGN: "-=", + MUL_ASSIGN: "*=", + QUO_ASSIGN: "/=", + REM_ASSIGN: "%=", + + AND_ASSIGN: "&=", + OR_ASSIGN: "|=", + XOR_ASSIGN: "^=", + SHL_ASSIGN: "<<=", + SHR_ASSIGN: ">>=", + AND_NOT_ASSIGN: "&^=", + + LAND: "&&", + LOR: "||", + ARROW: "<-", + INC: "++", + DEC: "--", + + EQL: "==", + LSS: "<", + GTR: ">", + ASSIGN: "=", + NOT: "!", + + NEQ: "!=", + LEQ: "<=", + GEQ: ">=", + DEFINE: ":=", + ELLIPSIS: "...", + + LPAREN: "(", + LBRACK: "[", + LBRACE: "{", + COMMA: ",", + PERIOD: ".", + + RPAREN: ")", + RBRACK: "]", + RBRACE: "}", + SEMICOLON: ";", + COLON: ":", + + BREAK: "break", + CASE: "case", + CHAN: "chan", + CONST: "const", + CONTINUE: "continue", + + DEFAULT: "default", + DEFER: "defer", + ELSE: "else", + FALLTHROUGH: "fallthrough", + FOR: "for", + + FUNC: "func", + GO: "go", + GOTO: "goto", + IF: "if", + IMPORT: "import", + + INTERFACE: "interface", + MAP: "map", + PACKAGE: "package", + RANGE: "range", + RETURN: "return", + + SELECT: "select", + STRUCT: "struct", + SWITCH: "switch", + TYPE: "type", + VAR: "var", +} + +// String returns the string corresponding to the token tok. +// For operators, delimiters, and keywords the string is the actual +// token character sequence (e.g., for the token ADD, the string is +// "+"). For all other tokens the string corresponds to the token +// constant name (e.g. for the token IDENT, the string is "IDENT"). +// +func (tok Token) String() string { + s := "" + if 0 <= tok && tok < Token(len(tokens)) { + s = tokens[tok] + } + if s == "" { + s = "token(" + strconv.Itoa(int(tok)) + ")" + } + return s +} + +// A set of constants for precedence-based expression parsing. +// Non-operators have lowest precedence, followed by operators +// starting with precedence 1 up to unary operators. The highest +// precedence corresponds serves as "catch-all" precedence for +// selector, indexing, and other operator and delimiter tokens. +// +const ( + LowestPrec = 0 // non-operators + UnaryPrec = 6 + HighestPrec = 7 +) + +// Precedence returns the operator precedence of the binary +// operator op. If op is not a binary operator, the result +// is LowestPrecedence. +// +func (op Token) Precedence() int { + switch op { + case LOR: + return 1 + case LAND: + return 2 + case EQL, NEQ, LSS, LEQ, GTR, GEQ: + return 3 + case ADD, SUB, OR, XOR: + return 4 + case MUL, QUO, REM, SHL, SHR, AND, AND_NOT: + return 5 + } + return LowestPrec +} + +var keywords map[string]Token + +func init() { + keywords = make(map[string]Token) + for i := keyword_beg + 1; i < keyword_end; i++ { + keywords[tokens[i]] = i + } +} + +// Lookup maps an identifier to its keyword token or IDENT (if not a keyword). +// +func Lookup(ident []byte) Token { + // TODO Maps with []byte key are illegal because []byte does not + // support == . Should find a more efficient solution eventually. + if tok, is_keyword := keywords[string(ident)]; is_keyword { + return tok + } + return IDENT +} + +// Predicates + +// IsLiteral returns true for tokens corresponding to identifiers +// and basic type literals; returns false otherwise. +// +func (tok Token) IsLiteral() bool { return literal_beg < tok && tok < literal_end } + +// IsOperator returns true for tokens corresponding to operators and +// delimiters; returns false otherwise. +// +func (tok Token) IsOperator() bool { return operator_beg < tok && tok < operator_end } + +// IsKeyword returns true for tokens corresponding to keywords; +// returns false otherwise. +// +func (tok Token) IsKeyword() bool { return keyword_beg < tok && tok < keyword_end } diff --git a/src/pkg/go/typechecker/Makefile b/src/pkg/go/typechecker/Makefile new file mode 100644 index 000000000..83af3ef4e --- /dev/null +++ b/src/pkg/go/typechecker/Makefile @@ -0,0 +1,14 @@ +# Copyright 2010 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=go/typechecker +GOFILES=\ + scope.go\ + type.go\ + typechecker.go\ + universe.go\ + +include ../../../Make.pkg diff --git a/src/pkg/go/typechecker/scope.go b/src/pkg/go/typechecker/scope.go new file mode 100644 index 000000000..d73d1a450 --- /dev/null +++ b/src/pkg/go/typechecker/scope.go @@ -0,0 +1,69 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// DEPRECATED FILE - WILL GO AWAY EVENTUALLY. +// +// Scope handling is now done in go/parser. +// The functionality here is only present to +// keep the typechecker running for now. + +package typechecker + +import "go/ast" + +func (tc *typechecker) openScope() *ast.Scope { + tc.topScope = ast.NewScope(tc.topScope) + return tc.topScope +} + +func (tc *typechecker) closeScope() { + tc.topScope = tc.topScope.Outer +} + +// declInScope declares an object of a given kind and name in scope and sets the object's Decl and N fields. +// It returns the newly allocated object. If an object with the same name already exists in scope, an error +// is reported and the object is not inserted. +func (tc *typechecker) declInScope(scope *ast.Scope, kind ast.ObjKind, name *ast.Ident, decl interface{}, n int) *ast.Object { + obj := ast.NewObj(kind, name.Name) + obj.Decl = decl + //obj.N = n + name.Obj = obj + if name.Name != "_" { + if alt := scope.Insert(obj); alt != nil { + tc.Errorf(name.Pos(), "%s already declared at %s", name.Name, tc.fset.Position(alt.Pos()).String()) + } + } + return obj +} + +// decl is the same as declInScope(tc.topScope, ...) +func (tc *typechecker) decl(kind ast.ObjKind, name *ast.Ident, decl interface{}, n int) *ast.Object { + return tc.declInScope(tc.topScope, kind, name, decl, n) +} + +// find returns the object with the given name if visible in the current scope hierarchy. +// If no such object is found, an error is reported and a bad object is returned instead. +func (tc *typechecker) find(name *ast.Ident) (obj *ast.Object) { + for s := tc.topScope; s != nil && obj == nil; s = s.Outer { + obj = s.Lookup(name.Name) + } + if obj == nil { + tc.Errorf(name.Pos(), "%s not declared", name.Name) + obj = ast.NewObj(ast.Bad, name.Name) + } + name.Obj = obj + return +} + +// findField returns the object with the given name if visible in the type's scope. +// If no such object is found, an error is reported and a bad object is returned instead. +func (tc *typechecker) findField(typ *Type, name *ast.Ident) (obj *ast.Object) { + // TODO(gri) This is simplistic at the moment and ignores anonymous fields. + obj = typ.Scope.Lookup(name.Name) + if obj == nil { + tc.Errorf(name.Pos(), "%s not declared", name.Name) + obj = ast.NewObj(ast.Bad, name.Name) + } + return +} diff --git a/src/pkg/go/typechecker/testdata/test0.src b/src/pkg/go/typechecker/testdata/test0.src new file mode 100644 index 000000000..4e317f214 --- /dev/null +++ b/src/pkg/go/typechecker/testdata/test0.src @@ -0,0 +1,94 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// type declarations + +package P0 + +type ( + B bool + I int32 + A [10]P + T struct { + x, y P + } + P *T + R *R + F func(A) I + Y interface { + f(A) I + } + S []P + M map[I]F + C chan<- I +) + +type ( + a/* ERROR "illegal cycle" */ a + a/* ERROR "already declared" */ int + + b/* ERROR "illegal cycle" */ c + c d + d e + e b /* ERROR "not a type" */ + + t *t + + U V + V W + W *U + + P1 *S2 + P2 P1 + + S1 struct { + a, b, c int + u, v, a/* ERROR "already declared" */ float + } + S2/* ERROR "illegal cycle" */ struct { + x S2 + } + + L1 []L1 + L2 []int + + A1 [10]int + A2/* ERROR "illegal cycle" */ [10]A2 + A3/* ERROR "illegal cycle" */ [10]struct { + x A4 + } + A4 [10]A3 + + F1 func() + F2 func(x, y, z float) + F3 func(x, y, x /* ERROR "already declared" */ float) + F4 func() (x, y, x /* ERROR "already declared" */ float) + F5 func(x int) (x /* ERROR "already declared" */ float) + + I1 interface{} + I2 interface { + m1() + } + I3 interface { + m1() + m1 /* ERROR "already declared" */ () + } + I4 interface { + m1(x, y, x /* ERROR "already declared" */ float) + m2() (x, y, x /* ERROR "already declared" */ float) + m3(x int) (x /* ERROR "already declared" */ float) + } + I5 interface { + m1(I5) + } + + C1 chan int + C2 <-chan int + C3 chan<- C3 + + M1 map[Last]string + M2 map[string]M2 + + Last int +) diff --git a/src/pkg/go/typechecker/testdata/test1.src b/src/pkg/go/typechecker/testdata/test1.src new file mode 100644 index 000000000..b5531fb9f --- /dev/null +++ b/src/pkg/go/typechecker/testdata/test1.src @@ -0,0 +1,13 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// const and var declarations + +package P1 + +const ( + c1 = 0 + c2 int = 0 + c3, c4 = 0 +) diff --git a/src/pkg/go/typechecker/testdata/test3.src b/src/pkg/go/typechecker/testdata/test3.src new file mode 100644 index 000000000..2e1a9fa8f --- /dev/null +++ b/src/pkg/go/typechecker/testdata/test3.src @@ -0,0 +1,41 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package P3 + +// function and method signatures + +func _() {} +func _() {} +func _(x, x /* ERROR "already declared" */ int) {} + +func f() {} +func f /* ERROR "already declared" */ () {} + +func (*foo /* ERROR "invalid receiver" */ ) m() {} +func (bar /* ERROR "not a type" */ ) m() {} + +func f1(x, _, _ int) (_, _ float) {} +func f2(x, y, x /* ERROR "already declared" */ int) {} +func f3(x, y int) (a, b, x /* ERROR "already declared" */ int) {} + +func (x *T) m1() {} +func (x *T) m1 /* ERROR "already declared" */ () {} +func (x T) m1 /* ERROR "already declared" */ () {} +func (T) m1 /* ERROR "already declared" */ () {} + +func (x *T) m2(u, x /* ERROR "already declared" */ int) {} +func (x *T) m3(a, b, c int) (u, x /* ERROR "already declared" */ int) {} +// The following are disabled for now because the typechecker +// in in the process of being rewritten and cannot handle them +// at the moment +//func (T) _(x, x /* "already declared" */ int) {} +//func (T) _() (x, x /* "already declared" */ int) {} + +//func (PT) _() {} + +var bar int + +type T struct{} +type PT (T) diff --git a/src/pkg/go/typechecker/testdata/test4.src b/src/pkg/go/typechecker/testdata/test4.src new file mode 100644 index 000000000..94d3558f9 --- /dev/null +++ b/src/pkg/go/typechecker/testdata/test4.src @@ -0,0 +1,11 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Constant declarations + +package P4 + +const ( + c0 = 0 +) diff --git a/src/pkg/go/typechecker/type.go b/src/pkg/go/typechecker/type.go new file mode 100644 index 000000000..1b88eb54b --- /dev/null +++ b/src/pkg/go/typechecker/type.go @@ -0,0 +1,118 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package typechecker + +import "go/ast" + +// A Type represents a Go type. +type Type struct { + Form Form + Obj *ast.Object // corresponding type name, or nil + Scope *ast.Scope // fields and methods, always present + N uint // basic type id, array length, number of function results, or channel direction + Key, Elt *Type // map key and array, pointer, slice, map or channel element + Params *ast.Scope // function (receiver, input and result) parameters, tuple expressions (results of function calls), or nil + Expr ast.Expr // corresponding AST expression +} + +// NewType creates a new type of a given form. +func NewType(form Form) *Type { + return &Type{Form: form, Scope: ast.NewScope(nil)} +} + +// Form describes the form of a type. +type Form int + +// The list of possible type forms. +const ( + BadType Form = iota // for error handling + Unresolved // type not fully setup + Basic + Array + Struct + Pointer + Function + Method + Interface + Slice + Map + Channel + Tuple +) + +var formStrings = [...]string{ + BadType: "badType", + Unresolved: "unresolved", + Basic: "basic", + Array: "array", + Struct: "struct", + Pointer: "pointer", + Function: "function", + Method: "method", + Interface: "interface", + Slice: "slice", + Map: "map", + Channel: "channel", + Tuple: "tuple", +} + +func (form Form) String() string { return formStrings[form] } + +// The list of basic type id's. +const ( + Bool = iota + Byte + Uint + Int + Float + Complex + Uintptr + String + + Uint8 + Uint16 + Uint32 + Uint64 + + Int8 + Int16 + Int32 + Int64 + + Float32 + Float64 + + Complex64 + Complex128 + + // TODO(gri) ideal types are missing +) + +var BasicTypes = map[uint]string{ + Bool: "bool", + Byte: "byte", + Uint: "uint", + Int: "int", + Float: "float", + Complex: "complex", + Uintptr: "uintptr", + String: "string", + + Uint8: "uint8", + Uint16: "uint16", + Uint32: "uint32", + Uint64: "uint64", + + Int8: "int8", + Int16: "int16", + Int32: "int32", + Int64: "int64", + + Float32: "float32", + Float64: "float64", + + Complex64: "complex64", + Complex128: "complex128", +} diff --git a/src/pkg/go/typechecker/typechecker.go b/src/pkg/go/typechecker/typechecker.go new file mode 100644 index 000000000..24480165b --- /dev/null +++ b/src/pkg/go/typechecker/typechecker.go @@ -0,0 +1,468 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// DEPRECATED PACKAGE - SEE go/types INSTEAD. +// This package implements typechecking of a Go AST. +// The result of the typecheck is an augmented AST +// with object and type information for each identifier. +// +package typechecker + +import ( + "fmt" + "go/ast" + "go/token" + "go/scanner" + "os" +) + +// TODO(gri) don't report errors for objects/types that are marked as bad. + + +const debug = true // set for debugging output + +// An importer takes an import path and returns the data describing the +// respective package's exported interface. The data format is TBD. +// +type Importer func(path string) ([]byte, os.Error) + +// CheckPackage typechecks a package and augments the AST by setting +// *ast.Object, *ast.Type, and *ast.Scope fields accordingly. If an +// importer is provided, it is used to handle imports, otherwise they +// are ignored (likely leading to typechecking errors). +// +// If errors are reported, the AST may be incompletely augmented (fields +// may be nil) or contain incomplete object, type, or scope information. +// +func CheckPackage(fset *token.FileSet, pkg *ast.Package, importer Importer) os.Error { + var tc typechecker + tc.fset = fset + tc.importer = importer + tc.checkPackage(pkg) + return tc.GetError(scanner.Sorted) +} + +// CheckFile typechecks a single file, but otherwise behaves like +// CheckPackage. If the complete package consists of more than just +// one file, the file may not typecheck without errors. +// +func CheckFile(fset *token.FileSet, file *ast.File, importer Importer) os.Error { + // create a single-file dummy package + pkg := &ast.Package{file.Name.Name, nil, nil, map[string]*ast.File{fset.Position(file.Name.NamePos).Filename: file}} + return CheckPackage(fset, pkg, importer) +} + +// ---------------------------------------------------------------------------- +// Typechecker state + +type typechecker struct { + fset *token.FileSet + scanner.ErrorVector + importer Importer + globals []*ast.Object // list of global objects + topScope *ast.Scope // current top-most scope + cyclemap map[*ast.Object]bool // for cycle detection + iota int // current value of iota +} + +func (tc *typechecker) Errorf(pos token.Pos, format string, args ...interface{}) { + tc.Error(tc.fset.Position(pos), fmt.Sprintf(format, args...)) +} + +func assert(pred bool) { + if !pred { + panic("internal error") + } +} + +/* +Typechecking is done in several phases: + +phase 1: declare all global objects; also collect all function and method declarations + - all objects have kind, name, decl fields; the decl field permits + quick lookup of an object's declaration + - constant objects have an iota value + - type objects have unresolved types with empty scopes, all others have nil types + - report global double declarations + +phase 2: bind methods to their receiver base types + - receiver base types must be declared in the package, thus for + each method a corresponding (unresolved) type must exist + - report method double declarations and errors with base types + +phase 3: resolve all global objects + - sequentially iterate through all objects in the global scope + - resolve types for all unresolved types and assign types to + all attached methods + - assign types to all other objects, possibly by evaluating + constant and initializer expressions + - resolution may recurse; a cyclemap is used to detect cycles + - report global typing errors + +phase 4: sequentially typecheck function and method bodies + - all global objects are declared and have types and values; + all methods have types + - sequentially process statements in each body; any object + referred to must be fully defined at this point + - report local typing errors +*/ + +func (tc *typechecker) checkPackage(pkg *ast.Package) { + // setup package scope + tc.topScope = Universe + tc.openScope() + defer tc.closeScope() + + // TODO(gri) there's no file scope at the moment since we ignore imports + + // phase 1: declare all global objects; also collect all function and method declarations + var funcs []*ast.FuncDecl + for _, file := range pkg.Files { + for _, decl := range file.Decls { + tc.declGlobal(decl) + if f, isFunc := decl.(*ast.FuncDecl); isFunc { + funcs = append(funcs, f) + } + } + } + + // phase 2: bind methods to their receiver base types + for _, m := range funcs { + if m.Recv != nil { + tc.bindMethod(m) + } + } + + // phase 3: resolve all global objects + tc.cyclemap = make(map[*ast.Object]bool) + for _, obj := range tc.globals { + tc.resolve(obj) + } + assert(len(tc.cyclemap) == 0) + + // 4: sequentially typecheck function and method bodies + for _, f := range funcs { + ftype, _ := f.Name.Obj.Type.(*Type) + tc.checkBlock(f.Body.List, ftype) + } + + pkg.Scope = tc.topScope +} + +func (tc *typechecker) declGlobal(global ast.Decl) { + switch d := global.(type) { + case *ast.BadDecl: + // ignore + + case *ast.GenDecl: + iota := 0 + var prev *ast.ValueSpec + for _, spec := range d.Specs { + switch s := spec.(type) { + case *ast.ImportSpec: + // TODO(gri) imports go into file scope + case *ast.ValueSpec: + switch d.Tok { + case token.CONST: + if s.Values == nil { + // create a new spec with type and values from the previous one + if prev != nil { + s = &ast.ValueSpec{s.Doc, s.Names, prev.Type, prev.Values, s.Comment} + } else { + // TODO(gri) this should probably go into the const decl code + tc.Errorf(s.Pos(), "missing initializer for const %s", s.Names[0].Name) + } + } + for _, name := range s.Names { + tc.globals = append(tc.globals, tc.decl(ast.Con, name, s, iota)) + } + case token.VAR: + for _, name := range s.Names { + tc.globals = append(tc.globals, tc.decl(ast.Var, name, s, 0)) + } + default: + panic("unreachable") + } + prev = s + iota++ + case *ast.TypeSpec: + obj := tc.decl(ast.Typ, s.Name, s, 0) + tc.globals = append(tc.globals, obj) + // give all type objects an unresolved type so + // that we can collect methods in the type scope + typ := NewType(Unresolved) + obj.Type = typ + typ.Obj = obj + default: + panic("unreachable") + } + } + + case *ast.FuncDecl: + if d.Recv == nil { + tc.globals = append(tc.globals, tc.decl(ast.Fun, d.Name, d, 0)) + } + + default: + panic("unreachable") + } +} + +// If x is of the form *T, deref returns T, otherwise it returns x. +func deref(x ast.Expr) ast.Expr { + if p, isPtr := x.(*ast.StarExpr); isPtr { + x = p.X + } + return x +} + +func (tc *typechecker) bindMethod(method *ast.FuncDecl) { + // a method is declared in the receiver base type's scope + var scope *ast.Scope + base := deref(method.Recv.List[0].Type) + if name, isIdent := base.(*ast.Ident); isIdent { + // if base is not an *ast.Ident, we had a syntax + // error and the parser reported an error already + obj := tc.topScope.Lookup(name.Name) + if obj == nil { + tc.Errorf(name.Pos(), "invalid receiver: %s is not declared in this package", name.Name) + } else if obj.Kind != ast.Typ { + tc.Errorf(name.Pos(), "invalid receiver: %s is not a type", name.Name) + } else { + typ := obj.Type.(*Type) + assert(typ.Form == Unresolved) + scope = typ.Scope + } + } + if scope == nil { + // no receiver type found; use a dummy scope + // (we still want to type-check the method + // body, so make sure there is a name object + // and type) + // TODO(gri) should we record the scope so + // that we don't lose the receiver for type- + // checking of the method body? + scope = ast.NewScope(nil) + } + tc.declInScope(scope, ast.Fun, method.Name, method, 0) +} + +func (tc *typechecker) resolve(obj *ast.Object) { + // check for declaration cycles + if tc.cyclemap[obj] { + tc.Errorf(obj.Pos(), "illegal cycle in declaration of %s", obj.Name) + obj.Kind = ast.Bad + return + } + tc.cyclemap[obj] = true + defer func() { + tc.cyclemap[obj] = false, false + }() + + // resolve non-type objects + typ, _ := obj.Type.(*Type) + if typ == nil { + switch obj.Kind { + case ast.Bad: + // ignore + + case ast.Con: + tc.declConst(obj) + + case ast.Var: + tc.declVar(obj) + obj.Type = tc.typeFor(nil, obj.Decl.(*ast.ValueSpec).Type, false) + + case ast.Fun: + obj.Type = NewType(Function) + t := obj.Decl.(*ast.FuncDecl).Type + tc.declSignature(obj.Type.(*Type), nil, t.Params, t.Results) + + default: + // type objects have non-nil types when resolve is called + if debug { + fmt.Printf("kind = %s\n", obj.Kind) + } + panic("unreachable") + } + return + } + + // resolve type objects + if typ.Form == Unresolved { + tc.typeFor(typ, typ.Obj.Decl.(*ast.TypeSpec).Type, false) + + // provide types for all methods + for _, obj := range typ.Scope.Objects { + if obj.Kind == ast.Fun { + assert(obj.Type == nil) + obj.Type = NewType(Method) + f := obj.Decl.(*ast.FuncDecl) + t := f.Type + tc.declSignature(obj.Type.(*Type), f.Recv, t.Params, t.Results) + } + } + } +} + +func (tc *typechecker) checkBlock(body []ast.Stmt, ftype *Type) { + tc.openScope() + defer tc.closeScope() + + // inject function/method parameters into block scope, if any + if ftype != nil { + for _, par := range ftype.Params.Objects { + if par.Name != "_" { + alt := tc.topScope.Insert(par) + assert(alt == nil) // ftype has no double declarations + } + } + } + + for _, stmt := range body { + tc.checkStmt(stmt) + } +} + +// ---------------------------------------------------------------------------- +// Types + +// unparen removes parentheses around x, if any. +func unparen(x ast.Expr) ast.Expr { + if ux, hasParens := x.(*ast.ParenExpr); hasParens { + return unparen(ux.X) + } + return x +} + +func (tc *typechecker) declFields(scope *ast.Scope, fields *ast.FieldList, ref bool) (n uint) { + if fields != nil { + for _, f := range fields.List { + typ := tc.typeFor(nil, f.Type, ref) + for _, name := range f.Names { + fld := tc.declInScope(scope, ast.Var, name, f, 0) + fld.Type = typ + n++ + } + } + } + return n +} + +func (tc *typechecker) declSignature(typ *Type, recv, params, results *ast.FieldList) { + assert((typ.Form == Method) == (recv != nil)) + typ.Params = ast.NewScope(nil) + tc.declFields(typ.Params, recv, true) + tc.declFields(typ.Params, params, true) + typ.N = tc.declFields(typ.Params, results, true) +} + +func (tc *typechecker) typeFor(def *Type, x ast.Expr, ref bool) (typ *Type) { + x = unparen(x) + + // type name + if t, isIdent := x.(*ast.Ident); isIdent { + obj := tc.find(t) + + if obj.Kind != ast.Typ { + tc.Errorf(t.Pos(), "%s is not a type", t.Name) + if def == nil { + typ = NewType(BadType) + } else { + typ = def + typ.Form = BadType + } + typ.Expr = x + return + } + + if !ref { + tc.resolve(obj) // check for cycles even if type resolved + } + typ = obj.Type.(*Type) + + if def != nil { + // new type declaration: copy type structure + def.Form = typ.Form + def.N = typ.N + def.Key, def.Elt = typ.Key, typ.Elt + def.Params = typ.Params + def.Expr = x + typ = def + } + return + } + + // type literal + typ = def + if typ == nil { + typ = NewType(BadType) + } + typ.Expr = x + + switch t := x.(type) { + case *ast.SelectorExpr: + if debug { + fmt.Println("qualified identifier unimplemented") + } + typ.Form = BadType + + case *ast.StarExpr: + typ.Form = Pointer + typ.Elt = tc.typeFor(nil, t.X, true) + + case *ast.ArrayType: + if t.Len != nil { + typ.Form = Array + // TODO(gri) compute the real length + // (this may call resolve recursively) + (*typ).N = 42 + } else { + typ.Form = Slice + } + typ.Elt = tc.typeFor(nil, t.Elt, t.Len == nil) + + case *ast.StructType: + typ.Form = Struct + tc.declFields(typ.Scope, t.Fields, false) + + case *ast.FuncType: + typ.Form = Function + tc.declSignature(typ, nil, t.Params, t.Results) + + case *ast.InterfaceType: + typ.Form = Interface + tc.declFields(typ.Scope, t.Methods, true) + + case *ast.MapType: + typ.Form = Map + typ.Key = tc.typeFor(nil, t.Key, true) + typ.Elt = tc.typeFor(nil, t.Value, true) + + case *ast.ChanType: + typ.Form = Channel + typ.N = uint(t.Dir) + typ.Elt = tc.typeFor(nil, t.Value, true) + + default: + if debug { + fmt.Printf("x is %T\n", x) + } + panic("unreachable") + } + + return +} + +// ---------------------------------------------------------------------------- +// TODO(gri) implement these place holders + +func (tc *typechecker) declConst(*ast.Object) { +} + +func (tc *typechecker) declVar(*ast.Object) { +} + +func (tc *typechecker) checkStmt(ast.Stmt) { +} diff --git a/src/pkg/go/typechecker/typechecker_test.go b/src/pkg/go/typechecker/typechecker_test.go new file mode 100644 index 000000000..4bad4499a --- /dev/null +++ b/src/pkg/go/typechecker/typechecker_test.go @@ -0,0 +1,163 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements a simple typechecker test harness. Packages found +// in the testDir directory are typechecked. Error messages reported by +// the typechecker are compared against the error messages expected for +// the test files. +// +// Expected errors are indicated in the test files by putting a comment +// of the form /* ERROR "rx" */ immediately following an offending token. +// The harness will verify that an error matching the regular expression +// rx is reported at that source position. Consecutive comments may be +// used to indicate multiple errors for the same token position. +// +// For instance, the following test file indicates that a "not declared" +// error should be reported for the undeclared variable x: +// +// package P0 +// func f() { +// _ = x /* ERROR "not declared" */ + 1 +// } +// +// If the -pkg flag is set, only packages with package names matching +// the regular expression provided via the flag value are tested. + +package typechecker + +import ( + "flag" + "fmt" + "go/ast" + "go/parser" + "go/scanner" + "go/token" + "io/ioutil" + "os" + "regexp" + "sort" + "strings" + "testing" +) + +const testDir = "./testdata" // location of test packages + +var fset = token.NewFileSet() + +var ( + pkgPat = flag.String("pkg", ".*", "regular expression to select test packages by package name") + trace = flag.Bool("trace", false, "print package names") +) + +// ERROR comments must be of the form /* ERROR "rx" */ and rx is +// a regular expression that matches the expected error message. +var errRx = regexp.MustCompile(`^/\* *ERROR *"([^"]*)" *\*/$`) + +// expectedErrors collects the regular expressions of ERROR comments +// found in the package files of pkg and returns them in sorted order +// (by filename and position). +func expectedErrors(t *testing.T, pkg *ast.Package) (list scanner.ErrorList) { + // scan all package files + for filename := range pkg.Files { + src, err := ioutil.ReadFile(filename) + if err != nil { + t.Fatalf("expectedErrors(%s): %v", pkg.Name, err) + } + + var s scanner.Scanner + file := fset.AddFile(filename, fset.Base(), len(src)) + s.Init(file, src, nil, scanner.ScanComments) + var prev token.Pos // position of last non-comment token + loop: + for { + pos, tok, lit := s.Scan() + switch tok { + case token.EOF: + break loop + case token.COMMENT: + s := errRx.FindStringSubmatch(lit) + if len(s) == 2 { + list = append(list, &scanner.Error{fset.Position(prev), string(s[1])}) + } + default: + prev = pos + } + } + } + sort.Sort(list) // multiple files may not be sorted + return +} + +func testFilter(f *os.FileInfo) bool { + return strings.HasSuffix(f.Name, ".src") && f.Name[0] != '.' +} + +func checkError(t *testing.T, expected, found *scanner.Error) { + rx, err := regexp.Compile(expected.Msg) + if err != nil { + t.Errorf("%s: %v", expected.Pos, err) + return + } + + match := rx.MatchString(found.Msg) + + if expected.Pos.Offset != found.Pos.Offset { + if match { + t.Errorf("%s: expected error should have been at %s", expected.Pos, found.Pos) + } else { + t.Errorf("%s: error matching %q expected", expected.Pos, expected.Msg) + return + } + } + + if !match { + t.Errorf("%s: %q does not match %q", expected.Pos, expected.Msg, found.Msg) + } +} + +func TestTypeCheck(t *testing.T) { + flag.Parse() + pkgRx, err := regexp.Compile(*pkgPat) + if err != nil { + t.Fatalf("illegal flag value %q: %s", *pkgPat, err) + } + + pkgs, err := parser.ParseDir(fset, testDir, testFilter, 0) + if err != nil { + scanner.PrintError(os.Stderr, err) + t.Fatalf("packages in %s contain syntax errors", testDir) + } + + for _, pkg := range pkgs { + if !pkgRx.MatchString(pkg.Name) { + continue // only test selected packages + } + + if *trace { + fmt.Println(pkg.Name) + } + + xlist := expectedErrors(t, pkg) + err := CheckPackage(fset, pkg, nil) + if err != nil { + if elist, ok := err.(scanner.ErrorList); ok { + // verify that errors match + for i := 0; i < len(xlist) && i < len(elist); i++ { + checkError(t, xlist[i], elist[i]) + } + // the correct number or errors must have been found + if len(xlist) != len(elist) { + fmt.Fprintf(os.Stderr, "%s\n", pkg.Name) + scanner.PrintError(os.Stderr, elist) + fmt.Fprintln(os.Stderr) + t.Errorf("TypeCheck(%s): %d errors expected but %d reported", pkg.Name, len(xlist), len(elist)) + } + } else { + t.Errorf("TypeCheck(%s): %v", pkg.Name, err) + } + } else if len(xlist) > 0 { + t.Errorf("TypeCheck(%s): %d errors expected but 0 reported", pkg.Name, len(xlist)) + } + } +} diff --git a/src/pkg/go/typechecker/universe.go b/src/pkg/go/typechecker/universe.go new file mode 100644 index 000000000..81c14a05e --- /dev/null +++ b/src/pkg/go/typechecker/universe.go @@ -0,0 +1,36 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package typechecker + +import "go/ast" + +// TODO(gri) should this be in package ast? + +// The Universe scope contains all predeclared identifiers. +var Universe *ast.Scope + +func def(obj *ast.Object) { + alt := Universe.Insert(obj) + if alt != nil { + panic("object declared twice") + } +} + +func init() { + Universe = ast.NewScope(nil) + + // basic types + for n, name := range BasicTypes { + typ := NewType(Basic) + typ.N = n + obj := ast.NewObj(ast.Typ, name) + obj.Type = typ + typ.Obj = obj + def(obj) + } + + // built-in functions + // TODO(gri) implement this +} diff --git a/src/pkg/go/types/Makefile b/src/pkg/go/types/Makefile new file mode 100644 index 000000000..4ca707c73 --- /dev/null +++ b/src/pkg/go/types/Makefile @@ -0,0 +1,16 @@ +# Copyright 2010 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=go/types +GOFILES=\ + check.go\ + const.go\ + exportdata.go\ + gcimporter.go\ + types.go\ + universe.go\ + +include ../../../Make.pkg diff --git a/src/pkg/go/types/check.go b/src/pkg/go/types/check.go new file mode 100644 index 000000000..87e3e93da --- /dev/null +++ b/src/pkg/go/types/check.go @@ -0,0 +1,226 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements the Check function, which typechecks a package. + +package types + +import ( + "fmt" + "go/ast" + "go/scanner" + "go/token" + "os" + "strconv" +) + +const debug = false + +type checker struct { + fset *token.FileSet + scanner.ErrorVector + types map[ast.Expr]Type +} + +func (c *checker) errorf(pos token.Pos, format string, args ...interface{}) string { + msg := fmt.Sprintf(format, args...) + c.Error(c.fset.Position(pos), msg) + return msg +} + +// collectFields collects struct fields tok = token.STRUCT), interface methods +// (tok = token.INTERFACE), and function arguments/results (tok = token.FUNC). +func (c *checker) collectFields(tok token.Token, list *ast.FieldList, cycleOk bool) (fields ObjList, tags []string, isVariadic bool) { + if list != nil { + for _, field := range list.List { + ftype := field.Type + if t, ok := ftype.(*ast.Ellipsis); ok { + ftype = t.Elt + isVariadic = true + } + typ := c.makeType(ftype, cycleOk) + tag := "" + if field.Tag != nil { + assert(field.Tag.Kind == token.STRING) + tag, _ = strconv.Unquote(field.Tag.Value) + } + if len(field.Names) > 0 { + // named fields + for _, name := range field.Names { + obj := name.Obj + obj.Type = typ + fields = append(fields, obj) + if tok == token.STRUCT { + tags = append(tags, tag) + } + } + } else { + // anonymous field + switch tok { + case token.STRUCT: + tags = append(tags, tag) + fallthrough + case token.FUNC: + obj := ast.NewObj(ast.Var, "") + obj.Type = typ + fields = append(fields, obj) + case token.INTERFACE: + utyp := Underlying(typ) + if typ, ok := utyp.(*Interface); ok { + // TODO(gri) This is not good enough. Check for double declarations! + fields = append(fields, typ.Methods...) + } else if _, ok := utyp.(*Bad); !ok { + // if utyp is Bad, don't complain (the root cause was reported before) + c.errorf(ftype.Pos(), "interface contains embedded non-interface type") + } + default: + panic("unreachable") + } + } + } + } + return +} + +// makeType makes a new type for an AST type specification x or returns +// the type referred to by a type name x. If cycleOk is set, a type may +// refer to itself directly or indirectly; otherwise cycles are errors. +// +func (c *checker) makeType(x ast.Expr, cycleOk bool) (typ Type) { + if debug { + fmt.Printf("makeType (cycleOk = %v)\n", cycleOk) + ast.Print(c.fset, x) + defer func() { + fmt.Printf("-> %T %v\n\n", typ, typ) + }() + } + + switch t := x.(type) { + case *ast.BadExpr: + return &Bad{} + + case *ast.Ident: + // type name + obj := t.Obj + if obj == nil { + // unresolved identifier (error has been reported before) + return &Bad{Msg: "unresolved identifier"} + } + if obj.Kind != ast.Typ { + msg := c.errorf(t.Pos(), "%s is not a type", t.Name) + return &Bad{Msg: msg} + } + c.checkObj(obj, cycleOk) + if !cycleOk && obj.Type.(*Name).Underlying == nil { + // TODO(gri) Enable this message again once its position + // is independent of the underlying map implementation. + // msg := c.errorf(obj.Pos(), "illegal cycle in declaration of %s", obj.Name) + msg := "illegal cycle" + return &Bad{Msg: msg} + } + return obj.Type.(Type) + + case *ast.ParenExpr: + return c.makeType(t.X, cycleOk) + + case *ast.SelectorExpr: + // qualified identifier + // TODO (gri) eventually, this code belongs to expression + // type checking - here for the time being + if ident, ok := t.X.(*ast.Ident); ok { + if obj := ident.Obj; obj != nil { + if obj.Kind != ast.Pkg { + msg := c.errorf(ident.Pos(), "%s is not a package", obj.Name) + return &Bad{Msg: msg} + } + // TODO(gri) we have a package name but don't + // have the mapping from package name to package + // scope anymore (created in ast.NewPackage). + return &Bad{} // for now + } + } + // TODO(gri) can this really happen (the parser should have excluded this)? + msg := c.errorf(t.Pos(), "expected qualified identifier") + return &Bad{Msg: msg} + + case *ast.StarExpr: + return &Pointer{Base: c.makeType(t.X, true)} + + case *ast.ArrayType: + if t.Len != nil { + // TODO(gri) compute length + return &Array{Elt: c.makeType(t.Elt, cycleOk)} + } + return &Slice{Elt: c.makeType(t.Elt, true)} + + case *ast.StructType: + fields, tags, _ := c.collectFields(token.STRUCT, t.Fields, cycleOk) + return &Struct{Fields: fields, Tags: tags} + + case *ast.FuncType: + params, _, _ := c.collectFields(token.FUNC, t.Params, true) + results, _, isVariadic := c.collectFields(token.FUNC, t.Results, true) + return &Func{Recv: nil, Params: params, Results: results, IsVariadic: isVariadic} + + case *ast.InterfaceType: + methods, _, _ := c.collectFields(token.INTERFACE, t.Methods, cycleOk) + methods.Sort() + return &Interface{Methods: methods} + + case *ast.MapType: + return &Map{Key: c.makeType(t.Key, true), Elt: c.makeType(t.Key, true)} + + case *ast.ChanType: + return &Chan{Dir: t.Dir, Elt: c.makeType(t.Value, true)} + } + + panic(fmt.Sprintf("unreachable (%T)", x)) +} + +// checkObj type checks an object. +func (c *checker) checkObj(obj *ast.Object, ref bool) { + if obj.Type != nil { + // object has already been type checked + return + } + + switch obj.Kind { + case ast.Bad: + // ignore + + case ast.Con: + // TODO(gri) complete this + + case ast.Typ: + typ := &Name{Obj: obj} + obj.Type = typ // "mark" object so recursion terminates + typ.Underlying = Underlying(c.makeType(obj.Decl.(*ast.TypeSpec).Type, ref)) + + case ast.Var: + // TODO(gri) complete this + + case ast.Fun: + // TODO(gri) complete this + + default: + panic("unreachable") + } +} + +// Check typechecks a package. +// It augments the AST by assigning types to all ast.Objects and returns a map +// of types for all expression nodes in statements, and a scanner.ErrorList if +// there are errors. +// +func Check(fset *token.FileSet, pkg *ast.Package) (types map[ast.Expr]Type, err os.Error) { + var c checker + c.fset = fset + c.types = make(map[ast.Expr]Type) + + for _, obj := range pkg.Scope.Objects { + c.checkObj(obj, false) + } + + return c.types, c.GetError(scanner.NoMultiples) +} diff --git a/src/pkg/go/types/check_test.go b/src/pkg/go/types/check_test.go new file mode 100644 index 000000000..8be653fcb --- /dev/null +++ b/src/pkg/go/types/check_test.go @@ -0,0 +1,215 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements a typechecker test harness. The packages specified +// in tests are typechecked. Error messages reported by the typechecker are +// compared against the error messages expected in the test files. +// +// Expected errors are indicated in the test files by putting a comment +// of the form /* ERROR "rx" */ immediately following an offending token. +// The harness will verify that an error matching the regular expression +// rx is reported at that source position. Consecutive comments may be +// used to indicate multiple errors for the same token position. +// +// For instance, the following test file indicates that a "not declared" +// error should be reported for the undeclared variable x: +// +// package p +// func f() { +// _ = x /* ERROR "not declared" */ + 1 +// } + +package types + +import ( + "fmt" + "go/ast" + "go/parser" + "go/scanner" + "go/token" + "io/ioutil" + "os" + "regexp" + "testing" +) + +// The test filenames do not end in .go so that they are invisible +// to gofmt since they contain comments that must not change their +// positions relative to surrounding tokens. + +var tests = []struct { + name string + files []string +}{ + {"test0", []string{"testdata/test0.src"}}, +} + +var fset = token.NewFileSet() + +// TODO(gri) This functionality should be in token.Fileset. +func getFile(filename string) *token.File { + for f := range fset.Files() { + if f.Name() == filename { + return f + } + } + return nil +} + +// TODO(gri) This functionality should be in token.Fileset. +func getPos(filename string, offset int) token.Pos { + if f := getFile(filename); f != nil { + return f.Pos(offset) + } + return token.NoPos +} + +// TODO(gri) Need to revisit parser interface. We should be able to use parser.ParseFiles +// or a similar function instead. +func parseFiles(t *testing.T, testname string, filenames []string) (map[string]*ast.File, os.Error) { + files := make(map[string]*ast.File) + var errors scanner.ErrorList + for _, filename := range filenames { + if _, exists := files[filename]; exists { + t.Fatalf("%s: duplicate file %s", testname, filename) + } + file, err := parser.ParseFile(fset, filename, nil, parser.DeclarationErrors) + if file == nil { + t.Fatalf("%s: could not parse file %s", testname, filename) + } + files[filename] = file + if err != nil { + // if the parser returns a non-scanner.ErrorList error + // the file couldn't be read in the first place and + // file == nil; in that case we shouldn't reach here + errors = append(errors, err.(scanner.ErrorList)...) + } + + } + return files, errors +} + +// ERROR comments must be of the form /* ERROR "rx" */ and rx is +// a regular expression that matches the expected error message. +// +var errRx = regexp.MustCompile(`^/\* *ERROR *"([^"]*)" *\*/$`) + +// expectedErrors collects the regular expressions of ERROR comments found +// in files and returns them as a map of error positions to error messages. +// +func expectedErrors(t *testing.T, testname string, files map[string]*ast.File) map[token.Pos]string { + errors := make(map[token.Pos]string) + for filename := range files { + src, err := ioutil.ReadFile(filename) + if err != nil { + t.Fatalf("%s: could not read %s", testname, filename) + } + + var s scanner.Scanner + // file was parsed already - do not add it again to the file + // set otherwise the position information returned here will + // not match the position information collected by the parser + s.Init(getFile(filename), src, nil, scanner.ScanComments) + var prev token.Pos // position of last non-comment token + + scanFile: + for { + pos, tok, lit := s.Scan() + switch tok { + case token.EOF: + break scanFile + case token.COMMENT: + s := errRx.FindStringSubmatch(lit) + if len(s) == 2 { + errors[prev] = string(s[1]) + } + default: + prev = pos + } + } + } + return errors +} + +func eliminate(t *testing.T, expected map[token.Pos]string, errors os.Error) { + if errors == nil { + return + } + for _, error := range errors.(scanner.ErrorList) { + // error.Pos is a token.Position, but we want + // a token.Pos so we can do a map lookup + // TODO(gri) Need to move scanner.Errors over + // to use token.Pos and file set info. + pos := getPos(error.Pos.Filename, error.Pos.Offset) + if msg, found := expected[pos]; found { + // we expect a message at pos; check if it matches + rx, err := regexp.Compile(msg) + if err != nil { + t.Errorf("%s: %v", error.Pos, err) + continue + } + if match := rx.MatchString(error.Msg); !match { + t.Errorf("%s: %q does not match %q", error.Pos, error.Msg, msg) + continue + } + // we have a match - eliminate this error + expected[pos] = "", false + } else { + // To keep in mind when analyzing failed test output: + // If the same error position occurs multiple times in errors, + // this message will be triggered (because the first error at + // the position removes this position from the expected errors). + t.Errorf("%s: no (multiple?) error expected, but found: %s", error.Pos, error.Msg) + } + } +} + +func check(t *testing.T, testname string, testfiles []string) { + // TODO(gri) Eventually all these different phases should be + // subsumed into a single function call that takes + // a set of files and creates a fully resolved and + // type-checked AST. + + files, err := parseFiles(t, testname, testfiles) + + // we are expecting the following errors + // (collect these after parsing the files so that + // they are found in the file set) + errors := expectedErrors(t, testname, files) + + // verify errors returned by the parser + eliminate(t, errors, err) + + // verify errors returned after resolving identifiers + pkg, err := ast.NewPackage(fset, files, GcImporter, Universe) + eliminate(t, errors, err) + + // verify errors returned by the typechecker + _, err = Check(fset, pkg) + eliminate(t, errors, err) + + // there should be no expected errors left + if len(errors) > 0 { + t.Errorf("%s: %d errors not reported:", testname, len(errors)) + for pos, msg := range errors { + t.Errorf("%s: %s\n", fset.Position(pos), msg) + } + } +} + +func TestCheck(t *testing.T) { + // For easy debugging w/o changing the testing code, + // if there is a local test file, only test that file. + const testfile = "test.go" + if fi, err := os.Stat(testfile); err == nil && fi.IsRegular() { + fmt.Printf("WARNING: Testing only %s (remove it to run all tests)\n", testfile) + check(t, testfile, []string{testfile}) + return + } + + // Otherwise, run all the tests. + for _, test := range tests { + check(t, test.name, test.files) + } +} diff --git a/src/pkg/go/types/const.go b/src/pkg/go/types/const.go new file mode 100644 index 000000000..1ef95d9f9 --- /dev/null +++ b/src/pkg/go/types/const.go @@ -0,0 +1,332 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements operations on ideal constants. + +package types + +import ( + "big" + "go/token" + "strconv" +) + +// TODO(gri) Consider changing the API so Const is an interface +// and operations on consts don't have to type switch. + +// A Const implements an ideal constant Value. +// The zero value z for a Const is not a valid constant value. +type Const struct { + // representation of constant values: + // ideal bool -> bool + // ideal int -> *big.Int + // ideal float -> *big.Rat + // ideal complex -> cmplx + // ideal string -> string + val interface{} +} + +// Representation of complex values. +type cmplx struct { + re, im *big.Rat +} + +func assert(cond bool) { + if !cond { + panic("go/types internal error: assertion failed") + } +} + +// MakeConst makes an ideal constant from a literal +// token and the corresponding literal string. +func MakeConst(tok token.Token, lit string) Const { + switch tok { + case token.INT: + var x big.Int + _, ok := x.SetString(lit, 0) + assert(ok) + return Const{&x} + case token.FLOAT: + var y big.Rat + _, ok := y.SetString(lit) + assert(ok) + return Const{&y} + case token.IMAG: + assert(lit[len(lit)-1] == 'i') + var im big.Rat + _, ok := im.SetString(lit[0 : len(lit)-1]) + assert(ok) + return Const{cmplx{big.NewRat(0, 1), &im}} + case token.CHAR: + assert(lit[0] == '\'' && lit[len(lit)-1] == '\'') + code, _, _, err := strconv.UnquoteChar(lit[1:len(lit)-1], '\'') + assert(err == nil) + return Const{big.NewInt(int64(code))} + case token.STRING: + s, err := strconv.Unquote(lit) + assert(err == nil) + return Const{s} + } + panic("unreachable") +} + +// MakeZero returns the zero constant for the given type. +func MakeZero(typ *Type) Const { + // TODO(gri) fix this + return Const{0} +} + +// Match attempts to match the internal constant representations of x and y. +// If the attempt is successful, the result is the values of x and y, +// if necessary converted to have the same internal representation; otherwise +// the results are invalid. +func (x Const) Match(y Const) (u, v Const) { + switch a := x.val.(type) { + case bool: + if _, ok := y.val.(bool); ok { + u, v = x, y + } + case *big.Int: + switch y.val.(type) { + case *big.Int: + u, v = x, y + case *big.Rat: + var z big.Rat + z.SetInt(a) + u, v = Const{&z}, y + case cmplx: + var z big.Rat + z.SetInt(a) + u, v = Const{cmplx{&z, big.NewRat(0, 1)}}, y + } + case *big.Rat: + switch y.val.(type) { + case *big.Int: + v, u = y.Match(x) + case *big.Rat: + u, v = x, y + case cmplx: + u, v = Const{cmplx{a, big.NewRat(0, 0)}}, y + } + case cmplx: + switch y.val.(type) { + case *big.Int, *big.Rat: + v, u = y.Match(x) + case cmplx: + u, v = x, y + } + case string: + if _, ok := y.val.(string); ok { + u, v = x, y + } + default: + panic("unreachable") + } + return +} + +// Convert attempts to convert the constant x to a given type. +// If the attempt is successful, the result is the new constant; +// otherwise the result is invalid. +func (x Const) Convert(typ *Type) Const { + // TODO(gri) implement this + switch x := x.val.(type) { + case bool: + case *big.Int: + case *big.Rat: + case cmplx: + case string: + } + return x +} + +func (x Const) String() string { + switch x := x.val.(type) { + case bool: + if x { + return "true" + } + return "false" + case *big.Int: + return x.String() + case *big.Rat: + return x.FloatString(10) // 10 digits of precision after decimal point seems fine + case cmplx: + // TODO(gri) don't print 0 components + return x.re.FloatString(10) + " + " + x.im.FloatString(10) + "i" + case string: + return x + } + panic("unreachable") +} + +func (x Const) UnaryOp(op token.Token) Const { + panic("unimplemented") +} + +func (x Const) BinaryOp(op token.Token, y Const) Const { + var z interface{} + switch x := x.val.(type) { + case bool: + z = binaryBoolOp(x, op, y.val.(bool)) + case *big.Int: + z = binaryIntOp(x, op, y.val.(*big.Int)) + case *big.Rat: + z = binaryFloatOp(x, op, y.val.(*big.Rat)) + case cmplx: + z = binaryCmplxOp(x, op, y.val.(cmplx)) + case string: + z = binaryStringOp(x, op, y.val.(string)) + default: + panic("unreachable") + } + return Const{z} +} + +func binaryBoolOp(x bool, op token.Token, y bool) interface{} { + switch op { + case token.EQL: + return x == y + case token.NEQ: + return x != y + } + panic("unreachable") +} + +func binaryIntOp(x *big.Int, op token.Token, y *big.Int) interface{} { + var z big.Int + switch op { + case token.ADD: + return z.Add(x, y) + case token.SUB: + return z.Sub(x, y) + case token.MUL: + return z.Mul(x, y) + case token.QUO: + return z.Quo(x, y) + case token.REM: + return z.Rem(x, y) + case token.AND: + return z.And(x, y) + case token.OR: + return z.Or(x, y) + case token.XOR: + return z.Xor(x, y) + case token.AND_NOT: + return z.AndNot(x, y) + case token.SHL: + panic("unimplemented") + case token.SHR: + panic("unimplemented") + case token.EQL: + return x.Cmp(y) == 0 + case token.NEQ: + return x.Cmp(y) != 0 + case token.LSS: + return x.Cmp(y) < 0 + case token.LEQ: + return x.Cmp(y) <= 0 + case token.GTR: + return x.Cmp(y) > 0 + case token.GEQ: + return x.Cmp(y) >= 0 + } + panic("unreachable") +} + +func binaryFloatOp(x *big.Rat, op token.Token, y *big.Rat) interface{} { + var z big.Rat + switch op { + case token.ADD: + return z.Add(x, y) + case token.SUB: + return z.Sub(x, y) + case token.MUL: + return z.Mul(x, y) + case token.QUO: + return z.Quo(x, y) + case token.EQL: + return x.Cmp(y) == 0 + case token.NEQ: + return x.Cmp(y) != 0 + case token.LSS: + return x.Cmp(y) < 0 + case token.LEQ: + return x.Cmp(y) <= 0 + case token.GTR: + return x.Cmp(y) > 0 + case token.GEQ: + return x.Cmp(y) >= 0 + } + panic("unreachable") +} + +func binaryCmplxOp(x cmplx, op token.Token, y cmplx) interface{} { + a, b := x.re, x.im + c, d := y.re, y.im + switch op { + case token.ADD: + // (a+c) + i(b+d) + var re, im big.Rat + re.Add(a, c) + im.Add(b, d) + return cmplx{&re, &im} + case token.SUB: + // (a-c) + i(b-d) + var re, im big.Rat + re.Sub(a, c) + im.Sub(b, d) + return cmplx{&re, &im} + case token.MUL: + // (ac-bd) + i(bc+ad) + var ac, bd, bc, ad big.Rat + ac.Mul(a, c) + bd.Mul(b, d) + bc.Mul(b, c) + ad.Mul(a, d) + var re, im big.Rat + re.Sub(&ac, &bd) + im.Add(&bc, &ad) + return cmplx{&re, &im} + case token.QUO: + // (ac+bd)/s + i(bc-ad)/s, with s = cc + dd + var ac, bd, bc, ad, s big.Rat + ac.Mul(a, c) + bd.Mul(b, d) + bc.Mul(b, c) + ad.Mul(a, d) + s.Add(c.Mul(c, c), d.Mul(d, d)) + var re, im big.Rat + re.Add(&ac, &bd) + re.Quo(&re, &s) + im.Sub(&bc, &ad) + im.Quo(&im, &s) + return cmplx{&re, &im} + case token.EQL: + return a.Cmp(c) == 0 && b.Cmp(d) == 0 + case token.NEQ: + return a.Cmp(c) != 0 || b.Cmp(d) != 0 + } + panic("unreachable") +} + +func binaryStringOp(x string, op token.Token, y string) interface{} { + switch op { + case token.ADD: + return x + y + case token.EQL: + return x == y + case token.NEQ: + return x != y + case token.LSS: + return x < y + case token.LEQ: + return x <= y + case token.GTR: + return x > y + case token.GEQ: + return x >= y + } + panic("unreachable") +} diff --git a/src/pkg/go/types/exportdata.go b/src/pkg/go/types/exportdata.go new file mode 100644 index 000000000..383520320 --- /dev/null +++ b/src/pkg/go/types/exportdata.go @@ -0,0 +1,132 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements ExportData. + +package types + +import ( + "bufio" + "fmt" + "io" + "os" + "strconv" + "strings" +) + +func readGopackHeader(buf *bufio.Reader) (name string, size int, err os.Error) { + // See $GOROOT/include/ar.h. + hdr := make([]byte, 64+12+6+6+8+10+2) + _, err = io.ReadFull(buf, hdr) + if err != nil { + return + } + if trace { + fmt.Printf("header: %s", hdr) + } + s := strings.TrimSpace(string(hdr[64+12+6+6+8:][:10])) + size, err = strconv.Atoi(s) + if err != nil || hdr[len(hdr)-2] != '`' || hdr[len(hdr)-1] != '\n' { + err = os.NewError("invalid archive header") + return + } + name = strings.TrimSpace(string(hdr[:64])) + return +} + +type dataReader struct { + *bufio.Reader + io.Closer +} + +// ExportData returns a readCloser positioned at the beginning of the +// export data section of the given object/archive file, or an error. +// It is the caller's responsibility to close the readCloser. +// +func ExportData(filename string) (rc io.ReadCloser, err os.Error) { + file, err := os.Open(filename) + if err != nil { + return + } + + defer func() { + if err != nil { + file.Close() + // Add file name to error. + err = fmt.Errorf("reading export data: %s: %v", filename, err) + } + }() + + buf := bufio.NewReader(file) + + // Read first line to make sure this is an object file. + line, err := buf.ReadSlice('\n') + if err != nil { + return + } + if string(line) == "!<arch>\n" { + // Archive file. Scan to __.PKGDEF, which should + // be second archive entry. + var name string + var size int + + // First entry should be __.SYMDEF. + // Read and discard. + if name, size, err = readGopackHeader(buf); err != nil { + return + } + if name != "__.SYMDEF" { + err = os.NewError("go archive does not begin with __.SYMDEF") + return + } + const block = 4096 + tmp := make([]byte, block) + for size > 0 { + n := size + if n > block { + n = block + } + _, err = io.ReadFull(buf, tmp[:n]) + if err != nil { + return + } + size -= n + } + + // Second entry should be __.PKGDEF. + if name, size, err = readGopackHeader(buf); err != nil { + return + } + if name != "__.PKGDEF" { + err = os.NewError("go archive is missing __.PKGDEF") + return + } + + // Read first line of __.PKGDEF data, so that line + // is once again the first line of the input. + line, err = buf.ReadSlice('\n') + if err != nil { + return + } + } + + // Now at __.PKGDEF in archive or still at beginning of file. + // Either way, line should begin with "go object ". + if !strings.HasPrefix(string(line), "go object ") { + err = os.NewError("not a go object file") + return + } + + // Skip over object header to export data. + // Begins after first line with $$. + for line[0] != '$' { + line, err = buf.ReadSlice('\n') + if err != nil { + return + } + } + + rc = &dataReader{buf, file} + return +} diff --git a/src/pkg/go/types/gcimporter.go b/src/pkg/go/types/gcimporter.go new file mode 100644 index 000000000..6ab1806b6 --- /dev/null +++ b/src/pkg/go/types/gcimporter.go @@ -0,0 +1,799 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements an ast.Importer for gc generated object files. +// TODO(gri) Eventually move this into a separate package outside types. + +package types + +import ( + "big" + "fmt" + "go/ast" + "go/token" + "io" + "os" + "path/filepath" + "runtime" + "scanner" + "strconv" +) + +const trace = false // set to true for debugging + +var ( + pkgRoot = filepath.Join(runtime.GOROOT(), "pkg", runtime.GOOS+"_"+runtime.GOARCH) + pkgExts = [...]string{".a", ".5", ".6", ".8"} +) + +// findPkg returns the filename and package id for an import path. +// If no file was found, an empty filename is returned. +func findPkg(path string) (filename, id string) { + if len(path) == 0 { + return + } + + id = path + var noext string + switch path[0] { + default: + // "x" -> "$GOROOT/pkg/$GOOS_$GOARCH/x.ext", "x" + noext = filepath.Join(pkgRoot, path) + + case '.': + // "./x" -> "/this/directory/x.ext", "/this/directory/x" + cwd, err := os.Getwd() + if err != nil { + return + } + noext = filepath.Join(cwd, path) + id = noext + + case '/': + // "/x" -> "/x.ext", "/x" + noext = path + } + + // try extensions + for _, ext := range pkgExts { + filename = noext + ext + if f, err := os.Stat(filename); err == nil && f.IsRegular() { + return + } + } + + filename = "" // not found + return +} + +// gcParser parses the exports inside a gc compiler-produced +// object/archive file and populates its scope with the results. +type gcParser struct { + scanner scanner.Scanner + tok int // current token + lit string // literal string; only valid for Ident, Int, String tokens + id string // package id of imported package + imports map[string]*ast.Object // package id -> package object +} + +func (p *gcParser) init(filename, id string, src io.Reader, imports map[string]*ast.Object) { + p.scanner.Init(src) + p.scanner.Error = func(_ *scanner.Scanner, msg string) { p.error(msg) } + p.scanner.Mode = scanner.ScanIdents | scanner.ScanInts | scanner.ScanStrings | scanner.ScanComments | scanner.SkipComments + p.scanner.Whitespace = 1<<'\t' | 1<<' ' + p.scanner.Filename = filename // for good error messages + p.next() + p.id = id + p.imports = imports +} + +func (p *gcParser) next() { + p.tok = p.scanner.Scan() + switch p.tok { + case scanner.Ident, scanner.Int, scanner.String: + p.lit = p.scanner.TokenText() + default: + p.lit = "" + } + if trace { + fmt.Printf("%s: %q -> %q\n", scanner.TokenString(p.tok), p.scanner.TokenText(), p.lit) + } +} + +// GcImporter implements the ast.Importer signature. +func GcImporter(imports map[string]*ast.Object, path string) (pkg *ast.Object, err os.Error) { + if path == "unsafe" { + return Unsafe, nil + } + + defer func() { + if r := recover(); r != nil { + err = r.(importError) // will re-panic if r is not an importError + if trace { + panic(err) // force a stack trace + } + } + }() + + filename, id := findPkg(path) + if filename == "" { + err = os.NewError("can't find import: " + id) + return + } + + if pkg = imports[id]; pkg != nil { + return // package was imported before + } + + buf, err := ExportData(filename) + if err != nil { + return + } + defer buf.Close() + + if trace { + fmt.Printf("importing %s (%s)\n", id, filename) + } + + var p gcParser + p.init(filename, id, buf, imports) + pkg = p.parseExport() + return +} + +// ---------------------------------------------------------------------------- +// Error handling + +// Internal errors are boxed as importErrors. +type importError struct { + pos scanner.Position + err os.Error +} + +func (e importError) String() string { + return fmt.Sprintf("import error %s (byte offset = %d): %s", e.pos, e.pos.Offset, e.err) +} + +func (p *gcParser) error(err interface{}) { + if s, ok := err.(string); ok { + err = os.NewError(s) + } + // panic with a runtime.Error if err is not an os.Error + panic(importError{p.scanner.Pos(), err.(os.Error)}) +} + +func (p *gcParser) errorf(format string, args ...interface{}) { + p.error(fmt.Sprintf(format, args...)) +} + +func (p *gcParser) expect(tok int) string { + lit := p.lit + if p.tok != tok { + p.errorf("expected %q, got %q (%q)", scanner.TokenString(tok), scanner.TokenString(p.tok), lit) + } + p.next() + return lit +} + +func (p *gcParser) expectSpecial(tok string) { + sep := 'x' // not white space + i := 0 + for i < len(tok) && p.tok == int(tok[i]) && sep > ' ' { + sep = p.scanner.Peek() // if sep <= ' ', there is white space before the next token + p.next() + i++ + } + if i < len(tok) { + p.errorf("expected %q, got %q", tok, tok[0:i]) + } +} + +func (p *gcParser) expectKeyword(keyword string) { + lit := p.expect(scanner.Ident) + if lit != keyword { + p.errorf("expected keyword %s, got %q", keyword, lit) + } +} + +// ---------------------------------------------------------------------------- +// Import declarations + +// ImportPath = string_lit . +// +func (p *gcParser) parsePkgId() *ast.Object { + id, err := strconv.Unquote(p.expect(scanner.String)) + if err != nil { + p.error(err) + } + + switch id { + case "": + // id == "" stands for the imported package id + // (only known at time of package installation) + id = p.id + case "unsafe": + // package unsafe is not in the imports map - handle explicitly + return Unsafe + } + + pkg := p.imports[id] + if pkg == nil { + scope = ast.NewScope(nil) + pkg = ast.NewObj(ast.Pkg, "") + pkg.Data = scope + p.imports[id] = pkg + } + + return pkg +} + +// dotIdentifier = ( ident | '·' ) { ident | int | '·' } . +func (p *gcParser) parseDotIdent() string { + ident := "" + if p.tok != scanner.Int { + sep := 'x' // not white space + for (p.tok == scanner.Ident || p.tok == scanner.Int || p.tok == '·') && sep > ' ' { + ident += p.lit + sep = p.scanner.Peek() // if sep <= ' ', there is white space before the next token + p.next() + } + } + if ident == "" { + p.expect(scanner.Ident) // use expect() for error handling + } + return ident +} + +// ExportedName = ImportPath "." dotIdentifier . +// +func (p *gcParser) parseExportedName(kind ast.ObjKind) *ast.Object { + pkg := p.parsePkgId() + p.expect('.') + name := p.parseDotIdent() + + // a type may have been declared before - if it exists + // already in the respective package scope, return that + // type + scope := pkg.Data.(*ast.Scope) + if kind == ast.Typ { + if obj := scope.Lookup(name); obj != nil { + assert(obj.Kind == ast.Typ) + return obj + } + } + + // any other object must be a newly declared object - + // create it and insert it into the package scope + obj := ast.NewObj(kind, name) + if scope.Insert(obj) != nil { + p.errorf("already declared: %s", obj.Name) + } + + // a new type object is a named type and may be referred + // to before the underlying type is known - set it up + if kind == ast.Typ { + obj.Type = &Name{Obj: obj} + } + + return obj +} + +// ---------------------------------------------------------------------------- +// Types + +// BasicType = identifier . +// +func (p *gcParser) parseBasicType() Type { + obj := Universe.Lookup(p.expect(scanner.Ident)) + if obj == nil || obj.Kind != ast.Typ { + p.errorf("not a basic type: %s", obj.Name) + } + return obj.Type.(Type) +} + +// ArrayType = "[" int_lit "]" Type . +// +func (p *gcParser) parseArrayType() Type { + // "[" already consumed and lookahead known not to be "]" + lit := p.expect(scanner.Int) + p.expect(']') + elt := p.parseType() + n, err := strconv.Atoui64(lit) + if err != nil { + p.error(err) + } + return &Array{Len: n, Elt: elt} +} + +// MapType = "map" "[" Type "]" Type . +// +func (p *gcParser) parseMapType() Type { + p.expectKeyword("map") + p.expect('[') + key := p.parseType() + p.expect(']') + elt := p.parseType() + return &Map{Key: key, Elt: elt} +} + +// Name = identifier | "?" . +// +func (p *gcParser) parseName() (name string) { + switch p.tok { + case scanner.Ident: + name = p.lit + p.next() + case '?': + // anonymous + p.next() + default: + p.error("name expected") + } + return +} + +// Field = Name Type [ ":" string_lit ] . +// +func (p *gcParser) parseField() (fld *ast.Object, tag string) { + name := p.parseName() + ftyp := p.parseType() + if name == "" { + // anonymous field - ftyp must be T or *T and T must be a type name + if _, ok := Deref(ftyp).(*Name); !ok { + p.errorf("anonymous field expected") + } + } + if p.tok == ':' { + p.next() + tag = p.expect(scanner.String) + } + fld = ast.NewObj(ast.Var, name) + fld.Type = ftyp + return +} + +// StructType = "struct" "{" [ FieldList ] "}" . +// FieldList = Field { ";" Field } . +// +func (p *gcParser) parseStructType() Type { + var fields []*ast.Object + var tags []string + + parseField := func() { + fld, tag := p.parseField() + fields = append(fields, fld) + tags = append(tags, tag) + } + + p.expectKeyword("struct") + p.expect('{') + if p.tok != '}' { + parseField() + for p.tok == ';' { + p.next() + parseField() + } + } + p.expect('}') + + return &Struct{Fields: fields, Tags: tags} +} + +// Parameter = ( identifier | "?" ) [ "..." ] Type [ ":" string_lit ] . +// +func (p *gcParser) parseParameter() (par *ast.Object, isVariadic bool) { + name := p.parseName() + if name == "" { + name = "_" // cannot access unnamed identifiers + } + if p.tok == '.' { + p.expectSpecial("...") + isVariadic = true + } + ptyp := p.parseType() + // ignore argument tag + if p.tok == ':' { + p.next() + p.expect(scanner.String) + } + par = ast.NewObj(ast.Var, name) + par.Type = ptyp + return +} + +// Parameters = "(" [ ParameterList ] ")" . +// ParameterList = { Parameter "," } Parameter . +// +func (p *gcParser) parseParameters() (list []*ast.Object, isVariadic bool) { + parseParameter := func() { + par, variadic := p.parseParameter() + list = append(list, par) + if variadic { + if isVariadic { + p.error("... not on final argument") + } + isVariadic = true + } + } + + p.expect('(') + if p.tok != ')' { + parseParameter() + for p.tok == ',' { + p.next() + parseParameter() + } + } + p.expect(')') + + return +} + +// Signature = Parameters [ Result ] . +// Result = Type | Parameters . +// +func (p *gcParser) parseSignature() *Func { + params, isVariadic := p.parseParameters() + + // optional result type + var results []*ast.Object + switch p.tok { + case scanner.Ident, scanner.String, '[', '*', '<': + // single, unnamed result + result := ast.NewObj(ast.Var, "_") + result.Type = p.parseType() + results = []*ast.Object{result} + case '(': + // named or multiple result(s) + var variadic bool + results, variadic = p.parseParameters() + if variadic { + p.error("... not permitted on result type") + } + } + + return &Func{Params: params, Results: results, IsVariadic: isVariadic} +} + +// MethodSpec = identifier Signature . +// +func (p *gcParser) parseMethodSpec() *ast.Object { + if p.tok == scanner.Ident { + p.expect(scanner.Ident) + } else { + // TODO(gri) should this be parseExportedName here? + p.parsePkgId() + p.expect('.') + p.parseDotIdent() + } + p.parseSignature() + + // TODO(gri) compute method object + return ast.NewObj(ast.Fun, "_") +} + +// InterfaceType = "interface" "{" [ MethodList ] "}" . +// MethodList = MethodSpec { ";" MethodSpec } . +// +func (p *gcParser) parseInterfaceType() Type { + var methods ObjList + + parseMethod := func() { + meth := p.parseMethodSpec() + methods = append(methods, meth) + } + + p.expectKeyword("interface") + p.expect('{') + if p.tok != '}' { + parseMethod() + for p.tok == ';' { + p.next() + parseMethod() + } + } + p.expect('}') + + methods.Sort() + return &Interface{Methods: methods} +} + +// ChanType = ( "chan" [ "<-" ] | "<-" "chan" ) Type . +// +func (p *gcParser) parseChanType() Type { + dir := ast.SEND | ast.RECV + if p.tok == scanner.Ident { + p.expectKeyword("chan") + if p.tok == '<' { + p.expectSpecial("<-") + dir = ast.SEND + } + } else { + p.expectSpecial("<-") + p.expectKeyword("chan") + dir = ast.RECV + } + elt := p.parseType() + return &Chan{Dir: dir, Elt: elt} +} + +// Type = +// BasicType | TypeName | ArrayType | SliceType | StructType | +// PointerType | FuncType | InterfaceType | MapType | ChanType | +// "(" Type ")" . +// BasicType = ident . +// TypeName = ExportedName . +// SliceType = "[" "]" Type . +// PointerType = "*" Type . +// FuncType = "func" Signature . +// +func (p *gcParser) parseType() Type { + switch p.tok { + case scanner.Ident: + switch p.lit { + default: + return p.parseBasicType() + case "struct": + return p.parseStructType() + case "func": + // FuncType + p.next() + return p.parseSignature() + case "interface": + return p.parseInterfaceType() + case "map": + return p.parseMapType() + case "chan": + return p.parseChanType() + } + case scanner.String: + // TypeName + return p.parseExportedName(ast.Typ).Type.(Type) + case '[': + p.next() // look ahead + if p.tok == ']' { + // SliceType + p.next() + return &Slice{Elt: p.parseType()} + } + return p.parseArrayType() + case '*': + // PointerType + p.next() + return &Pointer{Base: p.parseType()} + case '<': + return p.parseChanType() + case '(': + // "(" Type ")" + p.next() + typ := p.parseType() + p.expect(')') + return typ + } + p.errorf("expected type, got %s (%q)", scanner.TokenString(p.tok), p.lit) + return nil +} + +// ---------------------------------------------------------------------------- +// Declarations + +// ImportDecl = "import" identifier string_lit . +// +func (p *gcParser) parseImportDecl() { + p.expectKeyword("import") + // The identifier has no semantic meaning in the import data. + // It exists so that error messages can print the real package + // name: binary.ByteOrder instead of "encoding/binary".ByteOrder. + name := p.expect(scanner.Ident) + pkg := p.parsePkgId() + assert(pkg.Name == "" || pkg.Name == name) + pkg.Name = name +} + +// int_lit = [ "+" | "-" ] { "0" ... "9" } . +// +func (p *gcParser) parseInt() (sign, val string) { + switch p.tok { + case '-': + p.next() + sign = "-" + case '+': + p.next() + } + val = p.expect(scanner.Int) + return +} + +// number = int_lit [ "p" int_lit ] . +// +func (p *gcParser) parseNumber() Const { + // mantissa + sign, val := p.parseInt() + mant, ok := new(big.Int).SetString(sign+val, 10) + assert(ok) + + if p.lit == "p" { + // exponent (base 2) + p.next() + sign, val = p.parseInt() + exp, err := strconv.Atoui(val) + if err != nil { + p.error(err) + } + if sign == "-" { + denom := big.NewInt(1) + denom.Lsh(denom, exp) + return Const{new(big.Rat).SetFrac(mant, denom)} + } + if exp > 0 { + mant.Lsh(mant, exp) + } + return Const{new(big.Rat).SetInt(mant)} + } + + return Const{mant} +} + +// ConstDecl = "const" ExportedName [ Type ] "=" Literal . +// Literal = bool_lit | int_lit | float_lit | complex_lit | string_lit . +// bool_lit = "true" | "false" . +// complex_lit = "(" float_lit "+" float_lit ")" . +// string_lit = `"` { unicode_char } `"` . +// +func (p *gcParser) parseConstDecl() { + p.expectKeyword("const") + obj := p.parseExportedName(ast.Con) + var x Const + var typ Type + if p.tok != '=' { + obj.Type = p.parseType() + } + p.expect('=') + switch p.tok { + case scanner.Ident: + // bool_lit + if p.lit != "true" && p.lit != "false" { + p.error("expected true or false") + } + x = Const{p.lit == "true"} + typ = Bool.Underlying + p.next() + case '-', scanner.Int: + // int_lit + x = p.parseNumber() + typ = Int.Underlying + if _, ok := x.val.(*big.Rat); ok { + typ = Float64.Underlying + } + case '(': + // complex_lit + p.next() + re := p.parseNumber() + p.expect('+') + im := p.parseNumber() + p.expect(')') + x = Const{cmplx{re.val.(*big.Rat), im.val.(*big.Rat)}} + typ = Complex128.Underlying + case scanner.String: + // string_lit + x = MakeConst(token.STRING, p.lit) + p.next() + typ = String.Underlying + default: + p.error("expected literal") + } + if obj.Type == nil { + obj.Type = typ + } + obj.Data = x +} + +// TypeDecl = "type" ExportedName Type . +// +func (p *gcParser) parseTypeDecl() { + p.expectKeyword("type") + obj := p.parseExportedName(ast.Typ) + + // The type object may have been imported before and thus already + // have a type associated with it. We still need to parse the type + // structure, but throw it away if the object already has a type. + // This ensures that all imports refer to the same type object for + // a given type declaration. + typ := p.parseType() + + if name := obj.Type.(*Name); name.Underlying == nil { + assert(Underlying(typ) == typ) + name.Underlying = typ + } +} + +// VarDecl = "var" ExportedName Type . +// +func (p *gcParser) parseVarDecl() { + p.expectKeyword("var") + obj := p.parseExportedName(ast.Var) + obj.Type = p.parseType() +} + +// FuncDecl = "func" ExportedName Signature . +// +func (p *gcParser) parseFuncDecl() { + // "func" already consumed + obj := p.parseExportedName(ast.Fun) + obj.Type = p.parseSignature() +} + +// MethodDecl = "func" Receiver identifier Signature . +// Receiver = "(" ( identifier | "?" ) [ "*" ] ExportedName ")" . +// +func (p *gcParser) parseMethodDecl() { + // "func" already consumed + p.expect('(') + p.parseParameter() // receiver + p.expect(')') + p.expect(scanner.Ident) + p.parseSignature() +} + +// Decl = [ ImportDecl | ConstDecl | TypeDecl | VarDecl | FuncDecl | MethodDecl ] "\n" . +// +func (p *gcParser) parseDecl() { + switch p.lit { + case "import": + p.parseImportDecl() + case "const": + p.parseConstDecl() + case "type": + p.parseTypeDecl() + case "var": + p.parseVarDecl() + case "func": + p.next() // look ahead + if p.tok == '(' { + p.parseMethodDecl() + } else { + p.parseFuncDecl() + } + } + p.expect('\n') +} + +// ---------------------------------------------------------------------------- +// Export + +// Export = "PackageClause { Decl } "$$" . +// PackageClause = "package" identifier [ "safe" ] "\n" . +// +func (p *gcParser) parseExport() *ast.Object { + p.expectKeyword("package") + name := p.expect(scanner.Ident) + if p.tok != '\n' { + // A package is safe if it was compiled with the -u flag, + // which disables the unsafe package. + // TODO(gri) remember "safe" package + p.expectKeyword("safe") + } + p.expect('\n') + + assert(p.imports[p.id] == nil) + pkg := ast.NewObj(ast.Pkg, name) + pkg.Data = ast.NewScope(nil) + p.imports[p.id] = pkg + + for p.tok != '$' && p.tok != scanner.EOF { + p.parseDecl() + } + + if ch := p.scanner.Peek(); p.tok != '$' || ch != '$' { + // don't call next()/expect() since reading past the + // export data may cause scanner errors (e.g. NUL chars) + p.errorf("expected '$$', got %s %c", scanner.TokenString(p.tok), ch) + } + + if n := p.scanner.ErrorCount; n != 0 { + p.errorf("expected no scanner errors, got %d", n) + } + + return pkg +} diff --git a/src/pkg/go/types/gcimporter_test.go b/src/pkg/go/types/gcimporter_test.go new file mode 100644 index 000000000..ec87f5d51 --- /dev/null +++ b/src/pkg/go/types/gcimporter_test.go @@ -0,0 +1,101 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package types + +import ( + "exec" + "go/ast" + "io/ioutil" + "path/filepath" + "runtime" + "strings" + "testing" + "time" +) + +var gcName, gcPath string // compiler name and path + +func init() { + // determine compiler + switch runtime.GOARCH { + case "386": + gcName = "8g" + case "amd64": + gcName = "6g" + case "arm": + gcName = "5g" + default: + gcName = "unknown-GOARCH-compiler" + gcPath = gcName + return + } + gcPath, _ = exec.LookPath(gcName) +} + +func compile(t *testing.T, dirname, filename string) { + cmd := exec.Command(gcPath, filename) + cmd.Dir = dirname + out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("%s %s failed: %s", gcName, filename, err) + return + } + t.Logf("%s", string(out)) +} + +// Use the same global imports map for all tests. The effect is +// as if all tested packages were imported into a single package. +var imports = make(map[string]*ast.Object) + +func testPath(t *testing.T, path string) bool { + _, err := GcImporter(imports, path) + if err != nil { + t.Errorf("testPath(%s): %s", path, err) + return false + } + return true +} + +const maxTime = 3e9 // maximum allotted testing time in ns + +func testDir(t *testing.T, dir string, endTime int64) (nimports int) { + dirname := filepath.Join(pkgRoot, dir) + list, err := ioutil.ReadDir(dirname) + if err != nil { + t.Errorf("testDir(%s): %s", dirname, err) + } + for _, f := range list { + if time.Nanoseconds() >= endTime { + t.Log("testing time used up") + return + } + switch { + case f.IsRegular(): + // try extensions + for _, ext := range pkgExts { + if strings.HasSuffix(f.Name, ext) { + name := f.Name[0 : len(f.Name)-len(ext)] // remove extension + if testPath(t, filepath.Join(dir, name)) { + nimports++ + } + } + } + case f.IsDirectory(): + nimports += testDir(t, filepath.Join(dir, f.Name), endTime) + } + } + return +} + +func TestGcImport(t *testing.T) { + compile(t, "testdata", "exports.go") + + nimports := 0 + if testPath(t, "./testdata/exports") { + nimports++ + } + nimports += testDir(t, "", time.Nanoseconds()+maxTime) // installed packages + t.Logf("tested %d imports", nimports) +} diff --git a/src/pkg/go/types/testdata/exports.go b/src/pkg/go/types/testdata/exports.go new file mode 100644 index 000000000..ed63bf9ad --- /dev/null +++ b/src/pkg/go/types/testdata/exports.go @@ -0,0 +1,84 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file is used to generate an object file which +// serves as test file for gcimporter_test.go. + +package exports + +import ( + "go/ast" +) + +const ( + C0 int = 0 + C1 = 3.14159265 + C2 = 2.718281828i + C3 = -123.456e-789 + C4 = +123.456E+789 + C5 = 1234i + C6 = "foo\n" + C7 = `bar\n` +) + +type ( + T1 int + T2 [10]int + T3 []int + T4 *int + T5 chan int + T6a chan<- int + T6b chan (<-chan int) + T6c chan<- (chan int) + T7 <-chan *ast.File + T8 struct{} + T9 struct { + a int + b, c float32 + d []string `go:"tag"` + } + T10 struct { + T8 + T9 + _ *T10 + } + T11 map[int]string + T12 interface{} + T13 interface { + m1() + m2(int) float32 + } + T14 interface { + T12 + T13 + m3(x ...struct{}) []T9 + } + T15 func() + T16 func(int) + T17 func(x int) + T18 func() float32 + T19 func() (x float32) + T20 func(...interface{}) + T21 struct{ next *T21 } + T22 struct{ link *T23 } + T23 struct{ link *T22 } + T24 *T24 + T25 *T26 + T26 *T27 + T27 *T25 + T28 func(T28) T28 +) + +var ( + V0 int + V1 = -991.0 +) + +func F1() {} +func F2(x int) {} +func F3() int { return 0 } +func F4() float32 { return 0 } +func F5(a, b, c int, u, v, w struct{ x, y T1 }, more ...interface{}) (p, q, r chan<- T10) + +func (p *T1) M1() diff --git a/src/pkg/go/types/testdata/test0.src b/src/pkg/go/types/testdata/test0.src new file mode 100644 index 000000000..84a1abe27 --- /dev/null +++ b/src/pkg/go/types/testdata/test0.src @@ -0,0 +1,154 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// type declarations + +package test0 + +import "unsafe" + +const pi = 3.1415 + +type ( + N undeclared /* ERROR "undeclared" */ + B bool + I int32 + A [10]P + T struct { + x, y P + } + P *T + R (*R) + F func(A) I + Y interface { + f(A) I + } + S [](((P))) + M map[I]F + C chan<- I +) + + +type ( + p1 pi /* ERROR "not a package" */ .foo + p2 unsafe.Pointer +) + + +type ( + Pi pi /* ERROR "not a type" */ + + a /* DISABLED "illegal cycle" */ a + a /* ERROR "redeclared" */ int + + // where the cycle error appears depends on the + // order in which declarations are processed + // (which depends on the order in which a map + // is iterated through) + b c + c /* DISABLED "illegal cycle" */ d + d e + e b + + t *t + + U V + V *W + W U + + P1 *S2 + P2 P1 + + S0 struct { + } + S1 struct { + a, b, c int + u, v, a /* ERROR "redeclared" */ float32 + } + S2 struct { + U // anonymous field + // TODO(gri) recognize double-declaration below + // U /* ERROR "redeclared" */ int + } + S3 struct { + x S2 + } + S4/* DISABLED "illegal cycle" */ struct { + S4 + } + S5 struct { + S6 + } + S6 /* DISABLED "illegal cycle" */ struct { + field S7 + } + S7 struct { + S5 + } + + L1 []L1 + L2 []int + + A1 [10]int + A2 /* DISABLED "illegal cycle" */ [10]A2 + A3 /* DISABLED "illegal cycle" */ [10]struct { + x A4 + } + A4 [10]A3 + + F1 func() + F2 func(x, y, z float32) + F3 func(x, y, x /* ERROR "redeclared" */ float32) + F4 func() (x, y, x /* ERROR "redeclared" */ float32) + F5 func(x int) (x /* ERROR "redeclared" */ float32) + F6 func(x ...int) + + I1 interface{} + I2 interface { + m1() + } + I3 interface { + m1() + m1 /* ERROR "redeclared" */ () + } + I4 interface { + m1(x, y, x /* ERROR "redeclared" */ float32) + m2() (x, y, x /* ERROR "redeclared" */ float32) + m3(x int) (x /* ERROR "redeclared" */ float32) + } + I5 interface { + m1(I5) + } + I6 interface { + S0 /* ERROR "non-interface" */ + } + I7 interface { + I1 + I1 + } + I8 /* DISABLED "illegal cycle" */ interface { + I8 + } + I9 /* DISABLED "illegal cycle" */ interface { + I10 + } + I10 interface { + I11 + } + I11 interface { + I9 + } + + C1 chan int + C2 <-chan int + C3 chan<- C3 + C4 chan C5 + C5 chan C6 + C6 chan C4 + + M1 map[Last]string + M2 map[string]M2 + + Last int +) diff --git a/src/pkg/go/types/types.go b/src/pkg/go/types/types.go new file mode 100644 index 000000000..3aa896892 --- /dev/null +++ b/src/pkg/go/types/types.go @@ -0,0 +1,255 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// PACKAGE UNDER CONSTRUCTION. ANY AND ALL PARTS MAY CHANGE. +// Package types declares the types used to represent Go types. +// +package types + +import ( + "go/ast" + "sort" +) + +// All types implement the Type interface. +type Type interface { + isType() +} + +// All concrete types embed ImplementsType which +// ensures that all types implement the Type interface. +type ImplementsType struct{} + +func (t *ImplementsType) isType() {} + +// A Bad type is a non-nil placeholder type when we don't know a type. +type Bad struct { + ImplementsType + Msg string // for better error reporting/debugging +} + +// A Basic represents a (unnamed) basic type. +type Basic struct { + ImplementsType + // TODO(gri) need a field specifying the exact basic type +} + +// An Array represents an array type [Len]Elt. +type Array struct { + ImplementsType + Len uint64 + Elt Type +} + +// A Slice represents a slice type []Elt. +type Slice struct { + ImplementsType + Elt Type +} + +// A Struct represents a struct type struct{...}. +// Anonymous fields are represented by objects with empty names. +type Struct struct { + ImplementsType + Fields ObjList // struct fields; or nil + Tags []string // corresponding tags; or nil + // TODO(gri) This type needs some rethinking: + // - at the moment anonymous fields are marked with "" object names, + // and their names have to be reconstructed + // - there is no scope for fast lookup (but the parser creates one) +} + +// A Pointer represents a pointer type *Base. +type Pointer struct { + ImplementsType + Base Type +} + +// A Func represents a function type func(...) (...). +// Unnamed parameters are represented by objects with empty names. +type Func struct { + ImplementsType + Recv *ast.Object // nil if not a method + Params ObjList // (incoming) parameters from left to right; or nil + Results ObjList // (outgoing) results from left to right; or nil + IsVariadic bool // true if the last parameter's type is of the form ...T +} + +// An Interface represents an interface type interface{...}. +type Interface struct { + ImplementsType + Methods ObjList // interface methods sorted by name; or nil +} + +// A Map represents a map type map[Key]Elt. +type Map struct { + ImplementsType + Key, Elt Type +} + +// A Chan represents a channel type chan Elt, <-chan Elt, or chan<-Elt. +type Chan struct { + ImplementsType + Dir ast.ChanDir + Elt Type +} + +// A Name represents a named type as declared in a type declaration. +type Name struct { + ImplementsType + Underlying Type // nil if not fully declared + Obj *ast.Object // corresponding declared object + // TODO(gri) need to remember fields and methods. +} + +// If typ is a pointer type, Deref returns the pointer's base type; +// otherwise it returns typ. +func Deref(typ Type) Type { + if typ, ok := typ.(*Pointer); ok { + return typ.Base + } + return typ +} + +// Underlying returns the underlying type of a type. +func Underlying(typ Type) Type { + if typ, ok := typ.(*Name); ok { + utyp := typ.Underlying + if _, ok := utyp.(*Basic); !ok { + return utyp + } + // the underlying type of a type name referring + // to an (untyped) basic type is the basic type + // name + } + return typ +} + +// An ObjList represents an ordered (in some fashion) list of objects. +type ObjList []*ast.Object + +// ObjList implements sort.Interface. +func (list ObjList) Len() int { return len(list) } +func (list ObjList) Less(i, j int) bool { return list[i].Name < list[j].Name } +func (list ObjList) Swap(i, j int) { list[i], list[j] = list[j], list[i] } + +// Sort sorts an object list by object name. +func (list ObjList) Sort() { sort.Sort(list) } + +// identicalTypes returns true if both lists a and b have the +// same length and corresponding objects have identical types. +func identicalTypes(a, b ObjList) bool { + if len(a) == len(b) { + for i, x := range a { + y := b[i] + if !Identical(x.Type.(Type), y.Type.(Type)) { + return false + } + } + return true + } + return false +} + +// Identical returns true if two types are identical. +func Identical(x, y Type) bool { + if x == y { + return true + } + + switch x := x.(type) { + case *Bad: + // A Bad type is always identical to any other type + // (to avoid spurious follow-up errors). + return true + + case *Basic: + if y, ok := y.(*Basic); ok { + panic("unimplemented") + _ = y + } + + case *Array: + // Two array types are identical if they have identical element types + // and the same array length. + if y, ok := y.(*Array); ok { + return x.Len == y.Len && Identical(x.Elt, y.Elt) + } + + case *Slice: + // Two slice types are identical if they have identical element types. + if y, ok := y.(*Slice); ok { + return Identical(x.Elt, y.Elt) + } + + case *Struct: + // Two struct types are identical if they have the same sequence of fields, + // and if corresponding fields have the same names, and identical types, + // and identical tags. Two anonymous fields are considered to have the same + // name. Lower-case field names from different packages are always different. + if y, ok := y.(*Struct); ok { + // TODO(gri) handle structs from different packages + if identicalTypes(x.Fields, y.Fields) { + for i, f := range x.Fields { + g := y.Fields[i] + if f.Name != g.Name || x.Tags[i] != y.Tags[i] { + return false + } + } + return true + } + } + + case *Pointer: + // Two pointer types are identical if they have identical base types. + if y, ok := y.(*Pointer); ok { + return Identical(x.Base, y.Base) + } + + case *Func: + // Two function types are identical if they have the same number of parameters + // and result values, corresponding parameter and result types are identical, + // and either both functions are variadic or neither is. Parameter and result + // names are not required to match. + if y, ok := y.(*Func); ok { + return identicalTypes(x.Params, y.Params) && + identicalTypes(x.Results, y.Results) && + x.IsVariadic == y.IsVariadic + } + + case *Interface: + // Two interface types are identical if they have the same set of methods with + // the same names and identical function types. Lower-case method names from + // different packages are always different. The order of the methods is irrelevant. + if y, ok := y.(*Interface); ok { + return identicalTypes(x.Methods, y.Methods) // methods are sorted + } + + case *Map: + // Two map types are identical if they have identical key and value types. + if y, ok := y.(*Map); ok { + return Identical(x.Key, y.Key) && Identical(x.Elt, y.Elt) + } + + case *Chan: + // Two channel types are identical if they have identical value types + // and the same direction. + if y, ok := y.(*Chan); ok { + return x.Dir == y.Dir && Identical(x.Elt, y.Elt) + } + + case *Name: + // Two named types are identical if their type names originate + // in the same type declaration. + if y, ok := y.(*Name); ok { + return x.Obj == y.Obj || + // permit bad objects to be equal to avoid + // follow up errors + x.Obj != nil && x.Obj.Kind == ast.Bad || + y.Obj != nil && y.Obj.Kind == ast.Bad + } + } + + return false +} diff --git a/src/pkg/go/types/universe.go b/src/pkg/go/types/universe.go new file mode 100644 index 000000000..6ae88e5f9 --- /dev/null +++ b/src/pkg/go/types/universe.go @@ -0,0 +1,108 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// FILE UNDER CONSTRUCTION. ANY AND ALL PARTS MAY CHANGE. +// This file implements the universe and unsafe package scopes. + +package types + +import "go/ast" + +var ( + scope *ast.Scope // current scope to use for initialization + Universe *ast.Scope + Unsafe *ast.Object // package unsafe +) + +func define(kind ast.ObjKind, name string) *ast.Object { + obj := ast.NewObj(kind, name) + if scope.Insert(obj) != nil { + panic("types internal error: double declaration") + } + return obj +} + +func defType(name string) *Name { + obj := define(ast.Typ, name) + typ := &Name{Underlying: &Basic{}, Obj: obj} + obj.Type = typ + return typ +} + +func defConst(name string) { + obj := define(ast.Con, name) + _ = obj // TODO(gri) fill in other properties +} + +func defFun(name string) { + obj := define(ast.Fun, name) + _ = obj // TODO(gri) fill in other properties +} + +var ( + Bool, + Int, + Float64, + Complex128, + String *Name +) + +func init() { + scope = ast.NewScope(nil) + Universe = scope + + Bool = defType("bool") + defType("byte") // TODO(gri) should be an alias for uint8 + defType("complex64") + Complex128 = defType("complex128") + defType("float32") + Float64 = defType("float64") + defType("int8") + defType("int16") + defType("int32") + defType("int64") + String = defType("string") + defType("uint8") + defType("uint16") + defType("uint32") + defType("uint64") + Int = defType("int") + defType("uint") + defType("uintptr") + + defConst("true") + defConst("false") + defConst("iota") + defConst("nil") + + defFun("append") + defFun("cap") + defFun("close") + defFun("complex") + defFun("copy") + defFun("imag") + defFun("len") + defFun("make") + defFun("new") + defFun("panic") + defFun("print") + defFun("println") + defFun("real") + defFun("recover") + + scope = ast.NewScope(nil) + Unsafe = ast.NewObj(ast.Pkg, "unsafe") + Unsafe.Data = scope + + defType("Pointer") + + defFun("Alignof") + defFun("New") + defFun("NewArray") + defFun("Offsetof") + defFun("Reflect") + defFun("Sizeof") + defFun("Typeof") + defFun("Unreflect") +} |